LEXTOLKN ; ISL Parse term into words                ; 01-31-97
 ;;2.0;LEXICON UTILITY;**4**;Sep 23, 1996;Build 1
 ;
 ; Returns ^TMP("LEXTKN",$J,#,WORD) containing words
 ;
 ; If LEXIDX is set, then the Excluded Words file is used to 
 ; selectively exclude words from the indexing process.  If
 ; LEXLOOK is set, then the Excluded Words file is used to 
 ; selectively exclude words from the look-up process.  If 
 ; LEXIDX and LEXLOOK do not exist then all words are parsed
 ; parsed.
 ;
PT ; Entry point where DA is defined and X is unknown
 Q:'$D(DA)  S X=^LEX(757.01,DA,0)
PTX ; Entry point to parse string (X must exist)
 N LEXOK,LEXTOKS,LEXTOKS2,LEXTOKI,LEXTOKW,LEXTOLKN
 N LEXOKC,LEXOKN,LEXOKP
 K ^TMP("LEXTKN",$J)
 Q:'$L($G(X))  S LEXTOKS=$TR(X,"-"," ")
 ; Remove leading blanks from string
 F LEXOKP=1:1:$L(LEXTOKS) Q:$E(LEXTOKS,LEXOKP)'[" "
 S LEXTOKS=$E(LEXTOKS,LEXOKP,$L(LEXTOKS))
 ; Remove trailing blanks from string
 F LEXOKP=$L(LEXTOKS):-1:1 Q:$E(LEXTOKS,LEXOKP)'[" "
 S LEXTOKS=$E(LEXTOKS,1,LEXOKP)
 ; Remove Punctuation (less slashes)
 S LEXTOKS=$TR(LEXTOKS,"?`~!@#$%^&*()_-+={}[]\:;,<>","                            ")
 ; Conditionally remove slashes
 S:$D(LEXIDX) LEXTOKS=$TR(LEXTOKS,"/"," ")
 S LEXTOKS=$TR(LEXTOKS,".","")
 S LEXTOKS=$TR(LEXTOKS,"""","")
 ; Swtich to UPPERCASE (lower case is not specified by LEXLOW)
 S:'$D(LEXLOW) LEXTOKS=$$UP^XLFSTR(LEXTOKS)
 ; Store in temporary array (based on space character)
 S LEXOKC=0 F LEXTOKI=1:1:$L(LEXTOKS," ") D
 . N LEXTOKW S LEXTOKW=$P(LEXTOKS," ",LEXTOKI) Q:LEXTOKW=""
 . I LEXTOKW'["/" D
 . . S LEXOKC=LEXOKC+1,LEXTOLKN(LEXOKC)=LEXTOKW
 . . S LEXTOLKN(0)=LEXOKC
 . I LEXTOKW["/"&('$D(^LEX(757.05,"B",LEXTOKW))) D  Q  ; PCH 4
 . . N LEXP S LEXP=0 F  S LEXP=LEXP+1 Q:$P(LEXTOKW,"/",LEXP)=""  D
 . . . S LEXOKC=LEXOKC+1,LEXTOLKN(LEXOKC)=$P(LEXTOKW,"/",LEXP)
 . . . S LEXTOLKN(0)=LEXOKC
 . I LEXTOKW["/"&($D(^LEX(757.05,"B",LEXTOKW))) D
 . . N LEXOKR S LEXOKR=$O(^LEX(757.05,"B",LEXTOKW,0))
 . . I $P($G(^LEX(757.05,LEXOKR,0)),U,3)="R" D
 . . . S LEXOKC=LEXOKC+1,LEXTOLKN(LEXOKC)=LEXTOKW
 . . . S LEXTOLKN(0)=LEXOKC
 K LEXOKC,LEXOKR
 I +($G(LEXTOLKN(0)))=0 K LEXTOLKN S ^TMP("LEXTKN",$J,0)=0 G EXIT
 S LEXTOKW="",LEXOKN=0 F LEXTOKI=1:1:LEXTOLKN(0) D
 . S LEXTOKW=$G(LEXTOLKN(LEXTOKI))
 . ; Remove leading blanks
 . F LEXOKP=1:1:$L(LEXTOKW) Q:$E(LEXTOKW,LEXOKP)'[" "
 . S LEXTOKW=$E(LEXTOKW,LEXOKP,$L(LEXTOKW))
 . ; Remove trailing blanks
 . F LEXOKP=$L(LEXTOKW):-1:1 Q:$E(LEXTOKW,LEXOKP)'[" "
 . S LEXTOKW=$E(LEXTOKW,1,LEXOKP)
 . ; Leading numerics or Zero
 . ;I +LEXTOKW>0 S LEXTOKW="" Q
 . I $A($E(LEXTOKW,1))=48 S LEXTOKW="" Q
 . ; Apostrophies and spaces
 . S LEXTOKW=$TR(LEXTOKW,"'",""),LEXTOKW=$TR(LEXTOKW," ","")
 . ; Numeric only
 . ;S LEXOK=0 F LEXOKP=1:1:$L(LEXTOKW) S:$E(LEXTOKW,LEXOKP)?1U LEXOK=1 Q:LEXOK=1
 . ;I 'LEXOK S LEXTOKW="" Q
 . I $D(LEXIDX) D
 . . I LEXTOKW'="" S:$D(^LEX(757.04,"ACTION",LEXTOKW,"I")) LEXTOKW=""
 . . I LEXTOKW'="" S:$D(^LEX(757.04,"ACTION",LEXTOKW,"B")) LEXTOKW=""
 . I $D(LEXLOOK) D
 . . I LEXTOKW'="" S:$D(^LEX(757.04,"ACTION",LEXTOKW,"L")) LEXTOKW=""
 . . I LEXTOKW'="" S:$D(^LEX(757.04,"ACTION",LEXTOKW,"B")) LEXTOKW=""
 . I $D(LEXOKN),$D(LEXTOKW),LEXTOKW'="" D
 . . S LEXOKN=+(LEXOKN)+1,^TMP("LEXTKN",$J,LEXOKN,LEXTOKW)=""
 . S LEXTOKW=""
 S ^TMP("LEXTKN",$J,0)=LEXOKN
EXIT ; Clean up and quit
 K LEXOK,LEXTOKI,LEXOKN,LEXOKP,LEXOKR,LEXTOKS,LEXTOKS2,LEXTOKW,LEXTOLKN
 Q
