| 1 | MXMLPRSE ;SAIC/DKM - XML Parser ;05/04/2005  15:20 | 
|---|
| 2 | ;;7.3;TOOLKIT;**58,67,89**;Apr 25, 1995 | 
|---|
| 3 | ;================================================================= | 
|---|
| 4 | ; Main entry point. | 
|---|
| 5 | ; DOC = Closed reference to global array containing document | 
|---|
| 6 | ; CBK = Local array containing entry points for callback interface | 
|---|
| 7 | ; OPTION = Option flags; expected values are: | 
|---|
| 8 | ;   D = Debug mode | 
|---|
| 9 | ;   W = Do not report warnings | 
|---|
| 10 | ;   V = Validate (checks only well-formedness by default) | 
|---|
| 11 | ;   0,1 = Terminate on encountering error at specified level | 
|---|
| 12 | EN(DOC,CBK,OPTION) ; | 
|---|
| 13 | N WS,ID,QT,EDC,DTD,LVL,CS,DOCSTK,LLEN,LPOS,CPOS,LCUR,ERR,XML,PFX,SFX,EOD,EOG,ST,PATH,OFX | 
|---|
| 14 | S ID=$T(+0),WS=$C(9,10,13,32),QT="""",(DOCSTK,EOG,EOD,LVL,CS,ST,LPOS,LLEN,LCUR)=0,(CPOS,LVL(0,"N"))=1,OPTION=$G(OPTION),(XML,PFX,SFX)="",PATH=$$PATH(DOC) | 
|---|
| 15 | K ^TMP(ID,$J) | 
|---|
| 16 | I $L($T(TOUCH^XUSCLEAN)) D TOUCH^XUSCLEAN ;Set the keepalive node | 
|---|
| 17 | D INIT^MXMLPRS1,EPOS,CBK("STARTDOCUMENT"),OPNDOC(DOC) | 
|---|
| 18 | F  Q:EOD  D READ,EPOS,@ST^MXMLPRS0:'EOD | 
|---|
| 19 | D UNRESLV^MXMLPRS1,ERROR(17):ST'=2,CBK("ENDDOCUMENT") | 
|---|
| 20 | K ^TMP(ID,$J) | 
|---|
| 21 | Q | 
|---|
| 22 | ; Open a document | 
|---|
| 23 | ; Saves state of current document on stack. | 
|---|
| 24 | ; DOCREF=Closed reference to array containing document | 
|---|
| 25 | ; PREFIX=Optional prefix to prepend to document | 
|---|
| 26 | ; SUFFIX=Optional suffix to append to document | 
|---|
| 27 | OPNDOC(DOCREF,PREFIX,SUFFIX) ; | 
|---|
| 28 | S:$E(DOCREF)'="^" DOCREF=$$EXTRNL(DOCREF) | 
|---|
| 29 | Q:'$L(DOCREF) | 
|---|
| 30 | D SAVRES(1) | 
|---|
| 31 | S DOC=$NA(@DOCREF) | 
|---|
| 32 | I '$D(^TMP(ID,$J,"DOC",DOC)) S ^(DOC)="" | 
|---|
| 33 | E  D ERROR(43) | 
|---|
| 34 | S (LPOS,LLEN,LCUR)=0,CPOS=1,(OFX,XML)="",PFX=$G(PREFIX),SFX=$G(SUFFIX) | 
|---|
| 35 | S LCUR=DOC,DOC=$E(DOC,1,$L(DOC)-1) ;*rwf | 
|---|
| 36 | Q | 
|---|
| 37 | ; Close current document | 
|---|
| 38 | ; Restores state of previous document from stack. | 
|---|
| 39 | CLSDOC K ^TMP(ID,$J,"DOC",DOC_")") ;*rwf | 
|---|
| 40 | D SAVRES(0) | 
|---|
| 41 | Q | 
|---|
| 42 | ; Extract path from filespec | 
|---|
| 43 | PATH(DOC) ; | 
|---|
| 44 | N X | 
|---|
| 45 | Q:U[$E(DOC) "" | 
|---|
| 46 | F X="\","/","]",":","" Q:DOC[X | 
|---|
| 47 | Q $P(DOC,X,1,$L(DOC,X)-1)_X | 
|---|
| 48 | ; Save or restore document state | 
|---|
| 49 | SAVRES(SAVE) ; | 
|---|
| 50 | N X | 
|---|
| 51 | S:'SAVE DOCSTK=DOCSTK-1,EOD=DOCSTK=0 | 
|---|
| 52 | I DOCSTK F X="LLEN","LPOS","CPOS","LCUR","XML","PFX","SFX","OFX","DOC" D | 
|---|
| 53 | .I SAVE S DOCSTK(DOCSTK,X)=@X | 
|---|
| 54 | .E  S @X=DOCSTK(DOCSTK,X) | 
|---|
| 55 | I SAVE S DOCSTK=DOCSTK+1 | 
|---|
| 56 | E  K DOCSTK(DOCSTK) | 
|---|
| 57 | Q | 
|---|
| 58 | ; Retrieve text from document | 
|---|
| 59 | READ Q:((LLEN-CPOS)>50)!EOD  ;Quit if still have 50 char or EOD | 
|---|
| 60 | I (CPOS'<LLEN)&EOG D CLSDOC S EOG=0 Q  ;At end of text in file | 
|---|
| 61 | N TMP,X | 
|---|
| 62 | D SHIFT Q:$L(XML)>50 | 
|---|
| 63 | I EOG!EOD Q  ;Quit at end of document | 
|---|
| 64 | S LPOS=LPOS+1,LCUR=$Q(@LCUR) ;Get next node | 
|---|
| 65 | I LCUR'[DOC S EOG=1 Q  ;At end of global | 
|---|
| 66 | S TMP=@LCUR ;Get next data chunk | 
|---|
| 67 | W:OPTION["D" !,$J(LPOS,3)_":",TMP,! | 
|---|
| 68 | S OFX=OFX_TMP | 
|---|
| 69 | D SHIFT | 
|---|
| 70 | I LLEN<50 G READ | 
|---|
| 71 | Q | 
|---|
| 72 | ;Shift OFX to XML | 
|---|
| 73 | SHIFT ; | 
|---|
| 74 | S XML=$E(XML,CPOS,9999),CPOS=1 ;Drop old | 
|---|
| 75 | I $L(PFX) S OFX=XML_OFX,XML=PFX,PFX="" | 
|---|
| 76 | I $L(OFX) S X=511-$L(XML),XML=XML_$E(OFX,1,X),OFX=$E(OFX,X+1,99999) | 
|---|
| 77 | S LLEN=$L(XML) | 
|---|
| 78 | Q | 
|---|
| 79 | ; Parse name | 
|---|
| 80 | ; ERN=Error to signal if invalid (optional) | 
|---|
| 81 | NAME(ERN) ; | 
|---|
| 82 | N X | 
|---|
| 83 | D EPOS | 
|---|
| 84 | S X=$E(XML,CPOS) | 
|---|
| 85 | I X'?1A,"_:"'[X D:$G(ERN) ERROR(ERN,X) Q "" | 
|---|
| 86 | Q $$NAMETKN(.ERN) | 
|---|
| 87 | ; Parse name token | 
|---|
| 88 | ; ERN=Error to signal if invalid (optional) | 
|---|
| 89 | NAMETKN(ERN) ; | 
|---|
| 90 | N X,Y | 
|---|
| 91 | D EPOS | 
|---|
| 92 | F X=CPOS:1:LLEN+1 S Y=$E(XML,X) I Y'?1AN,".-_:"'[Y Q | 
|---|
| 93 | S Y=$E(XML,CPOS,X-1),CPOS=X | 
|---|
| 94 | I '$L(Y),$G(ERN) D ERROR(ERN,Y) | 
|---|
| 95 | Q Y | 
|---|
| 96 | ; Parse quote-enclosed value | 
|---|
| 97 | ; ERF=If set, signal error if not found | 
|---|
| 98 | ; FLG=Special flag: 0=attribute literal, 1=general entity literal | 
|---|
| 99 | ;     2=parameter entity literal | 
|---|
| 100 | ; Returns value less quotes with normalized whitespace | 
|---|
| 101 | VALUE(ERF,FLG) ; | 
|---|
| 102 | N DLM,CHR,RTN,EXC | 
|---|
| 103 | D WS() | 
|---|
| 104 | S DLM=$S($$NEXT(QT):QT,$$NEXT("'"):"'",1:""),RTN="",FLG=+$G(FLG),EXC=$S(FLG=2:"",1:"<") | 
|---|
| 105 | I DLM="" D:$G(ERF) EPOS,ERROR(11) Q RTN | 
|---|
| 106 | F  S CHR=$E(XML,CPOS) Q:DLM=CHR!(EXC[CHR)!EOD  D | 
|---|
| 107 | .I $$NEXT("&#") S RTN=RTN_$$CHENTITY | 
|---|
| 108 | .E  I 'FLG,$$NEXT("&") S RTN=RTN_$$ENTITY | 
|---|
| 109 | .E  S RTN=RTN_CHR,CPOS=CPOS+1 | 
|---|
| 110 | .D:CPOS>LLEN READ | 
|---|
| 111 | I DLM=CHR S CPOS=CPOS+1 | 
|---|
| 112 | E  D EPOS,ERROR($S('$L(CHR):12,EXC[CHR:13,1:12)) Q "" | 
|---|
| 113 | Q $$NMLWS(RTN) | 
|---|
| 114 | ; Normalize whitespace | 
|---|
| 115 | ; Note: used as input transform for Entity Catalog, so can't depend | 
|---|
| 116 | ; on any environment variables. | 
|---|
| 117 | ; TXT=Text to normalize | 
|---|
| 118 | ; Returns text stripped of leading and trailing whitespace and with | 
|---|
| 119 | ; imbedded contiguous whitespace reduced to single space. | 
|---|
| 120 | NMLWS(TXT,FG) ; | 
|---|
| 121 | N Z,CRLF | 
|---|
| 122 | S CRLF=$C(13,10) | 
|---|
| 123 | ;Normalize CRLF to one SP first | 
|---|
| 124 | F  S Z=$F(TXT,CRLF) Q:'Z  S TXT=$P(TXT,CRLF,1)_" "_$P(TXT,CRLF,2,999) | 
|---|
| 125 | S TXT=$TR(TXT,$C(9,10,13,32),"    ") | 
|---|
| 126 | ;For CDATA or unk, this is where we should stop | 
|---|
| 127 | Q:'$G(FG) TXT | 
|---|
| 128 | F Z=1:1 Q:$E(TXT,Z)'=" " | 
|---|
| 129 | S TXT=$E(TXT,Z,9999) | 
|---|
| 130 | F Z=$L(TXT):-1 Q:$E(TXT,Z)'=" " | 
|---|
| 131 | S TXT=$E(TXT,1,Z) | 
|---|
| 132 | F Z=1:1:$L(TXT) D:$E(TXT,Z)=" " | 
|---|
| 133 | .F  Q:$E(TXT,Z+1)'=" "  S $E(TXT,Z+1)="" | 
|---|
| 134 | Q TXT | 
|---|
| 135 | ; Process parameter entity if found | 
|---|
| 136 | DOPARAM F  D WS() Q:EOD!'$$NEXT("%")  I $$ENTITY(1) | 
|---|
| 137 | Q | 
|---|
| 138 | ; Resolve general/parameter/character entity | 
|---|
| 139 | ; PARAM=1: parameter; PARAM=0: general or character (default) | 
|---|
| 140 | ENTITY(PARAM) ; | 
|---|
| 141 | N NAME,APND | 
|---|
| 142 | S PARAM=+$G(PARAM) | 
|---|
| 143 | I 'PARAM,$$NEXT("#") Q $$CHENTITY | 
|---|
| 144 | S NAME=$S(PARAM:"%",1:"")_$$NAME(2) | 
|---|
| 145 | Q:'$$NEXT(";",3) "" | 
|---|
| 146 | ;Handle the common ones inline | 
|---|
| 147 | S APND=$S(NAME="amp":"&",NAME="lt":"<",NAME="gt":">",NAME="quot":$C(34),NAME="apos":"'",1:"") | 
|---|
| 148 | Q:$L(APND) APND | 
|---|
| 149 | I $D(^TMP(ID,$J,"UNP",NAME)) D ERROR(40,NAME) Q "" | 
|---|
| 150 | I '$D(^TMP(ID,$J,"ENT",NAME)) D ERROR(14,NAME) Q "" | 
|---|
| 151 | S APND=$S(PARAM:" ",1:"") | 
|---|
| 152 | D OPNDOC(^TMP(ID,$J,"ENT",NAME),APND,APND) | 
|---|
| 153 | Q "" | 
|---|
| 154 | ; Parse character entity reference | 
|---|
| 155 | ; Returns character equivalent | 
|---|
| 156 | CHENTITY() ; | 
|---|
| 157 | N DIGIT,BASE,DIGITS,VAL | 
|---|
| 158 | S BASE=$S($$NEXT("x"):16,1:10),DIGITS="0123456789"_$S(BASE=16:"ABCDEF",1:""),VAL=0 | 
|---|
| 159 | F CPOS=CPOS:1:LLEN+1 Q:$$NEXT(";")!EOD  D | 
|---|
| 160 | .S DIGIT=$F(DIGITS,$$UP^XLFSTR($E(XML,CPOS)))-2,VAL=VAL*BASE+DIGIT | 
|---|
| 161 | .D:DIGIT<0 ERROR(19) | 
|---|
| 162 | I VAL<32,WS'[$C(VAL) D ERROR(19) | 
|---|
| 163 | Q $C(VAL) | 
|---|
| 164 | ; Set an entity value | 
|---|
| 165 | SETENT(NAME,VAL) ; | 
|---|
| 166 | K ^TMP(ID,$J,"ENT",NAME) | 
|---|
| 167 | S ^(NAME)=$NA(^(NAME)),^(NAME,1)=VAL | 
|---|
| 168 | Q | 
|---|
| 169 | ; Process all attributes | 
|---|
| 170 | ATTRIBS(ENAME,ATTR) ; | 
|---|
| 171 | N TYP,MOD,DEF,ANAME | 
|---|
| 172 | K ATTR | 
|---|
| 173 | F  Q:'$$ATTRIB(ENAME,.ATTR) | 
|---|
| 174 | I OPTION["V" D | 
|---|
| 175 | .S ANAME="$" | 
|---|
| 176 | .F  S ANAME=$O(^TMP(ID,$J,"ATT",ENAME,ANAME)) Q:'$L(ANAME)  D | 
|---|
| 177 | ..S TYP=^(ANAME),MOD=$P(TYP,"^",2),DEF=$P(TYP,"^",3,9999),TYP=+TYP | 
|---|
| 178 | ..I MOD=1!(MOD=3),'$D(ATTR(ANAME)) D ERROR(36,ANAME) Q | 
|---|
| 179 | ..I MOD=3,ATTR(ANAME)'=DEF D ERROR(37,ATTR(ANAME)) Q | 
|---|
| 180 | ..I MOD=2,'$D(ATTR(ANAME)) Q | 
|---|
| 181 | ..S:'$D(ATTR(ANAME)) ATTR(ANAME)=DEF | 
|---|
| 182 | Q | 
|---|
| 183 | ; Parse attribute=value sequence | 
|---|
| 184 | ; ENAME=Element name to which attribute belongs | 
|---|
| 185 | ; ATTR=Local array (by reference) to receive attribute value. | 
|---|
| 186 | ;      Format is ATTR("<attribute name>")="<attribute value>" | 
|---|
| 187 | ; Returns 1 if successful, 0 if not. | 
|---|
| 188 | ATTRIB(ENAME,ATTR) ; | 
|---|
| 189 | N ANAME | 
|---|
| 190 | D WS() | 
|---|
| 191 | S ANAME=$$NAME | 
|---|
| 192 | Q:ANAME="" 0 | 
|---|
| 193 | I $D(ATTR(ANAME)) D ERROR(4,ANAME) Q 0 | 
|---|
| 194 | D:'$D(^TMP(ID,$J,"ATT",ENAME,ANAME)) ERROR(29,ANAME) | 
|---|
| 195 | D WS() | 
|---|
| 196 | Q:'$$NEXT("=",3) 0 | 
|---|
| 197 | D WS() | 
|---|
| 198 | S ATTR(ANAME)=$$VALUE(1) | 
|---|
| 199 | D CHKVAL^MXMLPRS1(ENAME,ANAME,ATTR(ANAME)) | 
|---|
| 200 | Q 1 | 
|---|
| 201 | ; Parse a processing instruction | 
|---|
| 202 | ; Returns 1 if PI found, 0 if not. | 
|---|
| 203 | PI() N PNAME,ARGS,DONE | 
|---|
| 204 | Q:'$$NEXT("<?") 0 | 
|---|
| 205 | S PNAME=$$NAME(2),ARGS=0 | 
|---|
| 206 | I $$UP^XLFSTR(PNAME)="XML" D ERROR(9) Q 0 | 
|---|
| 207 | D WS(1) | 
|---|
| 208 | F  S DONE=$F(XML,"?>",CPOS) D  Q:DONE!EOD | 
|---|
| 209 | .S ARGS=ARGS+1,ARGS(ARGS)=$E(XML,CPOS,$S(DONE:DONE-3,1:LLEN)) | 
|---|
| 210 | .S CPOS=$S(DONE:DONE,1:LLEN+1) | 
|---|
| 211 | .D READ | 
|---|
| 212 | I EOD D ERROR(7) Q 0 | 
|---|
| 213 | D CBK("PI",PNAME,.ARGS) | 
|---|
| 214 | Q 1 | 
|---|
| 215 | ; Parse a comment | 
|---|
| 216 | ; Returns 1 if comment found, 0 if not. | 
|---|
| 217 | COMMENT() Q $$PARSCT("<!--","--",">","COMMENT") | 
|---|
| 218 | ; Parse a CDATA section | 
|---|
| 219 | ; Returns 1 if found, 0 if not. | 
|---|
| 220 | CDATA() Q $$PARSCT("<![CDATA[","]]>","","CHARACTERS") | 
|---|
| 221 | ; Parse a section (for CDATA and COMMENT) | 
|---|
| 222 | ; BGN=Beginning delimiter | 
|---|
| 223 | ; END=Ending delimiter | 
|---|
| 224 | ; TRL=Trailing delimiter | 
|---|
| 225 | ; TYP=Event type | 
|---|
| 226 | PARSCT(BGN,END,TRL,TYP) ; | 
|---|
| 227 | N X | 
|---|
| 228 | Q:'$$NEXT(BGN) 0 | 
|---|
| 229 | D EPOS | 
|---|
| 230 | I 'LVL,TYP'="COMMENT" D ERROR(6) Q 0 | 
|---|
| 231 | F  S X=$F(XML,END,CPOS) D  Q:X!EOD | 
|---|
| 232 | .D CBK(TYP,$E(XML,CPOS,$S(X:X-$L(END)-1,1:LLEN))) | 
|---|
| 233 | .S CPOS=$S(X:X,1:LLEN+1) | 
|---|
| 234 | .D READ,EPOS | 
|---|
| 235 | I EOD D ERROR(7) Q 0 | 
|---|
| 236 | I $L(TRL),$$NEXT(TRL,3) | 
|---|
| 237 | Q 1 | 
|---|
| 238 | ; Fetch an external entity from file or entity catalog | 
|---|
| 239 | ; SYS=System identifier (i.e., a URL) | 
|---|
| 240 | ; PUB=Public identifier (i.e., Entity Catalog ID) - optional | 
|---|
| 241 | ; GBL=Optional global root to receive entity content | 
|---|
| 242 | ; Returns global reference or null if error | 
|---|
| 243 | EXTRNL(SYS,PUB,GBL) ; | 
|---|
| 244 | N X,Y | 
|---|
| 245 | S PUB=$$NMLWS($G(PUB)),GBL=$G(GBL) | 
|---|
| 246 | I '$L(GBL) D CBK("EXTERNAL",.SYS,.PUB,.GBL) Q:$L(GBL) GBL | 
|---|
| 247 | I $L(PUB) D  Q:X $NA(^MXML(950,X,1)) | 
|---|
| 248 | .S Y=$E(PUB,1,30),X=0 | 
|---|
| 249 | .F  S X=$O(^MXML(950,"B",Y,X)) Q:'X  Q:$G(^MXML(950,X,0))=PUB | 
|---|
| 250 | S:'$L(GBL) GBL=$$TMPGBL | 
|---|
| 251 | S:$$PATH(SYS)="" SYS=PATH_SYS | 
|---|
| 252 | S X=$S($$FTG^%ZISH(SYS,"",$NA(@GBL@(1)),$QL(GBL)+1):GBL,1:"") | 
|---|
| 253 | D:'$L(X) ERROR(30,$S($L(SYS):SYS,1:PUB)) | 
|---|
| 254 | Q X | 
|---|
| 255 | ; Return a unique scratch global reference | 
|---|
| 256 | TMPGBL() N SUB | 
|---|
| 257 | S SUB=$O(^TMP(ID,$J,$C(1)),-1)+1,^(SUB)="" | 
|---|
| 258 | Q $NA(^(SUB)) | 
|---|
| 259 | ; Returns a SYSTEM and/or PUBLIC id | 
|---|
| 260 | ; SYS=Returned SYSTEM id | 
|---|
| 261 | ; PUB=Returned PUBLIC id | 
|---|
| 262 | ; FLG=If set, SYSTEM id is optional after PUBLIC id | 
|---|
| 263 | ; Optional return value: 0=neither, 1=PUBLIC, 2=SYSTEM | 
|---|
| 264 | SYSPUB(SYS,PUB,FLG) ; | 
|---|
| 265 | N RTN | 
|---|
| 266 | I $$NEXT("PUBLIC") D | 
|---|
| 267 | .D WS(1) | 
|---|
| 268 | .S PUB=$$VALUE(1),SYS=$$VALUE('$G(FLG)),RTN=1 | 
|---|
| 269 | E  I $$NEXT("SYSTEM") D | 
|---|
| 270 | .D WS(1) | 
|---|
| 271 | .S PUB="",SYS=$$VALUE(1),RTN=2 | 
|---|
| 272 | E  S (SYS,PUB)="",RTN=0 | 
|---|
| 273 | Q:$Q RTN | 
|---|
| 274 | Q | 
|---|
| 275 | ; Save current document location for error reporting | 
|---|
| 276 | ; See EPOS^MXMLPRS0 | 
|---|
| 277 | EPOS S ERR("XML")=XML,ERR("POS")=CPOS,ERR("LIN")=LPOS | 
|---|
| 278 | Q | 
|---|
| 279 | ; Setup error information | 
|---|
| 280 | ERROR(ERN,ARG) ; | 
|---|
| 281 | N DIHELP,DIMSG,DIERR,MSG | 
|---|
| 282 | D BLD^DIALOG(9500000+ERN,"","","MSG","") | 
|---|
| 283 | S ERR("NUM")=ERN | 
|---|
| 284 | S ERR("SEV")=$S($G(DIHELP):0,$G(DIMSG):1,1:2) | 
|---|
| 285 | S ERR("MSG")=$G(MSG(1)) | 
|---|
| 286 | S ERR("ARG")=$G(ARG) | 
|---|
| 287 | I OPTION'["W"!ERR("SEV"),OPTION["V"!(ERR("SEV")'=1) D CBK("ERROR",.ERR) | 
|---|
| 288 | S:ERR("SEV")=2!(OPTION[ERR("SEV")) EOD=-1                             ; Stop parsing on severe error | 
|---|
| 289 | Q | 
|---|
| 290 | ; Shortcuts to functions/procedures defined elsewhere | 
|---|
| 291 | WS(X) Q:$Q $$WS^MXMLPRS0(.X) | 
|---|
| 292 | D WS^MXMLPRS0(.X) Q | 
|---|
| 293 | CBK(X,Y1,Y2,Y3,Y4) D CBK^MXMLPRS0(.X,.Y1,.Y2,.Y3,.Y4) Q | 
|---|
| 294 | NEXT(X,Y) Q $$NEXT^MXMLPRS0(.X,.Y) | 
|---|