1 | MXMLPRSE ;SAIC/DKM - XML Parser ;05/04/2005 15:20
|
---|
2 | ;;7.3;TOOLKIT;**58,67,89**;Apr 25, 1995
|
---|
3 | ;=================================================================
|
---|
4 | ; Main entry point.
|
---|
5 | ; DOC = Closed reference to global array containing document
|
---|
6 | ; CBK = Local array containing entry points for callback interface
|
---|
7 | ; OPTION = Option flags; expected values are:
|
---|
8 | ; D = Debug mode
|
---|
9 | ; W = Do not report warnings
|
---|
10 | ; V = Validate (checks only well-formedness by default)
|
---|
11 | ; 0,1 = Terminate on encountering error at specified level
|
---|
12 | EN(DOC,CBK,OPTION) ;
|
---|
13 | N WS,ID,QT,EDC,DTD,LVL,CS,DOCSTK,LLEN,LPOS,CPOS,LCUR,ERR,XML,PFX,SFX,EOD,EOG,ST,PATH,OFX
|
---|
14 | S ID=$T(+0),WS=$C(9,10,13,32),QT="""",(DOCSTK,EOG,EOD,LVL,CS,ST,LPOS,LLEN,LCUR)=0,(CPOS,LVL(0,"N"))=1,OPTION=$G(OPTION),(XML,PFX,SFX)="",PATH=$$PATH(DOC)
|
---|
15 | K ^TMP(ID,$J)
|
---|
16 | I $L($T(TOUCH^XUSCLEAN)) D TOUCH^XUSCLEAN ;Set the keepalive node
|
---|
17 | D INIT^MXMLPRS1,EPOS,CBK("STARTDOCUMENT"),OPNDOC(DOC)
|
---|
18 | F Q:EOD D READ,EPOS,@ST^MXMLPRS0:'EOD
|
---|
19 | D UNRESLV^MXMLPRS1,ERROR(17):ST'=2,CBK("ENDDOCUMENT")
|
---|
20 | K ^TMP(ID,$J)
|
---|
21 | Q
|
---|
22 | ; Open a document
|
---|
23 | ; Saves state of current document on stack.
|
---|
24 | ; DOCREF=Closed reference to array containing document
|
---|
25 | ; PREFIX=Optional prefix to prepend to document
|
---|
26 | ; SUFFIX=Optional suffix to append to document
|
---|
27 | OPNDOC(DOCREF,PREFIX,SUFFIX) ;
|
---|
28 | S:$E(DOCREF)'="^" DOCREF=$$EXTRNL(DOCREF)
|
---|
29 | Q:'$L(DOCREF)
|
---|
30 | D SAVRES(1)
|
---|
31 | S DOC=$NA(@DOCREF)
|
---|
32 | I '$D(^TMP(ID,$J,"DOC",DOC)) S ^(DOC)=""
|
---|
33 | E D ERROR(43)
|
---|
34 | S (LPOS,LLEN,LCUR)=0,CPOS=1,(OFX,XML)="",PFX=$G(PREFIX),SFX=$G(SUFFIX)
|
---|
35 | S LCUR=DOC,DOC=$E(DOC,1,$L(DOC)-1) ;*rwf
|
---|
36 | Q
|
---|
37 | ; Close current document
|
---|
38 | ; Restores state of previous document from stack.
|
---|
39 | CLSDOC K ^TMP(ID,$J,"DOC",DOC_")") ;*rwf
|
---|
40 | D SAVRES(0)
|
---|
41 | Q
|
---|
42 | ; Extract path from filespec
|
---|
43 | PATH(DOC) ;
|
---|
44 | N X
|
---|
45 | Q:U[$E(DOC) ""
|
---|
46 | F X="\","/","]",":","" Q:DOC[X
|
---|
47 | Q $P(DOC,X,1,$L(DOC,X)-1)_X
|
---|
48 | ; Save or restore document state
|
---|
49 | SAVRES(SAVE) ;
|
---|
50 | N X
|
---|
51 | S:'SAVE DOCSTK=DOCSTK-1,EOD=DOCSTK=0
|
---|
52 | I DOCSTK F X="LLEN","LPOS","CPOS","LCUR","XML","PFX","SFX","OFX","DOC" D
|
---|
53 | .I SAVE S DOCSTK(DOCSTK,X)=@X
|
---|
54 | .E S @X=DOCSTK(DOCSTK,X)
|
---|
55 | I SAVE S DOCSTK=DOCSTK+1
|
---|
56 | E K DOCSTK(DOCSTK)
|
---|
57 | Q
|
---|
58 | ; Retrieve text from document
|
---|
59 | READ Q:((LLEN-CPOS)>50)!EOD ;Quit if still have 50 char or EOD
|
---|
60 | I (CPOS'<LLEN)&EOG D CLSDOC S EOG=0 Q ;At end of text in file
|
---|
61 | N TMP,X
|
---|
62 | D SHIFT Q:$L(XML)>50
|
---|
63 | I EOG!EOD Q ;Quit at end of document
|
---|
64 | S LPOS=LPOS+1,LCUR=$Q(@LCUR) ;Get next node
|
---|
65 | I LCUR'[DOC S EOG=1 Q ;At end of global
|
---|
66 | S TMP=@LCUR ;Get next data chunk
|
---|
67 | W:OPTION["D" !,$J(LPOS,3)_":",TMP,!
|
---|
68 | S OFX=OFX_TMP
|
---|
69 | D SHIFT
|
---|
70 | I LLEN<50 G READ
|
---|
71 | Q
|
---|
72 | ;Shift OFX to XML
|
---|
73 | SHIFT ;
|
---|
74 | S XML=$E(XML,CPOS,9999),CPOS=1 ;Drop old
|
---|
75 | I $L(PFX) S OFX=XML_OFX,XML=PFX,PFX=""
|
---|
76 | I $L(OFX) S X=511-$L(XML),XML=XML_$E(OFX,1,X),OFX=$E(OFX,X+1,99999)
|
---|
77 | S LLEN=$L(XML)
|
---|
78 | Q
|
---|
79 | ; Parse name
|
---|
80 | ; ERN=Error to signal if invalid (optional)
|
---|
81 | NAME(ERN) ;
|
---|
82 | N X
|
---|
83 | D EPOS
|
---|
84 | S X=$E(XML,CPOS)
|
---|
85 | I X'?1A,"_:"'[X D:$G(ERN) ERROR(ERN,X) Q ""
|
---|
86 | Q $$NAMETKN(.ERN)
|
---|
87 | ; Parse name token
|
---|
88 | ; ERN=Error to signal if invalid (optional)
|
---|
89 | NAMETKN(ERN) ;
|
---|
90 | N X,Y
|
---|
91 | D EPOS
|
---|
92 | F X=CPOS:1:LLEN+1 S Y=$E(XML,X) I Y'?1AN,".-_:"'[Y Q
|
---|
93 | S Y=$E(XML,CPOS,X-1),CPOS=X
|
---|
94 | I '$L(Y),$G(ERN) D ERROR(ERN,Y)
|
---|
95 | Q Y
|
---|
96 | ; Parse quote-enclosed value
|
---|
97 | ; ERF=If set, signal error if not found
|
---|
98 | ; FLG=Special flag: 0=attribute literal, 1=general entity literal
|
---|
99 | ; 2=parameter entity literal
|
---|
100 | ; Returns value less quotes with normalized whitespace
|
---|
101 | VALUE(ERF,FLG) ;
|
---|
102 | N DLM,CHR,RTN,EXC
|
---|
103 | D WS()
|
---|
104 | S DLM=$S($$NEXT(QT):QT,$$NEXT("'"):"'",1:""),RTN="",FLG=+$G(FLG),EXC=$S(FLG=2:"",1:"<")
|
---|
105 | I DLM="" D:$G(ERF) EPOS,ERROR(11) Q RTN
|
---|
106 | F S CHR=$E(XML,CPOS) Q:DLM=CHR!(EXC[CHR)!EOD D
|
---|
107 | .I $$NEXT("&#") S RTN=RTN_$$CHENTITY
|
---|
108 | .E I 'FLG,$$NEXT("&") S RTN=RTN_$$ENTITY
|
---|
109 | .E S RTN=RTN_CHR,CPOS=CPOS+1
|
---|
110 | .D:CPOS>LLEN READ
|
---|
111 | I DLM=CHR S CPOS=CPOS+1
|
---|
112 | E D EPOS,ERROR($S('$L(CHR):12,EXC[CHR:13,1:12)) Q ""
|
---|
113 | Q $$NMLWS(RTN)
|
---|
114 | ; Normalize whitespace
|
---|
115 | ; Note: used as input transform for Entity Catalog, so can't depend
|
---|
116 | ; on any environment variables.
|
---|
117 | ; TXT=Text to normalize
|
---|
118 | ; Returns text stripped of leading and trailing whitespace and with
|
---|
119 | ; imbedded contiguous whitespace reduced to single space.
|
---|
120 | NMLWS(TXT,FG) ;
|
---|
121 | N Z,CRLF
|
---|
122 | S CRLF=$C(13,10)
|
---|
123 | ;Normalize CRLF to one SP first
|
---|
124 | F S Z=$F(TXT,CRLF) Q:'Z S TXT=$P(TXT,CRLF,1)_" "_$P(TXT,CRLF,2,999)
|
---|
125 | S TXT=$TR(TXT,$C(9,10,13,32)," ")
|
---|
126 | ;For CDATA or unk, this is where we should stop
|
---|
127 | Q:'$G(FG) TXT
|
---|
128 | F Z=1:1 Q:$E(TXT,Z)'=" "
|
---|
129 | S TXT=$E(TXT,Z,9999)
|
---|
130 | F Z=$L(TXT):-1 Q:$E(TXT,Z)'=" "
|
---|
131 | S TXT=$E(TXT,1,Z)
|
---|
132 | F Z=1:1:$L(TXT) D:$E(TXT,Z)=" "
|
---|
133 | .F Q:$E(TXT,Z+1)'=" " S $E(TXT,Z+1)=""
|
---|
134 | Q TXT
|
---|
135 | ; Process parameter entity if found
|
---|
136 | DOPARAM F D WS() Q:EOD!'$$NEXT("%") I $$ENTITY(1)
|
---|
137 | Q
|
---|
138 | ; Resolve general/parameter/character entity
|
---|
139 | ; PARAM=1: parameter; PARAM=0: general or character (default)
|
---|
140 | ENTITY(PARAM) ;
|
---|
141 | N NAME,APND
|
---|
142 | S PARAM=+$G(PARAM)
|
---|
143 | I 'PARAM,$$NEXT("#") Q $$CHENTITY
|
---|
144 | S NAME=$S(PARAM:"%",1:"")_$$NAME(2)
|
---|
145 | Q:'$$NEXT(";",3) ""
|
---|
146 | ;Handle the common ones inline
|
---|
147 | S APND=$S(NAME="amp":"&",NAME="lt":"<",NAME="gt":">",NAME="quot":$C(34),NAME="apos":"'",1:"")
|
---|
148 | Q:$L(APND) APND
|
---|
149 | I $D(^TMP(ID,$J,"UNP",NAME)) D ERROR(40,NAME) Q ""
|
---|
150 | I '$D(^TMP(ID,$J,"ENT",NAME)) D ERROR(14,NAME) Q ""
|
---|
151 | S APND=$S(PARAM:" ",1:"")
|
---|
152 | D OPNDOC(^TMP(ID,$J,"ENT",NAME),APND,APND)
|
---|
153 | Q ""
|
---|
154 | ; Parse character entity reference
|
---|
155 | ; Returns character equivalent
|
---|
156 | CHENTITY() ;
|
---|
157 | N DIGIT,BASE,DIGITS,VAL
|
---|
158 | S BASE=$S($$NEXT("x"):16,1:10),DIGITS="0123456789"_$S(BASE=16:"ABCDEF",1:""),VAL=0
|
---|
159 | F CPOS=CPOS:1:LLEN+1 Q:$$NEXT(";")!EOD D
|
---|
160 | .S DIGIT=$F(DIGITS,$$UP^XLFSTR($E(XML,CPOS)))-2,VAL=VAL*BASE+DIGIT
|
---|
161 | .D:DIGIT<0 ERROR(19)
|
---|
162 | I VAL<32,WS'[$C(VAL) D ERROR(19)
|
---|
163 | Q $C(VAL)
|
---|
164 | ; Set an entity value
|
---|
165 | SETENT(NAME,VAL) ;
|
---|
166 | K ^TMP(ID,$J,"ENT",NAME)
|
---|
167 | S ^(NAME)=$NA(^(NAME)),^(NAME,1)=VAL
|
---|
168 | Q
|
---|
169 | ; Process all attributes
|
---|
170 | ATTRIBS(ENAME,ATTR) ;
|
---|
171 | N TYP,MOD,DEF,ANAME
|
---|
172 | K ATTR
|
---|
173 | F Q:'$$ATTRIB(ENAME,.ATTR)
|
---|
174 | I OPTION["V" D
|
---|
175 | .S ANAME="$"
|
---|
176 | .F S ANAME=$O(^TMP(ID,$J,"ATT",ENAME,ANAME)) Q:'$L(ANAME) D
|
---|
177 | ..S TYP=^(ANAME),MOD=$P(TYP,"^",2),DEF=$P(TYP,"^",3,9999),TYP=+TYP
|
---|
178 | ..I MOD=1!(MOD=3),'$D(ATTR(ANAME)) D ERROR(36,ANAME) Q
|
---|
179 | ..I MOD=3,ATTR(ANAME)'=DEF D ERROR(37,ATTR(ANAME)) Q
|
---|
180 | ..I MOD=2,'$D(ATTR(ANAME)) Q
|
---|
181 | ..S:'$D(ATTR(ANAME)) ATTR(ANAME)=DEF
|
---|
182 | Q
|
---|
183 | ; Parse attribute=value sequence
|
---|
184 | ; ENAME=Element name to which attribute belongs
|
---|
185 | ; ATTR=Local array (by reference) to receive attribute value.
|
---|
186 | ; Format is ATTR("<attribute name>")="<attribute value>"
|
---|
187 | ; Returns 1 if successful, 0 if not.
|
---|
188 | ATTRIB(ENAME,ATTR) ;
|
---|
189 | N ANAME
|
---|
190 | D WS()
|
---|
191 | S ANAME=$$NAME
|
---|
192 | Q:ANAME="" 0
|
---|
193 | I $D(ATTR(ANAME)) D ERROR(4,ANAME) Q 0
|
---|
194 | D:'$D(^TMP(ID,$J,"ATT",ENAME,ANAME)) ERROR(29,ANAME)
|
---|
195 | D WS()
|
---|
196 | Q:'$$NEXT("=",3) 0
|
---|
197 | D WS()
|
---|
198 | S ATTR(ANAME)=$$VALUE(1)
|
---|
199 | D CHKVAL^MXMLPRS1(ENAME,ANAME,ATTR(ANAME))
|
---|
200 | Q 1
|
---|
201 | ; Parse a processing instruction
|
---|
202 | ; Returns 1 if PI found, 0 if not.
|
---|
203 | PI() N PNAME,ARGS,DONE
|
---|
204 | Q:'$$NEXT("<?") 0
|
---|
205 | S PNAME=$$NAME(2),ARGS=0
|
---|
206 | I $$UP^XLFSTR(PNAME)="XML" D ERROR(9) Q 0
|
---|
207 | D WS(1)
|
---|
208 | F S DONE=$F(XML,"?>",CPOS) D Q:DONE!EOD
|
---|
209 | .S ARGS=ARGS+1,ARGS(ARGS)=$E(XML,CPOS,$S(DONE:DONE-3,1:LLEN))
|
---|
210 | .S CPOS=$S(DONE:DONE,1:LLEN+1)
|
---|
211 | .D READ
|
---|
212 | I EOD D ERROR(7) Q 0
|
---|
213 | D CBK("PI",PNAME,.ARGS)
|
---|
214 | Q 1
|
---|
215 | ; Parse a comment
|
---|
216 | ; Returns 1 if comment found, 0 if not.
|
---|
217 | COMMENT() Q $$PARSCT("<!--","--",">","COMMENT")
|
---|
218 | ; Parse a CDATA section
|
---|
219 | ; Returns 1 if found, 0 if not.
|
---|
220 | CDATA() Q $$PARSCT("<![CDATA[","]]>","","CHARACTERS")
|
---|
221 | ; Parse a section (for CDATA and COMMENT)
|
---|
222 | ; BGN=Beginning delimiter
|
---|
223 | ; END=Ending delimiter
|
---|
224 | ; TRL=Trailing delimiter
|
---|
225 | ; TYP=Event type
|
---|
226 | PARSCT(BGN,END,TRL,TYP) ;
|
---|
227 | N X
|
---|
228 | Q:'$$NEXT(BGN) 0
|
---|
229 | D EPOS
|
---|
230 | I 'LVL,TYP'="COMMENT" D ERROR(6) Q 0
|
---|
231 | F S X=$F(XML,END,CPOS) D Q:X!EOD
|
---|
232 | .D CBK(TYP,$E(XML,CPOS,$S(X:X-$L(END)-1,1:LLEN)))
|
---|
233 | .S CPOS=$S(X:X,1:LLEN+1)
|
---|
234 | .D READ,EPOS
|
---|
235 | I EOD D ERROR(7) Q 0
|
---|
236 | I $L(TRL),$$NEXT(TRL,3)
|
---|
237 | Q 1
|
---|
238 | ; Fetch an external entity from file or entity catalog
|
---|
239 | ; SYS=System identifier (i.e., a URL)
|
---|
240 | ; PUB=Public identifier (i.e., Entity Catalog ID) - optional
|
---|
241 | ; GBL=Optional global root to receive entity content
|
---|
242 | ; Returns global reference or null if error
|
---|
243 | EXTRNL(SYS,PUB,GBL) ;
|
---|
244 | N X,Y
|
---|
245 | S PUB=$$NMLWS($G(PUB)),GBL=$G(GBL)
|
---|
246 | I '$L(GBL) D CBK("EXTERNAL",.SYS,.PUB,.GBL) Q:$L(GBL) GBL
|
---|
247 | I $L(PUB) D Q:X $NA(^MXML(950,X,1))
|
---|
248 | .S Y=$E(PUB,1,30),X=0
|
---|
249 | .F S X=$O(^MXML(950,"B",Y,X)) Q:'X Q:$G(^MXML(950,X,0))=PUB
|
---|
250 | S:'$L(GBL) GBL=$$TMPGBL
|
---|
251 | S:$$PATH(SYS)="" SYS=PATH_SYS
|
---|
252 | S X=$S($$FTG^%ZISH(SYS,"",$NA(@GBL@(1)),$QL(GBL)+1):GBL,1:"")
|
---|
253 | D:'$L(X) ERROR(30,$S($L(SYS):SYS,1:PUB))
|
---|
254 | Q X
|
---|
255 | ; Return a unique scratch global reference
|
---|
256 | TMPGBL() N SUB
|
---|
257 | S SUB=$O(^TMP(ID,$J,$C(1)),-1)+1,^(SUB)=""
|
---|
258 | Q $NA(^(SUB))
|
---|
259 | ; Returns a SYSTEM and/or PUBLIC id
|
---|
260 | ; SYS=Returned SYSTEM id
|
---|
261 | ; PUB=Returned PUBLIC id
|
---|
262 | ; FLG=If set, SYSTEM id is optional after PUBLIC id
|
---|
263 | ; Optional return value: 0=neither, 1=PUBLIC, 2=SYSTEM
|
---|
264 | SYSPUB(SYS,PUB,FLG) ;
|
---|
265 | N RTN
|
---|
266 | I $$NEXT("PUBLIC") D
|
---|
267 | .D WS(1)
|
---|
268 | .S PUB=$$VALUE(1),SYS=$$VALUE('$G(FLG)),RTN=1
|
---|
269 | E I $$NEXT("SYSTEM") D
|
---|
270 | .D WS(1)
|
---|
271 | .S PUB="",SYS=$$VALUE(1),RTN=2
|
---|
272 | E S (SYS,PUB)="",RTN=0
|
---|
273 | Q:$Q RTN
|
---|
274 | Q
|
---|
275 | ; Save current document location for error reporting
|
---|
276 | ; See EPOS^MXMLPRS0
|
---|
277 | EPOS S ERR("XML")=XML,ERR("POS")=CPOS,ERR("LIN")=LPOS
|
---|
278 | Q
|
---|
279 | ; Setup error information
|
---|
280 | ERROR(ERN,ARG) ;
|
---|
281 | N DIHELP,DIMSG,DIERR,MSG
|
---|
282 | D BLD^DIALOG(9500000+ERN,"","","MSG","")
|
---|
283 | S ERR("NUM")=ERN
|
---|
284 | S ERR("SEV")=$S($G(DIHELP):0,$G(DIMSG):1,1:2)
|
---|
285 | S ERR("MSG")=$G(MSG(1))
|
---|
286 | S ERR("ARG")=$G(ARG)
|
---|
287 | I OPTION'["W"!ERR("SEV"),OPTION["V"!(ERR("SEV")'=1) D CBK("ERROR",.ERR)
|
---|
288 | S:ERR("SEV")=2!(OPTION[ERR("SEV")) EOD=-1 ; Stop parsing on severe error
|
---|
289 | Q
|
---|
290 | ; Shortcuts to functions/procedures defined elsewhere
|
---|
291 | WS(X) Q:$Q $$WS^MXMLPRS0(.X)
|
---|
292 | D WS^MXMLPRS0(.X) Q
|
---|
293 | CBK(X,Y1,Y2,Y3,Y4) D CBK^MXMLPRS0(.X,.Y1,.Y2,.Y3,.Y4) Q
|
---|
294 | NEXT(X,Y) Q $$NEXT^MXMLPRS0(.X,.Y)
|
---|