source: FOIAVistA/trunk/r/XML_PARSER-MXML/MXMLPRSE.m@ 1154

Last change on this file since 1154 was 628, checked in by George Lilly, 15 years ago

initial load of FOIAVistA 6/30/08 version

File size: 9.3 KB
Line 
1MXMLPRSE ;SAIC/DKM - XML Parser ;05/04/2005 15:20
2 ;;7.3;TOOLKIT;**58,67,89**;Apr 25, 1995
3 ;=================================================================
4 ; Main entry point.
5 ; DOC = Closed reference to global array containing document
6 ; CBK = Local array containing entry points for callback interface
7 ; OPTION = Option flags; expected values are:
8 ; D = Debug mode
9 ; W = Do not report warnings
10 ; V = Validate (checks only well-formedness by default)
11 ; 0,1 = Terminate on encountering error at specified level
12EN(DOC,CBK,OPTION) ;
13 N WS,ID,QT,EDC,DTD,LVL,CS,DOCSTK,LLEN,LPOS,CPOS,LCUR,ERR,XML,PFX,SFX,EOD,EOG,ST,PATH,OFX
14 S ID=$T(+0),WS=$C(9,10,13,32),QT="""",(DOCSTK,EOG,EOD,LVL,CS,ST,LPOS,LLEN,LCUR)=0,(CPOS,LVL(0,"N"))=1,OPTION=$G(OPTION),(XML,PFX,SFX)="",PATH=$$PATH(DOC)
15 K ^TMP(ID,$J)
16 I $L($T(TOUCH^XUSCLEAN)) D TOUCH^XUSCLEAN ;Set the keepalive node
17 D INIT^MXMLPRS1,EPOS,CBK("STARTDOCUMENT"),OPNDOC(DOC)
18 F Q:EOD D READ,EPOS,@ST^MXMLPRS0:'EOD
19 D UNRESLV^MXMLPRS1,ERROR(17):ST'=2,CBK("ENDDOCUMENT")
20 K ^TMP(ID,$J)
21 Q
22 ; Open a document
23 ; Saves state of current document on stack.
24 ; DOCREF=Closed reference to array containing document
25 ; PREFIX=Optional prefix to prepend to document
26 ; SUFFIX=Optional suffix to append to document
27OPNDOC(DOCREF,PREFIX,SUFFIX) ;
28 S:$E(DOCREF)'="^" DOCREF=$$EXTRNL(DOCREF)
29 Q:'$L(DOCREF)
30 D SAVRES(1)
31 S DOC=$NA(@DOCREF)
32 I '$D(^TMP(ID,$J,"DOC",DOC)) S ^(DOC)=""
33 E D ERROR(43)
34 S (LPOS,LLEN,LCUR)=0,CPOS=1,(OFX,XML)="",PFX=$G(PREFIX),SFX=$G(SUFFIX)
35 S LCUR=DOC,DOC=$E(DOC,1,$L(DOC)-1) ;*rwf
36 Q
37 ; Close current document
38 ; Restores state of previous document from stack.
39CLSDOC K ^TMP(ID,$J,"DOC",DOC_")") ;*rwf
40 D SAVRES(0)
41 Q
42 ; Extract path from filespec
43PATH(DOC) ;
44 N X
45 Q:U[$E(DOC) ""
46 F X="\","/","]",":","" Q:DOC[X
47 Q $P(DOC,X,1,$L(DOC,X)-1)_X
48 ; Save or restore document state
49SAVRES(SAVE) ;
50 N X
51 S:'SAVE DOCSTK=DOCSTK-1,EOD=DOCSTK=0
52 I DOCSTK F X="LLEN","LPOS","CPOS","LCUR","XML","PFX","SFX","OFX","DOC" D
53 .I SAVE S DOCSTK(DOCSTK,X)=@X
54 .E S @X=DOCSTK(DOCSTK,X)
55 I SAVE S DOCSTK=DOCSTK+1
56 E K DOCSTK(DOCSTK)
57 Q
58 ; Retrieve text from document
59READ Q:((LLEN-CPOS)>50)!EOD ;Quit if still have 50 char or EOD
60 I (CPOS'<LLEN)&EOG D CLSDOC S EOG=0 Q ;At end of text in file
61 N TMP,X
62 D SHIFT Q:$L(XML)>50
63 I EOG!EOD Q ;Quit at end of document
64 S LPOS=LPOS+1,LCUR=$Q(@LCUR) ;Get next node
65 I LCUR'[DOC S EOG=1 Q ;At end of global
66 S TMP=@LCUR ;Get next data chunk
67 W:OPTION["D" !,$J(LPOS,3)_":",TMP,!
68 S OFX=OFX_TMP
69 D SHIFT
70 I LLEN<50 G READ
71 Q
72 ;Shift OFX to XML
73SHIFT ;
74 S XML=$E(XML,CPOS,9999),CPOS=1 ;Drop old
75 I $L(PFX) S OFX=XML_OFX,XML=PFX,PFX=""
76 I $L(OFX) S X=511-$L(XML),XML=XML_$E(OFX,1,X),OFX=$E(OFX,X+1,99999)
77 S LLEN=$L(XML)
78 Q
79 ; Parse name
80 ; ERN=Error to signal if invalid (optional)
81NAME(ERN) ;
82 N X
83 D EPOS
84 S X=$E(XML,CPOS)
85 I X'?1A,"_:"'[X D:$G(ERN) ERROR(ERN,X) Q ""
86 Q $$NAMETKN(.ERN)
87 ; Parse name token
88 ; ERN=Error to signal if invalid (optional)
89NAMETKN(ERN) ;
90 N X,Y
91 D EPOS
92 F X=CPOS:1:LLEN+1 S Y=$E(XML,X) I Y'?1AN,".-_:"'[Y Q
93 S Y=$E(XML,CPOS,X-1),CPOS=X
94 I '$L(Y),$G(ERN) D ERROR(ERN,Y)
95 Q Y
96 ; Parse quote-enclosed value
97 ; ERF=If set, signal error if not found
98 ; FLG=Special flag: 0=attribute literal, 1=general entity literal
99 ; 2=parameter entity literal
100 ; Returns value less quotes with normalized whitespace
101VALUE(ERF,FLG) ;
102 N DLM,CHR,RTN,EXC
103 D WS()
104 S DLM=$S($$NEXT(QT):QT,$$NEXT("'"):"'",1:""),RTN="",FLG=+$G(FLG),EXC=$S(FLG=2:"",1:"<")
105 I DLM="" D:$G(ERF) EPOS,ERROR(11) Q RTN
106 F S CHR=$E(XML,CPOS) Q:DLM=CHR!(EXC[CHR)!EOD D
107 .I $$NEXT("&#") S RTN=RTN_$$CHENTITY
108 .E I 'FLG,$$NEXT("&") S RTN=RTN_$$ENTITY
109 .E S RTN=RTN_CHR,CPOS=CPOS+1
110 .D:CPOS>LLEN READ
111 I DLM=CHR S CPOS=CPOS+1
112 E D EPOS,ERROR($S('$L(CHR):12,EXC[CHR:13,1:12)) Q ""
113 Q $$NMLWS(RTN)
114 ; Normalize whitespace
115 ; Note: used as input transform for Entity Catalog, so can't depend
116 ; on any environment variables.
117 ; TXT=Text to normalize
118 ; Returns text stripped of leading and trailing whitespace and with
119 ; imbedded contiguous whitespace reduced to single space.
120NMLWS(TXT,FG) ;
121 N Z,CRLF
122 S CRLF=$C(13,10)
123 ;Normalize CRLF to one SP first
124 F S Z=$F(TXT,CRLF) Q:'Z S TXT=$P(TXT,CRLF,1)_" "_$P(TXT,CRLF,2,999)
125 S TXT=$TR(TXT,$C(9,10,13,32)," ")
126 ;For CDATA or unk, this is where we should stop
127 Q:'$G(FG) TXT
128 F Z=1:1 Q:$E(TXT,Z)'=" "
129 S TXT=$E(TXT,Z,9999)
130 F Z=$L(TXT):-1 Q:$E(TXT,Z)'=" "
131 S TXT=$E(TXT,1,Z)
132 F Z=1:1:$L(TXT) D:$E(TXT,Z)=" "
133 .F Q:$E(TXT,Z+1)'=" " S $E(TXT,Z+1)=""
134 Q TXT
135 ; Process parameter entity if found
136DOPARAM F D WS() Q:EOD!'$$NEXT("%") I $$ENTITY(1)
137 Q
138 ; Resolve general/parameter/character entity
139 ; PARAM=1: parameter; PARAM=0: general or character (default)
140ENTITY(PARAM) ;
141 N NAME,APND
142 S PARAM=+$G(PARAM)
143 I 'PARAM,$$NEXT("#") Q $$CHENTITY
144 S NAME=$S(PARAM:"%",1:"")_$$NAME(2)
145 Q:'$$NEXT(";",3) ""
146 ;Handle the common ones inline
147 S APND=$S(NAME="amp":"&",NAME="lt":"<",NAME="gt":">",NAME="quot":$C(34),NAME="apos":"'",1:"")
148 Q:$L(APND) APND
149 I $D(^TMP(ID,$J,"UNP",NAME)) D ERROR(40,NAME) Q ""
150 I '$D(^TMP(ID,$J,"ENT",NAME)) D ERROR(14,NAME) Q ""
151 S APND=$S(PARAM:" ",1:"")
152 D OPNDOC(^TMP(ID,$J,"ENT",NAME),APND,APND)
153 Q ""
154 ; Parse character entity reference
155 ; Returns character equivalent
156CHENTITY() ;
157 N DIGIT,BASE,DIGITS,VAL
158 S BASE=$S($$NEXT("x"):16,1:10),DIGITS="0123456789"_$S(BASE=16:"ABCDEF",1:""),VAL=0
159 F CPOS=CPOS:1:LLEN+1 Q:$$NEXT(";")!EOD D
160 .S DIGIT=$F(DIGITS,$$UP^XLFSTR($E(XML,CPOS)))-2,VAL=VAL*BASE+DIGIT
161 .D:DIGIT<0 ERROR(19)
162 I VAL<32,WS'[$C(VAL) D ERROR(19)
163 Q $C(VAL)
164 ; Set an entity value
165SETENT(NAME,VAL) ;
166 K ^TMP(ID,$J,"ENT",NAME)
167 S ^(NAME)=$NA(^(NAME)),^(NAME,1)=VAL
168 Q
169 ; Process all attributes
170ATTRIBS(ENAME,ATTR) ;
171 N TYP,MOD,DEF,ANAME
172 K ATTR
173 F Q:'$$ATTRIB(ENAME,.ATTR)
174 I OPTION["V" D
175 .S ANAME="$"
176 .F S ANAME=$O(^TMP(ID,$J,"ATT",ENAME,ANAME)) Q:'$L(ANAME) D
177 ..S TYP=^(ANAME),MOD=$P(TYP,"^",2),DEF=$P(TYP,"^",3,9999),TYP=+TYP
178 ..I MOD=1!(MOD=3),'$D(ATTR(ANAME)) D ERROR(36,ANAME) Q
179 ..I MOD=3,ATTR(ANAME)'=DEF D ERROR(37,ATTR(ANAME)) Q
180 ..I MOD=2,'$D(ATTR(ANAME)) Q
181 ..S:'$D(ATTR(ANAME)) ATTR(ANAME)=DEF
182 Q
183 ; Parse attribute=value sequence
184 ; ENAME=Element name to which attribute belongs
185 ; ATTR=Local array (by reference) to receive attribute value.
186 ; Format is ATTR("<attribute name>")="<attribute value>"
187 ; Returns 1 if successful, 0 if not.
188ATTRIB(ENAME,ATTR) ;
189 N ANAME
190 D WS()
191 S ANAME=$$NAME
192 Q:ANAME="" 0
193 I $D(ATTR(ANAME)) D ERROR(4,ANAME) Q 0
194 D:'$D(^TMP(ID,$J,"ATT",ENAME,ANAME)) ERROR(29,ANAME)
195 D WS()
196 Q:'$$NEXT("=",3) 0
197 D WS()
198 S ATTR(ANAME)=$$VALUE(1)
199 D CHKVAL^MXMLPRS1(ENAME,ANAME,ATTR(ANAME))
200 Q 1
201 ; Parse a processing instruction
202 ; Returns 1 if PI found, 0 if not.
203PI() N PNAME,ARGS,DONE
204 Q:'$$NEXT("<?") 0
205 S PNAME=$$NAME(2),ARGS=0
206 I $$UP^XLFSTR(PNAME)="XML" D ERROR(9) Q 0
207 D WS(1)
208 F S DONE=$F(XML,"?>",CPOS) D Q:DONE!EOD
209 .S ARGS=ARGS+1,ARGS(ARGS)=$E(XML,CPOS,$S(DONE:DONE-3,1:LLEN))
210 .S CPOS=$S(DONE:DONE,1:LLEN+1)
211 .D READ
212 I EOD D ERROR(7) Q 0
213 D CBK("PI",PNAME,.ARGS)
214 Q 1
215 ; Parse a comment
216 ; Returns 1 if comment found, 0 if not.
217COMMENT() Q $$PARSCT("<!--","--",">","COMMENT")
218 ; Parse a CDATA section
219 ; Returns 1 if found, 0 if not.
220CDATA() Q $$PARSCT("<![CDATA[","]]>","","CHARACTERS")
221 ; Parse a section (for CDATA and COMMENT)
222 ; BGN=Beginning delimiter
223 ; END=Ending delimiter
224 ; TRL=Trailing delimiter
225 ; TYP=Event type
226PARSCT(BGN,END,TRL,TYP) ;
227 N X
228 Q:'$$NEXT(BGN) 0
229 D EPOS
230 I 'LVL,TYP'="COMMENT" D ERROR(6) Q 0
231 F S X=$F(XML,END,CPOS) D Q:X!EOD
232 .D CBK(TYP,$E(XML,CPOS,$S(X:X-$L(END)-1,1:LLEN)))
233 .S CPOS=$S(X:X,1:LLEN+1)
234 .D READ,EPOS
235 I EOD D ERROR(7) Q 0
236 I $L(TRL),$$NEXT(TRL,3)
237 Q 1
238 ; Fetch an external entity from file or entity catalog
239 ; SYS=System identifier (i.e., a URL)
240 ; PUB=Public identifier (i.e., Entity Catalog ID) - optional
241 ; GBL=Optional global root to receive entity content
242 ; Returns global reference or null if error
243EXTRNL(SYS,PUB,GBL) ;
244 N X,Y
245 S PUB=$$NMLWS($G(PUB)),GBL=$G(GBL)
246 I '$L(GBL) D CBK("EXTERNAL",.SYS,.PUB,.GBL) Q:$L(GBL) GBL
247 I $L(PUB) D Q:X $NA(^MXML(950,X,1))
248 .S Y=$E(PUB,1,30),X=0
249 .F S X=$O(^MXML(950,"B",Y,X)) Q:'X Q:$G(^MXML(950,X,0))=PUB
250 S:'$L(GBL) GBL=$$TMPGBL
251 S:$$PATH(SYS)="" SYS=PATH_SYS
252 S X=$S($$FTG^%ZISH(SYS,"",$NA(@GBL@(1)),$QL(GBL)+1):GBL,1:"")
253 D:'$L(X) ERROR(30,$S($L(SYS):SYS,1:PUB))
254 Q X
255 ; Return a unique scratch global reference
256TMPGBL() N SUB
257 S SUB=$O(^TMP(ID,$J,$C(1)),-1)+1,^(SUB)=""
258 Q $NA(^(SUB))
259 ; Returns a SYSTEM and/or PUBLIC id
260 ; SYS=Returned SYSTEM id
261 ; PUB=Returned PUBLIC id
262 ; FLG=If set, SYSTEM id is optional after PUBLIC id
263 ; Optional return value: 0=neither, 1=PUBLIC, 2=SYSTEM
264SYSPUB(SYS,PUB,FLG) ;
265 N RTN
266 I $$NEXT("PUBLIC") D
267 .D WS(1)
268 .S PUB=$$VALUE(1),SYS=$$VALUE('$G(FLG)),RTN=1
269 E I $$NEXT("SYSTEM") D
270 .D WS(1)
271 .S PUB="",SYS=$$VALUE(1),RTN=2
272 E S (SYS,PUB)="",RTN=0
273 Q:$Q RTN
274 Q
275 ; Save current document location for error reporting
276 ; See EPOS^MXMLPRS0
277EPOS S ERR("XML")=XML,ERR("POS")=CPOS,ERR("LIN")=LPOS
278 Q
279 ; Setup error information
280ERROR(ERN,ARG) ;
281 N DIHELP,DIMSG,DIERR,MSG
282 D BLD^DIALOG(9500000+ERN,"","","MSG","")
283 S ERR("NUM")=ERN
284 S ERR("SEV")=$S($G(DIHELP):0,$G(DIMSG):1,1:2)
285 S ERR("MSG")=$G(MSG(1))
286 S ERR("ARG")=$G(ARG)
287 I OPTION'["W"!ERR("SEV"),OPTION["V"!(ERR("SEV")'=1) D CBK("ERROR",.ERR)
288 S:ERR("SEV")=2!(OPTION[ERR("SEV")) EOD=-1 ; Stop parsing on severe error
289 Q
290 ; Shortcuts to functions/procedures defined elsewhere
291WS(X) Q:$Q $$WS^MXMLPRS0(.X)
292 D WS^MXMLPRS0(.X) Q
293CBK(X,Y1,Y2,Y3,Y4) D CBK^MXMLPRS0(.X,.Y1,.Y2,.Y3,.Y4) Q
294NEXT(X,Y) Q $$NEXT^MXMLPRS0(.X,.Y)
Note: See TracBrowser for help on using the repository browser.