[613] | 1 | HDISVM01 ;BPFO/JRP - PARSE XML DOCUMENT USING SAX;12/20/2004
|
---|
| 2 | ;;1.0;HEALTH DATA & INFORMATICS;;Feb 22, 2005
|
---|
| 3 | ;
|
---|
| 4 | SAX(XMLARR,PRSARR) ;Parse XML document using SAX interface
|
---|
| 5 | ; Input: XMLARR - Global array containing XML document (closed root)
|
---|
| 6 | ; PRSARR - Array to output parsed XML document (closed root)
|
---|
| 7 | ;Output: None
|
---|
| 8 | ; @PRSARR@("ESUBS",ESN) = Element name
|
---|
| 9 | ; @PRSARR@("EINDX",ElementName) = Subscript number (ESN)
|
---|
| 10 | ; @PRSARR@("ASUBS",ESN,ASN) = Attribute name
|
---|
| 11 | ; @PRSARR@("AINDX",ESN,AttributeName) = Subscript number (ASN)
|
---|
| 12 | ; @PRSARR@("DATA",ESN,Repetition,"V") = Value of element
|
---|
| 13 | ; @PRSARR@("DATA",ESN,Repetition,"A",ASN) = Value of attribute
|
---|
| 14 | ; @PRSARR@("DATA",ESN1,Rep1,ESN2,Rep2,"V") = Value of child
|
---|
| 15 | ; element
|
---|
| 16 | ; @PRSARR@("DATA",ESN1,Rep1,ESN2,Rep2,"A",ASN) = Value of child
|
---|
| 17 | ; attribute
|
---|
| 18 | ; Notes : XMLARR must be a global array (i.e. no local arrays)
|
---|
| 19 | ; : PRSARR is initialized (i.e KILLed) on input
|
---|
| 20 | Q:$G(XMLARR)=""
|
---|
| 21 | Q:'$D(@XMLARR)
|
---|
| 22 | Q:$G(PRSARR)=""
|
---|
| 23 | N HDICBK,SUBNUM,TAGNAME
|
---|
| 24 | N ESUBS,EINDX,ASUBS,AINDX,DATA
|
---|
| 25 | S ESUBS=$NA(@PRSARR@("ESUBS"))
|
---|
| 26 | S EINDX=$NA(@PRSARR@("EINDX"))
|
---|
| 27 | S ASUBS=$NA(@PRSARR@("ASUBS"))
|
---|
| 28 | S AINDX=$NA(@PRSARR@("AINDX"))
|
---|
| 29 | S DATA=$NA(@PRSARR@("DATA"))
|
---|
| 30 | ;Set callbacks
|
---|
| 31 | S HDICBK("STARTDOCUMENT")="STRTDOC^HDISVM01"
|
---|
| 32 | S HDICBK("ENDDOCUMENT")="ENDDOC^HDISVM01"
|
---|
| 33 | S HDICBK("DOCTYPE")="DOCTYPE^HDISVM01"
|
---|
| 34 | S HDICBK("STARTELEMENT")="STRTLMNT^HDISVM01"
|
---|
| 35 | S HDICBK("ENDELEMENT")="ENDLMNT^HDISVM01"
|
---|
| 36 | S HDICBK("CHARACTERS")="CHARS^HDISVM01"
|
---|
| 37 | S HDICBK("PI")="PI^HDISVM01"
|
---|
| 38 | S HDICBK("EXTERNAL")="EXTERN^HDISVM01"
|
---|
| 39 | S HDICBK("NOTATION")="NOTATION^HDISVM01"
|
---|
| 40 | S HDICBK("COMMENT")="COMMENT^HDISVM01"
|
---|
| 41 | S HDICBK("ERROR")="ERROR^HDISVM01"
|
---|
| 42 | ;Parse XML document using SAX
|
---|
| 43 | K @PRSARR
|
---|
| 44 | D EN^MXMLPRSE(XMLARR,.HDICBK,"")
|
---|
| 45 | Q
|
---|
| 46 | ;
|
---|
| 47 | STRTDOC ;Start document
|
---|
| 48 | Q
|
---|
| 49 | ;
|
---|
| 50 | ENDDOC ;End document
|
---|
| 51 | Q
|
---|
| 52 | ;
|
---|
| 53 | DOCTYPE(ROOT,PUBID,SYSID) ;DOCTYPE declaration
|
---|
| 54 | Q
|
---|
| 55 | ;
|
---|
| 56 | STRTLMNT(NAME,ATTRLIST) ;Start element
|
---|
| 57 | N ESN,REP,ATTR,ASN,TMPREF
|
---|
| 58 | ;Determine element subscript number
|
---|
| 59 | S ESN=+$G(@EINDX@(NAME))
|
---|
| 60 | I 'ESN D
|
---|
| 61 | .S ESN=1+$O(@ESUBS@(""),-1)
|
---|
| 62 | .S @ESUBS@(ESN)=NAME
|
---|
| 63 | .S @EINDX@(NAME)=ESN
|
---|
| 64 | ;Determine repetition number
|
---|
| 65 | S REP=1+$O(@DATA@(ESN,""),-1)
|
---|
| 66 | ;Add element subscript number and repetition number to output array
|
---|
| 67 | S TMPREF=$$OREF^DILF(DATA)
|
---|
| 68 | S TMPREF=TMPREF_ESN_","_REP_","
|
---|
| 69 | S DATA=$$CREF^DILF(TMPREF)
|
---|
| 70 | ;Store attributes
|
---|
| 71 | S ATTR=""
|
---|
| 72 | F S ATTR=$O(ATTRLIST(ATTR)) Q:ATTR="" D
|
---|
| 73 | .;Get attribute subscript number
|
---|
| 74 | .S ASN=+$G(@AINDX@(ESN,ATTR))
|
---|
| 75 | .I 'ASN D
|
---|
| 76 | ..S ASN=1+$O(@ASUBS@(""),-1)
|
---|
| 77 | ..S @ASUBS@(ESN,ASN)=ATTR
|
---|
| 78 | ..S @AINDX@(ESN,ATTR)=ASN
|
---|
| 79 | .;Store value
|
---|
| 80 | .S @DATA@("A",ASN)=ATTRLIST(ATTR)
|
---|
| 81 | Q
|
---|
| 82 | ;
|
---|
| 83 | ENDLMNT(NAME) ;End element
|
---|
| 84 | N TMPREF,SUBCNT,SUBCHK
|
---|
| 85 | ;Remove element subscript number and repition number from output array
|
---|
| 86 | S TMPREF=$$OREF^DILF(DATA)
|
---|
| 87 | S SUBCNT=$L(TMPREF,",")
|
---|
| 88 | S SUBCHK=SUBCNT-3
|
---|
| 89 | I SUBCHK>0 S TMPREF=$P(TMPREF,",",1,SUBCHK)_","
|
---|
| 90 | I SUBCHK<1 S TMPREF=$P(TMPREF,"(",1)_"("
|
---|
| 91 | S DATA=$$CREF^DILF(TMPREF)
|
---|
| 92 | Q
|
---|
| 93 | ;
|
---|
| 94 | CHARS(TEXT) ;Non-markup content
|
---|
| 95 | ;Store element value
|
---|
| 96 | S @DATA@("V")=TEXT
|
---|
| 97 | Q
|
---|
| 98 | ;
|
---|
| 99 | PI(TARGET,TEXT) ;Processing instruction
|
---|
| 100 | Q
|
---|
| 101 | ;
|
---|
| 102 | EXTERN(SYSID,PUBID,GLOBAL) ;External entity reference
|
---|
| 103 | Q
|
---|
| 104 | ;
|
---|
| 105 | NOTATION(NAME,SYSID,PUBID) ;Notation declaration
|
---|
| 106 | Q
|
---|
| 107 | ;
|
---|
| 108 | COMMENT(TEXT) ;Comment
|
---|
| 109 | Q
|
---|
| 110 | ;
|
---|
| 111 | ERROR(ERR) ;Error
|
---|
| 112 | Q
|
---|
| 113 | ;
|
---|
| 114 | UNESC(TEXT) ;Convert escaped characters
|
---|
| 115 | ;Assumes TEXT is not corrupt
|
---|
| 116 | N ESCBEG,ESCEND,ESCTXT,ESCCHAR,OUTPUT
|
---|
| 117 | S TEXT=$G(TEXT)
|
---|
| 118 | I TEXT="" Q TEXT
|
---|
| 119 | I TEXT'["&" Q TEXT
|
---|
| 120 | ;Do conversion
|
---|
| 121 | S OUTPUT=""
|
---|
| 122 | F Q:TEXT'["&" D
|
---|
| 123 | .;Find escaped character
|
---|
| 124 | .S ESCBEG=$F(TEXT,"&")
|
---|
| 125 | .S ESCEND=$F(TEXT,";",ESCBEG)
|
---|
| 126 | .S ESCTXT=$E(TEXT,ESCBEG,ESCEND-2)
|
---|
| 127 | .;Convert escaped character
|
---|
| 128 | .S ESCCHAR=""
|
---|
| 129 | .I ESCTXT="amp" S ESCCHAR="&"
|
---|
| 130 | .I ESCTXT="lt" S ESCCHAR="<"
|
---|
| 131 | .I ESCTXT="gt" S ESCCHAR=">"
|
---|
| 132 | .I ESCTXT="apos" S ESCCHAR="'"
|
---|
| 133 | .I ESCTXT="quot" S ESCCHAR=$C(34)
|
---|
| 134 | .;Replace escaped character with actual character
|
---|
| 135 | .S OUTPUT=OUTPUT_$E(TEXT,1,ESCBEG-2)_ESCCHAR
|
---|
| 136 | .;Continue processing rest of string
|
---|
| 137 | .S TEXT=$E(TEXT,ESCEND,$L(TEXT))
|
---|
| 138 | ;Add on remainder of text
|
---|
| 139 | S OUTPUT=OUTPUT_TEXT
|
---|
| 140 | Q OUTPUT
|
---|