source: fmts/trunk/p/C0XMAIN.m@ 1478

Last change on this file since 1478 was 1343, checked in by George Lilly, 13 years ago

beginning some retrieval code

File size: 12.4 KB
RevLine 
[1343]1C0XMAIN ; GPL - Fileman Triples entry point routine ;10/13/11 17:05
2 ;;0.1;C0X;nopatch;noreleasedate;Build 7
3 ;Copyright 2011 George Lilly. Licensed under the terms of the GNU
4 ;General Public License See attached copy of the License.
5 ;
6 ;This program is free software; you can redistribute it and/or modify
7 ;it under the terms of the GNU General Public License as published by
8 ;the Free Software Foundation; either version 2 of the License, or
9 ;(at your option) any later version.
10 ;
11 ;This program is distributed in the hope that it will be useful,
12 ;but WITHOUT ANY WARRANTY; without even the implied warranty of
13 ;MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 ;GNU General Public License for more details.
15 ;
16 ;You should have received a copy of the GNU General Public License along
17 ;with this program; if not, write to the Free Software Foundation, Inc.,
18 ;51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
19 ;
20 Q
21 ;
22INITFARY(ZFARY) ; INITIALIZE FILE NUMBERS AND OTHER USEFUL THINGS
23 ; FOR THE DEFAULT TRIPLE STORE. USE OTHER VALUES FOR SUPPORTING ADDITIONAL
24 ; TRIPLE STORES
25 S @ZFARY@("C0XTFN")=172.101 ; TRIPLES FILE NUMBER
26 S @ZFARY@("C0XSFN")=172.201 ; TRIPLES STRINGS FILE NUMBER
27 S @ZFARY@("C0XTN")=$NA(^C0X(101)) ; TRIPLES GLOBAL NAME
28 S @ZFARY@("C0XSN")=$NA(^C0X(201)) ; STRING FILE GLOBAL NAME
29 ;S @ZFARY@("C0XDIR")="/home/glilly/all_smart_patient_data/smart-rdf/"
30 S @ZFARY@("C0XDIR")="/home/george/fmts/trunk/samples/"
31 D USEFARY(ZFARY)
32 Q
33 ;
34USEFARY(ZFARY) ; INITIALIZES VARIABLES SAVED IN ARRAY ZFARY
35 N ZI S ZI=""
36 F S ZI=$O(@ZFARY@(ZI)) Q:ZI="" D
37 . ;N ZX
38 . S ZX="S "_ZI_"="""_@ZFARY@(ZI)_""""
39 . ;W !,ZX
40 . X ZX
41 Q
42 ;
43IMPORT(FNAME,FDIR,FURL,FARY) ; EXTRINSIC THAT READS A FILE FROM THE STANDARD
44 ; DIRECTORY, LOADS IT INTO THE TRIPLESTORE AS TEXT, AND RETURNS THE
45 ; NODE NAME OF THE TEXT TRIPLE
46 ; FDIR IS THE OPTIONAL DIRECTORY (DEFAUTS TO STANDARD DIR)
47 ; FURL IS THE OPTIONAL URI FOR ACCESSING THE FILE FROM THE TRIPLE STORE
48 ; FARY IS THE OPTIONAL FILE ARRAY OF THE TRIPLE STORE TO USE
49 I '$D(FARY) D ;
50 . D INITFARY("C0XFARY")
51 . S FARY="C0XFARY"
52 D USEFARY(FARY)
53 N ZD,ZTMP
54 I '$D(FDIR) S FDIR=C0XDIR ; DIRECTORY OF THE RDF FILE
55 I '$D(FURL) D ;
56 . N ZN2 S ZN2=$TR(FNAME,".","_") ; REMOVE THE DOT FROM THE NAME
57 . S FURL=FDIR_ZN2
58 N ZTMP
59 S ZTMP=$NA(^TMP("C0X",$J,"FILEIN",1)) ; WHERE TO PUT THE INCOMING FILE
60 K @ZTMP ; MAKE SURE IT'S CLEAR
61 S C0XSTART=$$NOW^XLFDT
62 W !,"STARTED: ",C0XSTART
63 W !,"READING IN: ",FNAME
64 I '$$FILEIN(ZTMP,FDIR,FNAME,4) D Q ; QUIT IF NO SUCCESS
65 . W !,"ERROR READING FILE: ",FDIR,FNAME
66 S ZRDF=$NA(^TMP("C0X",$J,"FILEIN")) ; WITHOUT THE SUBSCRIPT
67 W !,$O(@ZRDF@(""),-1)," LINES READ"
68 D INSRDF(ZRDF,FURL,FARY) ; IMPORT AND PROCESS THE RDF
69 Q
70 ;
71WGET(ZURL,FARY) ; GET FROM THE INTERNET AN RDF FILE AND INSERT IT
72 ;
73 I '$D(FARY) D ;
74 . D INITFARY("C0XFARY")
75 . S FARY="C0XFARY"
76 D USEFARY(FARY)
77 ;N ZLOC,ZTMP
78 K ZTMP
79 S ZLOC=$NA(^TMP("C0X","WGET",$J))
80 S C0XSTART=$$NOW^XLFDT
81 W !,"STARTED: ",C0XSTART
82 W !,"DOWNLOADING: ",ZURL
83 S OK=$$httpGET^%zewdGTM(ZURL,.ZTMP)
84 M @ZLOC=ZTMP
85 W !,$O(@ZLOC@(""),-1)," LINES READ"
86 D INSRDF(ZLOC,ZURL,FARY)
87 Q
88 ;
89INSRDF(ZRDF,ZNAME,FARY) ; INSERT AN RDF FILE INTO THE STORE AND PROCESS
90 ; ZRDF IS PASSED BY NAME
91 I '$D(FARY) D ;
92 . D INITFARY("C0XFARY")
93 . S FARY="C0XFARY"
94 D USEFARY(FARY)
95 N ZGRAPH,ZSUBJECT
96 S ZGRAPH="_:G"_$$LKY9 ; RANDOM GRAPH NAME
97 S ZSUBJECT=$$ANONS ; RANDOM ANOYMOUS SUBJECT
98 D ADD(ZGRAPH,ZSUBJECT,"fmts:url",ZNAME,FARY)
99 N ZTXTNM
100 S ZTXTNM="_TXT_INCOMING_RDF_FILE_"_ZNAME_"_"_$$LKY9 ; NAME FOR TEXT NODE
101 D ADD(ZGRAPH,ZSUBJECT,"fmts:rdfSource",ZTXTNM,FARY)
102 D UPDIE(.C0XFDA) ; TRY IT OUT
103 K C0XCNT ;RESET FOR NEXT TIME
104 D STORETXT(ZRDF,ZTXTNM,FARY)
105 W !,"ADDED: ",ZGRAPH," ",ZSUBJECT," fmts:rdfSource ",ZTXTNM
106 D PROCESS(.G,ZRDF,ZNAME,ZGRAPH,FARY) ; PARSE AND INSERT THE RDF
107 Q
108 ;
109STORETXT(ZTXT,ZNAME,FARY) ; STORE TEXT IN THE TRIPLESTORE AT ZNAME
110 ;
111 I '$D(FARY) D ;
112 . D INITFARY("C0XFARY")
113 . S FARY="C0XFARY"
114 D USEFARY(FARY)
115 N ZIEN
116 S ZIEN=$$IENOF(ZNAME,FARY) ; GET THE IEN
117 D CLEAN^DILF
118 K ZERR
119 D WP^DIE(C0XSFN,ZIEN_",",1,,ZTXT,"ZERR")
120 I $D(ZERR) D ;
121 . ZWR ZERR
122 Q
123 ;
124GETTXT(ZRTN,ZNAME,FARY) ; RETURNS RDF SOURCE OR OTHER TEXT
125 ; ZRTN IS PASSED BY REFERENCE
126 I '$D(FARY) D ;
127 . D INITFARY("C0XFARY")
128 . S FARY="C0XFARY"
129 D USEFARY(FARY)
130 N ZIEN
131 S ZIEN=$$IENOF(ZNAME)
132 S OK=$$GET1^DIQ(C0XSFN,ZIEN_",",1,,"ZRTN")
133 Q
134 ;
135WHERETXT(ZNAME,FARY) ; EXTRINSIC WHICH RETURNS THE NAME OF THE GLOBAL
136 ; WHERE THE TEXT IS LOCATED. NAME IS THE NAME OF THE STRING
137 I '$D(FARY) D ;
138 . D INITFARY("C0XFARY")
139 . S FARY="C0XFARY"
140 D USEFARY(FARY)
141 N ZIEN
142 S ZIEN=$$IENOF(ZNAME)
143 Q $NA(@C0XSN@(ZIEN,1))
144 ;
145FILEIN(ZINTMP,ZDIR,ZFNAME,ZLVL) ; READS A FILE INTO ZINTMP USING FTG^%ZISH
146 ; ZINTMP IS PASSED BY NAME AND INCLUDES THE NEW SUBSCRIPT
147 ; IE ^TMP("C0X","FILEIN",1)
148 ; ZLVL IN THIS CASE WOULD BE 3 INCREMENTING THE 1
149 ; EXTRINSIC WHICH RETURNS THE RESULT OF FTG^%ZISH
150 S OK=$$FTG^%ZISH(ZDIR,FNAME,ZINTMP,ZLVL)
151 Q OK
152 ;
153TESTPROC ; TEST PROCESS WITH EXISTING SMALL RDF FILE
154 S ZIN=$NA(^TMP("C0X",12226,"FILEIN"))
155 S ZGRAPH="/test/rdfFile"
156 S ZM="/test/rdfFile/meta"
157 D PROCESS(.G,ZIN,ZGRAPH,ZM)
158 Q
159 ;
160PROCESS(ZRTN,ZRDF,ZGRF,ZMETA,FARY) ; PROCESS AN INCOMING RDF FILE
161 ; ZRTN IS PASS BY REFERENCE AND RETURNS MESSAGES ABOUT THE PROCESSING
162 ; ZRDF IS PASSED BY NAME AND IS THE GLOBAL CONTAINING THE RDF FILE
163 ; ZGRF IS THE NAME OF THE GRAPH TO USE IN THE TRIPLE STORE FOR RESULTS
164 ; ZMETA IS OPTIONAL AND IS THE NAME OF THE GRAPH TO STORE METADATA
165 ;
166 I '$D(FARY) D ;
167 . D INITFARY("C0XFARY")
168 . S FARY="C0XFARY"
169 D USEFARY(FARY)
170 ; -- first parse the rdf file with the MXML parser
171 ;S C0XDOCID=$$PARSE^C0CNHIN(ZRDF,"C0XARRAY") ; PARSE WITH MXML
172 S C0XDOCID=$$EN^MXMLDOM(ZRDF,"W")
173 ; -- assign the MXLM dom global name to ZDOM
174 S ZDOM=$NA(^TMP("MXMLDOM",$J,C0XDOCID))
175 W !,$O(@ZDOM@(""),-1)," XML NODES PARSED"
176 ; -- populate the metagraph to point to the graph with status unfinished
177 S METAS=$$ANONS ; GET AN ANONOMOUS RANDOM SUBJECT
178 I '$D(ZMETA) S ZMETA="_:G"_$$LKY9 ; RANDOM GRAPH NAME FOR METAGRAPH
179 D ADD(ZMETA,METAS,"fmts:about",ZGRF,FARY) ; POINT THE META TO THE GRAPH
180 D ADD(ZMETA,METAS,"fmts:status","unfinished",FARY) ; mark as unfinished
181 ;S C0XDATE=$$FMDTOUTC^C0CUTIL($$NOW^XLFDT,"DT")
182 S C0XDATE=$$NOW^XLFDT
183 D ADD(ZMETA,METAS,"fmts:dateTime",C0XDATE,FARY)
184 D UPDIE(.C0XFDA) ; commit the metagraph changes to the triple store
185 ; --
186 ; -- pull out the vocabularies in the RDF statement. marked with xmlns:
187 ; -- put them in a local variable for quick reference
188 ; -- TODO: create a graph for vocabularies and validate incoming against it
189 ;
190 S C0XVOC=""
191 N ZI,ZJ,ZK S ZI=""
192 F S ZI=$O(@ZDOM@(1,"A",ZI)) Q:ZI="" D ; FOR EACH xmlns
193 . S ZVOC=$P(ZI,"xmlns:",2)
194 . I ZVOC'="" S C0XVOC(ZVOC)=$G(@ZDOM@(1,"A",ZI))
195 ;W !,"VOCABS:" ZWR C0XVOC
196 ;
197 ; -- look for children called rdf:Description. quit if none. not an rdf file
198 ;
199 S ZI=$O(@ZDOM@(1,"C",""))
200 I $G(@ZDOM@(1,"C",ZI))'="rdf:Description" D Q ; not an rdf file
201 . W !,"Error. Not an RDF file. Cannot process."
202 ;
203 ; -- now process the rdf description children
204 ;
205 S ZI=""
206 S (C0XSUB,C0XPRE,C0XOBJ)="" ; INITIALIZE subject, object and predicate
207 F S ZI=$O(@ZDOM@(1,"C",ZI)) Q:ZI="" D ;
208 . ; -- we are skipping any child that is not rdf:Description
209 . ; -- TODO: check to see if this is right in general
210 . ;
211 . IF $G(@ZDOM@(1,"C",ZI))'="rdf:Description" D Q ;
212 . . W !,"SKIPPING NODE: ",ZI
213 . ; -- now looking for the subject for the triples
214 . S ZX=$G(@ZDOM@(ZI,"A","rdf:about"))
215 . I ZX'="" D ; we have the subject
216 . . ;W " about: ",ZX
217 . . S C0XSUB=ZX
218 . E D ;
219 . . S ZX=$G(@ZDOM@(ZI,"A","rdf:nodeID")) ; node id is another style of subject
220 . . I ZX'="" D ;
221 . . . S C0XSUB=ZX
222 . I C0XSUB="" S C0XSUB=$$ANONS ; DEFAULT TO BLANK SUBJECT
223 . ;
224 . ; -- we now have the subject. the children of this node have the rest
225 . ;
226 . S ZJ="" ; for the children of the rdf:Description nodes
227 . F S ZJ=$O(@ZDOM@(ZI,"C",ZJ)) Q:ZJ="" D ; for each child
228 . . S C0XPRE=@ZDOM@(ZJ) ; the predicate without a prefix
229 . . S ZX=$G(@ZDOM@(ZJ,"A","xmlns")) ; name space
230 . . I ZX'="" S C0XPRE=ZX_C0XPRE ; add the namespace prefix
231 . . I C0XPRE[":" D ; expand using vocabulary
232 . . . N ZB,ZA
233 . . . S ZB=$P(C0XPRE,":",1)
234 . . . S ZA=$P(C0XPRE,":",2)
235 . . . I $G(C0XVOC(ZB))'="" D ;
236 . . . . S C0XPRE=C0XVOC(ZB)_ZA ; expanded
237 . . S ZY=$G(@ZDOM@(ZJ,"A","rdf:resource")) ; potential object
238 . . I ZY'="" D Q ;
239 . . . S C0XOBJ=ZY ; object
240 . . . D ADD(ZGRF,C0XSUB,C0XPRE,C0XOBJ) ; finally. our first real triple
241 . . ; -- this is an else because of the quit above
242 . . S ZX=$G(@ZDOM@(ZJ,"A","rdf:nodeID")) ; fishing for nodeId object
243 . . I ZX'="" D Q ; got one
244 . . . S C0XOBJ=ZX ; we are using the incoming nodeIDs as object/subject
245 . . . ; without change... this could be foolish .. look at it again later
246 . . . D ADD(ZGRF,C0XSUB,C0XPRE,C0XOBJ) ; go for it and add a node
247 . . S C0XOBJ=$G(@ZDOM@(ZJ,"T",1)) ; hopefully an object is here
248 . . I C0XOBJ="" D Q ; not a happy situation
249 . . . W !,"ERROR, NO OBJECT FOUND FOR NODE: ",ZJ
250 . . D ADD(ZGRF,C0XSUB,C0XPRE,C0XOBJ) ; go for it and add a node
251 W !,"INSERTING ",C0XCNT," TRIPLES"
252 D UPDIE(.C0XFDA) ; commit the updates to the file
253 ; next, mark the graph as finished
254 S C0XEND=$$NOW^XLFDT
255 W !," ENDED AT: ",C0XEND
256 S C0XDIFF=$$FMDIFF^XLFDT(C0XEND,C0XSTART,2)
257 W !," ELAPSED TIME: ",C0XDIFF," SECONDS"
258 W !," APPROXIMATELY ",$P(C0XCNT/C0XDIFF,".")," TRIPLES PER SECOND"
259 Q
260 ;
261SHOW(ZN) ;
262 ZWR ^TMP("MXMLDOM",$J,1,ZN,*)
263 Q
264 ;
265ANONS() ; RETURNS AN ANONOMOUS SUBJECT
266 Q "_S:"_$$LKY9
267 ;
268NEWG(NGRAPH,NMETA) ; CREATES A NEW META GRAPH, MARKS IT AS UNFINISHED
269 ; THEN CREATES A NEW GRAPH AND POINTS THE METAGRAPH TO IT
270 ; NGRAPH AND NMETA ARE PASSED BY REFERENCE AND ARE THE RETURN
271 S NGRAPH="G"_$$LKY9
272 S NMETA=NGRAPH_"A"
273 Q
274 ;
275ADD(ZG,ZS,ZP,ZO,FARY) ; ADD A TRIPLE TO THE TRIPLESTORE. ALL VALUES ARE TEXT
276 ; THE FDA IS SET UP BUT THE FILES ARE NOT UPDATED. CALL UPDIE TO COMPLETE
277 I '$D(FARY) D ;
278 . D INITFARY("C0XFARY")
279 . S FARY="C0XFARY"
280 D USEFARY(FARY)
281 I '$D(C0XCNT) S C0XCNT=0
282 N ZNODE
283 S ZNODE="N"_$$LKY17
284 N ZNARY ; GET READY TO CALL IENOFA
285 S ZNARY("ZG",ZG)=""
286 S ZNARY("ZS",ZS)=""
287 S ZNARY("ZP",ZP)=""
288 S ZNARY("ZO",ZO)=""
289 D IENOFA(.ZIENS,.ZNARY,FARY) ; RESOLVE/ADD STRINGS
290 ;S ZGIEN=$$IENOF(ZG) ; LAYGO TO GET IEN
291 ;S ZSIEN=$$IENOF(ZS)
292 ;S ZPIEN=$$IENOF(ZP)
293 ;S ZOIEN=$$IENOF(ZO)
294 ;I $D(C0XFDA) D UPDIE ; ADD THE STRINGS IF NEEDED
295 S C0XCNT=C0XCNT+1
296 S C0XFDA(C0XTFN,"?+"_C0XCNT_",",.01)=ZNODE
297 S C0XFDA(C0XTFN,"?+"_C0XCNT_",",.02)=$O(ZIENS("IEN","ZG",""))
298 S C0XFDA(C0XTFN,"?+"_C0XCNT_",",.03)=$O(ZIENS("IEN","ZS",""))
299 S C0XFDA(C0XTFN,"?+"_C0XCNT_",",.04)=$O(ZIENS("IEN","ZP",""))
300 S C0XFDA(C0XTFN,"?+"_C0XCNT_",",.05)=$O(ZIENS("IEN","ZO",""))
301 ; REMEMBER TO CALL UPDIE WHEN YOU'RE DONE
302 Q
303 ;
304LKY9() ;EXTRINIC THAT RETURNS A RANDOM 9 DIGIT NUMBER. USED FOR GENERATING
305 ; UNIQUE NODE AND GRAPH NAMES
306 N ZN,ZI
307 S ZN=""
308 F ZI=1:1:9 D ;
309 . S ZN=ZN_$R(10)
310 Q ZN
311 ;
312LKY17() ;EXTRINIC THAT RETURNS A RANDOM 9 DIGIT NUMBER. USED FOR GENERATING
313 ; UNIQUE NODE AND GRAPH NAMES
314 N ZN,ZI
315 S ZN=""
316 F ZI=1:1:17 D ;
317 . S ZN=ZN_$R(10)
318 Q ZN
319 ;
320IENOF(ZSTRING,FARY) ; EXTRINSIC WHICH RETURNS THE IEN OF ZS IN THE STRINGS FILE
321 I '$D(FARY) D ;
322 . D INITFARY("C0XFARY")
323 . S FARY="C0XFARY"
324 N ZIEN
325 S ZIEN=$O(@C0XSN@("B",ZSTRING,""))
326 I ZIEN="" D ;
327 . S C0XFDA2(C0XSFN,"+1,",.01)=ZSTRING
328 . D UPDIE(.C0XFDA2)
329 . S ZIEN=$O(@C0XSN@("B",ZSTRING,""))
330 . K C0XFDA2
331 Q ZIEN
332 ;
333IENOFA(ZOUTARY,ZINARY,FARY) ; RESOLVE STRINGS TO IEN IN STRINGS FILE
334 ; OR ADD THEM IF
335 ; MISSING. ZINARY AND ZOUTARY ARE PASSED BY REFERENCE
336 ; ZINARY LOOKS LIKE ZINARY("VAR","VAL")=""
337 ; RETURNS IN ZOUTARY OF THE FORM ZOUTARY("IEN","VAR",IEN)=""
338 I '$D(FARY) D ;
339 . D INITFARY("C0XFARY")
340 . S FARY="C0XFARY"
341 K ZOUTARY ; START WITH CLEAN RESULTS
342 K C0XFDA2 ; USE A SEPARATE FDA FOR THIS
343 N ZI S ZI=""
344 N ZV,ZIEN
345 N ZCNT S ZCNT=0
346 F S ZI=$O(ZINARY(ZI)) Q:ZI="" D ; LOOK FOR MISSING STRINGS
347 . S ZV=$O(ZINARY(ZI,""))
348 . I $O(@C0XSN@("B",ZV,""))="" D ;
349 . . S ZCNT=ZCNT+1
350 . . S C0XFDA2(C0XSFN,"+"_ZCNT_",",.01)=ZV
351 I $D(C0XFDA2) D ;
352 . D UPDIE(.C0XFDA2) ; ADD MISSING STRINGS
353 . K C0XFDA2 ; CLEAN UP
354 F S ZI=$O(ZINARY(ZI)) Q:ZI="" D ; NOT GET ALL IENS
355 . S ZV=$O(ZINARY(ZI,""))
356 . S ZIEN=$O(@C0XSN@("B",ZV,"")) ; THEY SHOULD BE THERE NOW
357 . I ZIEN="" D ;
358 . . W !,"ERROR ADDING STRING: ",ZV
359 . . B
360 . S ZOUTARY("IEN",ZI,ZIEN)=""
361 Q
362 ;
363UPDIE(ZFDA) ; INTERNAL ROUTINE TO CALL UPDATE^DIE AND CHECK FOR ERRORS
364 ; ZFDA IS PASSED BY REFERENCE
365 ;ZWR ZFDA
366 ;B
367 K ZERR
368 D CLEAN^DILF
369 D UPDATE^DIE("","ZFDA","","ZERR")
370 I $D(ZERR) S ZZERR=ZZERR ; ZZERR DOESN'T EXIST,
371 ; INVOKE THE ERROR TRAP IF TASKED
372 ;. W "ERROR",!
373 ;. ZWR ZERR
374 ;. B
375 K ZFDA
376 Q
377 ;
Note: See TracBrowser for help on using the repository browser.