source: fmts/trunk/p/C0XNOTS.m@ 1644

Last change on this file since 1644 was 1483, checked in by George Lilly, 12 years ago

updates for the no-triplestore parse

File size: 21.5 KB
Line 
1C0XNOTS ; GPL - RDF processing without the triplestore ;7/5/12 17:05
2 ;;0.1;C0X;nopatch;noreleasedate;Build 7
3 ;Copyright 2011 George Lilly. Licensed under the terms of the GNU
4 ;General Public License See attached copy of the License.
5 ;
6 ;This program is free software; you can redistribute it and/or modify
7 ;it under the terms of the GNU General Public License as published by
8 ;the Free Software Foundation; either version 2 of the License, or
9 ;(at your option) any later version.
10 ;
11 ;This program is distributed in the hope that it will be useful,
12 ;but WITHOUT ANY WARRANTY; without even the implied warranty of
13 ;MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 ;GNU General Public License for more details.
15 ;
16 ;You should have received a copy of the GNU General Public License along
17 ;with this program; if not, write to the Free Software Foundation, Inc.,
18 ;51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
19 ;
20 Q
21 ;
22 ; This is based on C0XMAIN but experiments with a fast load for triples
23 ; that will write directly to the fileman global
24 ; The file 172.101 is a F2N design style for triples, which means
25 ; that it is a Flat file with no subfiles, all fields at the root
26 ; ... it is a "2" file solution which means all strings are stored in
27 ; ... strings file and pointed to by the triples file
28 ; ... it is an N file because it has generated Node IDs instead of
29 ; ... DINUM which would use the IEN for the Node ID.
30 ; gpl 11/04/2011
31 ;
32INITFARY(ZFARY) ; INITIALIZE FILE NUMBERS AND OTHER USEFUL THINGS
33 ; FOR THE DEFAULT TRIPLE STORE. USE OTHER VALUES FOR SUPPORTING ADDITIONAL
34 ; TRIPLE STORES
35 I $D(@ZFARY) Q ; ALREADY INITIALIZED
36 S @ZFARY@("C0XTFN")=172.101 ; TRIPLES FILE NUMBER
37 S @ZFARY@("C0XSFN")=172.201 ; TRIPLES STRINGS FILE NUMBER
38 S @ZFARY@("C0XTN")=$NA(^C0X(101)) ; TRIPLES GLOBAL NAME
39 S @ZFARY@("C0XSN")=$NA(^C0X(201)) ; STRING FILE GLOBAL NAME
40 S @ZFARY@("C0XDIR")="/home/glilly/fmts/trunk/samples/smart-new/"
41 S @ZFARY@("BLKLOAD")=1 ; this file supports block load
42 S @ZFARY@("FMTSSTYLE")="F2N" ; fileman style
43 S @ZFARY@("REPLYFMT")="JSON"
44 D USEFARY(ZFARY)
45 Q
46 ;
47USEFARY(ZFARY) ; INITIALIZES VARIABLES SAVED IN ARRAY ZFARY
48 N ZI S ZI=""
49 F S ZI=$O(@ZFARY@(ZI)) Q:ZI="" D
50 . ;N ZX
51 . S ZX="S "_ZI_"="""_@ZFARY@(ZI)_""""
52 . ;W !,ZX
53 . X ZX
54 Q
55 ;
56FILEIN ; INTERACTIVE ENTRY POINT FOR OPTION TO READ IN A FILE
57 I '$D(C0XFARY) D INITFARY("C0XFARY")
58 D USEFARY("C0XFARY")
59 S DIR(0)="F^3:240"
60 S DIR("A")="File Directory"
61 S DIR("B")=C0XDIR
62 D ^DIR
63 I Y="^" Q ;
64 S C0XDIR=Y
65 S C0XFARY("C0XDIR")=Y
66 S DIR(0)="F^3:240"
67 S DIR("A")="File Name"
68 I '$D(C0XFN) S DIR("B")="qds.rdf"
69 E S DIR("B")=C0XFN
70 D ^DIR
71 I Y="" Q ;
72 I Y="^" Q ;
73 S C0XFN=Y
74 K C0XARY
75 S C0XARY=""
76 D IMPORT(.C0XARY,C0XFN,C0XDIR)
77 K C0XFDA
78 Q
79 ;
80IMPORT(ZRETURN,FNAME,INDIR) ; EXTRINSIC THAT READS A FILE FROM THE STANDARD
81 ; DIRECTORY, PARSES IT AND RETURNS AN ARRAY OF TRIPLES
82 ; INDIR IS THE OPTIONAL DIRECTORY (DEFAUTS TO STANDARD DIR)
83 ; INURL IS THE OPTIONAL URI FOR ACCESSING THE FILE FROM THE TRIPLE STORE
84 ; FARY IS THE OPTIONAL FILE ARRAY OF THE TRIPLE STORE TO USE
85 I '$D(FARY) D ;
86 . D INITFARY("C0XFARY")
87 . S FARY="C0XFARY"
88 D USEFARY(FARY)
89 N ZD,ZTMP
90 I '$D(INDIR) S INDIR=C0XDIR ; DIRECTORY OF THE RDF FILE
91 I $G(INURL)="" D ;
92 . ;N ZN2 S ZN2=$P(FNAME,".",1)_"_"_$P(FNAME,".",2) ; REMOVE THE DOT
93 . ;S INURL=FDIR_ZN2
94 . S INURL=INDIR_FNAME
95 N ZTMP
96 S ZTMP=$NA(^TMP("C0X",$J,"FILEIN",1)) ; WHERE TO PUT THE INCOMING FILE
97 K @ZTMP ; MAKE SURE IT'S CLEAR
98 S C0XSTART=$$NOW^XLFDT
99 I $D(DEBUG) W !,"STARTED: ",C0XSTART
100 I $D(DEBUG) W !,"READING IN: ",FNAME
101 I '$$FILEREAD(ZTMP,INDIR,FNAME,4) D Q ; QUIT IF NO SUCCESS
102 . W !,"ERROR READING FILE: ",INDIR,FNAME
103 S ZRDF=$NA(^TMP("C0X",$J,"FILEIN")) ; WITHOUT THE SUBSCRIPT
104 I $D(DEBUG) W !,$O(@ZRDF@(""),-1)," LINES READ"
105 D INSRDF(.ZRETURN,ZRDF) ; IMPORT AND PROCESS THE RDF
106 K INURL
107 K C0XFDA
108 ;K ^TMP("MXMLDOM",$J)
109 Q
110 ;
111WGET(ZRETURN,ZURL) ; GET FROM THE INTERNET AN RDF FILE AND INSERT IT
112 ;
113 I '$D(FARY) D ;
114 . D INITFARY("C0XFARY")
115 . S FARY="C0XFARY"
116 D USEFARY(FARY)
117 ;N ZLOC,ZTMP
118 K ZTMP
119 S ZLOC=$NA(^TMP("C0X","WGET",$J))
120 K @ZLOC
121 S C0XSTART=$$NOW^XLFDT
122 W !,"STARTED: ",C0XSTART
123 W !,"DOWNLOADING: ",ZURL
124 S OK=$$httpGET^%zewdGTM(ZURL,.ZTMP)
125 M @ZLOC=ZTMP
126 S C0XLINES=$O(@ZLOC@(""),-1)
127 W !,C0XLINES," LINES READ"
128 S C0XDLC=$$NOW^XLFDT ; DOWNLOAD COMPLETE
129 W !,"DOWNLOAD COMPLETE AT ",C0XDLC
130 S C0XDIFF=$$FMDIFF^XLFDT(C0XDLC,C0XSTART,2)
131 W !," ELAPSED TIME: ",C0XDIFF," SECONDS"
132 I C0XDIFF'=0 W !," APPROXIMATELY ",$P(C0XLINES/C0XDIFF,".")," LINES PER SEC"
133 D INSRDF(.ZRETURN,ZLOC)
134 Q
135 ;
136INSRDF(ZRETURN,ZRDF) ; PARSE AN RDF FILE AND RETURN AN ARRAY
137 ; ZRDF IS PASSED BY NAME
138 I '$D(FARY) D ;
139 . D INITFARY("C0XFARY")
140 . S FARY="C0XFARY"
141 D USEFARY(FARY)
142 S BATCNT=0 ; BATCH COUNTER
143 S BATMAX=10000 ; TRY BATCHES OF THIS SIZE
144 N ZGRAPH,ZSUBJECT
145 S ZGRAPH="_:G"_$$LKY9 ; RANDOM GRAPH NAME
146 S ZSUBJECT=$$ANONS() ; RANDOM ANOYMOUS SUBJECT
147 ;D ADD(ZGRAPH,ZSUBJECT,"fmts:url",ZNAME,FARY)
148 N ZTXTNM
149 ;S ZTXTNM="_TXT_INCOMING_RDF_FILE_"_ZNAME_"_"_$$LKY9 ; NAME FOR TEXT NODE
150 ;D ADD(ZGRAPH,ZSUBJECT,"fmts:fileSource",ZTXTNM,FARY)
151 ;D ADD(ZGRAPH,ZSUBJECT,"fmts:fileTag",$$name2tag(ZNAME),FARY)
152 ;D SWUPDIE(.C0XFDA) ; TRY IT OUT
153 K C0XCNT ;RESET FOR NEXT TIME
154 ;D STORETXT(ZRDF,ZTXTNM,FARY)
155 ;W !,"ADDED: ",ZGRAPH," ",ZSUBJECT," fmts:fileSource ",ZTXTNM
156 D PROCESS(.ZRETURN,ZRDF) ; PARSE THE RDF AND RETURN THE ARRAY OF TRIPLES
157 Q
158 ;
159name2tag(zname) ; extrinsic which returns a tag derived from a name
160 ; /home/vista/project.xml ==> project
161 q $p($re($p($re(zname),"/")),".")
162 ;
163FILEREAD(ZINTMP,ZDIR,ZFNAME,ZLVL) ; READS A FILE INTO ZINTMP USING FTG^%ZISH
164 ; ZINTMP IS PASSED BY NAME AND INCLUDES THE NEW SUBSCRIPT
165 ; IE ^TMP("C0X","FILEIN",1)
166 ; ZLVL IN THIS CASE WOULD BE 3 INCREMENTING THE 1
167 ; EXTRINSIC WHICH RETURNS THE RESULT OF FTG^%ZISH
168 S OK=$$FTG^%ZISH(ZDIR,FNAME,ZINTMP,ZLVL)
169 Q OK
170 ;
171TESTPROC ; TEST PROCESS WITH EXISTING SMALL RDF FILE
172 S ZIN=$NA(^TMP("C0X",12226,"FILEIN"))
173 S ZGRAPH="/test/rdfFile"
174 S ZM="/test/rdfFile/meta"
175 D PROCESS(.G,ZIN)
176 Q
177 ;
178VISTAOWL ;
179 S ZRDF=$NA(^TMP("C0X",542,"FILEIN"))
180 S ZNAME="/home/glilly/vistaowl/VistAOWL.owl"
181 S ZGRAPH="_:G431590209"
182 S FARY="C0XFARY"
183 D INITFARY(FARY)
184 S C0XDOCID=1
185 S BATCNT=0
186 S BATMAX=10000
187 D PROCESS(.G,ZRDF)
188 Q
189 ;
190FETCH(C0XRARY,FNAME,FDIR) ; read in an RDF file and return a usable mumps array of the
191 ; contents
192 ;
193 I '$D(FNAME) S FNAME="dewdrop-patient-32-v2.rdf"
194 I '$D(FDIR) S FDIR="/home/vista/CCR/"
195 D IMPORT(.C0XRARY,FNAME,FDIR)
196 Q
197 ;
198PROCESS(ZRTN,ZRDF) ; PROCESS AN INCOMING RDF FILE
199 ; ZRTN IS PASS BY REFERENCE AND RETURNS MESSAGES ABOUT THE PROCESSING
200 ; ZRDF IS PASSED BY NAME AND IS THE GLOBAL CONTAINING THE RDF FILE
201 ;
202 I '$D(FARY) D ;
203 . D INITFARY("C0XFARY")
204 . S FARY="C0XFARY"
205 D USEFARY(FARY)
206 ;N BATCNT
207 ;N BATMAX
208 ; -- first parse the rdf file with the MXML parser
209 ;S C0XDOCID=$$PARSE^C0CNHIN(ZRDF,"C0XARRAY") ; PARSE WITH MXML
210 S C0XDLC2=$$NOW^XLFDT ; START OF PARSE
211 I @ZRDF@(1)'["<?xml" D Q ;
212 . K @ZRDF ; don't need the input buffer
213 . W !,"Not an XML file"
214 S C0XDOCID=$$EN^MXMLDOM(ZRDF,"W") ;
215 ;B
216 K @ZRDF ; DON'T NEED INPUT BUFFER ANYMORE
217 ; -- assign the MXLM dom global name to ZDOM
218 S ZDOM=$NA(^TMP("MXMLDOM",$J,C0XDOCID))
219 ;S ZDOM=$NA(^TMP("MXMLDOM",16850,C0XDOCID)) ;VISTAOWL DOM
220 S C0XNODE=$O(@ZDOM@(""),-1)
221 I $D(DEBUG) W !,C0XNODE," XML NODES PARSED"
222 S C0XPRS=$$NOW^XLFDT ; PARSE COMPLETE
223 I $D(DEBUG) W !,"PARSE COMPLETE AT ",C0XPRS
224 S C0XDIFF=$$FMDIFF^XLFDT(C0XPRS,C0XDLC2,2)
225 I $D(DEBUG) W !," ELAPSED TIME: ",C0XDIFF," SECONDS"
226 I C0XDIFF'=0 D ;
227 . I $D(DEBUG) W !," APPROXIMATELY ",$P(C0XNODE/C0XDIFF,".")," NODES PER SECOND"
228 ; -- populate the metagraph to point to the graph with status unfinished
229 S METAS=$$ANONS ; GET AN ANONOMOUS RANDOM SUBJECT
230 I '$D(ZMETA) S ZMETA="_:G"_$$LKY9 ; RANDOM GRAPH NAME FOR METAGRAPH
231 ;D ADD(ZMETA,METAS,"fmts:about",ZGRF,FARY) ; POINT THE META TO THE GRAPH
232 ;D ADD(ZMETA,METAS,"fmts:status","unfinished",FARY) ; mark as unfinished
233 I $D(DEBUG) W !,"INSERTING GRAPH: ",ZGRF
234 ;S C0XDATE=$$FMDTOUTC^C0CUTIL($$NOW^XLFDT,"DT")
235 S C0XDATE=$$NOW^XLFDT
236 ;D ADD(ZMETA,METAS,"fmts:dateTime",C0XDATE,FARY)
237 ;D SWUPDIE(.C0XFDA) ; commit the metagraph changes to the triple store
238 ; --
239 ; -- pull out the vocabularies in the RDF statement. marked with xmlns:
240 ; -- put them in a local variable for quick reference
241 ; -- TODO: create a graph for vocabularies and validate incoming against it
242 ;
243 S C0XVOC=""
244 N ZI,ZJ,ZK S ZI=""
245 F S ZI=$O(@ZDOM@(1,"A",ZI)) Q:ZI="" D ; FOR EACH xmlns
246 . S ZVOC=$P(ZI,"xmlns:",2)
247 . I ZVOC'="" S C0XVOC(ZVOC)=$G(@ZDOM@(1,"A",ZI))
248 I $D(DEBUG) D ;
249 . W !,"VOCABS:"
250 . N ZZ S ZZ=""
251 . F S ZZ=$O(C0XVOC(ZZ)) Q:ZZ="" W !,ZZ,":",C0XVOC(ZZ)
252 ;
253 ; -- look for children called rdf:Description. quit if none. not an rdf file
254 ;
255 S C0XTYPE("rdf:Description")=1
256 S C0XTYPE("owl:ObjectProperty")=1
257 S C0XTYPE("owl:Ontology")=1
258 S C0XTYPE("owl:Class")=1
259 S C0XTYPE("rdfs:subClassOf")=1
260 S C0XTYPE("rdf:RDF")=1
261 S ZI=$O(@ZDOM@(1,"C",""))
262 I '$G(C0XTYPE(@ZDOM@(1,"C",ZI))) D ;Q ; not an rdf file
263 . W !,"Unusual RDF file ",@ZDOM@(1,"C",ZI)
264 . ;W !,"Error. Not an RDF file. Cannot process."
265 . D SHOW(1)
266 ;
267 ; -- now process the rdf description children
268 ;
269 S ZI=""
270 S (C0XSUB,C0XPRE,C0XOBJ)="" ; INITIALIZE subject, object and predicate
271 F S ZI=$O(@ZDOM@(1,"C",ZI)) Q:ZI="" D ;
272 . ; -- we are skipping any child that is not rdf:Description
273 . ; -- TODO: check to see if this is right in general
274 . ;
275 . IF '$G(C0XTYPE(@ZDOM@(1,"C",ZI))) D Q ;
276 . . W !,"SKIPPING NODE: ",ZI
277 . ; -- now looking for the subject for the triples
278 . S ZX=$G(@ZDOM@(ZI,"A","rdf:about"))
279 . I ZX'="" D ; we have the subject
280 . . ;W " about: ",ZX
281 . . S C0XSUB=ZX
282 . E D ;
283 . . S ZX=$G(@ZDOM@(ZI,"A","rdf:nodeID")) ; node id is another style of subject
284 . . I ZX'="" D ;
285 . . . S C0XSUB=ZX
286 . I C0XSUB="" S C0XSUB=$$ANONS ; DEFAULT TO BLANK SUBJECT
287 . ;
288 . ; -- we now have the subject. the children of this node have the rest
289 . ;
290 . S ZGRF=C0XSUB ; INVENT A GRAPH NAME BASED ON THE SUBJECT
291 . ;
292 . S ZJ="" ; for the children of the rdf:Description nodes
293 . F S ZJ=$O(@ZDOM@(ZI,"C",ZJ)) Q:ZJ="" D ; for each child
294 . . S C0XPRE=@ZDOM@(ZJ) ; the predicate without a prefix
295 . . S ZX=$G(@ZDOM@(ZJ,"A","xmlns")) ; name space
296 . . I ZX'="" S C0XPRE=ZX_C0XPRE ; add the namespace prefix
297 . . I C0XPRE[":" D ; expand using vocabulary
298 . . . N ZB,ZA
299 . . . S ZB=$P(C0XPRE,":",1)
300 . . . S ZA=$P(C0XPRE,":",2)
301 . . . I $G(C0XVOC(ZB))'="" D ;
302 . . . . S C0XPRE=C0XVOC(ZB)_ZA ; expanded
303 . . S ZY=$G(@ZDOM@(ZJ,"A","rdf:resource")) ; potential object
304 . . I ZY'="" D Q ;
305 . . . S C0XOBJ=$$EXT^C0XUTIL(ZY) ; object
306 . . . D ADD(ZGRF,C0XSUB,C0XPRE,C0XOBJ) ; finally. our first real triple
307 . . ; -- this is an else because of the quit above
308 . . S ZX=$G(@ZDOM@(ZJ,"A","rdf:nodeID")) ; fishing for nodeId object
309 . . I ZX'="" D Q ; got one
310 . . . S C0XOBJ=ZX ; we are using the incoming nodeIDs as object/subject
311 . . . ; without change... this could be foolish .. look at it again later
312 . . . D ADD(ZGRF,C0XSUB,C0XPRE,C0XOBJ) ; go for it and add a node
313 . . S C0XOBJ=$G(@ZDOM@(ZJ,"T",1)) ; hopefully an object is here
314 . . I C0XOBJ="" D Q ; not a happy situation
315 . . . W !,"ERROR, NO OBJECT FOUND FOR NODE: ",ZJ
316 . . S C0XOBJ=$$EXT^C0XUTIL(C0XOBJ) ; might be namespaced
317 . . D ADD(ZGRF,C0XSUB,C0XPRE,C0XOBJ) ; go for it and add a node
318 S C0XTRP=$$NOW^XLFDT ; PARSE COMPLETE
319 I $D(DEBUG) W !,"TRIPLES COMPLETE AT ",C0XTRP
320 S C0XDIFF=$$FMDIFF^XLFDT(C0XTRP,C0XPRS,2)
321 I $D(DEBUG) W !," ELAPSED TIME: ",C0XDIFF," SECONDS"
322 I C0XDIFF'=0 D ;
323 . I $D(DEBUG) W !," APPROXIMATELY ",$P(C0XCNT/C0XDIFF,".")," TRIPLES PER SECOND"
324 I $D(DEBUG) W !,"INSERTING ",C0XCNT," TRIPLES"
325 Q
326 ; what follows can be deleted
327 I $D(C0XFDA) D ;
328 . I $G(BLKLOAD) D ;
329 . . D BULKLOAD(.C0XFDA)
330 . E D ;
331 . . D UPDIE(.C0XFDA) ; commit the updates to the file
332 ; next, mark the graph as finished
333 S C0XINS=$$NOW^XLFDT ; PARSE COMPLETE
334 W !,"INSERTION COMPLETE AT ",C0XPRS
335 S C0XDIFF=$$FMDIFF^XLFDT(C0XINS,C0XTRP,2)
336 W !," ELAPSED TIME: ",C0XDIFF," SECONDS"
337 I C0XDIFF'=0 W !," APPROXIMATELY ",$P(C0XCNT/C0XDIFF,".")," NODES PER SECOND"
338 S C0XEND=$$NOW^XLFDT
339 W !," ENDED AT: ",C0XEND
340 S C0XDIFF=$$FMDIFF^XLFDT(C0XEND,C0XSTART,2)
341 W !," ELAPSED TIME: ",C0XDIFF," SECONDS"
342 I C0XDIFF'=0 W !," APPROXIMATELY ",$P(C0XCNT/C0XDIFF,".")," TRIPLES PER SECOND"
343 Q
344 ;
345SHOW(ZN) ;
346 I '$D(C0XJOB) S C0XJOB=$J
347 N ZD
348 S ZD=$NA(^TMP("MXMLDOM",C0XJOB,1,ZN))
349 W ZD,"=",@ZD
350 F S ZD=$Q(@ZD) Q:$QS(ZD,4)'=ZN W !,ZD,"=",@ZD
351 ;ZWR ^TMP("MXMLDOM",C0XJOB,1,ZN,*)
352 Q
353 ;
354ANONS() ; RETURNS AN ANONOMOUS SUBJECT
355 Q "iDPsDPss"_$$LKY9
356 ;
357NEWG(NGRAPH,NMETA) ; CREATES A NEW META GRAPH, MARKS IT AS UNFINISHED
358 ; THEN CREATES A NEW GRAPH AND POINTS THE METAGRAPH TO IT
359 ; NGRAPH AND NMETA ARE PASSED BY REFERENCE AND ARE THE RETURN
360 S NGRAPH="G"_$$LKY9
361 S NMETA=NGRAPH_"A"
362 Q
363 ;
364STARTADD ; INITIALIZE C0XFDA AND BATCNT
365 K C0XFDA
366 K BATCNT
367 Q
368 ;
369ADD(ZG,ZS,ZP,ZO,ZRTN) ; ADD A TRIPLE TO THE TRIPLESTORE. ALL VALUES ARE TEXT
370 ; THE FDA IS SET UP BUT THE FILES ARE NOT UPDATED. CALL UPDIE TO COMPLETE
371 I '$D(ZRTN) S ZRTN="ZRTN"
372 S @ZRTN@(ZS,ZP,ZO)=""
373 S C0XARY(ZS,ZP,ZO)=""
374 N ZNN S ZNN=$L(ZS,"/")
375 Q:ZNN<2
376 S C0XRARY($P(ZS,"/",ZNN-1),$P(ZS,"/",ZNN),ZP)=ZO
377 Q
378 ; the rest is not needed
379 I $G(NOFMTS)'="" D Q ; just add the triples to C0XARY
380 . S C0XARY(ZS,ZP,ZO)=""
381 I '$D(FARY) D ;
382 . D INITFARY("C0XFARY")
383 . S FARY="C0XFARY"
384 D USEFARY(FARY)
385 I '$D(C0XCNT) S C0XCNT=0
386 N ZNODE
387 S ZNODE="N"_$$LKY17
388 N ZNARY ; GET READY TO CALL IENOFA
389 I (ZG="")!(ZS="")!(ZP="")!(ZO="") D Q ;
390 . I $G(DEBUG) W !,"Error Empty String ZG:"_ZG_" ZS:"_ZS_" ZP:"_ZP_" ZO"_ZO
391 S ZNARY("ZG",ZG)=""
392 S ZNARY("ZS",ZS)=""
393 S ZNARY("ZP",ZP)=""
394 S ZNARY("ZO",ZO)=""
395 D IENOFA(.ZIENS,.ZNARY,FARY) ; RESOLVE/ADD STRINGS
396 ;S ZGIEN=$$IENOF(ZG) ; LAYGO TO GET IEN
397 ;S ZSIEN=$$IENOF(ZS)
398 ;S ZPIEN=$$IENOF(ZP)
399 ;S ZOIEN=$$IENOF(ZO)
400 ;I $D(C0XFDA) D UPDIE ; ADD THE STRINGS IF NEEDED
401 I '$D(BATCNT) S BATCNT=0
402 S BATCNT=BATCNT+1
403 S C0XCNT=C0XCNT+1
404 I $G(BLKLOAD)=1 D ; we are using bulk load
405 . S C0XFDA(C0XTFN,BATCNT,.01)=ZNODE
406 . S C0XFDA(C0XTFN,BATCNT,.02)=$O(ZIENS("IEN","ZG",""))
407 . S C0XFDA(C0XTFN,BATCNT,.03)=$O(ZIENS("IEN","ZS",""))
408 . S C0XFDA(C0XTFN,BATCNT,.04)=$O(ZIENS("IEN","ZP",""))
409 . S C0XFDA(C0XTFN,BATCNT,.05)=$O(ZIENS("IEN","ZO",""))
410 E D ;
411 . S C0XFDA(C0XTFN,"?+"_BATCNT_",",.01)=ZNODE
412 . S C0XFDA(C0XTFN,"?+"_BATCNT_",",.02)=$O(ZIENS("IEN","ZG",""))
413 . S C0XFDA(C0XTFN,"?+"_BATCNT_",",.03)=$O(ZIENS("IEN","ZS",""))
414 . S C0XFDA(C0XTFN,"?+"_BATCNT_",",.04)=$O(ZIENS("IEN","ZP",""))
415 . S C0XFDA(C0XTFN,"?+"_BATCNT_",",.05)=$O(ZIENS("IEN","ZO",""))
416 I '$D(BATMAX) S BATMAX=10000
417 I BATCNT=BATMAX D ; BATCH IS DONE
418 . I $G(BLKLOAD) D ; bulk load
419 . . D BULKLOAD(.C0XFDA) ; bulk load the batch
420 . E D ; no bulk load
421 . . D UPDIE(.C0XFDA)
422 . K C0XFDA
423 . S BATCNT=0 ; RESET COUNTER
424 ; REMEMBER TO CALL UPDIE WHEN YOU'RE DONE
425 Q
426 ;
427LKY5() ;EXTRINIC THAT RETURNS A RANDOM 5 DIGIT NUMBER. USED FOR GENERATING
428 ; UNIQUE NODE AND GRAPH NAMES
429 N ZN,ZI
430 S ZN=""
431 F ZI=1:1:5 D ;
432 . S ZN=ZN_$R(10)
433 Q ZN
434 ;
435LKY9() ;EXTRINIC THAT RETURNS A RANDOM 9 DIGIT NUMBER. USED FOR GENERATING
436 ; UNIQUE NODE AND GRAPH NAMES
437 N ZN,ZI
438 S ZN=""
439 F ZI=1:1:9 D ;
440 . S ZN=ZN_$R(10)
441 Q ZN
442 ;
443LKY17() ;EXTRINIC THAT RETURNS A RANDOM 9 DIGIT NUMBER. USED FOR GENERATING
444 ; UNIQUE NODE AND GRAPH NAMES
445 N ZN,ZI
446 S ZN=""
447 F ZI=1:1:17 D ;
448 . S ZN=ZN_$R(10)
449 Q ZN
450 ;
451 ; these routines add the string if it is not found
452 ;
453IENOF(ZSTRING,FARY) ; EXTRINSIC WHICH RETURNS THE IEN OF ZS IN THE STRINGS FILE
454 I '$D(FARY) D ;
455 . D INITFARY("C0XFARY")
456 . S FARY="C0XFARY"
457 N ZIEN
458 I $G(ZSTRING)="" Q "" ; NO STRING
459 S ZIEN=$O(@C0XSN@("B",ZSTRING,""))
460 I ZIEN="" D ;
461 . S C0XFDA2(C0XSFN,"+1,",.01)=ZSTRING
462 . D UPDIE(.C0XFDA2)
463 . S ZIEN=$O(@C0XSN@("B",ZSTRING,""))
464 . K C0XFDA2
465 Q ZIEN
466 ;
467IENOFA(ZOUTARY,INARY,FARY) ; RESOLVE STRINGS TO IEN IN STRINGS FILE
468 ; OR ADD THEM IF
469 ; MISSING. ZINARY AND ZOUTARY ARE PASSED BY REFERENCE
470 ; ZINARY LOOKS LIKE ZINARY("VAR","VAL")=""
471 ; RETURNS IN ZOUTARY OF THE FORM ZOUTARY("IEN","VAR",IEN)=""
472 I '$D(FARY) D ;
473 . D INITFARY("C0XFARY")
474 . S FARY="C0XFARY"
475 K ZOUTARY ; START WITH CLEAN RESULTS
476 K C0XFDA2 ; USE A SEPARATE FDA FOR THIS
477 I '$D(C0XVOC) D VOCINIT^C0XUTIL
478 N ZINARY
479 N ZI S ZI=""
480 F S ZI=$O(INARY(ZI)) Q:ZI="" D ;
481 . N ZK
482 . S ZK=$O(INARY(ZI,""))
483 . S ZINARY($$EXT^C0XUTIL(ZI),$$EXT^C0XUTIL(ZK))=""
484 N ZV,ZIEN,ABORT
485 S ABORT=0
486 N ZCNT S ZCNT=0
487 F S ZI=$O(ZINARY(ZI)) Q:(ZI="")!+ABORT D ; LOOK FOR MISSING STRINGS
488 . S ZV=$O(ZINARY(ZI,""))
489 . I ZV="" S ABORT=1 Q ; abandon quad -- missing an entry
490 . I ZV["^" S ZV=$TR(ZV,"^","|")
491 . I $O(@C0XSN@("B",ZV,""))="" D ;
492 . . S ZCNT=ZCNT+1
493 . . S C0XFDA2(C0XSFN,"+"_ZCNT_",",.01)=ZV
494 I +ABORT Q ;
495 I $D(C0XFDA2) D ;
496 . D UPDIE(.C0XFDA2) ; ADD MISSING STRINGS
497 . K C0XFDA2 ; CLEAN UP
498 F S ZI=$O(ZINARY(ZI)) Q:ZI="" D ; NOW GET ALL IENS
499 . S ZV=$O(ZINARY(ZI,""))
500 . I ZV["^" S ZV=$TR(ZV,"^","|")
501 . S ZIEN=$O(@C0XSN@("B",ZV,"")) ; THEY SHOULD BE THERE NOW
502 . I ZIEN="" D ;
503 . . W !,"ERROR ADDING STRING: ",ZV
504 . . B
505 . S ZOUTARY("IEN",ZI,ZIEN)=""
506 Q
507 ;
508ADDINN(ZG,ZS,ZARY) ; ADD IF NOT NULL
509 ; ZG IS THE GRAPH NAME, PASSED BY VALUE
510 ; ZS IS THE SUBJECT, PASSED BY VALUE
511 ; ZARY IS AN ARRAY, PASSED BY REFERENCE OF THE PREDICATE AND OBJECT
512 ; FORMAT IS ZARY(PRED)=OBJ
513 N ZI S ZI=""
514 F S ZI=$O(ZARY(ZI)) Q:ZI="" D ;
515 . ;I ZARY(ZI)="" S ZARY(ZI)="NULL"
516 . I ZARY(ZI)'="" D ;
517 . . D ADD^C0XF2N(ZG,ZS,ZI,ZARY(ZI))
518 . . I $D(DEBUG) W !,"ADDING",ZI," ",ZARY(ZI)
519 ;ZWR ZARY
520 Q
521 ;
522BULKLOAD(ZBFDA) ; BULK LOADER FOR LOADING TRIPLES INTO FILE 172.101
523 ; USING GLOBAL SETS INSTEAD OF UPDATE^DIE
524 ; QUITS IF FILE IS NOT 172.101
525 ; EXPECTS AN FDA WITHOUT STRINGS FOR THE IENS, STARTING AT 1
526 ; QUITS IF FIRST ENTRY IS NOT IENS 1
527 ; ASSUMES THAT THE LAST IENS IS THE COUNT OF ENTRIES
528 ; ZBFDA IS PASSED BY REFERENCE
529 ;
530 ; -- reserves a block of iens from file 172.101 by locking the zero node
531 ; -- ^C0X(101,0) and adding the count of entries to piece 2 and 3
532 ; -- then unlocking to minimize the duration of the lock
533 ;
534 I $D(DEBUG) W !,"USING BULKLOAD"
535 I '$D(ZBFDA) Q ; EMPTY FDA
536 I $O(ZBFDA(""))'=172.101 Q ; WRONG FILE
537 N ZCNT,ZP3,ZP4
538 ; -- find the number of nodes to insert
539 S ZCNT=$O(ZBFDA(172.101,""),-1)
540 I ZCNT="" D Q ;
541 . W !,"ERROR IN BULK LOAD - INVALID NODE COUNT"
542 . B
543 ; -- lock the zero node and reserve a block of iens to insert
544 I $D(DEBUG) W !,"LOCKING ZERO NODE"
545 LOCK +^C0X(101,0)
546 S ZP3=$P(^C0X(101,0),U,3)
547 S ZP4=$P(^C0X(101,0),U,4)
548 S $P(^C0X(101,0),U,3)=ZP3+ZCNT+1
549 S $P(^C0X(101,0),U,4)=ZP4+ZCNT+1
550 LOCK -^C0X(101,0)
551 N ZI,ZN,ZG,ZS,ZP,ZO,ZIEN,ZBASE
552 S ZBASE=ZP3 ; the last ien in the file
553 I $D(DEBUG) W !,"ZERO NODE UNLOCKED, IENS RESERVED=",ZCNT
554 I $D(DEBUG) W !,$$NOW^XLFDT
555 S ZI=""
556 F S ZI=$O(ZBFDA(172.101,ZI)) Q:ZI="" D ;
557 . S ZN=$G(ZBFDA(172.101,ZI,.01)) ; node name
558 . I ZN="" D BLKERR Q ;
559 . S ZG=$G(ZBFDA(172.101,ZI,.02)) ; graph pointer
560 . I ZG="" D BLKERR Q ;
561 . S ZS=$G(ZBFDA(172.101,ZI,.03)) ; subject pointer
562 . I ZS="" D BLKERR Q ;
563 . S ZP=$G(ZBFDA(172.101,ZI,.04)) ; predicate pointer
564 . I ZP="" D BLKERR Q ;
565 . S ZO=$G(ZBFDA(172.101,ZI,.05)) ; object pointer
566 . I ZO="" D BLKERR Q ;
567 . S ZIEN=ZI+ZBASE ; the new ien
568 . S ^C0X(101,ZIEN,0)=ZN_U_ZG_U_ZS_U_ZP_U_ZO ; set the zero node
569 . D INDEX(ZIEN,ZN,ZG,ZS,ZP,ZO)
570 Q
571 ;
572INDEX(ZIEN,ZN,ZG,ZS,ZP,ZO) ; HARD SET THE INDEX FOR ONE ENTRY
573 S ^C0X(101,"B",ZN,ZIEN)="" ; the B index
574 S ^C0X(101,"G",ZG,ZIEN)="" ; the G for Graph index
575 S ^C0X(101,"SPO",ZS,ZP,ZO,ZIEN)=""
576 S ^C0X(101,"SOP",ZS,ZO,ZP,ZIEN)=""
577 S ^C0X(101,"OPS",ZO,ZP,ZS,ZIEN)=""
578 S ^C0X(101,"OSP",ZO,ZS,ZP,ZIEN)=""
579 S ^C0X(101,"PSO",ZP,ZS,ZO,ZIEN)=""
580 S ^C0X(101,"POS",ZP,ZO,ZS,ZIEN)=""
581 S ^C0X(101,"GOPS",ZG,ZO,ZP,ZS,ZIEN)=""
582 S ^C0X(101,"GOSP",ZG,ZO,ZS,ZP,ZIEN)=""
583 S ^C0X(101,"GPSO",ZG,ZP,ZS,ZO,ZIEN)=""
584 S ^C0X(101,"GPOS",ZG,ZP,ZO,ZS,ZIEN)=""
585 S ^C0X(101,"GSPO",ZG,ZS,ZP,ZO,ZIEN)=""
586 S ^C0X(101,"GSOP",ZG,ZS,ZO,ZP,ZIEN)=""
587 Q
588 ;
589REINDEX ; REINDEX THE ^C0X(101, TRIPLE STORE
590 K ^C0X(101,"B")
591 K ^C0X(101,"G")
592 K ^C0X(101,"SPO")
593 K ^C0X(101,"SOP")
594 K ^C0X(101,"OPS")
595 K ^C0X(101,"OSP")
596 K ^C0X(101,"PSO")
597 K ^C0X(101,"POS")
598 K ^C0X(101,"GOPS")
599 K ^C0X(101,"GOSP")
600 K ^C0X(101,"GPSO")
601 K ^C0X(101,"GPOS")
602 K ^C0X(101,"GSPO")
603 K ^C0X(101,"GSOP")
604 N ZIEN,ZZ
605 S ZIEN=0
606 F S ZIEN=$O(^C0X(101,ZIEN)) Q:+ZIEN=0 D ; FOR EACH NODE
607 . S ZZ=$G(^C0X(101,ZIEN,0))
608 . I ZZ="" D Q ;
609 . . W !,"ERROR REINDEXING NODE ",ZI
610 . S ZN=$P(ZZ,"^",1)
611 . S ZG=$P(ZZ,"^",2)
612 . S ZS=$P(ZZ,"^",3)
613 . S ZP=$P(ZZ,"^",4)
614 . S ZO=$P(ZZ,"^",5)
615 . D INDEX(ZIEN,ZN,ZG,ZS,ZP,ZO)
616 Q
617 ;
618BLKERR ;
619 W !,"ERROR IN BULK LOAD"
620 S C0XERR="ERROR IN BULK LOAD"
621 S C0XLOC=ZBFDA(ZI)
622 D ^%ZTER ; report the error
623 B
624 Q
625 ;
626DELGRAPH(ZGRF,FARY) ; delete a graph from the triplestore
627 ; (doesn't delete strings)
628 ;
629 I '$D(FARY) D ;
630 . D INITFARY("C0XFARY")
631 . S FARY="C0XFARY"
632 D USEFARY(FARY)
633 N ZGRAPH
634 D TING(.ZGRAPH,ZGRF,FARY)
635 I '$D(ZGRAPH) D Q ;
636 . I $D(DEBUG) W !,"NO TRIPLES IN GRAPH"
637 K C0XFDA
638 N ZI S ZI=""
639 F S ZI=$O(ZGRAPH(ZI)) Q:ZI="" D ;
640 . S C0XFDA(C0XTFN,ZI_",",.01)="@"
641 D UPDIE(.C0XFDA)
642 Q
643 ;
644TING(ZRTN,ZGRF,FARY) ; return the iens for graph ZGRF
645 ; ZRTN is passed by reference
646 I '$D(FARY) D ;
647 . D INITFARY("C0XFARY")
648 . S FARY="C0XFARY"
649 D USEFARY(FARY)
650 K ZRTN
651 N ZI,ZG S ZI=""
652 S ZG=$$IENOF^C0XGET1(ZGRF)
653 I ZG="" D Q ;
654 . I $D(DEBUG) W !,"ERROR GRAPH NOT FOUND"
655 I '$D(@C0XTN@("G",ZG)) Q ;
656 F S ZI=$O(@C0XTN@("G",ZG,ZI)) Q:ZI="" D ;
657 . S ZRTN(ZI)=""
658 Q
659 ;
660SWUPDIE(ZFDA) ; SWITCH BETWEEN UPDIE AND BULKLOAD
661 I $G(BLKLOAD)=1 D ; bulk load
662 . D BULKLOAD(.ZFDA) ; bulk load the batch
663 E D ; no bulk load
664 . D UPDIE(.ZFDA)
665 K ZFDA
666 Q
667 ;
668UPDIE(ZFDA) ; INTERNAL ROUTINE TO CALL UPDATE^DIE AND CHECK FOR ERRORS
669 ; ZFDA IS PASSED BY REFERENCE
670 ;ZWR ZFDA
671 ;B
672 K ZERR
673 D CLEAN^DILF
674 D UPDATE^DIE("","ZFDA","","ZERR")
675 I $D(ZERR) S ZZERR=ZZERR ; ZZERR DOESN'T EXIST,
676 ; INVOKE THE ERROR TRAP IF TASKED
677 ;. W "ERROR",!
678 ;. ZWR ZERR
679 ;. B
680 K ZFDA
681 Q
682 ;
Note: See TracBrowser for help on using the repository browser.