source: fmts/trunk/p/C0XF2N.m@ 1279

Last change on this file since 1279 was 1279, checked in by George Lilly, 12 years ago

version with BULKLOAD

File size: 22.4 KB
Line 
1C0XMAIN ; GPL - Fileman Triples entry point routine ;10/13/11 17:05
2 ;;0.1;C0X;nopatch;noreleasedate;Build 1
3 ;Copyright 2011 George Lilly. Licensed under the terms of the GNU
4 ;General Public License See attached copy of the License.
5 ;
6 ;This program is free software; you can redistribute it and/or modify
7 ;it under the terms of the GNU General Public License as published by
8 ;the Free Software Foundation; either version 2 of the License, or
9 ;(at your option) any later version.
10 ;
11 ;This program is distributed in the hope that it will be useful,
12 ;but WITHOUT ANY WARRANTY; without even the implied warranty of
13 ;MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 ;GNU General Public License for more details.
15 ;
16 ;You should have received a copy of the GNU General Public License along
17 ;with this program; if not, write to the Free Software Foundation, Inc.,
18 ;51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
19 ;
20 Q
21 ;
22 ; This is based on C0XMAIN but experiments with a fast load for triples
23 ; that will write directly to the fileman global
24 ; The file 172.101 is a F2N design style for triples, which means
25 ; that it is a Flat file with no subfiles, all fields at the root
26 ; ... it is a "2" file solution which means all strings are stored in
27 ; ... strings file and pointed to by the triples file
28 ; ... it is an N file because it has generated Node IDs instead of
29 ; ... DINUM which would use the IEN for the Node ID.
30 ; gpl 11/04/2011
31 ;
32INITFARY(ZFARY) ; INITIALIZE FILE NUMBERS AND OTHER USEFUL THINGS
33 ; FOR THE DEFAULT TRIPLE STORE. USE OTHER VALUES FOR SUPPORTING ADDITIONAL
34 ; TRIPLE STORES
35 S @ZFARY@("C0XTFN")=172.101 ; TRIPLES FILE NUMBER
36 S @ZFARY@("C0XSFN")=172.201 ; TRIPLES STRINGS FILE NUMBER
37 S @ZFARY@("C0XTN")=$NA(^C0X(101)) ; TRIPLES GLOBAL NAME
38 S @ZFARY@("C0XSN")=$NA(^C0X(201)) ; STRING FILE GLOBAL NAME
39 S @ZFARY@("C0XDIR")="/home/glilly/fmts/trunk/samples/qds/"
40 S @ZFARY@("BLKLOAD")=1 ; this file supports block load
41 S @ZFARY@("FMTSSTYLE")="F2N" ; fileman style
42 D USEFARY(ZFARY)
43 Q
44 ;
45USEFARY(ZFARY) ; INITIALIZES VARIABLES SAVED IN ARRAY ZFARY
46 N ZI S ZI=""
47 F S ZI=$O(@ZFARY@(ZI)) Q:ZI="" D
48 . ;N ZX
49 . S ZX="S "_ZI_"="""_@ZFARY@(ZI)_""""
50 . ;W !,ZX
51 . X ZX
52 Q
53 ;
54FILEIN ; INTERACTIVE ENTRY POINT FOR OPTION TO READ IN A FILE
55 I '$D(C0XFARY) D INITFARY("C0XFARY")
56 D USEFARY("C0XFARY")
57 S DIR(0)="F^3:240"
58 S DIR("A")="File Directory"
59 S DIR("B")=C0XDIR
60 D ^DIR
61 I Y="^" Q ;
62 S C0XDIR=Y
63 S C0XFARY("C0XDIR")=Y
64 S DIR(0)="F^3:240"
65 S DIR("A")="File Name"
66 I '$D(C0XFN) S DIR("B")="qds.rdf"
67 E S DIR("B")=C0XFN
68 D ^DIR
69 I Y="" Q ;
70 I Y="^" Q ;
71 S C0XFN=Y
72 D IMPORT(C0XFN,C0XDIR,,"C0XFARY")
73 Q
74 ;
75IMPORT(FNAME,FDIR,FURL,FARY) ; EXTRINSIC THAT READS A FILE FROM THE STANDARD
76 ; DIRECTORY, LOADS IT INTO THE TRIPLESTORE AS TEXT, AND RETURNS THE
77 ; NODE NAME OF THE TEXT TRIPLE
78 ; FDIR IS THE OPTIONAL DIRECTORY (DEFAUTS TO STANDARD DIR)
79 ; FURL IS THE OPTIONAL URI FOR ACCESSING THE FILE FROM THE TRIPLE STORE
80 ; FARY IS THE OPTIONAL FILE ARRAY OF THE TRIPLE STORE TO USE
81 I '$D(FARY) D ;
82 . D INITFARY("C0XFARY")
83 . S FARY="C0XFARY"
84 D USEFARY(FARY)
85 N ZD,ZTMP
86 I '$D(FDIR) S FDIR=C0XDIR ; DIRECTORY OF THE RDF FILE
87 I '$D(FURL) D ;
88 . N ZN2 S ZN2=$TR(FNAME,".","_") ; REMOVE THE DOT FROM THE NAME
89 . S FURL=FDIR_ZN2
90 N ZTMP
91 S ZTMP=$NA(^TMP("C0X",$J,"FILEIN",1)) ; WHERE TO PUT THE INCOMING FILE
92 K @ZTMP ; MAKE SURE IT'S CLEAR
93 S C0XSTART=$$NOW^XLFDT
94 W !,"STARTED: ",C0XSTART
95 W !,"READING IN: ",FNAME
96 I '$$FILEREAD(ZTMP,FDIR,FNAME,4) D Q ; QUIT IF NO SUCCESS
97 . W !,"ERROR READING FILE: ",FDIR,FNAME
98 S ZRDF=$NA(^TMP("C0X",$J,"FILEIN")) ; WITHOUT THE SUBSCRIPT
99 W !,$O(@ZRDF@(""),-1)," LINES READ"
100 D INSRDF(ZRDF,FURL,FARY) ; IMPORT AND PROCESS THE RDF
101 Q
102 ;
103WGET(ZURL,FARY) ; GET FROM THE INTERNET AN RDF FILE AND INSERT IT
104 ;
105 I '$D(FARY) D ;
106 . D INITFARY("C0XFARY")
107 . S FARY="C0XFARY"
108 D USEFARY(FARY)
109 ;N ZLOC,ZTMP
110 K ZTMP
111 S ZLOC=$NA(^TMP("C0X","WGET",$J))
112 K @ZLOC
113 S C0XSTART=$$NOW^XLFDT
114 W !,"STARTED: ",C0XSTART
115 W !,"DOWNLOADING: ",ZURL
116 S OK=$$httpGET^%zewdGTM(ZURL,.ZTMP)
117 M @ZLOC=ZTMP
118 S C0XLINES=$O(@ZLOC@(""),-1)
119 W !,C0XLINES," LINES READ"
120 S C0XDLC=$$NOW^XLFDT ; DOWNLOAD COMPLETE
121 W !,"DOWNLOAD COMPLETE AT ",C0XDLC
122 S C0XDIFF=$$FMDIFF^XLFDT(C0XDLC,C0XSTART,2)
123 W !," ELAPSED TIME: ",C0XDIFF," SECONDS"
124 I C0XDIFF'=0 W !," APPROXIMATELY ",$P(C0XLINES/C0XDIFF,".")," LINES PER SEC"
125 D INSRDF(ZLOC,ZURL,FARY)
126 Q
127 ;
128INSRDF(ZRDF,ZNAME,FARY) ; INSERT AN RDF FILE INTO THE STORE AND PROCESS
129 ; ZRDF IS PASSED BY NAME
130 I '$D(FARY) D ;
131 . D INITFARY("C0XFARY")
132 . S FARY="C0XFARY"
133 D USEFARY(FARY)
134 N ZGRAPH,ZSUBJECT
135 S ZGRAPH="_:G"_$$LKY9 ; RANDOM GRAPH NAME
136 S ZSUBJECT=$$ANONS ; RANDOM ANOYMOUS SUBJECT
137 D ADD(ZGRAPH,ZSUBJECT,"fmts:url",ZNAME,FARY)
138 N ZTXTNM
139 S ZTXTNM="_TXT_INCOMING_RDF_FILE_"_ZNAME_"_"_$$LKY9 ; NAME FOR TEXT NODE
140 D ADD(ZGRAPH,ZSUBJECT,"fmts:rdfSource",ZTXTNM,FARY)
141 D UPDIE(.C0XFDA) ; TRY IT OUT
142 K C0XCNT ;RESET FOR NEXT TIME
143 D STORETXT(ZRDF,ZTXTNM,FARY)
144 W !,"ADDED: ",ZGRAPH," ",ZSUBJECT," fmts:rdfSource ",ZTXTNM
145 D PROCESS2(.G,ZRDF,ZNAME,ZGRAPH,FARY) ; PARSE AND INSERT THE RDF
146 Q
147 ;
148STORETXT(ZTXT,ZNAME,FARY) ; STORE TEXT IN THE TRIPLESTORE AT ZNAME
149 ;
150 I '$D(FARY) D ;
151 . D INITFARY("C0XFARY")
152 . S FARY="C0XFARY"
153 D USEFARY(FARY)
154 N ZIEN
155 S ZIEN=$$IENOF(ZNAME,FARY) ; GET THE IEN
156 D CLEAN^DILF
157 K ZERR
158 D WP^DIE(C0XSFN,ZIEN_",",1,,ZTXT,"ZERR")
159 I $D(ZERR) D ;
160 . ZWR ZERR
161 Q
162 ;
163GETTXT(ZRTN,ZNAME,FARY) ; RETURNS RDF SOURCE OR OTHER TEXT
164 ; ZRTN IS PASSED BY REFERENCE
165 I '$D(FARY) D ;
166 . D INITFARY("C0XFARY")
167 . S FARY="C0XFARY"
168 D USEFARY(FARY)
169 N ZIEN
170 S ZIEN=$$IENOF(ZNAME)
171 S OK=$$GET1^DIQ(C0XSFN,ZIEN_",",1,,"ZRTN")
172 Q
173 ;
174WHERETXT(ZNAME,FARY) ; EXTRINSIC WHICH RETURNS THE NAME OF THE GLOBAL
175 ; WHERE THE TEXT IS LOCATED. NAME IS THE NAME OF THE STRING
176 I '$D(FARY) D ;
177 . D INITFARY("C0XFARY")
178 . S FARY="C0XFARY"
179 D USEFARY(FARY)
180 N ZIEN
181 S ZIEN=$$IENOF(ZNAME)
182 Q $NA(@C0XSN@(ZIEN,1))
183 ;
184FILEREAD(ZINTMP,ZDIR,ZFNAME,ZLVL) ; READS A FILE INTO ZINTMP USING FTG^%ZISH
185 ; ZINTMP IS PASSED BY NAME AND INCLUDES THE NEW SUBSCRIPT
186 ; IE ^TMP("C0X","FILEIN",1)
187 ; ZLVL IN THIS CASE WOULD BE 3 INCREMENTING THE 1
188 ; EXTRINSIC WHICH RETURNS THE RESULT OF FTG^%ZISH
189 S OK=$$FTG^%ZISH(ZDIR,FNAME,ZINTMP,ZLVL)
190 Q OK
191 ;
192TESTPROC ; TEST PROCESS WITH EXISTING SMALL RDF FILE
193 S ZIN=$NA(^TMP("C0X",12226,"FILEIN"))
194 S ZGRAPH="/test/rdfFile"
195 S ZM="/test/rdfFile/meta"
196 D PROCESS(.G,ZIN,ZGRAPH,ZM)
197 Q
198 ;
199PROCESS(ZRTN,ZRDF,ZGRF,ZMETA,FARY) ; PROCESS AN INCOMING RDF FILE
200 ; ZRTN IS PASS BY REFERENCE AND RETURNS MESSAGES ABOUT THE PROCESSING
201 ; ZRDF IS PASSED BY NAME AND IS THE GLOBAL CONTAINING THE RDF FILE
202 ; ZGRF IS THE NAME OF THE GRAPH TO USE IN THE TRIPLE STORE FOR RESULTS
203 ; ZMETA IS OPTIONAL AND IS THE NAME OF THE GRAPH TO STORE METADATA
204 ;
205 I '$D(FARY) D ;
206 . D INITFARY("C0XFARY")
207 . S FARY="C0XFARY"
208 D USEFARY(FARY)
209 ; -- first parse the rdf file with the MXML parser
210 ;S C0XDOCID=$$PARSE^C0CNHIN(ZRDF,"C0XARRAY") ; PARSE WITH MXML
211 S C0XDOCID=$$EN^MXMLDOM(ZRDF,"W")
212 ; -- assign the MXLM dom global name to ZDOM
213 S ZDOM=$NA(^TMP("MXMLDOM",$J,C0XDOCID))
214 W !,$O(@ZDOM@(""),-1)," XML NODES PARSED"
215 ; -- populate the metagraph to point to the graph with status unfinished
216 S METAS=$$ANONS ; GET AN ANONOMOUS RANDOM SUBJECT
217 I '$D(ZMETA) S ZMETA="_:G"_$$LKY9 ; RANDOM GRAPH NAME FOR METAGRAPH
218 D ADD(ZMETA,METAS,"fmts:about",ZGRF,FARY) ; POINT THE META TO THE GRAPH
219 D ADD(ZMETA,METAS,"fmts:status","unfinished",FARY) ; mark as unfinished
220 ;S C0XDATE=$$FMDTOUTC^C0CUTIL($$NOW^XLFDT,"DT")
221 S C0XDATE=$$NOW^XLFDT
222 D ADD(ZMETA,METAS,"fmts:dateTime",C0XDATE,FARY)
223 D UPDIE(.C0XFDA) ; commit the metagraph changes to the triple store
224 ; --
225 ; -- pull out the vocabularies in the RDF statement. marked with xmlns:
226 ; -- put them in a local variable for quick reference
227 ; -- TODO: create a graph for vocabularies and validate incoming against it
228 ;
229 S C0XVOC=""
230 N ZI,ZJ,ZK S ZI=""
231 F S ZI=$O(@ZDOM@(1,"A",ZI)) Q:ZI="" D ; FOR EACH xmlns
232 . S ZVOC=$P(ZI,"xmlns:",2)
233 . I ZVOC'="" S C0XVOC(ZVOC)=$G(@ZDOM@(1,"A",ZI))
234 ;W !,"VOCABS:" ZWR C0XVOC
235 ;
236 ; -- look for children called rdf:Description. quit if none. not an rdf file
237 ;
238 S ZI=$O(@ZDOM@(1,"C",""))
239 I $G(@ZDOM@(1,"C",ZI))'="rdf:Description" D Q ; not an rdf file
240 . W !,"Error. Not an RDF file. Cannot process."
241 ;
242 ; -- now process the rdf description children
243 ;
244 S ZI=""
245 S (C0XSUB,C0XPRE,C0XOBJ)="" ; INITIALIZE subject, object and predicate
246 F S ZI=$O(@ZDOM@(1,"C",ZI)) Q:ZI="" D ;
247 . ; -- we are skipping any child that is not rdf:Description
248 . ; -- TODO: check to see if this is right in general
249 . ;
250 . IF $G(@ZDOM@(1,"C",ZI))'="rdf:Description" D Q ;
251 . . W !,"SKIPPING NODE: ",ZI
252 . ; -- now looking for the subject for the triples
253 . S ZX=$G(@ZDOM@(ZI,"A","rdf:about"))
254 . I ZX'="" D ; we have the subject
255 . . ;W " about: ",ZX
256 . . S C0XSUB=ZX
257 . E D ;
258 . . S ZX=$G(@ZDOM@(ZI,"A","rdf:nodeID")) ; node id is another style of subject
259 . . I ZX'="" D ;
260 . . . S C0XSUB=ZX
261 . I C0XSUB="" S C0XSUB=$$ANONS ; DEFAULT TO BLANK SUBJECT
262 . ;
263 . ; -- we now have the subject. the children of this node have the rest
264 . ;
265 . S ZJ="" ; for the children of the rdf:Description nodes
266 . F S ZJ=$O(@ZDOM@(ZI,"C",ZJ)) Q:ZJ="" D ; for each child
267 . . S C0XPRE=@ZDOM@(ZJ) ; the predicate without a prefix
268 . . S ZX=$G(@ZDOM@(ZJ,"A","xmlns")) ; name space
269 . . I ZX'="" S C0XPRE=ZX_C0XPRE ; add the namespace prefix
270 . . I C0XPRE[":" D ; expand using vocabulary
271 . . . N ZB,ZA
272 . . . S ZB=$P(C0XPRE,":",1)
273 . . . S ZA=$P(C0XPRE,":",2)
274 . . . I $G(C0XVOC(ZB))'="" D ;
275 . . . . S C0XPRE=C0XVOC(ZB)_ZA ; expanded
276 . . S ZY=$G(@ZDOM@(ZJ,"A","rdf:resource")) ; potential object
277 . . I ZY'="" D Q ;
278 . . . S C0XOBJ=ZY ; object
279 . . . D ADD(ZGRF,C0XSUB,C0XPRE,C0XOBJ) ; finally. our first real triple
280 . . ; -- this is an else because of the quit above
281 . . S ZX=$G(@ZDOM@(ZJ,"A","rdf:nodeID")) ; fishing for nodeId object
282 . . I ZX'="" D Q ; got one
283 . . . S C0XOBJ=ZX ; we are using the incoming nodeIDs as object/subject
284 . . . ; without change... this could be foolish .. look at it again later
285 . . . D ADD(ZGRF,C0XSUB,C0XPRE,C0XOBJ) ; go for it and add a node
286 . . S C0XOBJ=$G(@ZDOM@(ZJ,"T",1)) ; hopefully an object is here
287 . . I C0XOBJ="" D Q ; not a happy situation
288 . . . W !,"ERROR, NO OBJECT FOUND FOR NODE: ",ZJ
289 . . D ADD(ZGRF,C0XSUB,C0XPRE,C0XOBJ) ; go for it and add a node
290 W !,"INSERTING ",C0XCNT," TRIPLES"
291 D UPDIE(.C0XFDA) ; commit the updates to the file
292 ; next, mark the graph as finished
293 S C0XEND=$$NOW^XLFDT
294 W !," ENDED AT: ",C0XEND
295 S C0XDIFF=$$FMDIFF^XLFDT(C0XEND,C0XSTART,2)
296 W !," ELAPSED TIME: ",C0XDIFF," SECONDS"
297 I C0XDIFF'=0 D ;
298 . W !," APPROXIMATELY ",$P(C0XCNT/C0XDIFF,".")," TRIPLES PER SECOND"
299 Q
300 ;
301PROCESS2(ZRTN,ZRDF,ZGRF,ZMETA,FARY) ; PROCESS AN INCOMING RDF FILE
302 ; ZRTN IS PASS BY REFERENCE AND RETURNS MESSAGES ABOUT THE PROCESSING
303 ; ZRDF IS PASSED BY NAME AND IS THE GLOBAL CONTAINING THE RDF FILE
304 ; ZGRF IS THE NAME OF THE GRAPH TO USE IN THE TRIPLE STORE FOR RESULTS
305 ; ZMETA IS OPTIONAL AND IS THE NAME OF THE GRAPH TO STORE METADATA
306 ;
307 I '$D(FARY) D ;
308 . D INITFARY("C0XFARY")
309 . S FARY="C0XFARY"
310 D USEFARY(FARY)
311 ;N BATCNT
312 ;N BATMAX
313 S BATCNT=0 ; BATCH COUNTER
314 S BATMAX=10000 ; TRY BATCHES OF THIS SIZE
315 ; -- first parse the rdf file with the MXML parser
316 ;S C0XDOCID=$$PARSE^C0CNHIN(ZRDF,"C0XARRAY") ; PARSE WITH MXML
317 S C0XDLC2=$$NOW^XLFDT ; START OF PARSE
318 S C0XDOCID=$$EN^MXMLDOM(ZRDF,"W")
319 ; -- assign the MXLM dom global name to ZDOM
320 S ZDOM=$NA(^TMP("MXMLDOM",$J,C0XDOCID))
321 S C0XNODE=$O(@ZDOM@(""),-1)
322 W !,C0XNODE," XML NODES PARSED"
323 S C0XPRS=$$NOW^XLFDT ; PARSE COMPLETE
324 W !,"PARSE COMPLETE AT ",C0XPRS
325 S C0XDIFF=$$FMDIFF^XLFDT(C0XPRS,C0XDLC2,2)
326 W !," ELAPSED TIME: ",C0XDIFF," SECONDS"
327 I C0XDIFF'=0 D ;
328 . W !," APPROXIMATELY ",$P(C0XNODE/C0XDIFF,".")," NODES PER SECOND"
329 ; -- populate the metagraph to point to the graph with status unfinished
330 S METAS=$$ANONS ; GET AN ANONOMOUS RANDOM SUBJECT
331 I '$D(ZMETA) S ZMETA="_:G"_$$LKY9 ; RANDOM GRAPH NAME FOR METAGRAPH
332 D ADD(ZMETA,METAS,"fmts:about",ZGRF,FARY) ; POINT THE META TO THE GRAPH
333 D ADD(ZMETA,METAS,"fmts:status","unfinished",FARY) ; mark as unfinished
334 ;S C0XDATE=$$FMDTOUTC^C0CUTIL($$NOW^XLFDT,"DT")
335 S C0XDATE=$$NOW^XLFDT
336 D ADD(ZMETA,METAS,"fmts:dateTime",C0XDATE,FARY)
337 D UPDIE(.C0XFDA) ; commit the metagraph changes to the triple store
338 ; --
339 ; -- pull out the vocabularies in the RDF statement. marked with xmlns:
340 ; -- put them in a local variable for quick reference
341 ; -- TODO: create a graph for vocabularies and validate incoming against it
342 ;
343 S C0XVOC=""
344 N ZI,ZJ,ZK S ZI=""
345 F S ZI=$O(@ZDOM@(1,"A",ZI)) Q:ZI="" D ; FOR EACH xmlns
346 . S ZVOC=$P(ZI,"xmlns:",2)
347 . I ZVOC'="" S C0XVOC(ZVOC)=$G(@ZDOM@(1,"A",ZI))
348 ;W !,"VOCABS:" ZWR C0XVOC
349 ;
350 ; -- look for children called rdf:Description. quit if none. not an rdf file
351 ;
352 S ZI=$O(@ZDOM@(1,"C",""))
353 I $G(@ZDOM@(1,"C",ZI))'="rdf:Description" D Q ; not an rdf file
354 . W !,"Error. Not an RDF file. Cannot process."
355 ;
356 ; -- now process the rdf description children
357 ;
358 S ZI=""
359 S (C0XSUB,C0XPRE,C0XOBJ)="" ; INITIALIZE subject, object and predicate
360 F S ZI=$O(@ZDOM@(1,"C",ZI)) Q:ZI="" D ;
361 . ; -- we are skipping any child that is not rdf:Description
362 . ; -- TODO: check to see if this is right in general
363 . ;
364 . IF $G(@ZDOM@(1,"C",ZI))'="rdf:Description" D Q ;
365 . . W !,"SKIPPING NODE: ",ZI
366 . ; -- now looking for the subject for the triples
367 . S ZX=$G(@ZDOM@(ZI,"A","rdf:about"))
368 . I ZX'="" D ; we have the subject
369 . . ;W " about: ",ZX
370 . . S C0XSUB=ZX
371 . E D ;
372 . . S ZX=$G(@ZDOM@(ZI,"A","rdf:nodeID")) ; node id is another style of subject
373 . . I ZX'="" D ;
374 . . . S C0XSUB=ZX
375 . I C0XSUB="" S C0XSUB=$$ANONS ; DEFAULT TO BLANK SUBJECT
376 . ;
377 . ; -- we now have the subject. the children of this node have the rest
378 . ;
379 . S ZJ="" ; for the children of the rdf:Description nodes
380 . F S ZJ=$O(@ZDOM@(ZI,"C",ZJ)) Q:ZJ="" D ; for each child
381 . . S C0XPRE=@ZDOM@(ZJ) ; the predicate without a prefix
382 . . S ZX=$G(@ZDOM@(ZJ,"A","xmlns")) ; name space
383 . . I ZX'="" S C0XPRE=ZX_C0XPRE ; add the namespace prefix
384 . . I C0XPRE[":" D ; expand using vocabulary
385 . . . N ZB,ZA
386 . . . S ZB=$P(C0XPRE,":",1)
387 . . . S ZA=$P(C0XPRE,":",2)
388 . . . I $G(C0XVOC(ZB))'="" D ;
389 . . . . S C0XPRE=C0XVOC(ZB)_ZA ; expanded
390 . . S ZY=$G(@ZDOM@(ZJ,"A","rdf:resource")) ; potential object
391 . . I ZY'="" D Q ;
392 . . . S C0XOBJ=ZY ; object
393 . . . D ADD2(ZGRF,C0XSUB,C0XPRE,C0XOBJ) ; finally. our first real triple
394 . . ; -- this is an else because of the quit above
395 . . S ZX=$G(@ZDOM@(ZJ,"A","rdf:nodeID")) ; fishing for nodeId object
396 . . I ZX'="" D Q ; got one
397 . . . S C0XOBJ=ZX ; we are using the incoming nodeIDs as object/subject
398 . . . ; without change... this could be foolish .. look at it again later
399 . . . D ADD2(ZGRF,C0XSUB,C0XPRE,C0XOBJ) ; go for it and add a node
400 . . S C0XOBJ=$G(@ZDOM@(ZJ,"T",1)) ; hopefully an object is here
401 . . I C0XOBJ="" D Q ; not a happy situation
402 . . . W !,"ERROR, NO OBJECT FOUND FOR NODE: ",ZJ
403 . . D ADD2(ZGRF,C0XSUB,C0XPRE,C0XOBJ) ; go for it and add a node
404 S C0XTRP=$$NOW^XLFDT ; PARSE COMPLETE
405 W !,"TRIPLES COMPLETE AT ",C0XTRP
406 S C0XDIFF=$$FMDIFF^XLFDT(C0XTRP,C0XPRS,2)
407 W !," ELAPSED TIME: ",C0XDIFF," SECONDS"
408 I C0XDIFF'=0 D ;
409 . W !," APPROXIMATELY ",$P(C0XCNT/C0XDIFF,".")," TRIPLES PER SECOND"
410 W !,"INSERTING ",C0XCNT," TRIPLES"
411 I $D(C0XFDA) D ;
412 . I $G(BLKLOAD) D ;
413 . . D BULKLOAD(.C0XFDA)
414 . E D ;
415 . . D UPDIE(.C0XFDA) ; commit the updates to the file
416 ; next, mark the graph as finished
417 S C0XINS=$$NOW^XLFDT ; PARSE COMPLETE
418 W !,"INSERTION COMPLETE AT ",C0XPRS
419 S C0XDIFF=$$FMDIFF^XLFDT(C0XINS,C0XTRP,2)
420 W !," ELAPSED TIME: ",C0XDIFF," SECONDS"
421 W !," APPROXIMATELY ",$P(C0XCNT/C0XDIFF,".")," NODES PER SECOND"
422 S C0XEND=$$NOW^XLFDT
423 W !," ENDED AT: ",C0XEND
424 S C0XDIFF=$$FMDIFF^XLFDT(C0XEND,C0XSTART,2)
425 W !," ELAPSED TIME: ",C0XDIFF," SECONDS"
426 W !," APPROXIMATELY ",$P(C0XCNT/C0XDIFF,".")," TRIPLES PER SECOND"
427 Q
428 ;
429SHOW(ZN) ;
430 ZWR ^TMP("MXMLDOM",$J,1,ZN,*)
431 Q
432 ;
433ANONS() ; RETURNS AN ANONOMOUS SUBJECT
434 Q "_S:"_$$LKY9
435 ;
436NEWG(NGRAPH,NMETA) ; CREATES A NEW META GRAPH, MARKS IT AS UNFINISHED
437 ; THEN CREATES A NEW GRAPH AND POINTS THE METAGRAPH TO IT
438 ; NGRAPH AND NMETA ARE PASSED BY REFERENCE AND ARE THE RETURN
439 S NGRAPH="G"_$$LKY9
440 S NMETA=NGRAPH_"A"
441 Q
442 ;
443ADD(ZG,ZS,ZP,ZO,FARY) ; ADD A TRIPLE TO THE TRIPLESTORE. ALL VALUES ARE TEXT
444 ; THE FDA IS SET UP BUT THE FILES ARE NOT UPDATED. CALL UPDIE TO COMPLETE
445 I '$D(FARY) D ;
446 . D INITFARY("C0XFARY")
447 . S FARY="C0XFARY"
448 D USEFARY(FARY)
449 I '$D(C0XCNT) S C0XCNT=0
450 N ZNODE
451 S ZNODE="N"_$$LKY17
452 N ZNARY ; GET READY TO CALL IENOFA
453 S ZNARY("ZG",ZG)=""
454 S ZNARY("ZS",ZS)=""
455 S ZNARY("ZP",ZP)=""
456 S ZNARY("ZO",ZO)=""
457 D IENOFA(.ZIENS,.ZNARY,FARY) ; RESOLVE/ADD STRINGS
458 ;S ZGIEN=$$IENOF(ZG) ; LAYGO TO GET IEN
459 ;S ZSIEN=$$IENOF(ZS)
460 ;S ZPIEN=$$IENOF(ZP)
461 ;S ZOIEN=$$IENOF(ZO)
462 ;I $D(C0XFDA) D UPDIE ; ADD THE STRINGS IF NEEDED
463 S C0XCNT=C0XCNT+1
464 S C0XFDA(C0XTFN,"?+"_C0XCNT_",",.01)=ZNODE
465 S C0XFDA(C0XTFN,"?+"_C0XCNT_",",.02)=$O(ZIENS("IEN","ZG",""))
466 S C0XFDA(C0XTFN,"?+"_C0XCNT_",",.03)=$O(ZIENS("IEN","ZS",""))
467 S C0XFDA(C0XTFN,"?+"_C0XCNT_",",.04)=$O(ZIENS("IEN","ZP",""))
468 S C0XFDA(C0XTFN,"?+"_C0XCNT_",",.05)=$O(ZIENS("IEN","ZO",""))
469 ; REMEMBER TO CALL UPDIE WHEN YOU'RE DONE
470 Q
471 ;
472ADD2(ZG,ZS,ZP,ZO,FARY) ; ADD A TRIPLE TO THE TRIPLESTORE. ALL VALUES ARE TEXT
473 ; THE FDA IS SET UP BUT THE FILES ARE NOT UPDATED. CALL UPDIE TO COMPLETE
474 I '$D(FARY) D ;
475 . D INITFARY("C0XFARY")
476 . S FARY="C0XFARY"
477 D USEFARY(FARY)
478 I '$D(C0XCNT) S C0XCNT=0
479 N ZNODE
480 S ZNODE="N"_$$LKY17
481 N ZNARY ; GET READY TO CALL IENOFA
482 S ZNARY("ZG",ZG)=""
483 S ZNARY("ZS",ZS)=""
484 S ZNARY("ZP",ZP)=""
485 S ZNARY("ZO",ZO)=""
486 D IENOFA(.ZIENS,.ZNARY,FARY) ; RESOLVE/ADD STRINGS
487 ;S ZGIEN=$$IENOF(ZG) ; LAYGO TO GET IEN
488 ;S ZSIEN=$$IENOF(ZS)
489 ;S ZPIEN=$$IENOF(ZP)
490 ;S ZOIEN=$$IENOF(ZO)
491 ;I $D(C0XFDA) D UPDIE ; ADD THE STRINGS IF NEEDED
492 S BATCNT=BATCNT+1
493 S C0XCNT=C0XCNT+1
494 I $G(BLKLOAD)=1 D ; we are using bulk load
495 . S C0XFDA(C0XTFN,BATCNT,.01)=ZNODE
496 . S C0XFDA(C0XTFN,BATCNT,.02)=$O(ZIENS("IEN","ZG",""))
497 . S C0XFDA(C0XTFN,BATCNT,.03)=$O(ZIENS("IEN","ZS",""))
498 . S C0XFDA(C0XTFN,BATCNT,.04)=$O(ZIENS("IEN","ZP",""))
499 . S C0XFDA(C0XTFN,BATCNT,.05)=$O(ZIENS("IEN","ZO",""))
500 E D ;
501 . S C0XFDA(C0XTFN,"?+"_BATCNT_",",.01)=ZNODE
502 . S C0XFDA(C0XTFN,"?+"_BATCNT_",",.02)=$O(ZIENS("IEN","ZG",""))
503 . S C0XFDA(C0XTFN,"?+"_BATCNT_",",.03)=$O(ZIENS("IEN","ZS",""))
504 . S C0XFDA(C0XTFN,"?+"_BATCNT_",",.04)=$O(ZIENS("IEN","ZP",""))
505 . S C0XFDA(C0XTFN,"?+"_BATCNT_",",.05)=$O(ZIENS("IEN","ZO",""))
506 I BATCNT=BATMAX D ; BATCH IS DONE
507 . I $G(BLKLOAD) D ; bulk load
508 . . D BULKLOAD(.C0XFDA) ; bulk load the batch
509 . E D ; no bulk load
510 . . D UPDIE(.C0XFDA)
511 . K C0XFDA
512 . S BATCNT=0 ; RESET COUNTER
513 ; REMEMBER TO CALL UPDIE WHEN YOU'RE DONE
514 Q
515 ;
516LKY9() ;EXTRINIC THAT RETURNS A RANDOM 9 DIGIT NUMBER. USED FOR GENERATING
517 ; UNIQUE NODE AND GRAPH NAMES
518 N ZN,ZI
519 S ZN=""
520 F ZI=1:1:9 D ;
521 . S ZN=ZN_$R(10)
522 Q ZN
523 ;
524LKY17() ;EXTRINIC THAT RETURNS A RANDOM 9 DIGIT NUMBER. USED FOR GENERATING
525 ; UNIQUE NODE AND GRAPH NAMES
526 N ZN,ZI
527 S ZN=""
528 F ZI=1:1:17 D ;
529 . S ZN=ZN_$R(10)
530 Q ZN
531 ;
532IENOF(ZSTRING,FARY) ; EXTRINSIC WHICH RETURNS THE IEN OF ZS IN THE STRINGS FILE
533 I '$D(FARY) D ;
534 . D INITFARY("C0XFARY")
535 . S FARY="C0XFARY"
536 N ZIEN
537 S ZIEN=$O(@C0XSN@("B",ZSTRING,""))
538 I ZIEN="" D ;
539 . S C0XFDA2(C0XSFN,"+1,",.01)=ZSTRING
540 . D UPDIE(.C0XFDA2)
541 . S ZIEN=$O(@C0XSN@("B",ZSTRING,""))
542 . K C0XFDA2
543 Q ZIEN
544 ;
545IENOFA(ZOUTARY,ZINARY,FARY) ; RESOLVE STRINGS TO IEN IN STRINGS FILE
546 ; OR ADD THEM IF
547 ; MISSING. ZINARY AND ZOUTARY ARE PASSED BY REFERENCE
548 ; ZINARY LOOKS LIKE ZINARY("VAR","VAL")=""
549 ; RETURNS IN ZOUTARY OF THE FORM ZOUTARY("IEN","VAR",IEN)=""
550 I '$D(FARY) D ;
551 . D INITFARY("C0XFARY")
552 . S FARY="C0XFARY"
553 K ZOUTARY ; START WITH CLEAN RESULTS
554 K C0XFDA2 ; USE A SEPARATE FDA FOR THIS
555 N ZI S ZI=""
556 N ZV,ZIEN
557 N ZCNT S ZCNT=0
558 F S ZI=$O(ZINARY(ZI)) Q:ZI="" D ; LOOK FOR MISSING STRINGS
559 . S ZV=$O(ZINARY(ZI,""))
560 . I $O(@C0XSN@("B",ZV,""))="" D ;
561 . . S ZCNT=ZCNT+1
562 . . S C0XFDA2(C0XSFN,"+"_ZCNT_",",.01)=ZV
563 I $D(C0XFDA2) D ;
564 . D UPDIE(.C0XFDA2) ; ADD MISSING STRINGS
565 . K C0XFDA2 ; CLEAN UP
566 F S ZI=$O(ZINARY(ZI)) Q:ZI="" D ; NOT GET ALL IENS
567 . S ZV=$O(ZINARY(ZI,""))
568 . S ZIEN=$O(@C0XSN@("B",ZV,"")) ; THEY SHOULD BE THERE NOW
569 . I ZIEN="" D ;
570 . . W !,"ERROR ADDING STRING: ",ZV
571 . . B
572 . S ZOUTARY("IEN",ZI,ZIEN)=""
573 Q
574 ;
575BULKLOAD(ZBFDA) ; BULK LOADER FOR LOADING TRIPLES INTO FILE 172.101
576 ; USING GLOBAL SETS INSTEAD OF UPDATE^DIE
577 ; QUITS IF FILE IS NOT 172.101
578 ; EXPECTS AN FDA WITHOUT STRINGS FOR THE IENS, STARTING AT 1
579 ; QUITS IF FIRST ENTRY IS NOT IENS 1
580 ; ASSUMES THAT THE LAST IENS IS THE COUNT OF ENTRIES
581 ; ZBFDA IS PASSED BY REFERENCE
582 ;
583 ; -- reserves a block of iens from file 172.101 by locking the zero node
584 ; -- ^C0X(101,0) and adding the count of entries to piece 2 and 3
585 ; -- then unlocking to minimize the duration of the lock
586 ;
587 W !,"USING BULKLOAD"
588 I '$D(ZBFDA) Q ; EMPTY FDA
589 I $O(ZBFDA(""))'=172.101 Q ; WRONG FILE
590 N ZCNT,ZP3,ZP4
591 ; -- find the number of nodes to insert
592 S ZCNT=$O(ZBFDA(172.101,""),-1)
593 I ZCNT="" D Q ;
594 . W !,"ERROR IN BULK LOAD - INVALID NODE COUNT"
595 . B
596 ; -- lock the zero node and reserve a block of iens to insert
597 W !,"LOCKING ZERO NODE"
598 LOCK +^C0X(101,0)
599 S ZP3=$P(^C0X(101,0),U,3)
600 S ZP4=$P(^C0X(101,0),U,4)
601 S $P(^C0X(101,0),U,3)=ZP3+ZCNT+1
602 S $P(^C0X(101,0),U,4)=ZP4+ZCNT+1
603 LOCK -^C0X(101,0)
604 N ZI,ZN,ZG,ZS,ZP,ZO,ZIEN,ZBASE
605 S ZBASE=ZP3 ; the last ien in the file
606 W !,"ZERO NODE UNLOCKED, IENS RESERVED=",ZCNT
607 W !,$$NOW^XLFDT
608 S ZI=""
609 F S ZI=$O(ZBFDA(172.101,ZI)) Q:ZI="" D ;
610 . S ZN=$G(ZBFDA(172.101,ZI,.01)) ; node name
611 . I ZN="" D BLKERR Q ;
612 . S ZG=$G(ZBFDA(172.101,ZI,.02)) ; graph pointer
613 . I ZG="" D BLKERR Q ;
614 . S ZS=$G(ZBFDA(172.101,ZI,.03)) ; subject pointer
615 . I ZS="" D BLKERR Q ;
616 . S ZP=$G(ZBFDA(172.101,ZI,.04)) ; predicate pointer
617 . I ZP="" D BLKERR Q ;
618 . S ZO=$G(ZBFDA(172.101,ZI,.05)) ; object pointer
619 . I ZO="" D BLKERR Q ;
620 . S ZIEN=ZI+ZBASE ; the new ien
621 . S ^C0X(101,ZIEN,0)=ZN_U_ZG_U_ZS_U_ZP_U_ZO ; set the zero node
622 . S ^C0X(101,"B",ZN,ZIEN)="" ; the B index
623 . S ^C0X(101,"G",ZG,ZIEN)="" ; the G for Graph index
624 . S ^C0X(101,"SPO",ZS,ZP,ZO)=""
625 . S ^C0X(101,"SOP",ZS,ZO,ZP)=""
626 . S ^C0X(101,"OPS",ZO,ZP,ZS)=""
627 . S ^C0X(101,"OSP",ZO,ZS,ZP)=""
628 . S ^C0X(101,"GOPS",ZG,ZO,ZP,ZS)=""
629 . S ^C0X(101,"GOSP",ZG,ZO,ZS,ZP)=""
630 . S ^C0X(101,"GPSO",ZG,ZP,ZS,ZO)=""
631 . S ^C0X(101,"GSPO",ZG,ZS,ZP,ZO)=""
632 Q
633 ;
634BLKERR ;
635 W !,"ERROR IN BULK LOAD",! ZWR ZBFDA(ZI)
636 B
637 Q
638 ;
639UPDIE(ZFDA) ; INTERNAL ROUTINE TO CALL UPDATE^DIE AND CHECK FOR ERRORS
640 ; ZFDA IS PASSED BY REFERENCE
641 ;ZWR ZFDA
642 ;B
643 K ZERR
644 D CLEAN^DILF
645 D UPDATE^DIE("","ZFDA","","ZERR")
646 I $D(ZERR) S ZZERR=ZZERR ; ZZERR DOESN'T EXIST,
647 ; INVOKE THE ERROR TRAP IF TASKED
648 ;. W "ERROR",!
649 ;. ZWR ZERR
650 ;. B
651 K ZFDA
652 Q
653 ;
Note: See TracBrowser for help on using the repository browser.