Changeset 1294 for fmts/trunk/p/C0XF2N.m
- Timestamp:
- Nov 12, 2011, 12:15:56 PM (13 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
fmts/trunk/p/C0XF2N.m
r1279 r1294 1 C0X MAIN ; GPL - Fileman Triples entry point routine ;10/13/11 17:052 ;;0.1;C0X;nopatch;noreleasedate;Build 11 C0XF2N ; GPL - Fileman Triples entry point routine ;10/13/11 17:05 2 ;;0.1;C0X;nopatch;noreleasedate;Build 5 3 3 ;Copyright 2011 George Lilly. Licensed under the terms of the GNU 4 4 ;General Public License See attached copy of the License. … … 33 33 ; FOR THE DEFAULT TRIPLE STORE. USE OTHER VALUES FOR SUPPORTING ADDITIONAL 34 34 ; TRIPLE STORES 35 I $D(@ZFARY) Q ; ALREADY INITIALIZED 35 36 S @ZFARY@("C0XTFN")=172.101 ; TRIPLES FILE NUMBER 36 37 S @ZFARY@("C0XSFN")=172.201 ; TRIPLES STRINGS FILE NUMBER 37 38 S @ZFARY@("C0XTN")=$NA(^C0X(101)) ; TRIPLES GLOBAL NAME 38 39 S @ZFARY@("C0XSN")=$NA(^C0X(201)) ; STRING FILE GLOBAL NAME 39 S @ZFARY@("C0XDIR")="/home/glilly/fmts/trunk/samples/ qds/"40 S @ZFARY@("C0XDIR")="/home/glilly/fmts/trunk/samples/" 40 41 S @ZFARY@("BLKLOAD")=1 ; this file supports block load 41 42 S @ZFARY@("FMTSSTYLE")="F2N" ; fileman style … … 71 72 S C0XFN=Y 72 73 D IMPORT(C0XFN,C0XDIR,,"C0XFARY") 73 Q 74 ; 75 IMPORT(FNAME,FDIR,FURL,FARY) ; EXTRINSIC THAT READS A FILE FROM THE STANDARD 74 K C0XFDA 75 Q 76 ; 77 IMPORT(FNAME,INDIR,INURL,FARY) ; EXTRINSIC THAT READS A FILE FROM THE STANDARD 76 78 ; DIRECTORY, LOADS IT INTO THE TRIPLESTORE AS TEXT, AND RETURNS THE 77 79 ; NODE NAME OF THE TEXT TRIPLE 78 ; FDIR IS THE OPTIONAL DIRECTORY (DEFAUTS TO STANDARD DIR)79 ; FURL IS THE OPTIONAL URI FOR ACCESSING THE FILE FROM THE TRIPLE STORE80 ; INDIR IS THE OPTIONAL DIRECTORY (DEFAUTS TO STANDARD DIR) 81 ; INURL IS THE OPTIONAL URI FOR ACCESSING THE FILE FROM THE TRIPLE STORE 80 82 ; FARY IS THE OPTIONAL FILE ARRAY OF THE TRIPLE STORE TO USE 81 83 I '$D(FARY) D ; … … 84 86 D USEFARY(FARY) 85 87 N ZD,ZTMP 86 I '$D(FDIR) S FDIR=C0XDIR ; DIRECTORY OF THE RDF FILE 87 I '$D(FURL) D ; 88 . N ZN2 S ZN2=$TR(FNAME,".","_") ; REMOVE THE DOT FROM THE NAME 89 . S FURL=FDIR_ZN2 88 I '$D(INDIR) S INDIR=C0XDIR ; DIRECTORY OF THE RDF FILE 89 I $G(INURL)="" D ; 90 . ;N ZN2 S ZN2=$P(FNAME,".",1)_"_"_$P(FNAME,".",2) ; REMOVE THE DOT 91 . ;S INURL=FDIR_ZN2 92 . S INURL=INDIR_FNAME 90 93 N ZTMP 91 94 S ZTMP=$NA(^TMP("C0X",$J,"FILEIN",1)) ; WHERE TO PUT THE INCOMING FILE … … 94 97 W !,"STARTED: ",C0XSTART 95 98 W !,"READING IN: ",FNAME 96 I '$$FILEREAD(ZTMP, FDIR,FNAME,4) D Q ; QUIT IF NO SUCCESS97 . W !,"ERROR READING FILE: ", FDIR,FNAME99 I '$$FILEREAD(ZTMP,INDIR,FNAME,4) D Q ; QUIT IF NO SUCCESS 100 . W !,"ERROR READING FILE: ",INDIR,FNAME 98 101 S ZRDF=$NA(^TMP("C0X",$J,"FILEIN")) ; WITHOUT THE SUBSCRIPT 99 102 W !,$O(@ZRDF@(""),-1)," LINES READ" 100 D INSRDF(ZRDF,FURL,FARY) ; IMPORT AND PROCESS THE RDF 103 D INSRDF(ZRDF,INURL,FARY) ; IMPORT AND PROCESS THE RDF 104 K INURL 105 K C0XFDA 106 K ^TMP("MXMLDOM",$J) 101 107 Q 102 108 ; … … 132 138 . S FARY="C0XFARY" 133 139 D USEFARY(FARY) 140 S BATCNT=0 ; BATCH COUNTER 141 S BATMAX=10000 ; TRY BATCHES OF THIS SIZE 134 142 N ZGRAPH,ZSUBJECT 135 143 S ZGRAPH="_:G"_$$LKY9 ; RANDOM GRAPH NAME … … 139 147 S ZTXTNM="_TXT_INCOMING_RDF_FILE_"_ZNAME_"_"_$$LKY9 ; NAME FOR TEXT NODE 140 148 D ADD(ZGRAPH,ZSUBJECT,"fmts:rdfSource",ZTXTNM,FARY) 141 D UPDIE(.C0XFDA) ; TRY IT OUT149 D SWUPDIE(.C0XFDA) ; TRY IT OUT 142 150 K C0XCNT ;RESET FOR NEXT TIME 143 151 D STORETXT(ZRDF,ZTXTNM,FARY) 144 152 W !,"ADDED: ",ZGRAPH," ",ZSUBJECT," fmts:rdfSource ",ZTXTNM 145 D PROCESS 2(.G,ZRDF,ZNAME,ZGRAPH,FARY) ; PARSE AND INSERT THE RDF153 D PROCESS(.G,ZRDF,ZNAME,ZGRAPH,FARY) ; PARSE AND INSERT THE RDF 146 154 Q 147 155 ; … … 207 215 . S FARY="C0XFARY" 208 216 D USEFARY(FARY) 217 ;N BATCNT 218 ;N BATMAX 209 219 ; -- first parse the rdf file with the MXML parser 210 220 ;S C0XDOCID=$$PARSE^C0CNHIN(ZRDF,"C0XARRAY") ; PARSE WITH MXML 221 S C0XDLC2=$$NOW^XLFDT ; START OF PARSE 211 222 S C0XDOCID=$$EN^MXMLDOM(ZRDF,"W") 223 K @ZRDF ; DON'T NEED INPUT BUFFER ANYMORE 212 224 ; -- assign the MXLM dom global name to ZDOM 213 225 S ZDOM=$NA(^TMP("MXMLDOM",$J,C0XDOCID)) 214 W !,$O(@ZDOM@(""),-1)," XML NODES PARSED" 226 S C0XNODE=$O(@ZDOM@(""),-1) 227 W !,C0XNODE," XML NODES PARSED" 228 S C0XPRS=$$NOW^XLFDT ; PARSE COMPLETE 229 W !,"PARSE COMPLETE AT ",C0XPRS 230 S C0XDIFF=$$FMDIFF^XLFDT(C0XPRS,C0XDLC2,2) 231 W !," ELAPSED TIME: ",C0XDIFF," SECONDS" 232 I C0XDIFF'=0 D ; 233 . W !," APPROXIMATELY ",$P(C0XNODE/C0XDIFF,".")," NODES PER SECOND" 215 234 ; -- populate the metagraph to point to the graph with status unfinished 216 235 S METAS=$$ANONS ; GET AN ANONOMOUS RANDOM SUBJECT … … 218 237 D ADD(ZMETA,METAS,"fmts:about",ZGRF,FARY) ; POINT THE META TO THE GRAPH 219 238 D ADD(ZMETA,METAS,"fmts:status","unfinished",FARY) ; mark as unfinished 239 W !,"INSERTING GRAPH: ",ZGRF 220 240 ;S C0XDATE=$$FMDTOUTC^C0CUTIL($$NOW^XLFDT,"DT") 221 241 S C0XDATE=$$NOW^XLFDT 222 242 D ADD(ZMETA,METAS,"fmts:dateTime",C0XDATE,FARY) 223 D UPDIE(.C0XFDA) ; commit the metagraph changes to the triple store243 D SWUPDIE(.C0XFDA) ; commit the metagraph changes to the triple store 224 244 ; -- 225 245 ; -- pull out the vocabularies in the RDF statement. marked with xmlns: … … 288 308 . . . W !,"ERROR, NO OBJECT FOUND FOR NODE: ",ZJ 289 309 . . D ADD(ZGRF,C0XSUB,C0XPRE,C0XOBJ) ; go for it and add a node 290 W !,"INSERTING ",C0XCNT," TRIPLES"291 D UPDIE(.C0XFDA) ; commit the updates to the file292 ; next, mark the graph as finished293 S C0XEND=$$NOW^XLFDT294 W !," ENDED AT: ",C0XEND295 S C0XDIFF=$$FMDIFF^XLFDT(C0XEND,C0XSTART,2)296 W !," ELAPSED TIME: ",C0XDIFF," SECONDS"297 I C0XDIFF'=0 D ;298 . W !," APPROXIMATELY ",$P(C0XCNT/C0XDIFF,".")," TRIPLES PER SECOND"299 Q300 ;301 PROCESS2(ZRTN,ZRDF,ZGRF,ZMETA,FARY) ; PROCESS AN INCOMING RDF FILE302 ; ZRTN IS PASS BY REFERENCE AND RETURNS MESSAGES ABOUT THE PROCESSING303 ; ZRDF IS PASSED BY NAME AND IS THE GLOBAL CONTAINING THE RDF FILE304 ; ZGRF IS THE NAME OF THE GRAPH TO USE IN THE TRIPLE STORE FOR RESULTS305 ; ZMETA IS OPTIONAL AND IS THE NAME OF THE GRAPH TO STORE METADATA306 ;307 I '$D(FARY) D ;308 . D INITFARY("C0XFARY")309 . S FARY="C0XFARY"310 D USEFARY(FARY)311 ;N BATCNT312 ;N BATMAX313 S BATCNT=0 ; BATCH COUNTER314 S BATMAX=10000 ; TRY BATCHES OF THIS SIZE315 ; -- first parse the rdf file with the MXML parser316 ;S C0XDOCID=$$PARSE^C0CNHIN(ZRDF,"C0XARRAY") ; PARSE WITH MXML317 S C0XDLC2=$$NOW^XLFDT ; START OF PARSE318 S C0XDOCID=$$EN^MXMLDOM(ZRDF,"W")319 ; -- assign the MXLM dom global name to ZDOM320 S ZDOM=$NA(^TMP("MXMLDOM",$J,C0XDOCID))321 S C0XNODE=$O(@ZDOM@(""),-1)322 W !,C0XNODE," XML NODES PARSED"323 S C0XPRS=$$NOW^XLFDT ; PARSE COMPLETE324 W !,"PARSE COMPLETE AT ",C0XPRS325 S C0XDIFF=$$FMDIFF^XLFDT(C0XPRS,C0XDLC2,2)326 W !," ELAPSED TIME: ",C0XDIFF," SECONDS"327 I C0XDIFF'=0 D ;328 . W !," APPROXIMATELY ",$P(C0XNODE/C0XDIFF,".")," NODES PER SECOND"329 ; -- populate the metagraph to point to the graph with status unfinished330 S METAS=$$ANONS ; GET AN ANONOMOUS RANDOM SUBJECT331 I '$D(ZMETA) S ZMETA="_:G"_$$LKY9 ; RANDOM GRAPH NAME FOR METAGRAPH332 D ADD(ZMETA,METAS,"fmts:about",ZGRF,FARY) ; POINT THE META TO THE GRAPH333 D ADD(ZMETA,METAS,"fmts:status","unfinished",FARY) ; mark as unfinished334 ;S C0XDATE=$$FMDTOUTC^C0CUTIL($$NOW^XLFDT,"DT")335 S C0XDATE=$$NOW^XLFDT336 D ADD(ZMETA,METAS,"fmts:dateTime",C0XDATE,FARY)337 D UPDIE(.C0XFDA) ; commit the metagraph changes to the triple store338 ; --339 ; -- pull out the vocabularies in the RDF statement. marked with xmlns:340 ; -- put them in a local variable for quick reference341 ; -- TODO: create a graph for vocabularies and validate incoming against it342 ;343 S C0XVOC=""344 N ZI,ZJ,ZK S ZI=""345 F S ZI=$O(@ZDOM@(1,"A",ZI)) Q:ZI="" D ; FOR EACH xmlns346 . S ZVOC=$P(ZI,"xmlns:",2)347 . I ZVOC'="" S C0XVOC(ZVOC)=$G(@ZDOM@(1,"A",ZI))348 ;W !,"VOCABS:" ZWR C0XVOC349 ;350 ; -- look for children called rdf:Description. quit if none. not an rdf file351 ;352 S ZI=$O(@ZDOM@(1,"C",""))353 I $G(@ZDOM@(1,"C",ZI))'="rdf:Description" D Q ; not an rdf file354 . W !,"Error. Not an RDF file. Cannot process."355 ;356 ; -- now process the rdf description children357 ;358 S ZI=""359 S (C0XSUB,C0XPRE,C0XOBJ)="" ; INITIALIZE subject, object and predicate360 F S ZI=$O(@ZDOM@(1,"C",ZI)) Q:ZI="" D ;361 . ; -- we are skipping any child that is not rdf:Description362 . ; -- TODO: check to see if this is right in general363 . ;364 . IF $G(@ZDOM@(1,"C",ZI))'="rdf:Description" D Q ;365 . . W !,"SKIPPING NODE: ",ZI366 . ; -- now looking for the subject for the triples367 . S ZX=$G(@ZDOM@(ZI,"A","rdf:about"))368 . I ZX'="" D ; we have the subject369 . . ;W " about: ",ZX370 . . S C0XSUB=ZX371 . E D ;372 . . S ZX=$G(@ZDOM@(ZI,"A","rdf:nodeID")) ; node id is another style of subject373 . . I ZX'="" D ;374 . . . S C0XSUB=ZX375 . I C0XSUB="" S C0XSUB=$$ANONS ; DEFAULT TO BLANK SUBJECT376 . ;377 . ; -- we now have the subject. the children of this node have the rest378 . ;379 . S ZJ="" ; for the children of the rdf:Description nodes380 . F S ZJ=$O(@ZDOM@(ZI,"C",ZJ)) Q:ZJ="" D ; for each child381 . . S C0XPRE=@ZDOM@(ZJ) ; the predicate without a prefix382 . . S ZX=$G(@ZDOM@(ZJ,"A","xmlns")) ; name space383 . . I ZX'="" S C0XPRE=ZX_C0XPRE ; add the namespace prefix384 . . I C0XPRE[":" D ; expand using vocabulary385 . . . N ZB,ZA386 . . . S ZB=$P(C0XPRE,":",1)387 . . . S ZA=$P(C0XPRE,":",2)388 . . . I $G(C0XVOC(ZB))'="" D ;389 . . . . S C0XPRE=C0XVOC(ZB)_ZA ; expanded390 . . S ZY=$G(@ZDOM@(ZJ,"A","rdf:resource")) ; potential object391 . . I ZY'="" D Q ;392 . . . S C0XOBJ=ZY ; object393 . . . D ADD2(ZGRF,C0XSUB,C0XPRE,C0XOBJ) ; finally. our first real triple394 . . ; -- this is an else because of the quit above395 . . S ZX=$G(@ZDOM@(ZJ,"A","rdf:nodeID")) ; fishing for nodeId object396 . . I ZX'="" D Q ; got one397 . . . S C0XOBJ=ZX ; we are using the incoming nodeIDs as object/subject398 . . . ; without change... this could be foolish .. look at it again later399 . . . D ADD2(ZGRF,C0XSUB,C0XPRE,C0XOBJ) ; go for it and add a node400 . . S C0XOBJ=$G(@ZDOM@(ZJ,"T",1)) ; hopefully an object is here401 . . I C0XOBJ="" D Q ; not a happy situation402 . . . W !,"ERROR, NO OBJECT FOUND FOR NODE: ",ZJ403 . . D ADD2(ZGRF,C0XSUB,C0XPRE,C0XOBJ) ; go for it and add a node404 310 S C0XTRP=$$NOW^XLFDT ; PARSE COMPLETE 405 311 W !,"TRIPLES COMPLETE AT ",C0XTRP … … 419 325 S C0XDIFF=$$FMDIFF^XLFDT(C0XINS,C0XTRP,2) 420 326 W !," ELAPSED TIME: ",C0XDIFF," SECONDS" 421 W !," APPROXIMATELY ",$P(C0XCNT/C0XDIFF,".")," NODES PER SECOND"327 I C0XDIFF'=0 W !," APPROXIMATELY ",$P(C0XCNT/C0XDIFF,".")," NODES PER SECOND" 422 328 S C0XEND=$$NOW^XLFDT 423 329 W !," ENDED AT: ",C0XEND 424 330 S C0XDIFF=$$FMDIFF^XLFDT(C0XEND,C0XSTART,2) 425 331 W !," ELAPSED TIME: ",C0XDIFF," SECONDS" 426 W !," APPROXIMATELY ",$P(C0XCNT/C0XDIFF,".")," TRIPLES PER SECOND"332 I C0XDIFF'=0 W !," APPROXIMATELY ",$P(C0XCNT/C0XDIFF,".")," TRIPLES PER SECOND" 427 333 Q 428 334 ; … … 432 338 ; 433 339 ANONS() ; RETURNS AN ANONOMOUS SUBJECT 434 Q "_ S:"_$$LKY9340 Q "_:S"_$$LKY9 435 341 ; 436 342 NEWG(NGRAPH,NMETA) ; CREATES A NEW META GRAPH, MARKS IT AS UNFINISHED … … 442 348 ; 443 349 ADD(ZG,ZS,ZP,ZO,FARY) ; ADD A TRIPLE TO THE TRIPLESTORE. ALL VALUES ARE TEXT 444 ; THE FDA IS SET UP BUT THE FILES ARE NOT UPDATED. CALL UPDIE TO COMPLETE445 I '$D(FARY) D ;446 . D INITFARY("C0XFARY")447 . S FARY="C0XFARY"448 D USEFARY(FARY)449 I '$D(C0XCNT) S C0XCNT=0450 N ZNODE451 S ZNODE="N"_$$LKY17452 N ZNARY ; GET READY TO CALL IENOFA453 S ZNARY("ZG",ZG)=""454 S ZNARY("ZS",ZS)=""455 S ZNARY("ZP",ZP)=""456 S ZNARY("ZO",ZO)=""457 D IENOFA(.ZIENS,.ZNARY,FARY) ; RESOLVE/ADD STRINGS458 ;S ZGIEN=$$IENOF(ZG) ; LAYGO TO GET IEN459 ;S ZSIEN=$$IENOF(ZS)460 ;S ZPIEN=$$IENOF(ZP)461 ;S ZOIEN=$$IENOF(ZO)462 ;I $D(C0XFDA) D UPDIE ; ADD THE STRINGS IF NEEDED463 S C0XCNT=C0XCNT+1464 S C0XFDA(C0XTFN,"?+"_C0XCNT_",",.01)=ZNODE465 S C0XFDA(C0XTFN,"?+"_C0XCNT_",",.02)=$O(ZIENS("IEN","ZG",""))466 S C0XFDA(C0XTFN,"?+"_C0XCNT_",",.03)=$O(ZIENS("IEN","ZS",""))467 S C0XFDA(C0XTFN,"?+"_C0XCNT_",",.04)=$O(ZIENS("IEN","ZP",""))468 S C0XFDA(C0XTFN,"?+"_C0XCNT_",",.05)=$O(ZIENS("IEN","ZO",""))469 ; REMEMBER TO CALL UPDIE WHEN YOU'RE DONE470 Q471 ;472 ADD2(ZG,ZS,ZP,ZO,FARY) ; ADD A TRIPLE TO THE TRIPLESTORE. ALL VALUES ARE TEXT473 350 ; THE FDA IS SET UP BUT THE FILES ARE NOT UPDATED. CALL UPDIE TO COMPLETE 474 351 I '$D(FARY) D ; … … 637 514 Q 638 515 ; 516 SWUPDIE(ZFDA) ; SWITCH BETWEEN UPDIE AND BULKLOAD 517 . I $G(BLKLOAD) D ; bulk load 518 . . D BULKLOAD(.ZFDA) ; bulk load the batch 519 . E D ; no bulk load 520 . . D UPDIE(.ZFDA) 521 . K ZFDA 522 Q 523 ; 639 524 UPDIE(ZFDA) ; INTERNAL ROUTINE TO CALL UPDATE^DIE AND CHECK FOR ERRORS 640 525 ; ZFDA IS PASSED BY REFERENCE
Note:
See TracChangeset
for help on using the changeset viewer.