- Timestamp:
- Nov 2, 2011, 8:38:42 PM (13 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
fmts/trunk/p/C0XMAIN.m
r1273 r1274 77 77 K ZTMP 78 78 S ZLOC=$NA(^TMP("C0X","WGET",$J)) 79 K @ZLOC 79 80 S C0XSTART=$$NOW^XLFDT 80 81 W !,"STARTED: ",C0XSTART … … 103 104 D STORETXT(ZRDF,ZTXTNM,FARY) 104 105 W !,"ADDED: ",ZGRAPH," ",ZSUBJECT," fmts:rdfSource ",ZTXTNM 105 D PROCESS (.G,ZRDF,ZNAME,ZGRAPH,FARY) ; PARSE AND INSERT THE RDF106 D PROCESS2(.G,ZRDF,ZNAME,ZGRAPH,FARY) ; PARSE AND INSERT THE RDF 106 107 Q 107 108 ; … … 258 259 Q 259 260 ; 261 PROCESS2(ZRTN,ZRDF,ZGRF,ZMETA,FARY) ; PROCESS AN INCOMING RDF FILE 262 ; ZRTN IS PASS BY REFERENCE AND RETURNS MESSAGES ABOUT THE PROCESSING 263 ; ZRDF IS PASSED BY NAME AND IS THE GLOBAL CONTAINING THE RDF FILE 264 ; ZGRF IS THE NAME OF THE GRAPH TO USE IN THE TRIPLE STORE FOR RESULTS 265 ; ZMETA IS OPTIONAL AND IS THE NAME OF THE GRAPH TO STORE METADATA 266 ; 267 I '$D(FARY) D ; 268 . D INITFARY("C0XFARY") 269 . S FARY="C0XFARY" 270 D USEFARY(FARY) 271 ;N BATCNT 272 ;N BATMAX 273 S BATCNT=0 ; BATCH COUNTER 274 S BATMAX=10000 ; TRY BATCHES OF THIS SIZE 275 ; -- first parse the rdf file with the MXML parser 276 ;S C0XDOCID=$$PARSE^C0CNHIN(ZRDF,"C0XARRAY") ; PARSE WITH MXML 277 S C0XDOCID=$$EN^MXMLDOM(ZRDF,"W") 278 ; -- assign the MXLM dom global name to ZDOM 279 S ZDOM=$NA(^TMP("MXMLDOM",$J,C0XDOCID)) 280 W !,$O(@ZDOM@(""),-1)," XML NODES PARSED" 281 ; -- populate the metagraph to point to the graph with status unfinished 282 S METAS=$$ANONS ; GET AN ANONOMOUS RANDOM SUBJECT 283 I '$D(ZMETA) S ZMETA="_:G"_$$LKY9 ; RANDOM GRAPH NAME FOR METAGRAPH 284 D ADD(ZMETA,METAS,"fmts:about",ZGRF,FARY) ; POINT THE META TO THE GRAPH 285 D ADD(ZMETA,METAS,"fmts:status","unfinished",FARY) ; mark as unfinished 286 ;S C0XDATE=$$FMDTOUTC^C0CUTIL($$NOW^XLFDT,"DT") 287 S C0XDATE=$$NOW^XLFDT 288 D ADD(ZMETA,METAS,"fmts:dateTime",C0XDATE,FARY) 289 D UPDIE(.C0XFDA) ; commit the metagraph changes to the triple store 290 ; -- 291 ; -- pull out the vocabularies in the RDF statement. marked with xmlns: 292 ; -- put them in a local variable for quick reference 293 ; -- TODO: create a graph for vocabularies and validate incoming against it 294 ; 295 S C0XVOC="" 296 N ZI,ZJ,ZK S ZI="" 297 F S ZI=$O(@ZDOM@(1,"A",ZI)) Q:ZI="" D ; FOR EACH xmlns 298 . S ZVOC=$P(ZI,"xmlns:",2) 299 . I ZVOC'="" S C0XVOC(ZVOC)=$G(@ZDOM@(1,"A",ZI)) 300 ;W !,"VOCABS:" ZWR C0XVOC 301 ; 302 ; -- look for children called rdf:Description. quit if none. not an rdf file 303 ; 304 S ZI=$O(@ZDOM@(1,"C","")) 305 I $G(@ZDOM@(1,"C",ZI))'="rdf:Description" D Q ; not an rdf file 306 . W !,"Error. Not an RDF file. Cannot process." 307 ; 308 ; -- now process the rdf description children 309 ; 310 S ZI="" 311 S (C0XSUB,C0XPRE,C0XOBJ)="" ; INITIALIZE subject, object and predicate 312 F S ZI=$O(@ZDOM@(1,"C",ZI)) Q:ZI="" D ; 313 . ; -- we are skipping any child that is not rdf:Description 314 . ; -- TODO: check to see if this is right in general 315 . ; 316 . IF $G(@ZDOM@(1,"C",ZI))'="rdf:Description" D Q ; 317 . . W !,"SKIPPING NODE: ",ZI 318 . ; -- now looking for the subject for the triples 319 . S ZX=$G(@ZDOM@(ZI,"A","rdf:about")) 320 . I ZX'="" D ; we have the subject 321 . . ;W " about: ",ZX 322 . . S C0XSUB=ZX 323 . E D ; 324 . . S ZX=$G(@ZDOM@(ZI,"A","rdf:nodeID")) ; node id is another style of subject 325 . . I ZX'="" D ; 326 . . . S C0XSUB=ZX 327 . I C0XSUB="" S C0XSUB=$$ANONS ; DEFAULT TO BLANK SUBJECT 328 . ; 329 . ; -- we now have the subject. the children of this node have the rest 330 . ; 331 . S ZJ="" ; for the children of the rdf:Description nodes 332 . F S ZJ=$O(@ZDOM@(ZI,"C",ZJ)) Q:ZJ="" D ; for each child 333 . . S C0XPRE=@ZDOM@(ZJ) ; the predicate without a prefix 334 . . S ZX=$G(@ZDOM@(ZJ,"A","xmlns")) ; name space 335 . . I ZX'="" S C0XPRE=ZX_C0XPRE ; add the namespace prefix 336 . . I C0XPRE[":" D ; expand using vocabulary 337 . . . N ZB,ZA 338 . . . S ZB=$P(C0XPRE,":",1) 339 . . . S ZA=$P(C0XPRE,":",2) 340 . . . I $G(C0XVOC(ZB))'="" D ; 341 . . . . S C0XPRE=C0XVOC(ZB)_ZA ; expanded 342 . . S ZY=$G(@ZDOM@(ZJ,"A","rdf:resource")) ; potential object 343 . . I ZY'="" D Q ; 344 . . . S C0XOBJ=ZY ; object 345 . . . D ADD2(ZGRF,C0XSUB,C0XPRE,C0XOBJ) ; finally. our first real triple 346 . . ; -- this is an else because of the quit above 347 . . S ZX=$G(@ZDOM@(ZJ,"A","rdf:nodeID")) ; fishing for nodeId object 348 . . I ZX'="" D Q ; got one 349 . . . S C0XOBJ=ZX ; we are using the incoming nodeIDs as object/subject 350 . . . ; without change... this could be foolish .. look at it again later 351 . . . D ADD2(ZGRF,C0XSUB,C0XPRE,C0XOBJ) ; go for it and add a node 352 . . S C0XOBJ=$G(@ZDOM@(ZJ,"T",1)) ; hopefully an object is here 353 . . I C0XOBJ="" D Q ; not a happy situation 354 . . . W !,"ERROR, NO OBJECT FOUND FOR NODE: ",ZJ 355 . . D ADD2(ZGRF,C0XSUB,C0XPRE,C0XOBJ) ; go for it and add a node 356 W !,"INSERTING ",C0XCNT," TRIPLES" 357 I $D(C0XFDA) D UPDIE(.C0XFDA) ; commit the updates to the file 358 ; next, mark the graph as finished 359 S C0XEND=$$NOW^XLFDT 360 W !," ENDED AT: ",C0XEND 361 S C0XDIFF=$$FMDIFF^XLFDT(C0XEND,C0XSTART,2) 362 W !," ELAPSED TIME: ",C0XDIFF," SECONDS" 363 W !," APPROXIMATELY ",$P(C0XCNT/C0XDIFF,".")," TRIPLES PER SECOND" 364 Q 365 ; 260 366 SHOW(ZN) ; 261 367 ZWR ^TMP("MXMLDOM",$J,1,ZN,*) … … 301 407 Q 302 408 ; 409 ADD2(ZG,ZS,ZP,ZO,FARY) ; ADD A TRIPLE TO THE TRIPLESTORE. ALL VALUES ARE TEXT 410 ; THE FDA IS SET UP BUT THE FILES ARE NOT UPDATED. CALL UPDIE TO COMPLETE 411 I '$D(FARY) D ; 412 . D INITFARY("C0XFARY") 413 . S FARY="C0XFARY" 414 D USEFARY(FARY) 415 I '$D(C0XCNT) S C0XCNT=0 416 N ZNODE 417 S ZNODE="N"_$$LKY17 418 N ZNARY ; GET READY TO CALL IENOFA 419 S ZNARY("ZG",ZG)="" 420 S ZNARY("ZS",ZS)="" 421 S ZNARY("ZP",ZP)="" 422 S ZNARY("ZO",ZO)="" 423 D IENOFA(.ZIENS,.ZNARY,FARY) ; RESOLVE/ADD STRINGS 424 ;S ZGIEN=$$IENOF(ZG) ; LAYGO TO GET IEN 425 ;S ZSIEN=$$IENOF(ZS) 426 ;S ZPIEN=$$IENOF(ZP) 427 ;S ZOIEN=$$IENOF(ZO) 428 ;I $D(C0XFDA) D UPDIE ; ADD THE STRINGS IF NEEDED 429 S C0XCNT=C0XCNT+1 430 S C0XFDA(C0XTFN,"?+"_C0XCNT_",",.01)=ZNODE 431 S C0XFDA(C0XTFN,"?+"_C0XCNT_",",.02)=$O(ZIENS("IEN","ZG","")) 432 S C0XFDA(C0XTFN,"?+"_C0XCNT_",",.03)=$O(ZIENS("IEN","ZS","")) 433 S C0XFDA(C0XTFN,"?+"_C0XCNT_",",.04)=$O(ZIENS("IEN","ZP","")) 434 S C0XFDA(C0XTFN,"?+"_C0XCNT_",",.05)=$O(ZIENS("IEN","ZO","")) 435 S BATCNT=BATCNT+1 436 I BATCNT=BATMAX D ; BATCH IS DONE 437 . D UPDIE(.C0XFDA) 438 . K C0XFDA 439 . S BATCNT=0 ; RESET COUNTER 440 ; REMEMBER TO CALL UPDIE WHEN YOU'RE DONE 441 Q 442 ; 303 443 LKY9() ;EXTRINIC THAT RETURNS A RANDOM 9 DIGIT NUMBER. USED FOR GENERATING 304 444 ; UNIQUE NODE AND GRAPH NAMES
Note:
See TracChangeset
for help on using the changeset viewer.