Changeset 1294 for fmts/trunk/p/C0XF2N.m


Ignore:
Timestamp:
Nov 12, 2011, 12:15:56 PM (12 years ago)
Author:
George Lilly
Message:

new initialization routine INITC0XINIT and some bug fixes

File:
1 edited

Legend:

Unmodified
Added
Removed
  • fmts/trunk/p/C0XF2N.m

    r1279 r1294  
    1 C0XMAIN ; GPL - Fileman Triples entry point routine ;10/13/11  17:05
    2  ;;0.1;C0X;nopatch;noreleasedate;Build 1
     1C0XF2N ; GPL - Fileman Triples entry point routine ;10/13/11  17:05
     2 ;;0.1;C0X;nopatch;noreleasedate;Build 5
    33 ;Copyright 2011 George Lilly.  Licensed under the terms of the GNU
    44 ;General Public License See attached copy of the License.
     
    3333 ; FOR THE DEFAULT TRIPLE STORE. USE OTHER VALUES FOR SUPPORTING ADDITIONAL
    3434 ; TRIPLE STORES
     35 I $D(@ZFARY) Q  ; ALREADY INITIALIZED
    3536 S @ZFARY@("C0XTFN")=172.101 ; TRIPLES FILE NUMBER
    3637 S @ZFARY@("C0XSFN")=172.201 ; TRIPLES STRINGS FILE NUMBER
    3738 S @ZFARY@("C0XTN")=$NA(^C0X(101)) ; TRIPLES GLOBAL NAME
    3839 S @ZFARY@("C0XSN")=$NA(^C0X(201)) ; STRING FILE GLOBAL NAME
    39  S @ZFARY@("C0XDIR")="/home/glilly/fmts/trunk/samples/qds/"
     40 S @ZFARY@("C0XDIR")="/home/glilly/fmts/trunk/samples/"
    4041 S @ZFARY@("BLKLOAD")=1 ; this file supports block load
    4142 S @ZFARY@("FMTSSTYLE")="F2N" ; fileman style
     
    7172 S C0XFN=Y
    7273 D IMPORT(C0XFN,C0XDIR,,"C0XFARY")
    73  Q
    74  ;
    75 IMPORT(FNAME,FDIR,FURL,FARY) ; EXTRINSIC THAT READS A FILE FROM THE STANDARD
     74 K C0XFDA
     75 Q
     76 ;
     77IMPORT(FNAME,INDIR,INURL,FARY) ; EXTRINSIC THAT READS A FILE FROM THE STANDARD
    7678 ; DIRECTORY, LOADS IT INTO THE TRIPLESTORE AS TEXT, AND RETURNS THE
    7779 ; NODE NAME OF THE TEXT TRIPLE
    78  ; FDIR IS THE OPTIONAL DIRECTORY (DEFAUTS TO STANDARD DIR)
    79  ; FURL IS THE OPTIONAL URI FOR ACCESSING THE FILE FROM THE TRIPLE STORE
     80 ; INDIR IS THE OPTIONAL DIRECTORY (DEFAUTS TO STANDARD DIR)
     81 ; INURL IS THE OPTIONAL URI FOR ACCESSING THE FILE FROM THE TRIPLE STORE
    8082 ; FARY IS THE OPTIONAL FILE ARRAY OF THE TRIPLE STORE TO USE
    8183 I '$D(FARY) D  ;
     
    8486 D USEFARY(FARY)
    8587 N ZD,ZTMP
    86  I '$D(FDIR) S FDIR=C0XDIR ; DIRECTORY OF THE RDF FILE
    87  I '$D(FURL) D  ;
    88  . N ZN2 S ZN2=$TR(FNAME,".","_") ; REMOVE THE DOT FROM THE NAME
    89  . S FURL=FDIR_ZN2
     88 I '$D(INDIR) S INDIR=C0XDIR ; DIRECTORY OF THE RDF FILE
     89 I $G(INURL)="" D  ;
     90 . ;N ZN2 S ZN2=$P(FNAME,".",1)_"_"_$P(FNAME,".",2) ; REMOVE THE DOT
     91 . ;S INURL=FDIR_ZN2
     92 . S INURL=INDIR_FNAME
    9093 N ZTMP
    9194 S ZTMP=$NA(^TMP("C0X",$J,"FILEIN",1)) ; WHERE TO PUT THE INCOMING FILE
     
    9497 W !,"STARTED: ",C0XSTART
    9598 W !,"READING IN: ",FNAME
    96  I '$$FILEREAD(ZTMP,FDIR,FNAME,4) D  Q  ; QUIT IF NO SUCCESS
    97  . W !,"ERROR READING FILE: ",FDIR,FNAME
     99 I '$$FILEREAD(ZTMP,INDIR,FNAME,4) D  Q  ; QUIT IF NO SUCCESS
     100 . W !,"ERROR READING FILE: ",INDIR,FNAME
    98101 S ZRDF=$NA(^TMP("C0X",$J,"FILEIN")) ; WITHOUT THE SUBSCRIPT
    99102 W !,$O(@ZRDF@(""),-1)," LINES READ"
    100  D INSRDF(ZRDF,FURL,FARY) ; IMPORT AND PROCESS THE RDF
     103 D INSRDF(ZRDF,INURL,FARY) ; IMPORT AND PROCESS THE RDF
     104 K INURL
     105 K C0XFDA
     106 K ^TMP("MXMLDOM",$J)
    101107 Q
    102108 ;
     
    132138 . S FARY="C0XFARY"
    133139 D USEFARY(FARY)
     140 S BATCNT=0 ; BATCH COUNTER
     141 S BATMAX=10000 ; TRY BATCHES OF THIS SIZE
    134142 N ZGRAPH,ZSUBJECT
    135143 S ZGRAPH="_:G"_$$LKY9 ; RANDOM GRAPH NAME
     
    139147 S ZTXTNM="_TXT_INCOMING_RDF_FILE_"_ZNAME_"_"_$$LKY9 ; NAME FOR TEXT NODE
    140148 D ADD(ZGRAPH,ZSUBJECT,"fmts:rdfSource",ZTXTNM,FARY)
    141  D UPDIE(.C0XFDA) ; TRY IT OUT
     149 D SWUPDIE(.C0XFDA) ; TRY IT OUT
    142150 K C0XCNT ;RESET FOR NEXT TIME
    143151 D STORETXT(ZRDF,ZTXTNM,FARY)
    144152 W !,"ADDED: ",ZGRAPH," ",ZSUBJECT," fmts:rdfSource ",ZTXTNM
    145  D PROCESS2(.G,ZRDF,ZNAME,ZGRAPH,FARY) ; PARSE AND INSERT THE RDF
     153 D PROCESS(.G,ZRDF,ZNAME,ZGRAPH,FARY) ; PARSE AND INSERT THE RDF
    146154 Q
    147155 ;
     
    207215 . S FARY="C0XFARY"
    208216 D USEFARY(FARY)
     217 ;N BATCNT
     218 ;N BATMAX
    209219 ; -- first parse the rdf file with the MXML parser
    210220 ;S C0XDOCID=$$PARSE^C0CNHIN(ZRDF,"C0XARRAY") ; PARSE WITH MXML
     221 S C0XDLC2=$$NOW^XLFDT ; START OF PARSE
    211222 S C0XDOCID=$$EN^MXMLDOM(ZRDF,"W")
     223 K @ZRDF ; DON'T NEED INPUT BUFFER ANYMORE
    212224 ; -- assign the MXLM dom global name to ZDOM
    213225 S ZDOM=$NA(^TMP("MXMLDOM",$J,C0XDOCID))
    214  W !,$O(@ZDOM@(""),-1)," XML NODES PARSED"
     226 S C0XNODE=$O(@ZDOM@(""),-1)
     227 W !,C0XNODE," XML NODES PARSED"
     228 S C0XPRS=$$NOW^XLFDT ; PARSE COMPLETE
     229 W !,"PARSE COMPLETE AT ",C0XPRS
     230 S C0XDIFF=$$FMDIFF^XLFDT(C0XPRS,C0XDLC2,2)
     231 W !," ELAPSED TIME: ",C0XDIFF," SECONDS"
     232 I C0XDIFF'=0 D  ;
     233 . W !," APPROXIMATELY ",$P(C0XNODE/C0XDIFF,".")," NODES PER SECOND"
    215234 ; -- populate the metagraph to point to the graph with status unfinished
    216235 S METAS=$$ANONS ; GET AN ANONOMOUS RANDOM SUBJECT
     
    218237 D ADD(ZMETA,METAS,"fmts:about",ZGRF,FARY) ; POINT THE META TO THE GRAPH
    219238 D ADD(ZMETA,METAS,"fmts:status","unfinished",FARY) ; mark as unfinished
     239 W !,"INSERTING GRAPH: ",ZGRF
    220240 ;S C0XDATE=$$FMDTOUTC^C0CUTIL($$NOW^XLFDT,"DT")
    221241 S C0XDATE=$$NOW^XLFDT
    222242 D ADD(ZMETA,METAS,"fmts:dateTime",C0XDATE,FARY)
    223  D UPDIE(.C0XFDA) ; commit the metagraph changes to the triple store
     243 D SWUPDIE(.C0XFDA) ; commit the metagraph changes to the triple store
    224244 ; --
    225245 ; -- pull out the vocabularies in the RDF statement. marked with xmlns:
     
    288308 . . . W !,"ERROR, NO OBJECT FOUND FOR NODE: ",ZJ
    289309 . . D ADD(ZGRF,C0XSUB,C0XPRE,C0XOBJ) ; go for it and add a node
    290  W !,"INSERTING ",C0XCNT," TRIPLES"
    291  D UPDIE(.C0XFDA) ; commit the updates to the file
    292  ; next, mark the graph as finished
    293  S C0XEND=$$NOW^XLFDT
    294  W !," ENDED AT: ",C0XEND
    295  S C0XDIFF=$$FMDIFF^XLFDT(C0XEND,C0XSTART,2)
    296  W !," ELAPSED TIME: ",C0XDIFF," SECONDS"
    297  I C0XDIFF'=0 D  ;
    298  . W !," APPROXIMATELY ",$P(C0XCNT/C0XDIFF,".")," TRIPLES PER SECOND"
    299  Q
    300  ;
    301 PROCESS2(ZRTN,ZRDF,ZGRF,ZMETA,FARY) ; PROCESS AN INCOMING RDF FILE
    302  ; ZRTN IS PASS BY REFERENCE AND RETURNS MESSAGES ABOUT THE PROCESSING
    303  ; ZRDF IS PASSED BY NAME AND IS THE GLOBAL CONTAINING THE RDF FILE
    304  ; ZGRF IS THE NAME OF THE GRAPH TO USE IN THE TRIPLE STORE FOR RESULTS
    305  ; ZMETA IS OPTIONAL AND IS THE NAME OF THE GRAPH TO STORE METADATA
    306  ;
    307  I '$D(FARY) D  ;
    308  . D INITFARY("C0XFARY")
    309  . S FARY="C0XFARY"
    310  D USEFARY(FARY)
    311  ;N BATCNT
    312  ;N BATMAX
    313  S BATCNT=0 ; BATCH COUNTER
    314  S BATMAX=10000 ; TRY BATCHES OF THIS SIZE
    315  ; -- first parse the rdf file with the MXML parser
    316  ;S C0XDOCID=$$PARSE^C0CNHIN(ZRDF,"C0XARRAY") ; PARSE WITH MXML
    317  S C0XDLC2=$$NOW^XLFDT ; START OF PARSE
    318  S C0XDOCID=$$EN^MXMLDOM(ZRDF,"W")
    319  ; -- assign the MXLM dom global name to ZDOM
    320  S ZDOM=$NA(^TMP("MXMLDOM",$J,C0XDOCID))
    321  S C0XNODE=$O(@ZDOM@(""),-1)
    322  W !,C0XNODE," XML NODES PARSED"
    323  S C0XPRS=$$NOW^XLFDT ; PARSE COMPLETE
    324  W !,"PARSE COMPLETE AT ",C0XPRS
    325  S C0XDIFF=$$FMDIFF^XLFDT(C0XPRS,C0XDLC2,2)
    326  W !," ELAPSED TIME: ",C0XDIFF," SECONDS"
    327  I C0XDIFF'=0 D  ;
    328  . W !," APPROXIMATELY ",$P(C0XNODE/C0XDIFF,".")," NODES PER SECOND"
    329  ; -- populate the metagraph to point to the graph with status unfinished
    330  S METAS=$$ANONS ; GET AN ANONOMOUS RANDOM SUBJECT
    331  I '$D(ZMETA) S ZMETA="_:G"_$$LKY9 ; RANDOM GRAPH NAME FOR METAGRAPH
    332  D ADD(ZMETA,METAS,"fmts:about",ZGRF,FARY) ; POINT THE META TO THE GRAPH
    333  D ADD(ZMETA,METAS,"fmts:status","unfinished",FARY) ; mark as unfinished
    334  ;S C0XDATE=$$FMDTOUTC^C0CUTIL($$NOW^XLFDT,"DT")
    335  S C0XDATE=$$NOW^XLFDT
    336  D ADD(ZMETA,METAS,"fmts:dateTime",C0XDATE,FARY)
    337  D UPDIE(.C0XFDA) ; commit the metagraph changes to the triple store
    338  ; --
    339  ; -- pull out the vocabularies in the RDF statement. marked with xmlns:
    340  ; -- put them in a local variable for quick reference
    341  ; -- TODO: create a graph for vocabularies and validate incoming against it
    342  ;
    343  S C0XVOC=""
    344  N ZI,ZJ,ZK S ZI=""
    345  F  S ZI=$O(@ZDOM@(1,"A",ZI)) Q:ZI=""  D  ; FOR EACH xmlns
    346  . S ZVOC=$P(ZI,"xmlns:",2)
    347  . I ZVOC'="" S C0XVOC(ZVOC)=$G(@ZDOM@(1,"A",ZI))
    348  ;W !,"VOCABS:" ZWR C0XVOC
    349  ;
    350  ; -- look for children called rdf:Description. quit if none. not an rdf file
    351  ;
    352  S ZI=$O(@ZDOM@(1,"C",""))
    353  I $G(@ZDOM@(1,"C",ZI))'="rdf:Description" D  Q  ; not an rdf file
    354  . W !,"Error. Not an RDF file. Cannot process."
    355  ;
    356  ; -- now process the rdf description children
    357  ;
    358  S ZI=""
    359  S (C0XSUB,C0XPRE,C0XOBJ)="" ; INITIALIZE subject, object and predicate
    360  F  S ZI=$O(@ZDOM@(1,"C",ZI)) Q:ZI=""  D  ;
    361  . ; -- we are skipping any child that is not rdf:Description
    362  . ; -- TODO: check to see if this is right in general
    363  . ;
    364  . IF $G(@ZDOM@(1,"C",ZI))'="rdf:Description" D  Q  ;
    365  . . W !,"SKIPPING NODE: ",ZI
    366  . ; -- now looking for the subject for the triples
    367  . S ZX=$G(@ZDOM@(ZI,"A","rdf:about"))
    368  . I ZX'="" D  ; we have the subject
    369  . . ;W " about: ",ZX
    370  . . S C0XSUB=ZX
    371  . E  D  ;
    372  . . S ZX=$G(@ZDOM@(ZI,"A","rdf:nodeID")) ; node id is another style of subject
    373  . . I ZX'="" D  ;
    374  . . . S C0XSUB=ZX
    375  . I C0XSUB="" S C0XSUB=$$ANONS ; DEFAULT TO BLANK SUBJECT
    376  . ;
    377  . ; -- we now have the subject. the children of this node have the rest
    378  . ;
    379  . S ZJ="" ; for the children of the rdf:Description nodes
    380  . F  S ZJ=$O(@ZDOM@(ZI,"C",ZJ)) Q:ZJ=""  D  ; for each child
    381  . . S C0XPRE=@ZDOM@(ZJ) ; the predicate without a prefix
    382  . . S ZX=$G(@ZDOM@(ZJ,"A","xmlns")) ; name space
    383  . . I ZX'="" S C0XPRE=ZX_C0XPRE ; add the namespace prefix
    384  . . I C0XPRE[":" D  ; expand using vocabulary
    385  . . . N ZB,ZA
    386  . . . S ZB=$P(C0XPRE,":",1)
    387  . . . S ZA=$P(C0XPRE,":",2)
    388  . . . I $G(C0XVOC(ZB))'="" D  ;
    389  . . . . S C0XPRE=C0XVOC(ZB)_ZA ; expanded
    390  . . S ZY=$G(@ZDOM@(ZJ,"A","rdf:resource")) ; potential object
    391  . . I ZY'="" D  Q ;
    392  . . . S C0XOBJ=ZY ; object
    393  . . . D ADD2(ZGRF,C0XSUB,C0XPRE,C0XOBJ) ; finally. our first real triple
    394  . . ; -- this is an else because of the quit above
    395  . . S ZX=$G(@ZDOM@(ZJ,"A","rdf:nodeID")) ; fishing for nodeId object
    396  . . I ZX'="" D  Q  ; got one
    397  . . . S C0XOBJ=ZX ; we are using the incoming nodeIDs as object/subject
    398  . . . ; without change... this could be foolish .. look at it again later
    399  . . . D ADD2(ZGRF,C0XSUB,C0XPRE,C0XOBJ) ; go for it and add a node
    400  . . S C0XOBJ=$G(@ZDOM@(ZJ,"T",1)) ; hopefully an object is here
    401  . . I C0XOBJ="" D  Q  ; not a happy situation
    402  . . . W !,"ERROR, NO OBJECT FOUND FOR NODE: ",ZJ
    403  . . D ADD2(ZGRF,C0XSUB,C0XPRE,C0XOBJ) ; go for it and add a node
    404310 S C0XTRP=$$NOW^XLFDT ; PARSE COMPLETE
    405311 W !,"TRIPLES COMPLETE AT ",C0XTRP
     
    419325 S C0XDIFF=$$FMDIFF^XLFDT(C0XINS,C0XTRP,2)
    420326 W !," ELAPSED TIME: ",C0XDIFF," SECONDS"
    421  W !," APPROXIMATELY ",$P(C0XCNT/C0XDIFF,".")," NODES PER SECOND"
     327 I C0XDIFF'=0 W !," APPROXIMATELY ",$P(C0XCNT/C0XDIFF,".")," NODES PER SECOND"
    422328 S C0XEND=$$NOW^XLFDT
    423329 W !," ENDED AT: ",C0XEND
    424330 S C0XDIFF=$$FMDIFF^XLFDT(C0XEND,C0XSTART,2)
    425331 W !," ELAPSED TIME: ",C0XDIFF," SECONDS"
    426  W !," APPROXIMATELY ",$P(C0XCNT/C0XDIFF,".")," TRIPLES PER SECOND"
     332 I C0XDIFF'=0 W !," APPROXIMATELY ",$P(C0XCNT/C0XDIFF,".")," TRIPLES PER SECOND"
    427333 Q
    428334 ;
     
    432338 ;
    433339ANONS() ; RETURNS AN ANONOMOUS SUBJECT
    434  Q "_S:"_$$LKY9
     340 Q "_:S"_$$LKY9
    435341 ;
    436342NEWG(NGRAPH,NMETA) ; CREATES A NEW META GRAPH, MARKS IT AS UNFINISHED
     
    442348 ;
    443349ADD(ZG,ZS,ZP,ZO,FARY) ; ADD A TRIPLE TO THE TRIPLESTORE. ALL VALUES ARE TEXT
    444  ; THE FDA IS SET UP BUT THE FILES ARE NOT UPDATED. CALL UPDIE TO COMPLETE
    445  I '$D(FARY) D  ;
    446  . D INITFARY("C0XFARY")
    447  . S FARY="C0XFARY"
    448  D USEFARY(FARY)
    449  I '$D(C0XCNT) S C0XCNT=0
    450  N ZNODE
    451  S ZNODE="N"_$$LKY17
    452  N ZNARY ; GET READY TO CALL IENOFA
    453  S ZNARY("ZG",ZG)=""
    454  S ZNARY("ZS",ZS)=""
    455  S ZNARY("ZP",ZP)=""
    456  S ZNARY("ZO",ZO)=""
    457  D IENOFA(.ZIENS,.ZNARY,FARY) ; RESOLVE/ADD STRINGS
    458  ;S ZGIEN=$$IENOF(ZG) ; LAYGO TO GET IEN
    459  ;S ZSIEN=$$IENOF(ZS)
    460  ;S ZPIEN=$$IENOF(ZP)
    461  ;S ZOIEN=$$IENOF(ZO)
    462  ;I $D(C0XFDA) D UPDIE ; ADD THE STRINGS IF NEEDED
    463  S C0XCNT=C0XCNT+1
    464  S C0XFDA(C0XTFN,"?+"_C0XCNT_",",.01)=ZNODE
    465  S C0XFDA(C0XTFN,"?+"_C0XCNT_",",.02)=$O(ZIENS("IEN","ZG",""))
    466  S C0XFDA(C0XTFN,"?+"_C0XCNT_",",.03)=$O(ZIENS("IEN","ZS",""))
    467  S C0XFDA(C0XTFN,"?+"_C0XCNT_",",.04)=$O(ZIENS("IEN","ZP",""))
    468  S C0XFDA(C0XTFN,"?+"_C0XCNT_",",.05)=$O(ZIENS("IEN","ZO",""))
    469  ; REMEMBER TO CALL UPDIE WHEN YOU'RE DONE
    470  Q
    471  ;
    472 ADD2(ZG,ZS,ZP,ZO,FARY) ; ADD A TRIPLE TO THE TRIPLESTORE. ALL VALUES ARE TEXT
    473350 ; THE FDA IS SET UP BUT THE FILES ARE NOT UPDATED. CALL UPDIE TO COMPLETE
    474351 I '$D(FARY) D  ;
     
    637514 Q
    638515 ;
     516SWUPDIE(ZFDA) ; SWITCH BETWEEN UPDIE AND BULKLOAD
     517 . I $G(BLKLOAD) D  ; bulk load
     518 . . D BULKLOAD(.ZFDA) ; bulk load the batch
     519 . E  D  ; no bulk load
     520 . . D UPDIE(.ZFDA)
     521 . K ZFDA
     522 Q
     523 ;
    639524UPDIE(ZFDA) ; INTERNAL ROUTINE TO CALL UPDATE^DIE AND CHECK FOR ERRORS
    640525 ; ZFDA IS PASSED BY REFERENCE
Note: See TracChangeset for help on using the changeset viewer.