Changeset 1294 for fmts


Ignore:
Timestamp:
Nov 12, 2011, 12:15:56 PM (13 years ago)
Author:
George Lilly
Message:

new initialization routine INITC0XINIT and some bug fixes

Location:
fmts/trunk/p
Files:
1 added
3 edited

Legend:

Unmodified
Added
Removed
  • fmts/trunk/p/C0XF2N.m

    r1279 r1294  
    1 C0XMAIN ; GPL - Fileman Triples entry point routine ;10/13/11  17:05
    2  ;;0.1;C0X;nopatch;noreleasedate;Build 1
     1C0XF2N ; GPL - Fileman Triples entry point routine ;10/13/11  17:05
     2 ;;0.1;C0X;nopatch;noreleasedate;Build 5
    33 ;Copyright 2011 George Lilly.  Licensed under the terms of the GNU
    44 ;General Public License See attached copy of the License.
     
    3333 ; FOR THE DEFAULT TRIPLE STORE. USE OTHER VALUES FOR SUPPORTING ADDITIONAL
    3434 ; TRIPLE STORES
     35 I $D(@ZFARY) Q  ; ALREADY INITIALIZED
    3536 S @ZFARY@("C0XTFN")=172.101 ; TRIPLES FILE NUMBER
    3637 S @ZFARY@("C0XSFN")=172.201 ; TRIPLES STRINGS FILE NUMBER
    3738 S @ZFARY@("C0XTN")=$NA(^C0X(101)) ; TRIPLES GLOBAL NAME
    3839 S @ZFARY@("C0XSN")=$NA(^C0X(201)) ; STRING FILE GLOBAL NAME
    39  S @ZFARY@("C0XDIR")="/home/glilly/fmts/trunk/samples/qds/"
     40 S @ZFARY@("C0XDIR")="/home/glilly/fmts/trunk/samples/"
    4041 S @ZFARY@("BLKLOAD")=1 ; this file supports block load
    4142 S @ZFARY@("FMTSSTYLE")="F2N" ; fileman style
     
    7172 S C0XFN=Y
    7273 D IMPORT(C0XFN,C0XDIR,,"C0XFARY")
    73  Q
    74  ;
    75 IMPORT(FNAME,FDIR,FURL,FARY) ; EXTRINSIC THAT READS A FILE FROM THE STANDARD
     74 K C0XFDA
     75 Q
     76 ;
     77IMPORT(FNAME,INDIR,INURL,FARY) ; EXTRINSIC THAT READS A FILE FROM THE STANDARD
    7678 ; DIRECTORY, LOADS IT INTO THE TRIPLESTORE AS TEXT, AND RETURNS THE
    7779 ; NODE NAME OF THE TEXT TRIPLE
    78  ; FDIR IS THE OPTIONAL DIRECTORY (DEFAUTS TO STANDARD DIR)
    79  ; FURL IS THE OPTIONAL URI FOR ACCESSING THE FILE FROM THE TRIPLE STORE
     80 ; INDIR IS THE OPTIONAL DIRECTORY (DEFAUTS TO STANDARD DIR)
     81 ; INURL IS THE OPTIONAL URI FOR ACCESSING THE FILE FROM THE TRIPLE STORE
    8082 ; FARY IS THE OPTIONAL FILE ARRAY OF THE TRIPLE STORE TO USE
    8183 I '$D(FARY) D  ;
     
    8486 D USEFARY(FARY)
    8587 N ZD,ZTMP
    86  I '$D(FDIR) S FDIR=C0XDIR ; DIRECTORY OF THE RDF FILE
    87  I '$D(FURL) D  ;
    88  . N ZN2 S ZN2=$TR(FNAME,".","_") ; REMOVE THE DOT FROM THE NAME
    89  . S FURL=FDIR_ZN2
     88 I '$D(INDIR) S INDIR=C0XDIR ; DIRECTORY OF THE RDF FILE
     89 I $G(INURL)="" D  ;
     90 . ;N ZN2 S ZN2=$P(FNAME,".",1)_"_"_$P(FNAME,".",2) ; REMOVE THE DOT
     91 . ;S INURL=FDIR_ZN2
     92 . S INURL=INDIR_FNAME
    9093 N ZTMP
    9194 S ZTMP=$NA(^TMP("C0X",$J,"FILEIN",1)) ; WHERE TO PUT THE INCOMING FILE
     
    9497 W !,"STARTED: ",C0XSTART
    9598 W !,"READING IN: ",FNAME
    96  I '$$FILEREAD(ZTMP,FDIR,FNAME,4) D  Q  ; QUIT IF NO SUCCESS
    97  . W !,"ERROR READING FILE: ",FDIR,FNAME
     99 I '$$FILEREAD(ZTMP,INDIR,FNAME,4) D  Q  ; QUIT IF NO SUCCESS
     100 . W !,"ERROR READING FILE: ",INDIR,FNAME
    98101 S ZRDF=$NA(^TMP("C0X",$J,"FILEIN")) ; WITHOUT THE SUBSCRIPT
    99102 W !,$O(@ZRDF@(""),-1)," LINES READ"
    100  D INSRDF(ZRDF,FURL,FARY) ; IMPORT AND PROCESS THE RDF
     103 D INSRDF(ZRDF,INURL,FARY) ; IMPORT AND PROCESS THE RDF
     104 K INURL
     105 K C0XFDA
     106 K ^TMP("MXMLDOM",$J)
    101107 Q
    102108 ;
     
    132138 . S FARY="C0XFARY"
    133139 D USEFARY(FARY)
     140 S BATCNT=0 ; BATCH COUNTER
     141 S BATMAX=10000 ; TRY BATCHES OF THIS SIZE
    134142 N ZGRAPH,ZSUBJECT
    135143 S ZGRAPH="_:G"_$$LKY9 ; RANDOM GRAPH NAME
     
    139147 S ZTXTNM="_TXT_INCOMING_RDF_FILE_"_ZNAME_"_"_$$LKY9 ; NAME FOR TEXT NODE
    140148 D ADD(ZGRAPH,ZSUBJECT,"fmts:rdfSource",ZTXTNM,FARY)
    141  D UPDIE(.C0XFDA) ; TRY IT OUT
     149 D SWUPDIE(.C0XFDA) ; TRY IT OUT
    142150 K C0XCNT ;RESET FOR NEXT TIME
    143151 D STORETXT(ZRDF,ZTXTNM,FARY)
    144152 W !,"ADDED: ",ZGRAPH," ",ZSUBJECT," fmts:rdfSource ",ZTXTNM
    145  D PROCESS2(.G,ZRDF,ZNAME,ZGRAPH,FARY) ; PARSE AND INSERT THE RDF
     153 D PROCESS(.G,ZRDF,ZNAME,ZGRAPH,FARY) ; PARSE AND INSERT THE RDF
    146154 Q
    147155 ;
     
    207215 . S FARY="C0XFARY"
    208216 D USEFARY(FARY)
     217 ;N BATCNT
     218 ;N BATMAX
    209219 ; -- first parse the rdf file with the MXML parser
    210220 ;S C0XDOCID=$$PARSE^C0CNHIN(ZRDF,"C0XARRAY") ; PARSE WITH MXML
     221 S C0XDLC2=$$NOW^XLFDT ; START OF PARSE
    211222 S C0XDOCID=$$EN^MXMLDOM(ZRDF,"W")
     223 K @ZRDF ; DON'T NEED INPUT BUFFER ANYMORE
    212224 ; -- assign the MXLM dom global name to ZDOM
    213225 S ZDOM=$NA(^TMP("MXMLDOM",$J,C0XDOCID))
    214  W !,$O(@ZDOM@(""),-1)," XML NODES PARSED"
     226 S C0XNODE=$O(@ZDOM@(""),-1)
     227 W !,C0XNODE," XML NODES PARSED"
     228 S C0XPRS=$$NOW^XLFDT ; PARSE COMPLETE
     229 W !,"PARSE COMPLETE AT ",C0XPRS
     230 S C0XDIFF=$$FMDIFF^XLFDT(C0XPRS,C0XDLC2,2)
     231 W !," ELAPSED TIME: ",C0XDIFF," SECONDS"
     232 I C0XDIFF'=0 D  ;
     233 . W !," APPROXIMATELY ",$P(C0XNODE/C0XDIFF,".")," NODES PER SECOND"
    215234 ; -- populate the metagraph to point to the graph with status unfinished
    216235 S METAS=$$ANONS ; GET AN ANONOMOUS RANDOM SUBJECT
     
    218237 D ADD(ZMETA,METAS,"fmts:about",ZGRF,FARY) ; POINT THE META TO THE GRAPH
    219238 D ADD(ZMETA,METAS,"fmts:status","unfinished",FARY) ; mark as unfinished
     239 W !,"INSERTING GRAPH: ",ZGRF
    220240 ;S C0XDATE=$$FMDTOUTC^C0CUTIL($$NOW^XLFDT,"DT")
    221241 S C0XDATE=$$NOW^XLFDT
    222242 D ADD(ZMETA,METAS,"fmts:dateTime",C0XDATE,FARY)
    223  D UPDIE(.C0XFDA) ; commit the metagraph changes to the triple store
     243 D SWUPDIE(.C0XFDA) ; commit the metagraph changes to the triple store
    224244 ; --
    225245 ; -- pull out the vocabularies in the RDF statement. marked with xmlns:
     
    288308 . . . W !,"ERROR, NO OBJECT FOUND FOR NODE: ",ZJ
    289309 . . D ADD(ZGRF,C0XSUB,C0XPRE,C0XOBJ) ; go for it and add a node
    290  W !,"INSERTING ",C0XCNT," TRIPLES"
    291  D UPDIE(.C0XFDA) ; commit the updates to the file
    292  ; next, mark the graph as finished
    293  S C0XEND=$$NOW^XLFDT
    294  W !," ENDED AT: ",C0XEND
    295  S C0XDIFF=$$FMDIFF^XLFDT(C0XEND,C0XSTART,2)
    296  W !," ELAPSED TIME: ",C0XDIFF," SECONDS"
    297  I C0XDIFF'=0 D  ;
    298  . W !," APPROXIMATELY ",$P(C0XCNT/C0XDIFF,".")," TRIPLES PER SECOND"
    299  Q
    300  ;
    301 PROCESS2(ZRTN,ZRDF,ZGRF,ZMETA,FARY) ; PROCESS AN INCOMING RDF FILE
    302  ; ZRTN IS PASS BY REFERENCE AND RETURNS MESSAGES ABOUT THE PROCESSING
    303  ; ZRDF IS PASSED BY NAME AND IS THE GLOBAL CONTAINING THE RDF FILE
    304  ; ZGRF IS THE NAME OF THE GRAPH TO USE IN THE TRIPLE STORE FOR RESULTS
    305  ; ZMETA IS OPTIONAL AND IS THE NAME OF THE GRAPH TO STORE METADATA
    306  ;
    307  I '$D(FARY) D  ;
    308  . D INITFARY("C0XFARY")
    309  . S FARY="C0XFARY"
    310  D USEFARY(FARY)
    311  ;N BATCNT
    312  ;N BATMAX
    313  S BATCNT=0 ; BATCH COUNTER
    314  S BATMAX=10000 ; TRY BATCHES OF THIS SIZE
    315  ; -- first parse the rdf file with the MXML parser
    316  ;S C0XDOCID=$$PARSE^C0CNHIN(ZRDF,"C0XARRAY") ; PARSE WITH MXML
    317  S C0XDLC2=$$NOW^XLFDT ; START OF PARSE
    318  S C0XDOCID=$$EN^MXMLDOM(ZRDF,"W")
    319  ; -- assign the MXLM dom global name to ZDOM
    320  S ZDOM=$NA(^TMP("MXMLDOM",$J,C0XDOCID))
    321  S C0XNODE=$O(@ZDOM@(""),-1)
    322  W !,C0XNODE," XML NODES PARSED"
    323  S C0XPRS=$$NOW^XLFDT ; PARSE COMPLETE
    324  W !,"PARSE COMPLETE AT ",C0XPRS
    325  S C0XDIFF=$$FMDIFF^XLFDT(C0XPRS,C0XDLC2,2)
    326  W !," ELAPSED TIME: ",C0XDIFF," SECONDS"
    327  I C0XDIFF'=0 D  ;
    328  . W !," APPROXIMATELY ",$P(C0XNODE/C0XDIFF,".")," NODES PER SECOND"
    329  ; -- populate the metagraph to point to the graph with status unfinished
    330  S METAS=$$ANONS ; GET AN ANONOMOUS RANDOM SUBJECT
    331  I '$D(ZMETA) S ZMETA="_:G"_$$LKY9 ; RANDOM GRAPH NAME FOR METAGRAPH
    332  D ADD(ZMETA,METAS,"fmts:about",ZGRF,FARY) ; POINT THE META TO THE GRAPH
    333  D ADD(ZMETA,METAS,"fmts:status","unfinished",FARY) ; mark as unfinished
    334  ;S C0XDATE=$$FMDTOUTC^C0CUTIL($$NOW^XLFDT,"DT")
    335  S C0XDATE=$$NOW^XLFDT
    336  D ADD(ZMETA,METAS,"fmts:dateTime",C0XDATE,FARY)
    337  D UPDIE(.C0XFDA) ; commit the metagraph changes to the triple store
    338  ; --
    339  ; -- pull out the vocabularies in the RDF statement. marked with xmlns:
    340  ; -- put them in a local variable for quick reference
    341  ; -- TODO: create a graph for vocabularies and validate incoming against it
    342  ;
    343  S C0XVOC=""
    344  N ZI,ZJ,ZK S ZI=""
    345  F  S ZI=$O(@ZDOM@(1,"A",ZI)) Q:ZI=""  D  ; FOR EACH xmlns
    346  . S ZVOC=$P(ZI,"xmlns:",2)
    347  . I ZVOC'="" S C0XVOC(ZVOC)=$G(@ZDOM@(1,"A",ZI))
    348  ;W !,"VOCABS:" ZWR C0XVOC
    349  ;
    350  ; -- look for children called rdf:Description. quit if none. not an rdf file
    351  ;
    352  S ZI=$O(@ZDOM@(1,"C",""))
    353  I $G(@ZDOM@(1,"C",ZI))'="rdf:Description" D  Q  ; not an rdf file
    354  . W !,"Error. Not an RDF file. Cannot process."
    355  ;
    356  ; -- now process the rdf description children
    357  ;
    358  S ZI=""
    359  S (C0XSUB,C0XPRE,C0XOBJ)="" ; INITIALIZE subject, object and predicate
    360  F  S ZI=$O(@ZDOM@(1,"C",ZI)) Q:ZI=""  D  ;
    361  . ; -- we are skipping any child that is not rdf:Description
    362  . ; -- TODO: check to see if this is right in general
    363  . ;
    364  . IF $G(@ZDOM@(1,"C",ZI))'="rdf:Description" D  Q  ;
    365  . . W !,"SKIPPING NODE: ",ZI
    366  . ; -- now looking for the subject for the triples
    367  . S ZX=$G(@ZDOM@(ZI,"A","rdf:about"))
    368  . I ZX'="" D  ; we have the subject
    369  . . ;W " about: ",ZX
    370  . . S C0XSUB=ZX
    371  . E  D  ;
    372  . . S ZX=$G(@ZDOM@(ZI,"A","rdf:nodeID")) ; node id is another style of subject
    373  . . I ZX'="" D  ;
    374  . . . S C0XSUB=ZX
    375  . I C0XSUB="" S C0XSUB=$$ANONS ; DEFAULT TO BLANK SUBJECT
    376  . ;
    377  . ; -- we now have the subject. the children of this node have the rest
    378  . ;
    379  . S ZJ="" ; for the children of the rdf:Description nodes
    380  . F  S ZJ=$O(@ZDOM@(ZI,"C",ZJ)) Q:ZJ=""  D  ; for each child
    381  . . S C0XPRE=@ZDOM@(ZJ) ; the predicate without a prefix
    382  . . S ZX=$G(@ZDOM@(ZJ,"A","xmlns")) ; name space
    383  . . I ZX'="" S C0XPRE=ZX_C0XPRE ; add the namespace prefix
    384  . . I C0XPRE[":" D  ; expand using vocabulary
    385  . . . N ZB,ZA
    386  . . . S ZB=$P(C0XPRE,":",1)
    387  . . . S ZA=$P(C0XPRE,":",2)
    388  . . . I $G(C0XVOC(ZB))'="" D  ;
    389  . . . . S C0XPRE=C0XVOC(ZB)_ZA ; expanded
    390  . . S ZY=$G(@ZDOM@(ZJ,"A","rdf:resource")) ; potential object
    391  . . I ZY'="" D  Q ;
    392  . . . S C0XOBJ=ZY ; object
    393  . . . D ADD2(ZGRF,C0XSUB,C0XPRE,C0XOBJ) ; finally. our first real triple
    394  . . ; -- this is an else because of the quit above
    395  . . S ZX=$G(@ZDOM@(ZJ,"A","rdf:nodeID")) ; fishing for nodeId object
    396  . . I ZX'="" D  Q  ; got one
    397  . . . S C0XOBJ=ZX ; we are using the incoming nodeIDs as object/subject
    398  . . . ; without change... this could be foolish .. look at it again later
    399  . . . D ADD2(ZGRF,C0XSUB,C0XPRE,C0XOBJ) ; go for it and add a node
    400  . . S C0XOBJ=$G(@ZDOM@(ZJ,"T",1)) ; hopefully an object is here
    401  . . I C0XOBJ="" D  Q  ; not a happy situation
    402  . . . W !,"ERROR, NO OBJECT FOUND FOR NODE: ",ZJ
    403  . . D ADD2(ZGRF,C0XSUB,C0XPRE,C0XOBJ) ; go for it and add a node
    404310 S C0XTRP=$$NOW^XLFDT ; PARSE COMPLETE
    405311 W !,"TRIPLES COMPLETE AT ",C0XTRP
     
    419325 S C0XDIFF=$$FMDIFF^XLFDT(C0XINS,C0XTRP,2)
    420326 W !," ELAPSED TIME: ",C0XDIFF," SECONDS"
    421  W !," APPROXIMATELY ",$P(C0XCNT/C0XDIFF,".")," NODES PER SECOND"
     327 I C0XDIFF'=0 W !," APPROXIMATELY ",$P(C0XCNT/C0XDIFF,".")," NODES PER SECOND"
    422328 S C0XEND=$$NOW^XLFDT
    423329 W !," ENDED AT: ",C0XEND
    424330 S C0XDIFF=$$FMDIFF^XLFDT(C0XEND,C0XSTART,2)
    425331 W !," ELAPSED TIME: ",C0XDIFF," SECONDS"
    426  W !," APPROXIMATELY ",$P(C0XCNT/C0XDIFF,".")," TRIPLES PER SECOND"
     332 I C0XDIFF'=0 W !," APPROXIMATELY ",$P(C0XCNT/C0XDIFF,".")," TRIPLES PER SECOND"
    427333 Q
    428334 ;
     
    432338 ;
    433339ANONS() ; RETURNS AN ANONOMOUS SUBJECT
    434  Q "_S:"_$$LKY9
     340 Q "_:S"_$$LKY9
    435341 ;
    436342NEWG(NGRAPH,NMETA) ; CREATES A NEW META GRAPH, MARKS IT AS UNFINISHED
     
    442348 ;
    443349ADD(ZG,ZS,ZP,ZO,FARY) ; ADD A TRIPLE TO THE TRIPLESTORE. ALL VALUES ARE TEXT
    444  ; THE FDA IS SET UP BUT THE FILES ARE NOT UPDATED. CALL UPDIE TO COMPLETE
    445  I '$D(FARY) D  ;
    446  . D INITFARY("C0XFARY")
    447  . S FARY="C0XFARY"
    448  D USEFARY(FARY)
    449  I '$D(C0XCNT) S C0XCNT=0
    450  N ZNODE
    451  S ZNODE="N"_$$LKY17
    452  N ZNARY ; GET READY TO CALL IENOFA
    453  S ZNARY("ZG",ZG)=""
    454  S ZNARY("ZS",ZS)=""
    455  S ZNARY("ZP",ZP)=""
    456  S ZNARY("ZO",ZO)=""
    457  D IENOFA(.ZIENS,.ZNARY,FARY) ; RESOLVE/ADD STRINGS
    458  ;S ZGIEN=$$IENOF(ZG) ; LAYGO TO GET IEN
    459  ;S ZSIEN=$$IENOF(ZS)
    460  ;S ZPIEN=$$IENOF(ZP)
    461  ;S ZOIEN=$$IENOF(ZO)
    462  ;I $D(C0XFDA) D UPDIE ; ADD THE STRINGS IF NEEDED
    463  S C0XCNT=C0XCNT+1
    464  S C0XFDA(C0XTFN,"?+"_C0XCNT_",",.01)=ZNODE
    465  S C0XFDA(C0XTFN,"?+"_C0XCNT_",",.02)=$O(ZIENS("IEN","ZG",""))
    466  S C0XFDA(C0XTFN,"?+"_C0XCNT_",",.03)=$O(ZIENS("IEN","ZS",""))
    467  S C0XFDA(C0XTFN,"?+"_C0XCNT_",",.04)=$O(ZIENS("IEN","ZP",""))
    468  S C0XFDA(C0XTFN,"?+"_C0XCNT_",",.05)=$O(ZIENS("IEN","ZO",""))
    469  ; REMEMBER TO CALL UPDIE WHEN YOU'RE DONE
    470  Q
    471  ;
    472 ADD2(ZG,ZS,ZP,ZO,FARY) ; ADD A TRIPLE TO THE TRIPLESTORE. ALL VALUES ARE TEXT
    473350 ; THE FDA IS SET UP BUT THE FILES ARE NOT UPDATED. CALL UPDIE TO COMPLETE
    474351 I '$D(FARY) D  ;
     
    637514 Q
    638515 ;
     516SWUPDIE(ZFDA) ; SWITCH BETWEEN UPDIE AND BULKLOAD
     517 . I $G(BLKLOAD) D  ; bulk load
     518 . . D BULKLOAD(.ZFDA) ; bulk load the batch
     519 . E  D  ; no bulk load
     520 . . D UPDIE(.ZFDA)
     521 . K ZFDA
     522 Q
     523 ;
    639524UPDIE(ZFDA) ; INTERNAL ROUTINE TO CALL UPDATE^DIE AND CHECK FOR ERRORS
    640525 ; ZFDA IS PASSED BY REFERENCE
  • fmts/trunk/p/C0XMAIN.m

    r1274 r1294  
    1 C0XMAIN ; GPL - Fileman Triples entry point routine ;10/13/11  17:05
    2  ;;0.1;C0X;nopatch;noreleasedate;Build 1
    3  ;Copyright 2011 George Lilly.  Licensed under the terms of the GNU
    4  ;General Public License See attached copy of the License.
    5  ;
    6  ;This program is free software; you can redistribute it and/or modify
    7  ;it under the terms of the GNU General Public License as published by
    8  ;the Free Software Foundation; either version 2 of the License, or
    9  ;(at your option) any later version.
    10  ;
    11  ;This program is distributed in the hope that it will be useful,
    12  ;but WITHOUT ANY WARRANTY; without even the implied warranty of
    13  ;MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    14  ;GNU General Public License for more details.
    15  ;
    16  ;You should have received a copy of the GNU General Public License along
    17  ;with this program; if not, write to the Free Software Foundation, Inc.,
    18  ;51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
    19  ;
    20  Q
    21  ;
    22 INITFARY(ZFARY) ; INITIALIZE FILE NUMBERS AND OTHER USEFUL THINGS
    23  ; FOR THE DEFAULT TRIPLE STORE. USE OTHER VALUES FOR SUPPORTING ADDITIONAL
    24  ; TRIPLE STORES
    25  S @ZFARY@("C0XTFN")=172.101 ; TRIPLES FILE NUMBER
    26  S @ZFARY@("C0XSFN")=172.201 ; TRIPLES STRINGS FILE NUMBER
    27  S @ZFARY@("C0XTN")=$NA(^C0X(101)) ; TRIPLES GLOBAL NAME
    28  S @ZFARY@("C0XSN")=$NA(^C0X(201)) ; STRING FILE GLOBAL NAME
    29  S @ZFARY@("C0XDIR")="/home/glilly/all_smart_patient_data/smart-rdf/"
    30  D USEFARY(ZFARY)
    31  Q
    32  ;
    33 USEFARY(ZFARY) ; INITIALIZES VARIABLES SAVED IN ARRAY ZFARY
    34  N ZI S ZI=""
    35  F  S ZI=$O(@ZFARY@(ZI)) Q:ZI=""  D 
    36  . ;N ZX
    37  . S ZX="S "_ZI_"="""_@ZFARY@(ZI)_""""
    38  . ;W !,ZX
    39  . X ZX
    40  Q
    41  ;
    42 IMPORT(FNAME,FDIR,FURL,FARY) ; EXTRINSIC THAT READS A FILE FROM THE STANDARD
    43  ; DIRECTORY, LOADS IT INTO THE TRIPLESTORE AS TEXT, AND RETURNS THE
    44  ; NODE NAME OF THE TEXT TRIPLE
    45  ; FDIR IS THE OPTIONAL DIRECTORY (DEFAUTS TO STANDARD DIR)
    46  ; FURL IS THE OPTIONAL URI FOR ACCESSING THE FILE FROM THE TRIPLE STORE
    47  ; FARY IS THE OPTIONAL FILE ARRAY OF THE TRIPLE STORE TO USE
    48  I '$D(FARY) D  ;
    49  . D INITFARY("C0XFARY")
    50  . S FARY="C0XFARY"
    51  D USEFARY(FARY)
    52  N ZD,ZTMP
    53  I '$D(FDIR) S FDIR=C0XDIR ; DIRECTORY OF THE RDF FILE
    54  I '$D(FURL) D  ;
    55  . N ZN2 S ZN2=$TR(FNAME,".","_") ; REMOVE THE DOT FROM THE NAME
    56  . S FURL=FDIR_ZN2
    57  N ZTMP
    58  S ZTMP=$NA(^TMP("C0X",$J,"FILEIN",1)) ; WHERE TO PUT THE INCOMING FILE
    59  K @ZTMP ; MAKE SURE IT'S CLEAR
    60  S C0XSTART=$$NOW^XLFDT
    61  W !,"STARTED: ",C0XSTART
    62  W !,"READING IN: ",FNAME
    63  I '$$FILEIN(ZTMP,FDIR,FNAME,4) D  Q  ; QUIT IF NO SUCCESS
    64  . W !,"ERROR READING FILE: ",FDIR,FNAME
    65  S ZRDF=$NA(^TMP("C0X",$J,"FILEIN")) ; WITHOUT THE SUBSCRIPT
    66  W !,$O(@ZRDF@(""),-1)," LINES READ"
    67  D INSRDF(ZRDF,FURL,FARY) ; IMPORT AND PROCESS THE RDF
    68  Q
    69  ;
    70 WGET(ZURL,FARY) ; GET FROM THE INTERNET AN RDF FILE AND INSERT IT
    71  ;
    72  I '$D(FARY) D  ;
    73  . D INITFARY("C0XFARY")
    74  . S FARY="C0XFARY"
    75  D USEFARY(FARY)
    76  ;N ZLOC,ZTMP
    77  K ZTMP
    78  S ZLOC=$NA(^TMP("C0X","WGET",$J))
    79  K @ZLOC
    80  S C0XSTART=$$NOW^XLFDT
    81  W !,"STARTED: ",C0XSTART
    82  W !,"DOWNLOADING: ",ZURL
    83  S OK=$$httpGET^%zewdGTM(ZURL,.ZTMP)
    84  M @ZLOC=ZTMP
    85  W !,$O(@ZLOC@(""),-1)," LINES READ"
    86  D INSRDF(ZLOC,ZURL,FARY)
    87  Q
    88  ;
    89 INSRDF(ZRDF,ZNAME,FARY) ; INSERT AN RDF FILE INTO THE STORE AND PROCESS
    90  ; ZRDF IS PASSED BY NAME
    91  I '$D(FARY) D  ;
    92  . D INITFARY("C0XFARY")
    93  . S FARY="C0XFARY"
    94  D USEFARY(FARY)
    95  N ZGRAPH,ZSUBJECT
    96  S ZGRAPH="_:G"_$$LKY9 ; RANDOM GRAPH NAME
    97  S ZSUBJECT=$$ANONS ; RANDOM ANOYMOUS SUBJECT
    98  D ADD(ZGRAPH,ZSUBJECT,"fmts:url",ZNAME,FARY)
    99  N ZTXTNM
    100  S ZTXTNM="_TXT_INCOMING_RDF_FILE_"_ZNAME_"_"_$$LKY9 ; NAME FOR TEXT NODE
    101  D ADD(ZGRAPH,ZSUBJECT,"fmts:rdfSource",ZTXTNM,FARY)
    102  D UPDIE(.C0XFDA) ; TRY IT OUT
    103  K C0XCNT ;RESET FOR NEXT TIME
    104  D STORETXT(ZRDF,ZTXTNM,FARY)
    105  W !,"ADDED: ",ZGRAPH," ",ZSUBJECT," fmts:rdfSource ",ZTXTNM
    106  D PROCESS2(.G,ZRDF,ZNAME,ZGRAPH,FARY) ; PARSE AND INSERT THE RDF
    107  Q
    108  ;
    109 STORETXT(ZTXT,ZNAME,FARY) ; STORE TEXT IN THE TRIPLESTORE AT ZNAME
    110  ;
    111  I '$D(FARY) D  ;
    112  . D INITFARY("C0XFARY")
    113  . S FARY="C0XFARY"
    114  D USEFARY(FARY)
    115  N ZIEN
    116  S ZIEN=$$IENOF(ZNAME,FARY) ; GET THE IEN
    117  D CLEAN^DILF
    118  K ZERR
    119  D WP^DIE(C0XSFN,ZIEN_",",1,,ZTXT,"ZERR")
    120  I $D(ZERR) D  ;
    121  . ZWR ZERR
    122  Q
    123  ;
    124 GETTXT(ZRTN,ZNAME,FARY) ; RETURNS RDF SOURCE OR OTHER TEXT
    125  ; ZRTN IS PASSED BY REFERENCE
    126  I '$D(FARY) D  ;
    127  . D INITFARY("C0XFARY")
    128  . S FARY="C0XFARY"
    129  D USEFARY(FARY)
    130  N ZIEN
    131  S ZIEN=$$IENOF(ZNAME)
    132  S OK=$$GET1^DIQ(C0XSFN,ZIEN_",",1,,"ZRTN")
    133  Q
    134  ;
    135 WHERETXT(ZNAME,FARY) ; EXTRINSIC WHICH RETURNS THE NAME OF THE GLOBAL
    136  ; WHERE THE TEXT IS LOCATED. NAME IS THE NAME OF THE STRING
    137  I '$D(FARY) D  ;
    138  . D INITFARY("C0XFARY")
    139  . S FARY="C0XFARY"
    140  D USEFARY(FARY)
    141  N ZIEN
    142  S ZIEN=$$IENOF(ZNAME)
    143  Q $NA(@C0XSN@(ZIEN,1))
    144  ;
    145 FILEIN(ZINTMP,ZDIR,ZFNAME,ZLVL) ; READS A FILE INTO ZINTMP USING FTG^%ZISH
    146  ; ZINTMP IS PASSED BY NAME AND INCLUDES THE NEW SUBSCRIPT
    147  ; IE ^TMP("C0X","FILEIN",1)
    148  ; ZLVL IN THIS CASE WOULD BE 3 INCREMENTING THE 1
    149  ; EXTRINSIC WHICH RETURNS THE RESULT OF FTG^%ZISH
    150  S OK=$$FTG^%ZISH(ZDIR,FNAME,ZINTMP,ZLVL)
    151  Q OK
    152  ;
    153 TESTPROC ; TEST PROCESS WITH EXISTING SMALL RDF FILE
    154  S ZIN=$NA(^TMP("C0X",12226,"FILEIN"))
    155  S ZGRAPH="/test/rdfFile"
    156  S ZM="/test/rdfFile/meta"
    157  D PROCESS(.G,ZIN,ZGRAPH,ZM)
    158  Q
    159  ;
    160 PROCESS(ZRTN,ZRDF,ZGRF,ZMETA,FARY) ; PROCESS AN INCOMING RDF FILE
    161  ; ZRTN IS PASS BY REFERENCE AND RETURNS MESSAGES ABOUT THE PROCESSING
    162  ; ZRDF IS PASSED BY NAME AND IS THE GLOBAL CONTAINING THE RDF FILE
    163  ; ZGRF IS THE NAME OF THE GRAPH TO USE IN THE TRIPLE STORE FOR RESULTS
    164  ; ZMETA IS OPTIONAL AND IS THE NAME OF THE GRAPH TO STORE METADATA
    165  ;
    166  I '$D(FARY) D  ;
    167  . D INITFARY("C0XFARY")
    168  . S FARY="C0XFARY"
    169  D USEFARY(FARY)
    170  ; -- first parse the rdf file with the MXML parser
    171  ;S C0XDOCID=$$PARSE^C0CNHIN(ZRDF,"C0XARRAY") ; PARSE WITH MXML
    172  S C0XDOCID=$$EN^MXMLDOM(ZRDF,"W")
    173  ; -- assign the MXLM dom global name to ZDOM
    174  S ZDOM=$NA(^TMP("MXMLDOM",$J,C0XDOCID))
    175  W !,$O(@ZDOM@(""),-1)," XML NODES PARSED"
    176  ; -- populate the metagraph to point to the graph with status unfinished
    177  S METAS=$$ANONS ; GET AN ANONOMOUS RANDOM SUBJECT
    178  I '$D(ZMETA) S ZMETA="_:G"_$$LKY9 ; RANDOM GRAPH NAME FOR METAGRAPH
    179  D ADD(ZMETA,METAS,"fmts:about",ZGRF,FARY) ; POINT THE META TO THE GRAPH
    180  D ADD(ZMETA,METAS,"fmts:status","unfinished",FARY) ; mark as unfinished
    181  ;S C0XDATE=$$FMDTOUTC^C0CUTIL($$NOW^XLFDT,"DT")
    182  S C0XDATE=$$NOW^XLFDT
    183  D ADD(ZMETA,METAS,"fmts:dateTime",C0XDATE,FARY)
    184  D UPDIE(.C0XFDA) ; commit the metagraph changes to the triple store
    185  ; --
    186  ; -- pull out the vocabularies in the RDF statement. marked with xmlns:
    187  ; -- put them in a local variable for quick reference
    188  ; -- TODO: create a graph for vocabularies and validate incoming against it
    189  ;
    190  S C0XVOC=""
    191  N ZI,ZJ,ZK S ZI=""
    192  F  S ZI=$O(@ZDOM@(1,"A",ZI)) Q:ZI=""  D  ; FOR EACH xmlns
    193  . S ZVOC=$P(ZI,"xmlns:",2)
    194  . I ZVOC'="" S C0XVOC(ZVOC)=$G(@ZDOM@(1,"A",ZI))
    195  ;W !,"VOCABS:" ZWR C0XVOC
    196  ;
    197  ; -- look for children called rdf:Description. quit if none. not an rdf file
    198  ;
    199  S ZI=$O(@ZDOM@(1,"C",""))
    200  I $G(@ZDOM@(1,"C",ZI))'="rdf:Description" D  Q  ; not an rdf file
    201  . W !,"Error. Not an RDF file. Cannot process."
    202  ;
    203  ; -- now process the rdf description children
    204  ;
    205  S ZI=""
    206  S (C0XSUB,C0XPRE,C0XOBJ)="" ; INITIALIZE subject, object and predicate
    207  F  S ZI=$O(@ZDOM@(1,"C",ZI)) Q:ZI=""  D  ;
    208  . ; -- we are skipping any child that is not rdf:Description
    209  . ; -- TODO: check to see if this is right in general
    210  . ;
    211  . IF $G(@ZDOM@(1,"C",ZI))'="rdf:Description" D  Q  ;
    212  . . W !,"SKIPPING NODE: ",ZI
    213  . ; -- now looking for the subject for the triples
    214  . S ZX=$G(@ZDOM@(ZI,"A","rdf:about"))
    215  . I ZX'="" D  ; we have the subject
    216  . . ;W " about: ",ZX
    217  . . S C0XSUB=ZX
    218  . E  D  ;
    219  . . S ZX=$G(@ZDOM@(ZI,"A","rdf:nodeID")) ; node id is another style of subject
    220  . . I ZX'="" D  ;
    221  . . . S C0XSUB=ZX
    222  . I C0XSUB="" S C0XSUB=$$ANONS ; DEFAULT TO BLANK SUBJECT
    223  . ;
    224  . ; -- we now have the subject. the children of this node have the rest
    225  . ;
    226  . S ZJ="" ; for the children of the rdf:Description nodes
    227  . F  S ZJ=$O(@ZDOM@(ZI,"C",ZJ)) Q:ZJ=""  D  ; for each child
    228  . . S C0XPRE=@ZDOM@(ZJ) ; the predicate without a prefix
    229  . . S ZX=$G(@ZDOM@(ZJ,"A","xmlns")) ; name space
    230  . . I ZX'="" S C0XPRE=ZX_C0XPRE ; add the namespace prefix
    231  . . I C0XPRE[":" D  ; expand using vocabulary
    232  . . . N ZB,ZA
    233  . . . S ZB=$P(C0XPRE,":",1)
    234  . . . S ZA=$P(C0XPRE,":",2)
    235  . . . I $G(C0XVOC(ZB))'="" D  ;
    236  . . . . S C0XPRE=C0XVOC(ZB)_ZA ; expanded
    237  . . S ZY=$G(@ZDOM@(ZJ,"A","rdf:resource")) ; potential object
    238  . . I ZY'="" D  Q ;
    239  . . . S C0XOBJ=ZY ; object
    240  . . . D ADD(ZGRF,C0XSUB,C0XPRE,C0XOBJ) ; finally. our first real triple
    241  . . ; -- this is an else because of the quit above
    242  . . S ZX=$G(@ZDOM@(ZJ,"A","rdf:nodeID")) ; fishing for nodeId object
    243  . . I ZX'="" D  Q  ; got one
    244  . . . S C0XOBJ=ZX ; we are using the incoming nodeIDs as object/subject
    245  . . . ; without change... this could be foolish .. look at it again later
    246  . . . D ADD(ZGRF,C0XSUB,C0XPRE,C0XOBJ) ; go for it and add a node
    247  . . S C0XOBJ=$G(@ZDOM@(ZJ,"T",1)) ; hopefully an object is here
    248  . . I C0XOBJ="" D  Q  ; not a happy situation
    249  . . . W !,"ERROR, NO OBJECT FOUND FOR NODE: ",ZJ
    250  . . D ADD(ZGRF,C0XSUB,C0XPRE,C0XOBJ) ; go for it and add a node
    251  W !,"INSERTING ",C0XCNT," TRIPLES"
    252  D UPDIE(.C0XFDA) ; commit the updates to the file
    253  ; next, mark the graph as finished
    254  S C0XEND=$$NOW^XLFDT
    255  W !," ENDED AT: ",C0XEND
    256  S C0XDIFF=$$FMDIFF^XLFDT(C0XEND,C0XSTART,2)
    257  W !," ELAPSED TIME: ",C0XDIFF," SECONDS"
    258  W !," APPROXIMATELY ",$P(C0XCNT/C0XDIFF,".")," TRIPLES PER SECOND"
    259  Q
    260  ;
    261 PROCESS2(ZRTN,ZRDF,ZGRF,ZMETA,FARY) ; PROCESS AN INCOMING RDF FILE
    262  ; ZRTN IS PASS BY REFERENCE AND RETURNS MESSAGES ABOUT THE PROCESSING
    263  ; ZRDF IS PASSED BY NAME AND IS THE GLOBAL CONTAINING THE RDF FILE
    264  ; ZGRF IS THE NAME OF THE GRAPH TO USE IN THE TRIPLE STORE FOR RESULTS
    265  ; ZMETA IS OPTIONAL AND IS THE NAME OF THE GRAPH TO STORE METADATA
    266  ;
    267  I '$D(FARY) D  ;
    268  . D INITFARY("C0XFARY")
    269  . S FARY="C0XFARY"
    270  D USEFARY(FARY)
    271  ;N BATCNT
    272  ;N BATMAX
    273  S BATCNT=0 ; BATCH COUNTER
    274  S BATMAX=10000 ; TRY BATCHES OF THIS SIZE
    275  ; -- first parse the rdf file with the MXML parser
    276  ;S C0XDOCID=$$PARSE^C0CNHIN(ZRDF,"C0XARRAY") ; PARSE WITH MXML
    277  S C0XDOCID=$$EN^MXMLDOM(ZRDF,"W")
    278  ; -- assign the MXLM dom global name to ZDOM
    279  S ZDOM=$NA(^TMP("MXMLDOM",$J,C0XDOCID))
    280  W !,$O(@ZDOM@(""),-1)," XML NODES PARSED"
    281  ; -- populate the metagraph to point to the graph with status unfinished
    282  S METAS=$$ANONS ; GET AN ANONOMOUS RANDOM SUBJECT
    283  I '$D(ZMETA) S ZMETA="_:G"_$$LKY9 ; RANDOM GRAPH NAME FOR METAGRAPH
    284  D ADD(ZMETA,METAS,"fmts:about",ZGRF,FARY) ; POINT THE META TO THE GRAPH
    285  D ADD(ZMETA,METAS,"fmts:status","unfinished",FARY) ; mark as unfinished
    286  ;S C0XDATE=$$FMDTOUTC^C0CUTIL($$NOW^XLFDT,"DT")
    287  S C0XDATE=$$NOW^XLFDT
    288  D ADD(ZMETA,METAS,"fmts:dateTime",C0XDATE,FARY)
    289  D UPDIE(.C0XFDA) ; commit the metagraph changes to the triple store
    290  ; --
    291  ; -- pull out the vocabularies in the RDF statement. marked with xmlns:
    292  ; -- put them in a local variable for quick reference
    293  ; -- TODO: create a graph for vocabularies and validate incoming against it
    294  ;
    295  S C0XVOC=""
    296  N ZI,ZJ,ZK S ZI=""
    297  F  S ZI=$O(@ZDOM@(1,"A",ZI)) Q:ZI=""  D  ; FOR EACH xmlns
    298  . S ZVOC=$P(ZI,"xmlns:",2)
    299  . I ZVOC'="" S C0XVOC(ZVOC)=$G(@ZDOM@(1,"A",ZI))
    300  ;W !,"VOCABS:" ZWR C0XVOC
    301  ;
    302  ; -- look for children called rdf:Description. quit if none. not an rdf file
    303  ;
    304  S ZI=$O(@ZDOM@(1,"C",""))
    305  I $G(@ZDOM@(1,"C",ZI))'="rdf:Description" D  Q  ; not an rdf file
    306  . W !,"Error. Not an RDF file. Cannot process."
    307  ;
    308  ; -- now process the rdf description children
    309  ;
    310  S ZI=""
    311  S (C0XSUB,C0XPRE,C0XOBJ)="" ; INITIALIZE subject, object and predicate
    312  F  S ZI=$O(@ZDOM@(1,"C",ZI)) Q:ZI=""  D  ;
    313  . ; -- we are skipping any child that is not rdf:Description
    314  . ; -- TODO: check to see if this is right in general
    315  . ;
    316  . IF $G(@ZDOM@(1,"C",ZI))'="rdf:Description" D  Q  ;
    317  . . W !,"SKIPPING NODE: ",ZI
    318  . ; -- now looking for the subject for the triples
    319  . S ZX=$G(@ZDOM@(ZI,"A","rdf:about"))
    320  . I ZX'="" D  ; we have the subject
    321  . . ;W " about: ",ZX
    322  . . S C0XSUB=ZX
    323  . E  D  ;
    324  . . S ZX=$G(@ZDOM@(ZI,"A","rdf:nodeID")) ; node id is another style of subject
    325  . . I ZX'="" D  ;
    326  . . . S C0XSUB=ZX
    327  . I C0XSUB="" S C0XSUB=$$ANONS ; DEFAULT TO BLANK SUBJECT
    328  . ;
    329  . ; -- we now have the subject. the children of this node have the rest
    330  . ;
    331  . S ZJ="" ; for the children of the rdf:Description nodes
    332  . F  S ZJ=$O(@ZDOM@(ZI,"C",ZJ)) Q:ZJ=""  D  ; for each child
    333  . . S C0XPRE=@ZDOM@(ZJ) ; the predicate without a prefix
    334  . . S ZX=$G(@ZDOM@(ZJ,"A","xmlns")) ; name space
    335  . . I ZX'="" S C0XPRE=ZX_C0XPRE ; add the namespace prefix
    336  . . I C0XPRE[":" D  ; expand using vocabulary
    337  . . . N ZB,ZA
    338  . . . S ZB=$P(C0XPRE,":",1)
    339  . . . S ZA=$P(C0XPRE,":",2)
    340  . . . I $G(C0XVOC(ZB))'="" D  ;
    341  . . . . S C0XPRE=C0XVOC(ZB)_ZA ; expanded
    342  . . S ZY=$G(@ZDOM@(ZJ,"A","rdf:resource")) ; potential object
    343  . . I ZY'="" D  Q ;
    344  . . . S C0XOBJ=ZY ; object
    345  . . . D ADD2(ZGRF,C0XSUB,C0XPRE,C0XOBJ) ; finally. our first real triple
    346  . . ; -- this is an else because of the quit above
    347  . . S ZX=$G(@ZDOM@(ZJ,"A","rdf:nodeID")) ; fishing for nodeId object
    348  . . I ZX'="" D  Q  ; got one
    349  . . . S C0XOBJ=ZX ; we are using the incoming nodeIDs as object/subject
    350  . . . ; without change... this could be foolish .. look at it again later
    351  . . . D ADD2(ZGRF,C0XSUB,C0XPRE,C0XOBJ) ; go for it and add a node
    352  . . S C0XOBJ=$G(@ZDOM@(ZJ,"T",1)) ; hopefully an object is here
    353  . . I C0XOBJ="" D  Q  ; not a happy situation
    354  . . . W !,"ERROR, NO OBJECT FOUND FOR NODE: ",ZJ
    355  . . D ADD2(ZGRF,C0XSUB,C0XPRE,C0XOBJ) ; go for it and add a node
    356  W !,"INSERTING ",C0XCNT," TRIPLES"
    357  I $D(C0XFDA) D UPDIE(.C0XFDA) ; commit the updates to the file
    358  ; next, mark the graph as finished
    359  S C0XEND=$$NOW^XLFDT
    360  W !," ENDED AT: ",C0XEND
    361  S C0XDIFF=$$FMDIFF^XLFDT(C0XEND,C0XSTART,2)
    362  W !," ELAPSED TIME: ",C0XDIFF," SECONDS"
    363  W !," APPROXIMATELY ",$P(C0XCNT/C0XDIFF,".")," TRIPLES PER SECOND"
    364  Q
    365  ;
    366 SHOW(ZN) ;
    367  ZWR ^TMP("MXMLDOM",$J,1,ZN,*)
    368  Q
    369  ;
    370 ANONS() ; RETURNS AN ANONOMOUS SUBJECT
    371  Q "_S:"_$$LKY9
    372  ;
    373 NEWG(NGRAPH,NMETA) ; CREATES A NEW META GRAPH, MARKS IT AS UNFINISHED
    374  ; THEN CREATES A NEW GRAPH AND POINTS THE METAGRAPH TO IT
    375  ; NGRAPH AND NMETA ARE PASSED BY REFERENCE AND ARE THE RETURN
    376  S NGRAPH="G"_$$LKY9
    377  S NMETA=NGRAPH_"A"
    378  Q
    379  ;
    380 ADD(ZG,ZS,ZP,ZO,FARY) ; ADD A TRIPLE TO THE TRIPLESTORE. ALL VALUES ARE TEXT
    381  ; THE FDA IS SET UP BUT THE FILES ARE NOT UPDATED. CALL UPDIE TO COMPLETE
    382  I '$D(FARY) D  ;
    383  . D INITFARY("C0XFARY")
    384  . S FARY="C0XFARY"
    385  D USEFARY(FARY)
    386  I '$D(C0XCNT) S C0XCNT=0
    387  N ZNODE
    388  S ZNODE="N"_$$LKY17
    389  N ZNARY ; GET READY TO CALL IENOFA
    390  S ZNARY("ZG",ZG)=""
    391  S ZNARY("ZS",ZS)=""
    392  S ZNARY("ZP",ZP)=""
    393  S ZNARY("ZO",ZO)=""
    394  D IENOFA(.ZIENS,.ZNARY,FARY) ; RESOLVE/ADD STRINGS
    395  ;S ZGIEN=$$IENOF(ZG) ; LAYGO TO GET IEN
    396  ;S ZSIEN=$$IENOF(ZS)
    397  ;S ZPIEN=$$IENOF(ZP)
    398  ;S ZOIEN=$$IENOF(ZO)
    399  ;I $D(C0XFDA) D UPDIE ; ADD THE STRINGS IF NEEDED
    400  S C0XCNT=C0XCNT+1
    401  S C0XFDA(C0XTFN,"?+"_C0XCNT_",",.01)=ZNODE
    402  S C0XFDA(C0XTFN,"?+"_C0XCNT_",",.02)=$O(ZIENS("IEN","ZG",""))
    403  S C0XFDA(C0XTFN,"?+"_C0XCNT_",",.03)=$O(ZIENS("IEN","ZS",""))
    404  S C0XFDA(C0XTFN,"?+"_C0XCNT_",",.04)=$O(ZIENS("IEN","ZP",""))
    405  S C0XFDA(C0XTFN,"?+"_C0XCNT_",",.05)=$O(ZIENS("IEN","ZO",""))
    406  ; REMEMBER TO CALL UPDIE WHEN YOU'RE DONE
    407  Q
    408  ;
    409 ADD2(ZG,ZS,ZP,ZO,FARY) ; ADD A TRIPLE TO THE TRIPLESTORE. ALL VALUES ARE TEXT
    410  ; THE FDA IS SET UP BUT THE FILES ARE NOT UPDATED. CALL UPDIE TO COMPLETE
    411  I '$D(FARY) D  ;
    412  . D INITFARY("C0XFARY")
    413  . S FARY="C0XFARY"
    414  D USEFARY(FARY)
    415  I '$D(C0XCNT) S C0XCNT=0
    416  N ZNODE
    417  S ZNODE="N"_$$LKY17
    418  N ZNARY ; GET READY TO CALL IENOFA
    419  S ZNARY("ZG",ZG)=""
    420  S ZNARY("ZS",ZS)=""
    421  S ZNARY("ZP",ZP)=""
    422  S ZNARY("ZO",ZO)=""
    423  D IENOFA(.ZIENS,.ZNARY,FARY) ; RESOLVE/ADD STRINGS
    424  ;S ZGIEN=$$IENOF(ZG) ; LAYGO TO GET IEN
    425  ;S ZSIEN=$$IENOF(ZS)
    426  ;S ZPIEN=$$IENOF(ZP)
    427  ;S ZOIEN=$$IENOF(ZO)
    428  ;I $D(C0XFDA) D UPDIE ; ADD THE STRINGS IF NEEDED
    429  S C0XCNT=C0XCNT+1
    430  S C0XFDA(C0XTFN,"?+"_C0XCNT_",",.01)=ZNODE
    431  S C0XFDA(C0XTFN,"?+"_C0XCNT_",",.02)=$O(ZIENS("IEN","ZG",""))
    432  S C0XFDA(C0XTFN,"?+"_C0XCNT_",",.03)=$O(ZIENS("IEN","ZS",""))
    433  S C0XFDA(C0XTFN,"?+"_C0XCNT_",",.04)=$O(ZIENS("IEN","ZP",""))
    434  S C0XFDA(C0XTFN,"?+"_C0XCNT_",",.05)=$O(ZIENS("IEN","ZO",""))
    435  S BATCNT=BATCNT+1
    436  I BATCNT=BATMAX D  ; BATCH IS DONE
    437  . D UPDIE(.C0XFDA)
    438  . K C0XFDA
    439  . S BATCNT=0 ; RESET COUNTER
    440  ; REMEMBER TO CALL UPDIE WHEN YOU'RE DONE
    441  Q
    442  ;
    443 LKY9() ;EXTRINIC THAT RETURNS A RANDOM 9 DIGIT NUMBER. USED FOR GENERATING
    444  ; UNIQUE NODE AND GRAPH NAMES
    445  N ZN,ZI
    446  S ZN=""
    447  F ZI=1:1:9 D  ;
    448  . S ZN=ZN_$R(10)
    449  Q ZN
    450  ;
    451 LKY17() ;EXTRINIC THAT RETURNS A RANDOM 9 DIGIT NUMBER. USED FOR GENERATING
    452  ; UNIQUE NODE AND GRAPH NAMES
    453  N ZN,ZI
    454  S ZN=""
    455  F ZI=1:1:17 D  ;
    456  . S ZN=ZN_$R(10)
    457  Q ZN
    458  ;
    459 IENOF(ZSTRING,FARY) ; EXTRINSIC WHICH RETURNS THE IEN OF ZS IN THE STRINGS FILE
    460  I '$D(FARY) D  ;
    461  . D INITFARY("C0XFARY")
    462  . S FARY="C0XFARY"
    463  N ZIEN
    464  S ZIEN=$O(@C0XSN@("B",ZSTRING,""))
    465  I ZIEN="" D  ;
    466  . S C0XFDA2(C0XSFN,"+1,",.01)=ZSTRING
    467  . D UPDIE(.C0XFDA2)
    468  . S ZIEN=$O(@C0XSN@("B",ZSTRING,""))
    469  . K C0XFDA2
    470  Q ZIEN
    471  ;
    472 IENOFA(ZOUTARY,ZINARY,FARY) ; RESOLVE STRINGS TO IEN IN STRINGS FILE
    473  ; OR ADD THEM IF
    474  ; MISSING. ZINARY AND ZOUTARY ARE PASSED BY REFERENCE
    475  ; ZINARY LOOKS LIKE ZINARY("VAR","VAL")=""
    476  ; RETURNS IN ZOUTARY OF THE FORM ZOUTARY("IEN","VAR",IEN)=""
    477  I '$D(FARY) D  ;
    478  . D INITFARY("C0XFARY")
    479  . S FARY="C0XFARY"
    480  K ZOUTARY ; START WITH CLEAN RESULTS
    481  K C0XFDA2 ; USE A SEPARATE FDA FOR THIS
    482  N ZI S ZI=""
    483  N ZV,ZIEN
    484  N ZCNT S ZCNT=0
    485  F  S ZI=$O(ZINARY(ZI)) Q:ZI=""  D  ; LOOK FOR MISSING STRINGS
    486  . S ZV=$O(ZINARY(ZI,""))
    487  . I $O(@C0XSN@("B",ZV,""))="" D  ;
    488  . . S ZCNT=ZCNT+1
    489  . . S C0XFDA2(C0XSFN,"+"_ZCNT_",",.01)=ZV
    490  I $D(C0XFDA2) D  ;
    491  . D UPDIE(.C0XFDA2) ; ADD MISSING STRINGS
    492  . K C0XFDA2 ; CLEAN UP
    493  F  S ZI=$O(ZINARY(ZI)) Q:ZI=""  D  ; NOT GET ALL IENS
    494  . S ZV=$O(ZINARY(ZI,""))
    495  . S ZIEN=$O(@C0XSN@("B",ZV,"")) ; THEY SHOULD BE THERE NOW
    496  . I ZIEN="" D  ;
    497  . . W !,"ERROR ADDING STRING: ",ZV
    498  . . B
    499  . S ZOUTARY("IEN",ZI,ZIEN)=""
    500  Q
    501  ;
    502 UPDIE(ZFDA) ; INTERNAL ROUTINE TO CALL UPDATE^DIE AND CHECK FOR ERRORS
    503  ; ZFDA IS PASSED BY REFERENCE
    504  ;ZWR ZFDA
    505  ;B
    506  K ZERR
    507  D CLEAN^DILF
    508  D UPDATE^DIE("","ZFDA","","ZERR")
    509  I $D(ZERR) S ZZERR=ZZERR ; ZZERR DOESN'T EXIST,
    510  ; INVOKE THE ERROR TRAP IF TASKED
    511  ;. W "ERROR",!
    512  ;. ZWR ZERR
    513  ;. B
    514  K ZFDA
    515  Q
    516  ;
     1C0XMAIN ; GPL - Fileman Triples entry point routine ;10/13/11  17:05
     2        ;;0.1;C0X;nopatch;noreleasedate;Build 5
     3        ;Copyright 2011 George Lilly.  Licensed under the terms of the GNU
     4        ;General Public License See attached copy of the License.
     5        ;
     6        ;This program is free software; you can redistribute it and/or modify
     7        ;it under the terms of the GNU General Public License as published by
     8        ;the Free Software Foundation; either version 2 of the License, or
     9        ;(at your option) any later version.
     10        ;
     11        ;This program is distributed in the hope that it will be useful,
     12        ;but WITHOUT ANY WARRANTY; without even the implied warranty of
     13        ;MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
     14        ;GNU General Public License for more details.
     15        ;
     16        ;You should have received a copy of the GNU General Public License along
     17        ;with this program; if not, write to the Free Software Foundation, Inc.,
     18        ;51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
     19        ;
     20        Q
     21        ;
     22INITFARY(ZFARY) ; INITIALIZE FILE NUMBERS AND OTHER USEFUL THINGS
     23        ; FOR THE DEFAULT TRIPLE STORE. USE OTHER VALUES FOR SUPPORTING ADDITIONAL
     24        ; TRIPLE STORES
     25        S @ZFARY@("C0XTFN")=172.101 ; TRIPLES FILE NUMBER
     26        S @ZFARY@("C0XSFN")=172.201 ; TRIPLES STRINGS FILE NUMBER
     27        S @ZFARY@("C0XTN")=$NA(^C0X(101)) ; TRIPLES GLOBAL NAME
     28        S @ZFARY@("C0XSN")=$NA(^C0X(201)) ; STRING FILE GLOBAL NAME
     29        ;S @ZFARY@("C0XDIR")="/home/glilly/all_smart_patient_data/smart-rdf/"
     30        S @ZFARY@("C0XDIR")="/home/george/fmts/trunk/samples/"
     31        D USEFARY(ZFARY)
     32        Q
     33        ;
     34USEFARY(ZFARY)  ; INITIALIZES VARIABLES SAVED IN ARRAY ZFARY
     35        N ZI S ZI=""
     36        F  S ZI=$O(@ZFARY@(ZI)) Q:ZI=""  D 
     37        . ;N ZX
     38        . S ZX="S "_ZI_"="""_@ZFARY@(ZI)_""""
     39        . ;W !,ZX
     40        . X ZX
     41        Q
     42        ;
     43IMPORT(FNAME,FDIR,FURL,FARY)    ; EXTRINSIC THAT READS A FILE FROM THE STANDARD
     44        ; DIRECTORY, LOADS IT INTO THE TRIPLESTORE AS TEXT, AND RETURNS THE
     45        ; NODE NAME OF THE TEXT TRIPLE
     46        ; FDIR IS THE OPTIONAL DIRECTORY (DEFAUTS TO STANDARD DIR)
     47        ; FURL IS THE OPTIONAL URI FOR ACCESSING THE FILE FROM THE TRIPLE STORE
     48        ; FARY IS THE OPTIONAL FILE ARRAY OF THE TRIPLE STORE TO USE
     49        I '$D(FARY) D  ;
     50        . D INITFARY("C0XFARY")
     51        . S FARY="C0XFARY"
     52        D USEFARY(FARY)
     53        N ZD,ZTMP
     54        I '$D(FDIR) S FDIR=C0XDIR ; DIRECTORY OF THE RDF FILE
     55        I '$D(FURL) D  ;
     56        . N ZN2 S ZN2=$TR(FNAME,".","_") ; REMOVE THE DOT FROM THE NAME
     57        . S FURL=FDIR_ZN2
     58        N ZTMP
     59        S ZTMP=$NA(^TMP("C0X",$J,"FILEIN",1)) ; WHERE TO PUT THE INCOMING FILE
     60        K @ZTMP ; MAKE SURE IT'S CLEAR
     61        S C0XSTART=$$NOW^XLFDT
     62        W !,"STARTED: ",C0XSTART
     63        W !,"READING IN: ",FNAME
     64        I '$$FILEIN(ZTMP,FDIR,FNAME,4) D  Q  ; QUIT IF NO SUCCESS
     65        . W !,"ERROR READING FILE: ",FDIR,FNAME
     66        S ZRDF=$NA(^TMP("C0X",$J,"FILEIN")) ; WITHOUT THE SUBSCRIPT
     67        W !,$O(@ZRDF@(""),-1)," LINES READ"
     68        D INSRDF(ZRDF,FURL,FARY) ; IMPORT AND PROCESS THE RDF
     69        Q
     70        ;
     71WGET(ZURL,FARY) ; GET FROM THE INTERNET AN RDF FILE AND INSERT IT
     72        ;
     73        I '$D(FARY) D  ;
     74        . D INITFARY("C0XFARY")
     75        . S FARY="C0XFARY"
     76        D USEFARY(FARY)
     77        ;N ZLOC,ZTMP
     78        K ZTMP
     79        S ZLOC=$NA(^TMP("C0X","WGET",$J))
     80        S C0XSTART=$$NOW^XLFDT
     81        W !,"STARTED: ",C0XSTART
     82        W !,"DOWNLOADING: ",ZURL
     83        S OK=$$httpGET^%zewdGTM(ZURL,.ZTMP)
     84        M @ZLOC=ZTMP
     85        W !,$O(@ZLOC@(""),-1)," LINES READ"
     86        D INSRDF(ZLOC,ZURL,FARY)
     87        Q
     88        ;
     89INSRDF(ZRDF,ZNAME,FARY) ; INSERT AN RDF FILE INTO THE STORE AND PROCESS
     90        ; ZRDF IS PASSED BY NAME
     91        I '$D(FARY) D  ;
     92        . D INITFARY("C0XFARY")
     93        . S FARY="C0XFARY"
     94        D USEFARY(FARY)
     95        N ZGRAPH,ZSUBJECT
     96        S ZGRAPH="_:G"_$$LKY9 ; RANDOM GRAPH NAME
     97        S ZSUBJECT=$$ANONS ; RANDOM ANOYMOUS SUBJECT
     98        D ADD(ZGRAPH,ZSUBJECT,"fmts:url",ZNAME,FARY)
     99        N ZTXTNM
     100        S ZTXTNM="_TXT_INCOMING_RDF_FILE_"_ZNAME_"_"_$$LKY9 ; NAME FOR TEXT NODE
     101        D ADD(ZGRAPH,ZSUBJECT,"fmts:rdfSource",ZTXTNM,FARY)
     102        D UPDIE(.C0XFDA) ; TRY IT OUT
     103        K C0XCNT ;RESET FOR NEXT TIME
     104        D STORETXT(ZRDF,ZTXTNM,FARY)
     105        W !,"ADDED: ",ZGRAPH," ",ZSUBJECT," fmts:rdfSource ",ZTXTNM
     106        D PROCESS(.G,ZRDF,ZNAME,ZGRAPH,FARY) ; PARSE AND INSERT THE RDF
     107        Q
     108        ;
     109STORETXT(ZTXT,ZNAME,FARY)       ; STORE TEXT IN THE TRIPLESTORE AT ZNAME
     110        ;
     111        I '$D(FARY) D  ;
     112        . D INITFARY("C0XFARY")
     113        . S FARY="C0XFARY"
     114        D USEFARY(FARY)
     115        N ZIEN
     116        S ZIEN=$$IENOF(ZNAME,FARY) ; GET THE IEN
     117        D CLEAN^DILF
     118        K ZERR
     119        D WP^DIE(C0XSFN,ZIEN_",",1,,ZTXT,"ZERR")
     120        I $D(ZERR) D  ;
     121        . ZWR ZERR
     122        Q
     123        ;
     124GETTXT(ZRTN,ZNAME,FARY) ; RETURNS RDF SOURCE OR OTHER TEXT
     125        ; ZRTN IS PASSED BY REFERENCE
     126        I '$D(FARY) D  ;
     127        . D INITFARY("C0XFARY")
     128        . S FARY="C0XFARY"
     129        D USEFARY(FARY)
     130        N ZIEN
     131        S ZIEN=$$IENOF(ZNAME)
     132        S OK=$$GET1^DIQ(C0XSFN,ZIEN_",",1,,"ZRTN")
     133        Q
     134        ;
     135WHERETXT(ZNAME,FARY)    ; EXTRINSIC WHICH RETURNS THE NAME OF THE GLOBAL
     136        ; WHERE THE TEXT IS LOCATED. NAME IS THE NAME OF THE STRING
     137        I '$D(FARY) D  ;
     138        . D INITFARY("C0XFARY")
     139        . S FARY="C0XFARY"
     140        D USEFARY(FARY)
     141        N ZIEN
     142        S ZIEN=$$IENOF(ZNAME)
     143        Q $NA(@C0XSN@(ZIEN,1))
     144        ;
     145FILEIN(ZINTMP,ZDIR,ZFNAME,ZLVL) ; READS A FILE INTO ZINTMP USING FTG^%ZISH
     146        ; ZINTMP IS PASSED BY NAME AND INCLUDES THE NEW SUBSCRIPT
     147        ; IE ^TMP("C0X","FILEIN",1)
     148        ; ZLVL IN THIS CASE WOULD BE 3 INCREMENTING THE 1
     149        ; EXTRINSIC WHICH RETURNS THE RESULT OF FTG^%ZISH
     150        S OK=$$FTG^%ZISH(ZDIR,FNAME,ZINTMP,ZLVL)
     151        Q OK
     152        ;
     153TESTPROC        ; TEST PROCESS WITH EXISTING SMALL RDF FILE
     154        S ZIN=$NA(^TMP("C0X",12226,"FILEIN"))
     155        S ZGRAPH="/test/rdfFile"
     156        S ZM="/test/rdfFile/meta"
     157        D PROCESS(.G,ZIN,ZGRAPH,ZM)
     158        Q
     159        ;
     160PROCESS(ZRTN,ZRDF,ZGRF,ZMETA,FARY)      ; PROCESS AN INCOMING RDF FILE
     161        ; ZRTN IS PASS BY REFERENCE AND RETURNS MESSAGES ABOUT THE PROCESSING
     162        ; ZRDF IS PASSED BY NAME AND IS THE GLOBAL CONTAINING THE RDF FILE
     163        ; ZGRF IS THE NAME OF THE GRAPH TO USE IN THE TRIPLE STORE FOR RESULTS
     164        ; ZMETA IS OPTIONAL AND IS THE NAME OF THE GRAPH TO STORE METADATA
     165        ;
     166        I '$D(FARY) D  ;
     167        . D INITFARY("C0XFARY")
     168        . S FARY="C0XFARY"
     169        D USEFARY(FARY)
     170        ; -- first parse the rdf file with the MXML parser
     171        ;S C0XDOCID=$$PARSE^C0CNHIN(ZRDF,"C0XARRAY") ; PARSE WITH MXML
     172        S C0XDOCID=$$EN^MXMLDOM(ZRDF,"W")
     173        ; -- assign the MXLM dom global name to ZDOM
     174        S ZDOM=$NA(^TMP("MXMLDOM",$J,C0XDOCID))
     175        W !,$O(@ZDOM@(""),-1)," XML NODES PARSED"
     176        ; -- populate the metagraph to point to the graph with status unfinished
     177        S METAS=$$ANONS ; GET AN ANONOMOUS RANDOM SUBJECT
     178        I '$D(ZMETA) S ZMETA="_:G"_$$LKY9 ; RANDOM GRAPH NAME FOR METAGRAPH
     179        D ADD(ZMETA,METAS,"fmts:about",ZGRF,FARY) ; POINT THE META TO THE GRAPH
     180        D ADD(ZMETA,METAS,"fmts:status","unfinished",FARY) ; mark as unfinished
     181        ;S C0XDATE=$$FMDTOUTC^C0CUTIL($$NOW^XLFDT,"DT")
     182        S C0XDATE=$$NOW^XLFDT
     183        D ADD(ZMETA,METAS,"fmts:dateTime",C0XDATE,FARY)
     184        D UPDIE(.C0XFDA) ; commit the metagraph changes to the triple store
     185        ; --
     186        ; -- pull out the vocabularies in the RDF statement. marked with xmlns:
     187        ; -- put them in a local variable for quick reference
     188        ; -- TODO: create a graph for vocabularies and validate incoming against it
     189        ;
     190        S C0XVOC=""
     191        N ZI,ZJ,ZK S ZI=""
     192        F  S ZI=$O(@ZDOM@(1,"A",ZI)) Q:ZI=""  D  ; FOR EACH xmlns
     193        . S ZVOC=$P(ZI,"xmlns:",2)
     194        . I ZVOC'="" S C0XVOC(ZVOC)=$G(@ZDOM@(1,"A",ZI))
     195        ;W !,"VOCABS:" ZWR C0XVOC
     196        ;
     197        ; -- look for children called rdf:Description. quit if none. not an rdf file
     198        ;
     199        S ZI=$O(@ZDOM@(1,"C",""))
     200        I $G(@ZDOM@(1,"C",ZI))'="rdf:Description" D  Q  ; not an rdf file
     201        . W !,"Error. Not an RDF file. Cannot process."
     202        ;
     203        ; -- now process the rdf description children
     204        ;
     205        S ZI=""
     206        S (C0XSUB,C0XPRE,C0XOBJ)="" ; INITIALIZE subject, object and predicate
     207        F  S ZI=$O(@ZDOM@(1,"C",ZI)) Q:ZI=""  D  ;
     208        . ; -- we are skipping any child that is not rdf:Description
     209        . ; -- TODO: check to see if this is right in general
     210        . ;
     211        . IF $G(@ZDOM@(1,"C",ZI))'="rdf:Description" D  Q  ;
     212        . . W !,"SKIPPING NODE: ",ZI
     213        . ; -- now looking for the subject for the triples
     214        . S ZX=$G(@ZDOM@(ZI,"A","rdf:about"))
     215        . I ZX'="" D  ; we have the subject
     216        . . ;W " about: ",ZX
     217        . . S C0XSUB=ZX
     218        . E  D  ;
     219        . . S ZX=$G(@ZDOM@(ZI,"A","rdf:nodeID")) ; node id is another style of subject
     220        . . I ZX'="" D  ;
     221        . . . S C0XSUB=ZX
     222        . I C0XSUB="" S C0XSUB=$$ANONS ; DEFAULT TO BLANK SUBJECT
     223        . ;
     224        . ; -- we now have the subject. the children of this node have the rest
     225        . ;
     226        . S ZJ="" ; for the children of the rdf:Description nodes
     227        . F  S ZJ=$O(@ZDOM@(ZI,"C",ZJ)) Q:ZJ=""  D  ; for each child
     228        . . S C0XPRE=@ZDOM@(ZJ) ; the predicate without a prefix
     229        . . S ZX=$G(@ZDOM@(ZJ,"A","xmlns")) ; name space
     230        . . I ZX'="" S C0XPRE=ZX_C0XPRE ; add the namespace prefix
     231        . . I C0XPRE[":" D  ; expand using vocabulary
     232        . . . N ZB,ZA
     233        . . . S ZB=$P(C0XPRE,":",1)
     234        . . . S ZA=$P(C0XPRE,":",2)
     235        . . . I $G(C0XVOC(ZB))'="" D  ;
     236        . . . . S C0XPRE=C0XVOC(ZB)_ZA ; expanded
     237        . . S ZY=$G(@ZDOM@(ZJ,"A","rdf:resource")) ; potential object
     238        . . I ZY'="" D  Q ;
     239        . . . S C0XOBJ=ZY ; object
     240        . . . D ADD(ZGRF,C0XSUB,C0XPRE,C0XOBJ) ; finally. our first real triple
     241        . . ; -- this is an else because of the quit above
     242        . . S ZX=$G(@ZDOM@(ZJ,"A","rdf:nodeID")) ; fishing for nodeId object
     243        . . I ZX'="" D  Q  ; got one
     244        . . . S C0XOBJ=ZX ; we are using the incoming nodeIDs as object/subject
     245        . . . ; without change... this could be foolish .. look at it again later
     246        . . . D ADD(ZGRF,C0XSUB,C0XPRE,C0XOBJ) ; go for it and add a node
     247        . . S C0XOBJ=$G(@ZDOM@(ZJ,"T",1)) ; hopefully an object is here
     248        . . I C0XOBJ="" D  Q  ; not a happy situation
     249        . . . W !,"ERROR, NO OBJECT FOUND FOR NODE: ",ZJ
     250        . . D ADD(ZGRF,C0XSUB,C0XPRE,C0XOBJ) ; go for it and add a node
     251        W !,"INSERTING ",C0XCNT," TRIPLES"
     252        D UPDIE(.C0XFDA) ; commit the updates to the file
     253        ; next, mark the graph as finished
     254        S C0XEND=$$NOW^XLFDT
     255        W !," ENDED AT: ",C0XEND
     256        S C0XDIFF=$$FMDIFF^XLFDT(C0XEND,C0XSTART,2)
     257        W !," ELAPSED TIME: ",C0XDIFF," SECONDS"
     258        W !," APPROXIMATELY ",$P(C0XCNT/C0XDIFF,".")," TRIPLES PER SECOND"
     259        Q
     260        ;
     261SHOW(ZN)        ;
     262        ZWR ^TMP("MXMLDOM",$J,1,ZN,*)
     263        Q
     264        ;
     265ANONS() ; RETURNS AN ANONOMOUS SUBJECT
     266        Q "_S:"_$$LKY9
     267        ;
     268NEWG(NGRAPH,NMETA)      ; CREATES A NEW META GRAPH, MARKS IT AS UNFINISHED
     269        ; THEN CREATES A NEW GRAPH AND POINTS THE METAGRAPH TO IT
     270        ; NGRAPH AND NMETA ARE PASSED BY REFERENCE AND ARE THE RETURN
     271        S NGRAPH="G"_$$LKY9
     272        S NMETA=NGRAPH_"A"
     273        Q
     274        ;
     275ADD(ZG,ZS,ZP,ZO,FARY)   ; ADD A TRIPLE TO THE TRIPLESTORE. ALL VALUES ARE TEXT
     276        ; THE FDA IS SET UP BUT THE FILES ARE NOT UPDATED. CALL UPDIE TO COMPLETE
     277        I '$D(FARY) D  ;
     278        . D INITFARY("C0XFARY")
     279        . S FARY="C0XFARY"
     280        D USEFARY(FARY)
     281        I '$D(C0XCNT) S C0XCNT=0
     282        N ZNODE
     283        S ZNODE="N"_$$LKY17
     284        N ZNARY ; GET READY TO CALL IENOFA
     285        S ZNARY("ZG",ZG)=""
     286        S ZNARY("ZS",ZS)=""
     287        S ZNARY("ZP",ZP)=""
     288        S ZNARY("ZO",ZO)=""
     289        D IENOFA(.ZIENS,.ZNARY,FARY) ; RESOLVE/ADD STRINGS
     290        ;S ZGIEN=$$IENOF(ZG) ; LAYGO TO GET IEN
     291        ;S ZSIEN=$$IENOF(ZS)
     292        ;S ZPIEN=$$IENOF(ZP)
     293        ;S ZOIEN=$$IENOF(ZO)
     294        ;I $D(C0XFDA) D UPDIE ; ADD THE STRINGS IF NEEDED
     295        S C0XCNT=C0XCNT+1
     296        S C0XFDA(C0XTFN,"?+"_C0XCNT_",",.01)=ZNODE
     297        S C0XFDA(C0XTFN,"?+"_C0XCNT_",",.02)=$O(ZIENS("IEN","ZG",""))
     298        S C0XFDA(C0XTFN,"?+"_C0XCNT_",",.03)=$O(ZIENS("IEN","ZS",""))
     299        S C0XFDA(C0XTFN,"?+"_C0XCNT_",",.04)=$O(ZIENS("IEN","ZP",""))
     300        S C0XFDA(C0XTFN,"?+"_C0XCNT_",",.05)=$O(ZIENS("IEN","ZO",""))
     301        ; REMEMBER TO CALL UPDIE WHEN YOU'RE DONE
     302        Q
     303        ;
     304LKY9()  ;EXTRINIC THAT RETURNS A RANDOM 9 DIGIT NUMBER. USED FOR GENERATING
     305        ; UNIQUE NODE AND GRAPH NAMES
     306        N ZN,ZI
     307        S ZN=""
     308        F ZI=1:1:9 D  ;
     309        . S ZN=ZN_$R(10)
     310        Q ZN
     311        ;
     312LKY17() ;EXTRINIC THAT RETURNS A RANDOM 9 DIGIT NUMBER. USED FOR GENERATING
     313        ; UNIQUE NODE AND GRAPH NAMES
     314        N ZN,ZI
     315        S ZN=""
     316        F ZI=1:1:17 D  ;
     317        . S ZN=ZN_$R(10)
     318        Q ZN
     319        ;
     320IENOF(ZSTRING,FARY)     ; EXTRINSIC WHICH RETURNS THE IEN OF ZS IN THE STRINGS FILE
     321        I '$D(FARY) D  ;
     322        . D INITFARY("C0XFARY")
     323        . S FARY="C0XFARY"
     324        N ZIEN
     325        S ZIEN=$O(@C0XSN@("B",ZSTRING,""))
     326        I ZIEN="" D  ;
     327        . S C0XFDA2(C0XSFN,"+1,",.01)=ZSTRING
     328        . D UPDIE(.C0XFDA2)
     329        . S ZIEN=$O(@C0XSN@("B",ZSTRING,""))
     330        . K C0XFDA2
     331        Q ZIEN
     332        ;
     333IENOFA(ZOUTARY,ZINARY,FARY)     ; RESOLVE STRINGS TO IEN IN STRINGS FILE
     334        ; OR ADD THEM IF
     335        ; MISSING. ZINARY AND ZOUTARY ARE PASSED BY REFERENCE
     336        ; ZINARY LOOKS LIKE ZINARY("VAR","VAL")=""
     337        ; RETURNS IN ZOUTARY OF THE FORM ZOUTARY("IEN","VAR",IEN)=""
     338        I '$D(FARY) D  ;
     339        . D INITFARY("C0XFARY")
     340        . S FARY="C0XFARY"
     341        K ZOUTARY ; START WITH CLEAN RESULTS
     342        K C0XFDA2 ; USE A SEPARATE FDA FOR THIS
     343        N ZI S ZI=""
     344        N ZV,ZIEN
     345        N ZCNT S ZCNT=0
     346        F  S ZI=$O(ZINARY(ZI)) Q:ZI=""  D  ; LOOK FOR MISSING STRINGS
     347        . S ZV=$O(ZINARY(ZI,""))
     348        . I $O(@C0XSN@("B",ZV,""))="" D  ;
     349        . . S ZCNT=ZCNT+1
     350        . . S C0XFDA2(C0XSFN,"+"_ZCNT_",",.01)=ZV
     351        I $D(C0XFDA2) D  ;
     352        . D UPDIE(.C0XFDA2) ; ADD MISSING STRINGS
     353        . K C0XFDA2 ; CLEAN UP
     354        F  S ZI=$O(ZINARY(ZI)) Q:ZI=""  D  ; NOT GET ALL IENS
     355        . S ZV=$O(ZINARY(ZI,""))
     356        . S ZIEN=$O(@C0XSN@("B",ZV,"")) ; THEY SHOULD BE THERE NOW
     357        . I ZIEN="" D  ;
     358        . . W !,"ERROR ADDING STRING: ",ZV
     359        . . B
     360        . S ZOUTARY("IEN",ZI,ZIEN)=""
     361        Q
     362        ;
     363UPDIE(ZFDA)     ; INTERNAL ROUTINE TO CALL UPDATE^DIE AND CHECK FOR ERRORS
     364        ; ZFDA IS PASSED BY REFERENCE
     365        ;ZWR ZFDA
     366        ;B
     367        K ZERR
     368        D CLEAN^DILF
     369        D UPDATE^DIE("","ZFDA","","ZERR")
     370        I $D(ZERR) S ZZERR=ZZERR ; ZZERR DOESN'T EXIST,
     371        ; INVOKE THE ERROR TRAP IF TASKED
     372        ;. W "ERROR",!
     373        ;. ZWR ZERR
     374        ;. B
     375        K ZFDA
     376        Q
     377        ;
  • fmts/trunk/p/C0XTEST.m

    r1280 r1294  
    1 C0XTEST ; GPL - Fileman Triples bulk load tester ;11/6/11  17:05
    2  ;;0.1;C0X;nopatch;noreleasedate;Build 1
    3  ;Copyright 2011 George Lilly.  Licensed under the terms of the GNU
    4  ;General Public License See attached copy of the License.
    5  ;
    6  ;This program is free software; you can redistribute it and/or modify
    7  ;it under the terms of the GNU General Public License as published by
    8  ;the Free Software Foundation; either version 2 of the License, or
    9  ;(at your option) any later version.
    10  ;
    11  ;This program is distributed in the hope that it will be useful,
    12  ;but WITHOUT ANY WARRANTY; without even the implied warranty of
    13  ;MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    14  ;GNU General Public License for more details.
    15  ;
    16  ;You should have received a copy of the GNU General Public License along
    17  ;with this program; if not, write to the Free Software Foundation, Inc.,
    18  ;51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
    19  ;
    20  Q
    21  ;
    22 EN ; run the test
    23  ;
    24  k C0XFDA ; clear the node variable
    25  i '$d(^C0X(101,0)) d  ; global doesn't exist
    26  . s ^C0X(101,0)="C0X TRIPLE^172.101^1^1"
    27  n zg
    28  S zg="_:G"_$$LKY9 ; all nodes are in the same graph
    29  n zi
    30  f zi=1:1:10000 d  ; try a test of 10000 nodes
    31  . s C0XFDA(172.101,zi,.01)="N"_$$LKY17 ; node name
    32  . s C0XFDA(172.101,zi,.02)=zg
    33  . s C0XFDA(172.101,zi,.03)=$R(100000)
    34  . s C0XFDA(172.101,zi,.04)=$R(100000)
    35  . s C0XFDA(172.101,zi,.05)=$R(100000)
    36  S C0XST=$$NOW^XLFDT ; start of the insertion test
    37  W !,"INSERTION STARTS AT ",C0XST,!
    38  d BULKLOAD(.C0XFDA)
    39  s C0XEND=$$NOW^XLFDT ; end of the insertion test
    40  W !,"INSERTION ENDS AT ",C0XEND
    41  S C0XDIFF=$$FMDIFF^XLFDT(C0XEND,C0XST,2)
    42  W !," ELAPSED TIME: ",C0XDIFF," SECONDS"
    43  W !," APPROXIMATELY ",$P(10000/C0XDIFF,".")," NODES PER SECOND"
    44  q
    45  ;
    46 LKY9() ;EXTRINIC THAT RETURNS A RANDOM 9 DIGIT NUMBER. USED FOR GENERATING
    47  ; UNIQUE NODE AND GRAPH NAMES
    48  N ZN,ZI
    49  S ZN=""
    50  F ZI=1:1:9 D  ;
    51  . S ZN=ZN_$R(10)
    52  Q ZN
    53  ;
    54 LKY17() ;EXTRINIC THAT RETURNS A RANDOM 9 DIGIT NUMBER. USED FOR GENERATING
    55  ; UNIQUE NODE AND GRAPH NAMES
    56  N ZN,ZI
    57  S ZN=""
    58  F ZI=1:1:17 D  ;
    59  . S ZN=ZN_$R(10)
    60  Q ZN
    61  ;
    62 BULKLOAD(ZBFDA) ; BULK LOADER FOR LOADING TRIPLES INTO FILE 172.101
    63  ; USING GLOBAL SETS INSTEAD OF UPDATE^DIE
    64  ; QUITS IF FILE IS NOT 172.101
    65  ; EXPECTS AN FDA WITHOUT STRINGS FOR THE IENS, STARTING AT 1
    66  ; QUITS IF FIRST ENTRY IS NOT IENS 1
    67  ; ASSUMES THAT THE LAST IENS IS THE COUNT OF ENTRIES
    68  ; ZBFDA IS PASSED BY REFERENCE
    69  ;
    70  ; -- reserves a block of iens from file 172.101 by locking the zero node
    71  ; -- ^C0X(101,0) and adding the count of entries to piece 2 and 3
    72  ; -- then unlocking to minimize the duration of the lock
    73  ;
    74  W !,"USING BULKLOAD"
    75  I '$D(ZBFDA) Q  ; EMPTY FDA
    76  I $O(ZBFDA(""))'=172.101 Q  ; WRONG FILE
    77  N ZCNT,ZP3,ZP4
    78  ; -- find the number of nodes to insert
    79  S ZCNT=$O(ZBFDA(172.101,""),-1)
    80  I ZCNT="" D  Q  ;
    81  . W !,"ERROR IN BULK LOAD - INVALID NODE COUNT"
    82  . B
    83  ; -- lock the zero node and reserve a block of iens to insert
    84  W !,"LOCKING ZERO NODE"
    85  LOCK +^C0X(101,0)
    86  S ZP3=$P(^C0X(101,0),U,3)
    87  S ZP4=$P(^C0X(101,0),U,4)
    88  S $P(^C0X(101,0),U,3)=ZP3+ZCNT+1
    89  S $P(^C0X(101,0),U,4)=ZP4+ZCNT+1
    90  LOCK -^C0X(101,0)
    91  N ZI,ZN,ZG,ZS,ZP,ZO,ZIEN,ZBASE
    92  S ZBASE=ZP3 ; the last ien in the file
    93  W !,"ZERO NODE UNLOCKED, IENS RESERVED=",ZCNT
    94  W !,$$NOW^XLFDT
    95  S ZI=""
    96  F  S ZI=$O(ZBFDA(172.101,ZI)) Q:ZI=""  D  ;
    97  . S ZN=$G(ZBFDA(172.101,ZI,.01)) ; node name
    98  . I ZN="" D BLKERR Q  ;
    99  . S ZG=$G(ZBFDA(172.101,ZI,.02)) ; graph pointer
    100  . I ZG="" D BLKERR Q  ;
    101  . S ZS=$G(ZBFDA(172.101,ZI,.03)) ; subject pointer
    102  . I ZS="" D BLKERR Q  ;
    103  . S ZP=$G(ZBFDA(172.101,ZI,.04)) ; predicate pointer
    104  . I ZP="" D BLKERR Q  ;
    105  . S ZO=$G(ZBFDA(172.101,ZI,.05)) ; object pointer
    106  . I ZO="" D BLKERR Q  ;
    107  . S ZIEN=ZI+ZBASE ; the new ien
    108  . S ^C0X(101,ZIEN,0)=ZN_U_ZG_U_ZS_U_ZP_U_ZO ; set the zero node
    109  . S ^C0X(101,"B",ZN,ZIEN)="" ; the B index
    110  . S ^C0X(101,"G",ZG,ZIEN)="" ; the G for Graph index
    111  . S ^C0X(101,"SPO",ZS,ZP,ZO)=""
    112  . S ^C0X(101,"SOP",ZS,ZO,ZP)=""
    113  . S ^C0X(101,"OPS",ZO,ZP,ZS)=""
    114  . S ^C0X(101,"OSP",ZO,ZS,ZP)=""
    115  . S ^C0X(101,"GOPS",ZG,ZO,ZP,ZS)=""
    116  . S ^C0X(101,"GOSP",ZG,ZO,ZS,ZP)=""
    117  . S ^C0X(101,"GPSO",ZG,ZP,ZS,ZO)=""
    118  . S ^C0X(101,"GSPO",ZG,ZS,ZP,ZO)=""
    119  Q
    120  ;
    121 BLKERR ;
    122  W !,"ERROR IN BULK LOAD",! ZWR ZBFDA(ZI)
    123  B
    124  Q
    125  ;
     1C0XTEST ; GPL - Fileman Triples bulk load tester ;11/6/11  17:05
     2        ;;0.1;C0X;nopatch;noreleasedate;Build 5
     3        ;Copyright 2011 George Lilly.  Licensed under the terms of the GNU
     4        ;General Public License See attached copy of the License.
     5        ;
     6        ;This program is free software; you can redistribute it and/or modify
     7        ;it under the terms of the GNU General Public License as published by
     8        ;the Free Software Foundation; either version 2 of the License, or
     9        ;(at your option) any later version.
     10        ;
     11        ;This program is distributed in the hope that it will be useful,
     12        ;but WITHOUT ANY WARRANTY; without even the implied warranty of
     13        ;MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
     14        ;GNU General Public License for more details.
     15        ;
     16        ;You should have received a copy of the GNU General Public License along
     17        ;with this program; if not, write to the Free Software Foundation, Inc.,
     18        ;51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
     19        ;
     20        Q
     21        ;
     22EN      ; run the test
     23        ;
     24        k C0XFDA ; clear the node variable
     25        i '$d(^C0X(101,0)) d  ; global doesn't exist
     26        . s ^C0X(101,0)="C0X TRIPLE^172.101^1^1"
     27        n zg
     28        S zg="_:G"_$$LKY9 ; all nodes are in the same graph
     29        n zi
     30        f zi=1:1:100000 d  ; try a test of 10000 nodes
     31        . s C0XFDA(172.101,zi,.01)="N"_$$LKY17 ; node name
     32        . s C0XFDA(172.101,zi,.02)=zg
     33        . s C0XFDA(172.101,zi,.03)=$R(100000)
     34        . s C0XFDA(172.101,zi,.04)=$R(100000)
     35        . s C0XFDA(172.101,zi,.05)=$R(100000)
     36        S C0XST=$$NOW^XLFDT ; start of the insertion test
     37        W !,"INSERTION STARTS AT ",C0XST,!
     38        d BULKLOAD(.C0XFDA)
     39        s C0XEND=$$NOW^XLFDT ; end of the insertion test
     40        W !,"INSERTION ENDS AT ",C0XEND
     41        S C0XDIFF=$$FMDIFF^XLFDT(C0XEND,C0XST,2)
     42        W !," ELAPSED TIME: ",C0XDIFF," SECONDS"
     43        I C0XDIFF>0 W !," APPROXIMATELY ",$P(100000/C0XDIFF,".")," NODES PER SECOND"
     44        q
     45        ;
     46LKY9()  ;EXTRINIC THAT RETURNS A RANDOM 9 DIGIT NUMBER. USED FOR GENERATING
     47        ; UNIQUE NODE AND GRAPH NAMES
     48        N ZN,ZI
     49        S ZN=""
     50        F ZI=1:1:9 D  ;
     51        . S ZN=ZN_$R(10)
     52        Q ZN
     53        ;
     54LKY17() ;EXTRINIC THAT RETURNS A RANDOM 9 DIGIT NUMBER. USED FOR GENERATING
     55        ; UNIQUE NODE AND GRAPH NAMES
     56        N ZN,ZI
     57        S ZN=""
     58        F ZI=1:1:17 D  ;
     59        . S ZN=ZN_$R(10)
     60        Q ZN
     61        ;
     62BULKLOAD(ZBFDA) ; BULK LOADER FOR LOADING TRIPLES INTO FILE 172.101
     63        ; USING GLOBAL SETS INSTEAD OF UPDATE^DIE
     64        ; QUITS IF FILE IS NOT 172.101
     65        ; EXPECTS AN FDA WITHOUT STRINGS FOR THE IENS, STARTING AT 1
     66        ; QUITS IF FIRST ENTRY IS NOT IENS 1
     67        ; ASSUMES THAT THE LAST IENS IS THE COUNT OF ENTRIES
     68        ; ZBFDA IS PASSED BY REFERENCE
     69        ;
     70        ; -- reserves a block of iens from file 172.101 by locking the zero node
     71        ; -- ^C0X(101,0) and adding the count of entries to piece 2 and 3
     72        ; -- then unlocking to minimize the duration of the lock
     73        ;
     74        W !,"USING BULKLOAD"
     75        I '$D(ZBFDA) Q  ; EMPTY FDA
     76        I $O(ZBFDA(""))'=172.101 Q  ; WRONG FILE
     77        N ZCNT,ZP3,ZP4
     78        ; -- find the number of nodes to insert
     79        S ZCNT=$O(ZBFDA(172.101,""),-1)
     80        I ZCNT="" D  Q  ;
     81        . W !,"ERROR IN BULK LOAD - INVALID NODE COUNT"
     82        . B
     83        ; -- lock the zero node and reserve a block of iens to insert
     84        W !,"LOCKING ZERO NODE"
     85        LOCK +^C0X(101,0)
     86        S ZP3=$P(^C0X(101,0),U,3)
     87        S ZP4=$P(^C0X(101,0),U,4)
     88        S $P(^C0X(101,0),U,3)=ZP3+ZCNT+1
     89        S $P(^C0X(101,0),U,4)=ZP4+ZCNT+1
     90        LOCK -^C0X(101,0)
     91        N ZI,ZN,ZG,ZS,ZP,ZO,ZIEN,ZBASE
     92        S ZBASE=ZP3 ; the last ien in the file
     93        W !,"ZERO NODE UNLOCKED, IENS RESERVED=",ZCNT
     94        W !,$$NOW^XLFDT
     95        S ZI=""
     96        F  S ZI=$O(ZBFDA(172.101,ZI)) Q:ZI=""  D  ;
     97        . S ZN=$G(ZBFDA(172.101,ZI,.01)) ; node name
     98        . I ZN="" D BLKERR Q  ;
     99        . S ZG=$G(ZBFDA(172.101,ZI,.02)) ; graph pointer
     100        . I ZG="" D BLKERR Q  ;
     101        . S ZS=$G(ZBFDA(172.101,ZI,.03)) ; subject pointer
     102        . I ZS="" D BLKERR Q  ;
     103        . S ZP=$G(ZBFDA(172.101,ZI,.04)) ; predicate pointer
     104        . I ZP="" D BLKERR Q  ;
     105        . S ZO=$G(ZBFDA(172.101,ZI,.05)) ; object pointer
     106        . I ZO="" D BLKERR Q  ;
     107        . S ZIEN=ZI+ZBASE ; the new ien
     108        . S ^C0X(101,ZIEN,0)=ZN_U_ZG_U_ZS_U_ZP_U_ZO ; set the zero node
     109        . S ^C0X(101,"B",ZN,ZIEN)="" ; the B index
     110        . S ^C0X(101,"G",ZG,ZIEN)="" ; the G for Graph index
     111        . S ^C0X(101,"SPO",ZS,ZP,ZO)=""
     112        . S ^C0X(101,"SOP",ZS,ZO,ZP)=""
     113        . S ^C0X(101,"OPS",ZO,ZP,ZS)=""
     114        . S ^C0X(101,"OSP",ZO,ZS,ZP)=""
     115        . S ^C0X(101,"GOPS",ZG,ZO,ZP,ZS)=""
     116        . S ^C0X(101,"GOSP",ZG,ZO,ZS,ZP)=""
     117        . S ^C0X(101,"GPSO",ZG,ZP,ZS,ZO)=""
     118        . S ^C0X(101,"GSPO",ZG,ZS,ZP,ZO)=""
     119        Q
     120        ;
     121BLKERR  ;
     122        W !,"ERROR IN BULK LOAD",! ZWR ZBFDA(ZI)
     123        B
     124        Q
     125        ;
Note: See TracChangeset for help on using the changeset viewer.