Changeset 1628 for fmts/trunk


Ignore:
Timestamp:
Sep 22, 2013, 6:55:12 PM (11 years ago)
Author:
George Lilly
Message:

fixed a bug or two for importing ca drugs

Location:
fmts/trunk/p
Files:
2 edited

Legend:

Unmodified
Added
Removed
  • fmts/trunk/p/C0XF2N.m

    r1539 r1628  
    88        ;the Free Software Foundation; either version 2 of the License, or
    99        ;(at your option) any later version.
    10         ;
    11         ;This program is distributed in the hope that it will be useful,
    12         ;but WITHOUT ANY WARRANTY; without even the implied warranty of
    13         ;MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    14         ;GNU General Public License for more details.
    15         ;
    16         ;You should have received a copy of the GNU General Public License along
    17         ;with this program; if not, write to the Free Software Foundation, Inc.,
    18         ;51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
    19         ;
    20         Q
    21         ;
    22         ; This is based on C0XMAIN but experiments with a fast load for triples
    23         ; that will write directly to the fileman global
    24         ; The file 172.101 is a F2N design style for triples, which means
    25         ; that it is a Flat file with no subfiles, all fields at the root
    26         ; ... it is a "2" file solution which means all strings are stored in
    27         ; ...    strings file and pointed to by the triples file
    28         ; ... it is an N file because it has generated Node IDs instead of
    29         ; ...   DINUM which would use the IEN for the Node ID.
    30         ; gpl 11/04/2011
    31         ;
    32 INITFARY(ZFARY) ; INITIALIZE FILE NUMBERS AND OTHER USEFUL THINGS
    33         ; FOR THE DEFAULT TRIPLE STORE. USE OTHER VALUES FOR SUPPORTING ADDITIONAL
    34         ; TRIPLE STORES
    35         I $D(@ZFARY) Q  ; ALREADY INITIALIZED
    36         S @ZFARY@("C0XTFN")=172.101 ; TRIPLES FILE NUMBER
    37         S @ZFARY@("C0XSFN")=172.201 ; TRIPLES STRINGS FILE NUMBER
    38         S @ZFARY@("C0XTN")=$NA(^C0X(101)) ; TRIPLES GLOBAL NAME
    39         S @ZFARY@("C0XSN")=$NA(^C0X(201)) ; STRING FILE GLOBAL NAME
    40         S @ZFARY@("C0XDIR")="/home/glilly/fmts/trunk/samples/smart-new/"
    41         S @ZFARY@("BLKLOAD")=1 ; this file supports block load
    42         S @ZFARY@("FMTSSTYLE")="F2N" ; fileman style
    43         S @ZFARY@("REPLYFMT")="JSON"
    44         D USEFARY(ZFARY)
    45         Q
    46         ;
    47 USEFARY(ZFARY)  ; INITIALIZES VARIABLES SAVED IN ARRAY ZFARY
    48         N ZI S ZI=""
    49         F  S ZI=$O(@ZFARY@(ZI)) Q:ZI=""  D 
    50         . ;N ZX
    51         . S ZX="S "_ZI_"="""_@ZFARY@(ZI)_""""
    52         . ;W !,ZX
    53         . X ZX
    54         Q
    55         ;
    56 FILEIN  ; INTERACTIVE ENTRY POINT FOR OPTION TO READ IN A FILE
    57         I '$D(C0XFARY) D INITFARY("C0XFARY")
    58         D USEFARY("C0XFARY")
    59         S DIR(0)="F^3:240"
    60         S DIR("A")="File Directory"
    61         S DIR("B")=C0XDIR
    62         D ^DIR
    63         I Y="^" Q  ;
    64         S C0XDIR=Y
    65         S C0XFARY("C0XDIR")=Y
    66         S DIR(0)="F^3:240"
    67         S DIR("A")="File Name"
    68         I '$D(C0XFN) S DIR("B")="qds.rdf"
    69         E  S DIR("B")=C0XFN
    70         D ^DIR
    71         I Y="" Q  ;
    72         I Y="^" Q  ;
    73         S C0XFN=Y
    74         D IMPORT(C0XFN,C0XDIR,,"C0XFARY")
    75         K C0XFDA
    76         Q
    77         ;
    78 IMPORT(FNAME,INDIR,INURL,FARY)  ; EXTRINSIC THAT READS A FILE FROM THE STANDARD
    79         ; DIRECTORY, LOADS IT INTO THE TRIPLESTORE AS TEXT, AND RETURNS THE
    80         ; NODE NAME OF THE TEXT TRIPLE
    81         ; INDIR IS THE OPTIONAL DIRECTORY (DEFAUTS TO STANDARD DIR)
    82         ; INURL IS THE OPTIONAL URI FOR ACCESSING THE FILE FROM THE TRIPLE STORE
    83         ; FARY IS THE OPTIONAL FILE ARRAY OF THE TRIPLE STORE TO USE
    84         I '$D(FARY) D  ;
    85         . D INITFARY("C0XFARY")
    86         . S FARY="C0XFARY"
    87         D USEFARY(FARY)
    88         N ZD,ZTMP
    89         I '$D(INDIR) S INDIR=C0XDIR ; DIRECTORY OF THE RDF FILE
    90         I $G(INURL)="" D  ;
    91         . ;N ZN2 S ZN2=$P(FNAME,".",1)_"_"_$P(FNAME,".",2) ; REMOVE THE DOT
    92         . ;S INURL=FDIR_ZN2
    93         . S INURL=INDIR_FNAME
    94         N ZTMP
    95         S ZTMP=$NA(^TMP("C0X",$J,"FILEIN",1)) ; WHERE TO PUT THE INCOMING FILE
    96         K @ZTMP ; MAKE SURE IT'S CLEAR
    97         S C0XSTART=$$NOW^XLFDT
    98         W !,"STARTED: ",C0XSTART
    99         W !,"READING IN: ",FNAME
    100         I '$$FILEREAD(ZTMP,INDIR,FNAME,4) D  Q  ; QUIT IF NO SUCCESS
    101         . W !,"ERROR READING FILE: ",INDIR,FNAME
    102         S ZRDF=$NA(^TMP("C0X",$J,"FILEIN")) ; WITHOUT THE SUBSCRIPT
    103         W !,$O(@ZRDF@(""),-1)," LINES READ"
    104         D INSRDF(ZRDF,INURL,FARY) ; IMPORT AND PROCESS THE RDF
    105         K INURL
    106         K C0XFDA
    107         ;K ^TMP("MXMLDOM",$J)
    108         Q
    109         ;
    110 WGET(ZURL,FARY) ; GET FROM THE INTERNET AN RDF FILE AND INSERT IT
    111         ;
    112         I '$D(FARY) D  ;
    113         . D INITFARY("C0XFARY")
    114         . S FARY="C0XFARY"
    115         D USEFARY(FARY)
    116         ;N ZLOC,ZTMP
    117         K ZTMP
    118         S ZLOC=$NA(^TMP("C0X","WGET",$J))
    119         K @ZLOC
    120         S C0XSTART=$$NOW^XLFDT
    121         W !,"STARTED: ",C0XSTART
    122         W !,"DOWNLOADING: ",ZURL
    123         S OK=$$httpGET^%zewdGTM(ZURL,.ZTMP)
    124         M @ZLOC=ZTMP
    125         S C0XLINES=$O(@ZLOC@(""),-1)
    126         W !,C0XLINES," LINES READ"
    127         S C0XDLC=$$NOW^XLFDT ; DOWNLOAD COMPLETE
    128         W !,"DOWNLOAD COMPLETE AT ",C0XDLC
    129         S C0XDIFF=$$FMDIFF^XLFDT(C0XDLC,C0XSTART,2)
    130         W !," ELAPSED TIME: ",C0XDIFF," SECONDS"
    131         I C0XDIFF'=0  W !," APPROXIMATELY ",$P(C0XLINES/C0XDIFF,".")," LINES PER SEC"
    132         D INSRDF(ZLOC,ZURL,FARY)
    133         Q
    134         ;
    135 INSRDF(ZRDF,ZNAME,FARY) ; INSERT AN RDF FILE INTO THE STORE AND PROCESS
    136         ; ZRDF IS PASSED BY NAME
    137         I '$D(FARY) D  ;
    138         . D INITFARY("C0XFARY")
    139         . S FARY="C0XFARY"
    140         D USEFARY(FARY)
    141         S BATCNT=0 ; BATCH COUNTER
    142         S BATMAX=10000 ; TRY BATCHES OF THIS SIZE
    143         N ZGRAPH,ZSUBJECT
    144         S ZGRAPH="_:G"_$$LKY9 ; RANDOM GRAPH NAME
    145         S ZSUBJECT=$$ANONS() ; RANDOM ANOYMOUS SUBJECT
    146         D ADD(ZGRAPH,ZSUBJECT,"fmts:url",ZNAME,FARY)
    147         N ZTXTNM
    148         S ZTXTNM="_TXT_INCOMING_RDF_FILE_"_ZNAME_"_"_$$LKY9 ; NAME FOR TEXT NODE
    149         D ADD(ZGRAPH,ZSUBJECT,"fmts:fileSource",ZTXTNM,FARY)
    150         D ADD(ZGRAPH,ZSUBJECT,"fmts:fileTag",$$name2tag(ZNAME),FARY)
    151         D SWUPDIE(.C0XFDA) ; TRY IT OUT
    152         K C0XCNT ;RESET FOR NEXT TIME
    153         D STORETXT(ZRDF,ZTXTNM,FARY)
    154         W !,"ADDED: ",ZGRAPH," ",ZSUBJECT," fmts:fileSource ",ZTXTNM
    155         D PROCESS(.G,ZRDF,ZNAME,ZGRAPH,FARY) ; PARSE AND INSERT THE RDF
    156         Q
    157         ;
    158 name2tag(zname) ; extrinsic which returns a tag derived from a name
    159         ; /home/vista/project.xml ==> project
    160         q $p($re($p($re(zname),"/")),".")
    161         ;
    162 STORETXT(ZTXT,ZNAME,FARY)       ; STORE TEXT IN THE TRIPLESTORE AT ZNAME
    163         ;
    164         I '$D(FARY) D  ;
    165         . D INITFARY("C0XFARY")
    166         . S FARY="C0XFARY"
    167         D USEFARY(FARY)
    168         N ZIEN
    169         S ZIEN=$$IENOF(ZNAME,FARY) ; GET THE IEN
    170         D CLEAN^DILF
    171         K ZERR
    172         D WP^DIE(C0XSFN,ZIEN_",",1,,ZTXT,"ZERR")
    173         I $D(ZERR) D  Q  ;
    174         . W !,"ERROR CREATING WORD PROCESSING FIELD"
    175         . S C0XERR="ERROR CREATING WORD PROCESSING FIELD"
    176         . D ^%ZTER ; error trap
    177         Q
    178         ;
    179 GETTXT(ZRTN,ZNAME,FARY) ; RETURNS RDF SOURCE OR OTHER TEXT
    180         ; ZRTN IS PASSED BY REFERENCE
    181         I '$D(FARY) D  ;
    182         . D INITFARY("C0XFARY")
    183         . S FARY="C0XFARY"
    184         D USEFARY(FARY)
    185         N ZIEN
    186         S ZIEN=$$IENOF(ZNAME)
    187         S OK=$$GET1^DIQ(C0XSFN,ZIEN_",",1,,"ZRTN")
    188         Q
    189         ;
    190 WHERETXT(ZNAME,FARY)    ; EXTRINSIC WHICH RETURNS THE NAME OF THE GLOBAL
    191         ; WHERE THE TEXT IS LOCATED. NAME IS THE NAME OF THE STRING
    192         I '$D(FARY) D  ;
    193         . D INITFARY("C0XFARY")
    194         . S FARY="C0XFARY"
    195         D USEFARY(FARY)
    196         N ZIEN
    197         S ZIEN=$$IENOF(ZNAME)
    198         Q $NA(@C0XSN@(ZIEN,1))
    199         ;
    200 FILEREAD(ZINTMP,ZDIR,ZFNAME,ZLVL)       ; READS A FILE INTO ZINTMP USING FTG^%ZISH
    201         ; ZINTMP IS PASSED BY NAME AND INCLUDES THE NEW SUBSCRIPT
    202         ; IE ^TMP("C0X","FILEIN",1)
    203         ; ZLVL IN THIS CASE WOULD BE 3 INCREMENTING THE 1
    204         ; EXTRINSIC WHICH RETURNS THE RESULT OF FTG^%ZISH
    205         S OK=$$FTG^%ZISH(ZDIR,FNAME,ZINTMP,ZLVL)
    206         Q OK
    207         ;
    208 TESTPROC        ; TEST PROCESS WITH EXISTING SMALL RDF FILE
    209         S ZIN=$NA(^TMP("C0X",12226,"FILEIN"))
    210         S ZGRAPH="/test/rdfFile"
    211         S ZM="/test/rdfFile/meta"
    212         D PROCESS(.G,ZIN,ZGRAPH,ZM)
    213         Q
    214         ;
    215 VISTAOWL        ;
    216         S ZRDF=$NA(^TMP("C0X",542,"FILEIN"))
    217         S ZNAME="/home/glilly/vistaowl/VistAOWL.owl"
    218         S ZGRAPH="_:G431590209"
    219         S FARY="C0XFARY"
    220         D INITFARY(FARY)
    221         S C0XDOCID=1
    222         S BATCNT=0
    223         S BATMAX=10000
    224         D PROCESS(.G,ZRDF,ZGRAPH,ZNAME,FARY)
    225         Q
    226         ;
    227 PROCESS(ZRTN,ZRDF,ZGRF,ZMETA,FARY)      ; PROCESS AN INCOMING RDF FILE
    228         ; ZRTN IS PASS BY REFERENCE AND RETURNS MESSAGES ABOUT THE PROCESSING
    229         ; ZRDF IS PASSED BY NAME AND IS THE GLOBAL CONTAINING THE RDF FILE
    230         ; ZGRF IS THE NAME OF THE GRAPH TO USE IN THE TRIPLE STORE FOR RESULTS
    231         ; ZMETA IS OPTIONAL AND IS THE NAME OF THE GRAPH TO STORE METADATA
    232         ;
    233         I '$D(FARY) D  ;
    234         . D INITFARY("C0XFARY")
    235         . S FARY="C0XFARY"
    236         D USEFARY(FARY)
    237         ;N BATCNT
    238         ;N BATMAX
    239         ; -- first parse the rdf file with the MXML parser
    240         ;S C0XDOCID=$$PARSE^C0CNHIN(ZRDF,"C0XARRAY") ; PARSE WITH MXML
    241         S C0XDLC2=$$NOW^XLFDT ; START OF PARSE
    242         I @ZRDF@(1)'["<?xml" D  Q  ;
    243         . K @ZRDF ; don't need the input buffer
    244         . W !,"Not an XML file"
    245         S C0XDOCID=$$EN^MXMLDOM(ZRDF,"W") ;
    246         ;B
    247         K @ZRDF ; DON'T NEED INPUT BUFFER ANYMORE
    248         ; -- assign the MXLM dom global name to ZDOM
    249         S ZDOM=$NA(^TMP("MXMLDOM",$J,C0XDOCID))
    250         ;S ZDOM=$NA(^TMP("MXMLDOM",16850,C0XDOCID)) ;VISTAOWL DOM
    251         S C0XNODE=$O(@ZDOM@(""),-1)
    252         W !,C0XNODE," XML NODES PARSED"
    253         S C0XPRS=$$NOW^XLFDT ; PARSE COMPLETE
    254         W !,"PARSE COMPLETE AT ",C0XPRS
    255         S C0XDIFF=$$FMDIFF^XLFDT(C0XPRS,C0XDLC2,2)
    256         W !," ELAPSED TIME: ",C0XDIFF," SECONDS"
    257         I C0XDIFF'=0 D  ;
    258         . W !," APPROXIMATELY ",$P(C0XNODE/C0XDIFF,".")," NODES PER SECOND"
    259         ; -- populate the metagraph to point to the graph with status unfinished
    260         S METAS=$$ANONS ; GET AN ANONOMOUS RANDOM SUBJECT
    261         I '$D(ZMETA) S ZMETA="_:G"_$$LKY9 ; RANDOM GRAPH NAME FOR METAGRAPH
    262         D ADD(ZMETA,METAS,"fmts:about",ZGRF,FARY) ; POINT THE META TO THE GRAPH
    263         D ADD(ZMETA,METAS,"fmts:status","unfinished",FARY) ; mark as unfinished
    264         W !,"INSERTING GRAPH: ",ZGRF
    265         ;S C0XDATE=$$FMDTOUTC^C0CUTIL($$NOW^XLFDT,"DT")
    266         S C0XDATE=$$NOW^XLFDT
    267         D ADD(ZMETA,METAS,"fmts:dateTime",C0XDATE,FARY)
    268         D SWUPDIE(.C0XFDA) ; commit the metagraph changes to the triple store
    269         ; --
    270         ; -- pull out the vocabularies in the RDF statement. marked with xmlns:
    271         ; -- put them in a local variable for quick reference
    272         ; -- TODO: create a graph for vocabularies and validate incoming against it
    273         ;
    274         S C0XVOC=""
    275         N ZI,ZJ,ZK S ZI=""
    276         F  S ZI=$O(@ZDOM@(1,"A",ZI)) Q:ZI=""  D  ; FOR EACH xmlns
    277         . S ZVOC=$P(ZI,"xmlns:",2)
    278         . I ZVOC'="" S C0XVOC(ZVOC)=$G(@ZDOM@(1,"A",ZI))
    279         I $D(DEBUG) D  ;
    280         . W !,"VOCABS:"
    281         . N ZZ S ZZ=""
    282         . F  S ZZ=$O(C0XVOC(ZZ)) Q:ZZ=""  W !,ZZ,":",C0XVOC(ZZ)
    283         ;
    284         ; -- look for children called rdf:Description. quit if none. not an rdf file
    285         ;
    286         S C0XTYPE("rdf:Description")=1
    287         S C0XTYPE("owl:ObjectProperty")=1
    288         S C0XTYPE("owl:Ontology")=1
    289         S C0XTYPE("owl:Class")=1
    290         S C0XTYPE("rdfs:subClassOf")=1
    291         S C0XTYPE("rdf:RDF")=1
    292         S ZI=$O(@ZDOM@(1,"C",""))
    293         I '$G(C0XTYPE(@ZDOM@(1,"C",ZI))) D  ;Q  ; not an rdf file
    294         . W !,"Unusual RDF file ",@ZDOM@(1,"C",ZI)
    295         . ;W !,"Error. Not an RDF file. Cannot process."
    296         . D SHOW(1)
    297         ;
    298         ; -- now process the rdf description children
    299         ;
    300         S ZI=""
    301         S (C0XSUB,C0XPRE,C0XOBJ)="" ; INITIALIZE subject, object and predicate
    302         F  S ZI=$O(@ZDOM@(1,"C",ZI)) Q:ZI=""  D  ;
    303         . ; -- we are skipping any child that is not rdf:Description
    304         . ; -- TODO: check to see if this is right in general
    305         . ;
    306         . IF '$G(C0XTYPE(@ZDOM@(1,"C",ZI))) D  Q  ;
    307         . . W !,"SKIPPING NODE: ",ZI
    308         . ; -- now looking for the subject for the triples
    309         . S ZX=$G(@ZDOM@(ZI,"A","rdf:about"))
    310         . I ZX'="" D  ; we have the subject
    311         . . ;W " about: ",ZX
    312         . . S C0XSUB=ZX
    313         . E  D  ;
    314         . . S ZX=$G(@ZDOM@(ZI,"A","rdf:nodeID")) ; node id is another style of subject
    315         . . I ZX'="" D  ;
    316         . . . S C0XSUB=ZX
    317         . I C0XSUB="" S C0XSUB=$$ANONS ; DEFAULT TO BLANK SUBJECT
    318         . ;
    319         . ; -- we now have the subject. the children of this node have the rest
    320         . ;
    321         . S ZJ="" ; for the children of the rdf:Description nodes
    322         . F  S ZJ=$O(@ZDOM@(ZI,"C",ZJ)) Q:ZJ=""  D  ; for each child
    323         . . S C0XPRE=@ZDOM@(ZJ) ; the predicate without a prefix
    324         . . S ZX=$G(@ZDOM@(ZJ,"A","xmlns")) ; name space
    325         . . I ZX'="" S C0XPRE=ZX_C0XPRE ; add the namespace prefix
    326         . . I C0XPRE[":" D  ; expand using vocabulary
    327         . . . N ZB,ZA
    328         . . . S ZB=$P(C0XPRE,":",1)
    329         . . . S ZA=$P(C0XPRE,":",2)
    330         . . . I $G(C0XVOC(ZB))'="" D  ;
    331         . . . . S C0XPRE=C0XVOC(ZB)_ZA ; expanded
    332         . . S ZY=$G(@ZDOM@(ZJ,"A","rdf:resource")) ; potential object
    333         . . I ZY'="" D  Q ;
    334         . . . S C0XOBJ=$$EXT^C0XUTIL(ZY) ; object
    335         . . . D ADD(ZGRF,C0XSUB,C0XPRE,C0XOBJ) ; finally. our first real triple
    336         . . ; -- this is an else because of the quit above
    337         . . S ZX=$G(@ZDOM@(ZJ,"A","rdf:nodeID")) ; fishing for nodeId object
    338         . . I ZX'="" D  Q  ; got one
    339         . . . S C0XOBJ=ZX ; we are using the incoming nodeIDs as object/subject
    340         . . . ; without change... this could be foolish .. look at it again later
    341         . . . D ADD(ZGRF,C0XSUB,C0XPRE,C0XOBJ) ; go for it and add a node
    342         . . S C0XOBJ=$G(@ZDOM@(ZJ,"T",1)) ; hopefully an object is here
    343         . . I C0XOBJ="" D  Q  ; not a happy situation
    344         . . . W !,"ERROR, NO OBJECT FOUND FOR NODE: ",ZJ
    345         . . S C0XOBJ=$$EXT^C0XUTIL(C0XOBJ) ; might be namespaced
    346         . . D ADD(ZGRF,C0XSUB,C0XPRE,C0XOBJ) ; go for it and add a node
    347         S C0XTRP=$$NOW^XLFDT ; PARSE COMPLETE
    348         W !,"TRIPLES COMPLETE AT ",C0XTRP
    349         S C0XDIFF=$$FMDIFF^XLFDT(C0XTRP,C0XPRS,2)
    350         W !," ELAPSED TIME: ",C0XDIFF," SECONDS"
    351         I C0XDIFF'=0 D  ;
    352         . W !," APPROXIMATELY ",$P(C0XCNT/C0XDIFF,".")," TRIPLES PER SECOND"
    353         W !,"INSERTING ",C0XCNT," TRIPLES"
    354         I $D(C0XFDA) D  ;
    355         . I $G(BLKLOAD) D  ;
    356         . . D BULKLOAD(.C0XFDA)
    357         . E  D  ;
    358         . . D UPDIE(.C0XFDA) ; commit the updates to the file
    359         ; next, mark the graph as finished
    360         S C0XINS=$$NOW^XLFDT ; PARSE COMPLETE
    361         W !,"INSERTION COMPLETE AT ",C0XPRS
    362         S C0XDIFF=$$FMDIFF^XLFDT(C0XINS,C0XTRP,2)
    363         W !," ELAPSED TIME: ",C0XDIFF," SECONDS"
    364         I C0XDIFF'=0 W !," APPROXIMATELY ",$P(C0XCNT/C0XDIFF,".")," NODES PER SECOND"
    365         S C0XEND=$$NOW^XLFDT
    366         W !," ENDED AT: ",C0XEND
    367         S C0XDIFF=$$FMDIFF^XLFDT(C0XEND,C0XSTART,2)
    368         W !," ELAPSED TIME: ",C0XDIFF," SECONDS"
    369         I C0XDIFF'=0 W !," APPROXIMATELY ",$P(C0XCNT/C0XDIFF,".")," TRIPLES PER SECOND"
    370         Q
    371         ;
    372 SHOW(ZN)        ;
    373         I '$D(C0XJOB) S C0XJOB=$J
    374         N ZD
    375         S ZD=$NA(^TMP("MXMLDOM",C0XJOB,1,ZN))
    376         W ZD,"=",@ZD
    377         F  S ZD=$Q(@ZD) Q:$QS(ZD,4)'=ZN  W !,ZD,"=",@ZD
    378         ;ZWR ^TMP("MXMLDOM",C0XJOB,1,ZN,*)
    379         Q
    380         ;
    381 ANONS() ; RETURNS AN ANONOMOUS SUBJECT
    382         Q "iDPsDPss"_$$LKY9
    383         ;
    384 NEWG(NGRAPH,NMETA)      ; CREATES A NEW META GRAPH, MARKS IT AS UNFINISHED
    385         ; THEN CREATES A NEW GRAPH AND POINTS THE METAGRAPH TO IT
    386         ; NGRAPH AND NMETA ARE PASSED BY REFERENCE AND ARE THE RETURN
    387         S NGRAPH="G"_$$LKY9
    388         S NMETA=NGRAPH_"A"
    389         Q
    390         ;
    391 STARTADD        ; INITIALIZE C0XFDA AND BATCNT
    392         K C0XFDA
    393         K BATCNT
    394         Q
    395         ;
    396 ADD(ZG,ZS,ZP,ZO,FARY)   ; ADD A TRIPLE TO THE TRIPLESTORE. ALL VALUES ARE TEXT
    397         ; THE FDA IS SET UP BUT THE FILES ARE NOT UPDATED. CALL UPDIE TO COMPLETE
    398         I '$D(FARY) D  ;
    399         . D INITFARY("C0XFARY")
    400         . S FARY="C0XFARY"
    401         D USEFARY(FARY)
    402         I '$D(C0XCNT) S C0XCNT=0
    403         N ZNODE
    404         S ZNODE="N"_$$LKY17
    405         N ZNARY ; GET READY TO CALL IENOFA
    406         I (ZG="")!(ZS="")!(ZP="")!(ZO="") D  Q  ;
    407         . I $G(DEBUG) W !,"Error Empty String ZG:"_ZG_" ZS:"_ZS_" ZP:"_ZP_" ZO"_ZO
    408         S ZNARY("ZG",ZG)=""
    409         S ZNARY("ZS",ZS)=""
    410         S ZNARY("ZP",ZP)=""
    411         S ZNARY("ZO",ZO)=""
    412         D IENOFA(.ZIENS,.ZNARY,FARY) ; RESOLVE/ADD STRINGS
    413         ;S ZGIEN=$$IENOF(ZG) ; LAYGO TO GET IEN
    414         ;S ZSIEN=$$IENOF(ZS)
    415         ;S ZPIEN=$$IENOF(ZP)
    416         ;S ZOIEN=$$IENOF(ZO)
    417         ;I $D(C0XFDA) D UPDIE ; ADD THE STRINGS IF NEEDED
    418         I '$D(BATCNT) S BATCNT=0
    419         S BATCNT=BATCNT+1
    420         S C0XCNT=C0XCNT+1
    421         I $G(BLKLOAD)=1 D  ; we are using bulk load
    422         . S C0XFDA(C0XTFN,BATCNT,.01)=ZNODE
    423         . S C0XFDA(C0XTFN,BATCNT,.02)=$O(ZIENS("IEN","ZG",""))
    424         . S C0XFDA(C0XTFN,BATCNT,.03)=$O(ZIENS("IEN","ZS",""))
    425         . S C0XFDA(C0XTFN,BATCNT,.04)=$O(ZIENS("IEN","ZP",""))
    426         . S C0XFDA(C0XTFN,BATCNT,.05)=$O(ZIENS("IEN","ZO",""))
    427         E  D  ;
    428         . S C0XFDA(C0XTFN,"?+"_BATCNT_",",.01)=ZNODE
    429         . S C0XFDA(C0XTFN,"?+"_BATCNT_",",.02)=$O(ZIENS("IEN","ZG",""))
    430         . S C0XFDA(C0XTFN,"?+"_BATCNT_",",.03)=$O(ZIENS("IEN","ZS",""))
    431         . S C0XFDA(C0XTFN,"?+"_BATCNT_",",.04)=$O(ZIENS("IEN","ZP",""))
    432         . S C0XFDA(C0XTFN,"?+"_BATCNT_",",.05)=$O(ZIENS("IEN","ZO",""))
    433         I '$D(BATMAX) S BATMAX=10000
    434         I BATCNT=BATMAX D  ; BATCH IS DONE
    435         . I $G(BLKLOAD) D  ; bulk load
    436         . . D BULKLOAD(.C0XFDA) ; bulk load the batch
    437         . E  D  ; no bulk load
    438         . . D UPDIE(.C0XFDA)
    439         . K C0XFDA
    440         . S BATCNT=0 ; RESET COUNTER
    441         ; REMEMBER TO CALL UPDIE WHEN YOU'RE DONE
    442         Q
    443         ;
    444 LKY5()  ;EXTRINIC THAT RETURNS A RANDOM 5 DIGIT NUMBER. USED FOR GENERATING
    445         ; UNIQUE NODE AND GRAPH NAMES
    446         N ZN,ZI
    447         S ZN=""
    448         F ZI=1:1:5 D  ;
    449         . S ZN=ZN_$R(10)
    450         Q ZN
    451         ;
    452 LKY9()  ;EXTRINIC THAT RETURNS A RANDOM 9 DIGIT NUMBER. USED FOR GENERATING
    453         ; UNIQUE NODE AND GRAPH NAMES
    454         N ZN,ZI
    455         S ZN=""
    456         F ZI=1:1:9 D  ;
    457         . S ZN=ZN_$R(10)
    458         Q ZN
    459         ;
    460 LKY17() ;EXTRINIC THAT RETURNS A RANDOM 9 DIGIT NUMBER. USED FOR GENERATING
    461         ; UNIQUE NODE AND GRAPH NAMES
    462         N ZN,ZI
    463         S ZN=""
    464         F ZI=1:1:17 D  ;
    465         . S ZN=ZN_$R(10)
    466         Q ZN
    467         ;
    468         ; these routines add the string if it is not found
    469         ;
    470 IENOF(ZSTRING,FARY)     ; EXTRINSIC WHICH RETURNS THE IEN OF ZS IN THE STRINGS FILE
    471         I '$D(FARY) D  ;
    472         . D INITFARY("C0XFARY")
    473         . S FARY="C0XFARY"
    474         N ZIEN
    475         I $G(ZSTRING)="" Q "" ; NO STRING
    476         S ZIEN=$O(@C0XSN@("B",ZSTRING,""))
    477         I ZIEN="" D  ;
    478         . S C0XFDA2(C0XSFN,"+1,",.01)=ZSTRING
    479         . D UPDIE(.C0XFDA2)
    480         . S ZIEN=$O(@C0XSN@("B",ZSTRING,""))
    481         . K C0XFDA2
    482         Q ZIEN
    483         ;
    484 IENOFA(ZOUTARY,INARY,FARY)      ; RESOLVE STRINGS TO IEN IN STRINGS FILE
    485         ; OR ADD THEM IF
    486         ; MISSING. ZINARY AND ZOUTARY ARE PASSED BY REFERENCE
    487         ; ZINARY LOOKS LIKE ZINARY("VAR","VAL")=""
    488         ; RETURNS IN ZOUTARY OF THE FORM ZOUTARY("IEN","VAR",IEN)=""
    489         I '$D(FARY) D  ;
    490         . D INITFARY("C0XFARY")
    491         . S FARY="C0XFARY"
    492         K ZOUTARY ; START WITH CLEAN RESULTS
    493         K C0XFDA2 ; USE A SEPARATE FDA FOR THIS
    494         I '$D(C0XVOC) D VOCINIT^C0XUTIL
    495         N ZINARY
    496         N ZI S ZI=""
    497         F  S ZI=$O(INARY(ZI)) Q:ZI=""  D  ;
    498         . N ZK
    499         . S ZK=$O(INARY(ZI,""))
    500         . S ZINARY($$EXT^C0XUTIL(ZI),$$EXT^C0XUTIL(ZK))=""
    501         N ZV,ZIEN,ABORT
    502         S ABORT=0
    503         N ZCNT S ZCNT=0
    504         F  S ZI=$O(ZINARY(ZI)) Q:(ZI="")!+ABORT  D  ; LOOK FOR MISSING STRINGS
    505         . S ZV=$O(ZINARY(ZI,""))
    506         . I ZV="" S ABORT=1 Q  ; abandon quad -- missing an entry
    507         . I ZV["^" S ZV=$TR(ZV,"^","|")
    508         . I $O(@C0XSN@("B",ZV,""))="" D  ;
    509         . . S ZCNT=ZCNT+1
    510         . . S C0XFDA2(C0XSFN,"+"_ZCNT_",",.01)=ZV
    511         I +ABORT Q  ;
    512         I $D(C0XFDA2) D  ;
    513         . D UPDIE(.C0XFDA2) ; ADD MISSING STRINGS
    514         . K C0XFDA2 ; CLEAN UP
    515         F  S ZI=$O(ZINARY(ZI)) Q:ZI=""  D  ; NOW GET ALL IENS
    516         . S ZV=$O(ZINARY(ZI,""))
    517         . I ZV["^" S ZV=$TR(ZV,"^","|")
    518         . S ZIEN=$O(@C0XSN@("B",ZV,"")) ; THEY SHOULD BE THERE NOW
    519         . I ZIEN="" D  ;
    520         . . W !,"ERROR ADDING STRING: ",ZV
    521         . . B
    522         . S ZOUTARY("IEN",ZI,ZIEN)=""
    523         Q
    524         ;
    525 ADDINN(ZG,ZS,ZARY)      ; ADD IF NOT NULL
    526         ; ZG IS THE GRAPH NAME, PASSED BY VALUE
    527         ; ZS IS THE SUBJECT, PASSED BY VALUE
    528         ; ZARY IS AN ARRAY, PASSED BY REFERENCE OF THE PREDICATE AND OBJECT
    529         ;  FORMAT IS ZARY(PRED)=OBJ
    530         N ZI S ZI=""
    531         F  S ZI=$O(ZARY(ZI)) Q:ZI=""  D  ;
    532         . ;I ZARY(ZI)="" S ZARY(ZI)="NULL"
    533         . I ZARY(ZI)'="" D  ;
    534         . . D ADD^C0XF2N(ZG,ZS,ZI,ZARY(ZI))
    535         . . I $D(DEBUG) W !,"ADDING",ZI," ",ZARY(ZI)
    536         ;ZWR ZARY
    537         Q
    538         ;
    539 BULKLOAD(ZBFDA) ; BULK LOADER FOR LOADING TRIPLES INTO FILE 172.101
    540         ; USING GLOBAL SETS INSTEAD OF UPDATE^DIE
    541         ; QUITS IF FILE IS NOT 172.101
    542         ; EXPECTS AN FDA WITHOUT STRINGS FOR THE IENS, STARTING AT 1
    543         ; QUITS IF FIRST ENTRY IS NOT IENS 1
    544         ; ASSUMES THAT THE LAST IENS IS THE COUNT OF ENTRIES
    545         ; ZBFDA IS PASSED BY REFERENCE
    546         ;
    547         ; -- reserves a block of iens from file 172.101 by locking the zero node
    548         ; -- ^C0X(101,0) and adding the count of entries to piece 2 and 3
    549         ; -- then unlocking to minimize the duration of the lock
    550         ;
    551         I $D(DEBUG) W !,"USING BULKLOAD"
    552         I '$D(ZBFDA) Q  ; EMPTY FDA
    553         I $O(ZBFDA(""))'=172.101 Q  ; WRONG FILE
    554         N ZCNT,ZP3,ZP4
    555         ; -- find the number of nodes to insert
    556         S ZCNT=$O(ZBFDA(172.101,""),-1)
    557         I ZCNT="" D  Q  ;
    558         . W !,"ERROR IN BULK LOAD - INVALID NODE COUNT"
    559         . B
    560         ; -- lock the zero node and reserve a block of iens to insert
    561         I $D(DEBUG) W !,"LOCKING ZERO NODE"
    562         LOCK +^C0X(101,0)
    563         S ZP3=$P(^C0X(101,0),U,3)
    564         S ZP4=$P(^C0X(101,0),U,4)
    565         S $P(^C0X(101,0),U,3)=ZP3+ZCNT+1
    566         S $P(^C0X(101,0),U,4)=ZP4+ZCNT+1
    567         LOCK -^C0X(101,0)
    568         N ZI,ZN,ZG,ZS,ZP,ZO,ZIEN,ZBASE
    569         S ZBASE=ZP3 ; the last ien in the file
    570         I $D(DEBUG) W !,"ZERO NODE UNLOCKED, IENS RESERVED=",ZCNT
    571         I $D(DEBUG) W !,$$NOW^XLFDT
    572         S ZI=""
    573         F  S ZI=$O(ZBFDA(172.101,ZI)) Q:ZI=""  D  ;
    574         . S ZN=$G(ZBFDA(172.101,ZI,.01)) ; node name
    575         . I ZN="" D BLKERR Q  ;
    576         . S ZG=$G(ZBFDA(172.101,ZI,.02)) ; graph pointer
    577         . I ZG="" D BLKERR Q  ;
    578         . S ZS=$G(ZBFDA(172.101,ZI,.03)) ; subject pointer
    579         . I ZS="" D BLKERR Q  ;
    580         . S ZP=$G(ZBFDA(172.101,ZI,.04)) ; predicate pointer
    581         . I ZP="" D BLKERR Q  ;
    582         . S ZO=$G(ZBFDA(172.101,ZI,.05)) ; object pointer
    583         . I ZO="" D BLKERR Q  ;
    584         . S ZIEN=ZI+ZBASE ; the new ien
    585         . S ^C0X(101,ZIEN,0)=ZN_U_ZG_U_ZS_U_ZP_U_ZO ; set the zero node
    586         . D INDEX(ZIEN,ZN,ZG,ZS,ZP,ZO)
    587         Q
    588         ;
    589 INDEX(ZIEN,ZN,ZG,ZS,ZP,ZO)      ; HARD SET THE INDEX FOR ONE ENTRY
    590         S ^C0X(101,"B",ZN,ZIEN)="" ; the B index
    591         S ^C0X(101,"G",ZG,ZIEN)="" ; the G for Graph index
    592         S ^C0X(101,"SPO",ZS,ZP,ZO,ZIEN)=""
    593         S ^C0X(101,"SOP",ZS,ZO,ZP,ZIEN)=""
    594         S ^C0X(101,"OPS",ZO,ZP,ZS,ZIEN)=""
    595         S ^C0X(101,"OSP",ZO,ZS,ZP,ZIEN)=""
    596         S ^C0X(101,"PSO",ZP,ZS,ZO,ZIEN)=""
    597         S ^C0X(101,"POS",ZP,ZO,ZS,ZIEN)=""
    598         S ^C0X(101,"GOPS",ZG,ZO,ZP,ZS,ZIEN)=""
    599         S ^C0X(101,"GOSP",ZG,ZO,ZS,ZP,ZIEN)=""
    600         S ^C0X(101,"GPSO",ZG,ZP,ZS,ZO,ZIEN)=""
    601         S ^C0X(101,"GPOS",ZG,ZP,ZO,ZS,ZIEN)=""
    602         S ^C0X(101,"GSPO",ZG,ZS,ZP,ZO,ZIEN)=""
    603         S ^C0X(101,"GSOP",ZG,ZS,ZO,ZP,ZIEN)=""
    604         Q
    605         ;
    606 REINDEX ; REINDEX THE ^C0X(101, TRIPLE STORE
    607         K ^C0X(101,"B")
    608         K ^C0X(101,"G")
    609         K ^C0X(101,"SPO")
    610         K ^C0X(101,"SOP")
    611         K ^C0X(101,"OPS")
    612         K ^C0X(101,"OSP")
    613         K ^C0X(101,"PSO")
    614         K ^C0X(101,"POS")
    615         K ^C0X(101,"GOPS")
    616         K ^C0X(101,"GOSP")
    617         K ^C0X(101,"GPSO")
    618         K ^C0X(101,"GPOS")
    619         K ^C0X(101,"GSPO")
    620         K ^C0X(101,"GSOP")
    621         N ZIEN,ZZ
    622         S ZIEN=0
    623         F  S ZIEN=$O(^C0X(101,ZIEN)) Q:+ZIEN=0  D  ; FOR EACH NODE
    624         . S ZZ=$G(^C0X(101,ZIEN,0))
    625         . I ZZ="" D  Q  ;
    626         . . W !,"ERROR REINDEXING NODE ",ZI
    627         . S ZN=$P(ZZ,"^",1)
    628         . S ZG=$P(ZZ,"^",2)
    629         . S ZS=$P(ZZ,"^",3)
    630         . S ZP=$P(ZZ,"^",4)
    631         . S ZO=$P(ZZ,"^",5)
    632         . D INDEX(ZIEN,ZN,ZG,ZS,ZP,ZO)
    633         Q
    634         ;
    635 BLKERR  ;
    636         W !,"ERROR IN BULK LOAD"
    637         S C0XERR="ERROR IN BULK LOAD"
    638         S C0XLOC=ZBFDA(ZI)
    639         D ^%ZTER ; report the error
    640         B
    641         Q
    642         ;
    643 DELGRAPH(ZGRF,FARY)     ; delete a graph from the triplestore
    644         ; (doesn't delete strings)
    645         ;
    646         I '$D(FARY) D  ;
    647         . D INITFARY("C0XFARY")
    648         . S FARY="C0XFARY"
    649         D USEFARY(FARY)
    650         N ZGRAPH
    651         D TING(.ZGRAPH,ZGRF,FARY)
    652         I '$D(ZGRAPH) D  Q  ;
    653         . I $D(DEBUG) W !,"NO TRIPLES IN GRAPH"
    654         K C0XFDA
    655         N ZI S ZI=""
    656         F  S ZI=$O(ZGRAPH(ZI)) Q:ZI=""  D  ;
    657         . S C0XFDA(C0XTFN,ZI_",",.01)="@"
    658         D UPDIE(.C0XFDA)
    659         Q
    660         ;
    661 TING(ZRTN,ZGRF,FARY)    ; return the iens for graph ZGRF
    662         ; ZRTN is passed by reference
    663         I '$D(FARY) D  ;
    664         . D INITFARY("C0XFARY")
    665         . S FARY="C0XFARY"
    666         D USEFARY(FARY)
    667         K ZRTN
    668         N ZI,ZG S ZI=""
    669         S ZG=$$IENOF^C0XGET1(ZGRF)
    670         I ZG="" D  Q  ;
    671         . I $D(DEBUG) W !,"ERROR GRAPH NOT FOUND"
    672         I '$D(@C0XTN@("G",ZG)) Q  ;
    673         F  S ZI=$O(@C0XTN@("G",ZG,ZI)) Q:ZI=""  D  ;
    674         . S ZRTN(ZI)=""
    675         Q
    676        
    677 SWUPDIE(ZFDA)   ; SWITCH BETWEEN UPDIE AND BULKLOAD
    678         I $G(BLKLOAD)=1 D  ; bulk load
    679         . D BULKLOAD(.ZFDA) ; bulk load the batch
    680         E  D  ; no bulk load
    681         . D UPDIE(.ZFDA)
    682         K ZFDA
    683         Q
    684         ;
    685 UPDIE(ZFDA)     ; INTERNAL ROUTINE TO CALL UPDATE^DIE AND CHECK FOR ERRORS
    686         ; ZFDA IS PASSED BY REFERENCE
    687         ;ZWR ZFDA
    688         ;B
    689         K ZERR
    690         D CLEAN^DILF
    691         D UPDATE^DIE("","ZFDA","","ZERR")
    692         I $D(ZERR) S ZZERR=ZZERR ; ZZERR DOESN'T EXIST,
    693         ; INVOKE THE ERROR TRAP IF TASKED
    694         ;. W "ERROR",!
    695         ;. ZWR ZERR
    696         ;. B
    697         K ZFDA
    698         Q
    699         ;
     10 ;
     11 ;This program is distributed in the hope that it will be useful,
     12 ;but WITHOUT ANY WARRANTY; without even the implied warranty of
     13 ;MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
     14 ;GNU General Public License for more details.
     15 ;
     16 ;You should have received a copy of the GNU General Public License along
     17 ;with this program; if not, write to the Free Software Foundation, Inc.,
     18 ;51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
     19 ;
     20 Q
     21 ;
     22 ; This is based on C0XMAIN but experiments with a fast load for triples
     23 ; that will write directly to the fileman global
     24 ; The file 172.101 is a F2N design style for triples, which means
     25 ; that it is a Flat file with no subfiles, all fields at the root
     26 ; ... it is a "2" file solution which means all strings are stored in
     27 ; ...    strings file and pointed to by the triples file
     28 ; ... it is an N file because it has generated Node IDs instead of
     29 ; ...   DINUM which would use the IEN for the Node ID.
     30 ; gpl 11/04/2011
     31 ;
     32INITFARY(ZFARY) ; INITIALIZE FILE NUMBERS AND OTHER USEFUL THINGS
     33 ; FOR THE DEFAULT TRIPLE STORE. USE OTHER VALUES FOR SUPPORTING ADDITIONAL
     34 ; TRIPLE STORES
     35 I $D(@ZFARY) Q  ; ALREADY INITIALIZED
     36 S @ZFARY@("C0XTFN")=172.101 ; TRIPLES FILE NUMBER
     37 S @ZFARY@("C0XSFN")=172.201 ; TRIPLES STRINGS FILE NUMBER
     38 S @ZFARY@("C0XTN")=$NA(^C0X(101)) ; TRIPLES GLOBAL NAME
     39 S @ZFARY@("C0XSN")=$NA(^C0X(201)) ; STRING FILE GLOBAL NAME
     40 S @ZFARY@("C0XDIR")="/home/glilly/fmts/trunk/samples/smart-new/"
     41 S @ZFARY@("BLKLOAD")=1 ; this file supports block load
     42 S @ZFARY@("FMTSSTYLE")="F2N" ; fileman style
     43 S @ZFARY@("REPLYFMT")="JSON"
     44 D USEFARY(ZFARY)
     45 Q
     46 ;
     47USEFARY(ZFARY) ; INITIALIZES VARIABLES SAVED IN ARRAY ZFARY
     48 N ZI S ZI=""
     49 F  S ZI=$O(@ZFARY@(ZI)) Q:ZI=""  D 
     50 . ;N ZX
     51 . S ZX="S "_ZI_"="""_@ZFARY@(ZI)_""""
     52 . ;W !,ZX
     53 . X ZX
     54 Q
     55 ;
     56FILEIN ; INTERACTIVE ENTRY POINT FOR OPTION TO READ IN A FILE
     57 I '$D(C0XFARY) D INITFARY("C0XFARY")
     58 D USEFARY("C0XFARY")
     59 S DIR(0)="F^3:240"
     60 S DIR("A")="File Directory"
     61 S DIR("B")=C0XDIR
     62 D ^DIR
     63 I Y="^" Q  ;
     64 S C0XDIR=Y
     65 S C0XFARY("C0XDIR")=Y
     66 S DIR(0)="F^3:240"
     67 S DIR("A")="File Name"
     68 I '$D(C0XFN) S DIR("B")="qds.rdf"
     69 E  S DIR("B")=C0XFN
     70 D ^DIR
     71 I Y="" Q  ;
     72 I Y="^" Q  ;
     73 S C0XFN=Y
     74 D IMPORT(C0XFN,C0XDIR,,"C0XFARY")
     75 K C0XFDA
     76 Q
     77 ;
     78IMPORT(FNAME,INDIR,INURL,FARY) ; EXTRINSIC THAT READS A FILE FROM THE STANDARD
     79 ; DIRECTORY, LOADS IT INTO THE TRIPLESTORE AS TEXT, AND RETURNS THE
     80 ; NODE NAME OF THE TEXT TRIPLE
     81 ; INDIR IS THE OPTIONAL DIRECTORY (DEFAUTS TO STANDARD DIR)
     82 ; INURL IS THE OPTIONAL URI FOR ACCESSING THE FILE FROM THE TRIPLE STORE
     83 ; FARY IS THE OPTIONAL FILE ARRAY OF THE TRIPLE STORE TO USE
     84 I '$D(FARY) D  ;
     85 . D INITFARY("C0XFARY")
     86 . S FARY="C0XFARY"
     87 D USEFARY(FARY)
     88 N ZD,ZTMP
     89 I '$D(INDIR) S INDIR=C0XDIR ; DIRECTORY OF THE RDF FILE
     90 I $G(INURL)="" D  ;
     91 . ;N ZN2 S ZN2=$P(FNAME,".",1)_"_"_$P(FNAME,".",2) ; REMOVE THE DOT
     92 . ;S INURL=FDIR_ZN2
     93 . S INURL=INDIR_FNAME
     94 N ZTMP
     95 S ZTMP=$NA(^TMP("C0X",$J,"FILEIN",1)) ; WHERE TO PUT THE INCOMING FILE
     96 K @ZTMP ; MAKE SURE IT'S CLEAR
     97 S C0XSTART=$$NOW^XLFDT
     98 W !,"STARTED: ",C0XSTART
     99 W !,"READING IN: ",FNAME
     100 I '$$FILEREAD(ZTMP,INDIR,FNAME,4) D  Q  ; QUIT IF NO SUCCESS
     101 . W !,"ERROR READING FILE: ",INDIR,FNAME
     102 S ZRDF=$NA(^TMP("C0X",$J,"FILEIN")) ; WITHOUT THE SUBSCRIPT
     103 W !,$O(@ZRDF@(""),-1)," LINES READ"
     104 D INSRDF(ZRDF,INURL,FARY) ; IMPORT AND PROCESS THE RDF
     105 K INURL
     106 K C0XFDA
     107 ;K ^TMP("MXMLDOM",$J)
     108 Q
     109 ;
     110WGET(ZURL,FARY) ; GET FROM THE INTERNET AN RDF FILE AND INSERT IT
     111 ;
     112 I '$D(FARY) D  ;
     113 . D INITFARY("C0XFARY")
     114 . S FARY="C0XFARY"
     115 D USEFARY(FARY)
     116 ;N ZLOC,ZTMP
     117 K ZTMP
     118 S ZLOC=$NA(^TMP("C0X","WGET",$J))
     119 K @ZLOC
     120 S C0XSTART=$$NOW^XLFDT
     121 W !,"STARTED: ",C0XSTART
     122 W !,"DOWNLOADING: ",ZURL
     123 S OK=$$httpGET^%zewdGTM(ZURL,.ZTMP)
     124 M @ZLOC=ZTMP
     125 S C0XLINES=$O(@ZLOC@(""),-1)
     126 W !,C0XLINES," LINES READ"
     127 S C0XDLC=$$NOW^XLFDT ; DOWNLOAD COMPLETE
     128 W !,"DOWNLOAD COMPLETE AT ",C0XDLC
     129 S C0XDIFF=$$FMDIFF^XLFDT(C0XDLC,C0XSTART,2)
     130 W !," ELAPSED TIME: ",C0XDIFF," SECONDS"
     131 I C0XDIFF'=0  W !," APPROXIMATELY ",$P(C0XLINES/C0XDIFF,".")," LINES PER SEC"
     132 D INSRDF(ZLOC,ZURL,FARY)
     133 Q
     134 ;
     135INSRDF(ZRDF,ZNAME,FARY) ; INSERT AN RDF FILE INTO THE STORE AND PROCESS
     136 ; ZRDF IS PASSED BY NAME
     137 I '$D(FARY) D  ;
     138 . D INITFARY("C0XFARY")
     139 . S FARY="C0XFARY"
     140 D USEFARY(FARY)
     141 S BATCNT=0 ; BATCH COUNTER
     142 S BATMAX=10000 ; TRY BATCHES OF THIS SIZE
     143 N ZGRAPH,ZSUBJECT
     144 S ZGRAPH="_:G"_$$LKY9 ; RANDOM GRAPH NAME
     145 S ZSUBJECT=$$ANONS() ; RANDOM ANOYMOUS SUBJECT
     146 D ADD(ZGRAPH,ZSUBJECT,"fmts:url",ZNAME,FARY)
     147 N ZTXTNM
     148 S ZTXTNM="_TXT_INCOMING_RDF_FILE_"_ZNAME_"_"_$$LKY9 ; NAME FOR TEXT NODE
     149 D ADD(ZGRAPH,ZSUBJECT,"fmts:fileSource",ZTXTNM,FARY)
     150 D ADD(ZGRAPH,ZSUBJECT,"fmts:fileTag",$$name2tag(ZNAME),FARY)
     151 D SWUPDIE(.C0XFDA) ; TRY IT OUT
     152 K C0XCNT ;RESET FOR NEXT TIME
     153 D STORETXT(ZRDF,ZTXTNM,FARY)
     154 W !,"ADDED: ",ZGRAPH," ",ZSUBJECT," fmts:fileSource ",ZTXTNM
     155 D PROCESS(.G,ZRDF,ZNAME,ZGRAPH,FARY) ; PARSE AND INSERT THE RDF
     156 Q
     157 ;
     158name2tag(zname) ; extrinsic which returns a tag derived from a name
     159 ; /home/vista/project.xml ==> project
     160 q $p($re($p($re(zname),"/")),".")
     161 ;
     162STORETXT(ZTXT,ZNAME,FARY) ; STORE TEXT IN THE TRIPLESTORE AT ZNAME
     163 ;
     164 I '$D(FARY) D  ;
     165 . D INITFARY("C0XFARY")
     166 . S FARY="C0XFARY"
     167 D USEFARY(FARY)
     168 N ZIEN
     169 S ZIEN=$$IENOF(ZNAME,FARY) ; GET THE IEN
     170 D CLEAN^DILF
     171 K ZERR
     172 D WP^DIE(C0XSFN,ZIEN_",",1,,ZTXT,"ZERR")
     173 I $D(ZERR) D  Q  ;
     174 . W !,"ERROR CREATING WORD PROCESSING FIELD"
     175 . S C0XERR="ERROR CREATING WORD PROCESSING FIELD"
     176 . D ^%ZTER ; error trap
     177 Q
     178 ;
     179GETTXT(ZRTN,ZNAME,FARY) ; RETURNS RDF SOURCE OR OTHER TEXT
     180 ; ZRTN IS PASSED BY REFERENCE
     181 I '$D(FARY) D  ;
     182 . D INITFARY("C0XFARY")
     183 . S FARY="C0XFARY"
     184 D USEFARY(FARY)
     185 N ZIEN
     186 S ZIEN=$$IENOF(ZNAME)
     187 S OK=$$GET1^DIQ(C0XSFN,ZIEN_",",1,,"ZRTN")
     188 Q
     189 ;
     190WHERETXT(ZNAME,FARY) ; EXTRINSIC WHICH RETURNS THE NAME OF THE GLOBAL
     191 ; WHERE THE TEXT IS LOCATED. NAME IS THE NAME OF THE STRING
     192 I '$D(FARY) D  ;
     193 . D INITFARY("C0XFARY")
     194 . S FARY="C0XFARY"
     195 D USEFARY(FARY)
     196 N ZIEN
     197 S ZIEN=$$IENOF(ZNAME)
     198 Q $NA(@C0XSN@(ZIEN,1))
     199 ;
     200FILEREAD(ZINTMP,ZDIR,ZFNAME,ZLVL) ; READS A FILE INTO ZINTMP USING FTG^%ZISH
     201 ; ZINTMP IS PASSED BY NAME AND INCLUDES THE NEW SUBSCRIPT
     202 ; IE ^TMP("C0X","FILEIN",1)
     203 ; ZLVL IN THIS CASE WOULD BE 3 INCREMENTING THE 1
     204 ; EXTRINSIC WHICH RETURNS THE RESULT OF FTG^%ZISH
     205 S OK=$$FTG^%ZISH(ZDIR,FNAME,ZINTMP,ZLVL)
     206 Q OK
     207 ;
     208TESTPROC ; TEST PROCESS WITH EXISTING SMALL RDF FILE
     209 S ZIN=$NA(^TMP("C0X",12226,"FILEIN"))
     210 S ZGRAPH="/test/rdfFile"
     211 S ZM="/test/rdfFile/meta"
     212 D PROCESS(.G,ZIN,ZGRAPH,ZM)
     213 Q
     214 ;
     215VISTAOWL ;
     216 S ZRDF=$NA(^TMP("C0X",542,"FILEIN"))
     217 S ZNAME="/home/glilly/vistaowl/VistAOWL.owl"
     218 S ZGRAPH="_:G431590209"
     219 S FARY="C0XFARY"
     220 D INITFARY(FARY)
     221 S C0XDOCID=1
     222 S BATCNT=0
     223 S BATMAX=10000
     224 D PROCESS(.G,ZRDF,ZGRAPH,ZNAME,FARY)
     225 Q
     226 ;
     227PROCESS(ZRTN,ZRDF,ZGRF,ZMETA,FARY) ; PROCESS AN INCOMING RDF FILE
     228 ; ZRTN IS PASS BY REFERENCE AND RETURNS MESSAGES ABOUT THE PROCESSING
     229 ; ZRDF IS PASSED BY NAME AND IS THE GLOBAL CONTAINING THE RDF FILE
     230 ; ZGRF IS THE NAME OF THE GRAPH TO USE IN THE TRIPLE STORE FOR RESULTS
     231 ; ZMETA IS OPTIONAL AND IS THE NAME OF THE GRAPH TO STORE METADATA
     232 ;
     233 I '$D(FARY) D  ;
     234 . D INITFARY("C0XFARY")
     235 . S FARY="C0XFARY"
     236 D USEFARY(FARY)
     237 ;N BATCNT
     238 ;N BATMAX
     239 ; -- first parse the rdf file with the MXML parser
     240 ;S C0XDOCID=$$PARSE^C0CNHIN(ZRDF,"C0XARRAY") ; PARSE WITH MXML
     241 S C0XDLC2=$$NOW^XLFDT ; START OF PARSE
     242 I @ZRDF@(1)'["<?xml" D  Q  ;
     243 . K @ZRDF ; don't need the input buffer
     244 . W !,"Not an XML file"
     245 S C0XDOCID=$$EN^MXMLDOM(ZRDF,"W") ;
     246 ;B
     247 K @ZRDF ; DON'T NEED INPUT BUFFER ANYMORE
     248 ; -- assign the MXLM dom global name to ZDOM
     249 S ZDOM=$NA(^TMP("MXMLDOM",$J,C0XDOCID))
     250 ;S ZDOM=$NA(^TMP("MXMLDOM",16850,C0XDOCID)) ;VISTAOWL DOM
     251 S C0XNODE=$O(@ZDOM@(""),-1)
     252 W !,C0XNODE," XML NODES PARSED"
     253 S C0XPRS=$$NOW^XLFDT ; PARSE COMPLETE
     254 W !,"PARSE COMPLETE AT ",C0XPRS
     255 S C0XDIFF=$$FMDIFF^XLFDT(C0XPRS,C0XDLC2,2)
     256 W !," ELAPSED TIME: ",C0XDIFF," SECONDS"
     257 I C0XDIFF'=0 D  ;
     258 . W !," APPROXIMATELY ",$P(C0XNODE/C0XDIFF,".")," NODES PER SECOND"
     259 ; -- populate the metagraph to point to the graph with status unfinished
     260 S METAS=$$ANONS ; GET AN ANONOMOUS RANDOM SUBJECT
     261 I '$D(ZMETA) S ZMETA="_:G"_$$LKY9 ; RANDOM GRAPH NAME FOR METAGRAPH
     262 D ADD(ZMETA,METAS,"fmts:about",ZGRF,FARY) ; POINT THE META TO THE GRAPH
     263 D ADD(ZMETA,METAS,"fmts:status","unfinished",FARY) ; mark as unfinished
     264 W !,"INSERTING GRAPH: ",ZGRF
     265 ;S C0XDATE=$$FMDTOUTC^C0CUTIL($$NOW^XLFDT,"DT")
     266 S C0XDATE=$$NOW^XLFDT
     267 D ADD(ZMETA,METAS,"fmts:dateTime",C0XDATE,FARY)
     268 D SWUPDIE(.C0XFDA) ; commit the metagraph changes to the triple store
     269 ; --
     270 ; -- pull out the vocabularies in the RDF statement. marked with xmlns:
     271 ; -- put them in a local variable for quick reference
     272 ; -- TODO: create a graph for vocabularies and validate incoming against it
     273 ;
     274 S C0XVOC=""
     275 N ZI,ZJ,ZK S ZI=""
     276 F  S ZI=$O(@ZDOM@(1,"A",ZI)) Q:ZI=""  D  ; FOR EACH xmlns
     277 . S ZVOC=$P(ZI,"xmlns:",2)
     278 . I ZVOC'="" S C0XVOC(ZVOC)=$G(@ZDOM@(1,"A",ZI))
     279 I $D(DEBUG) D  ;
     280 . W !,"VOCABS:"
     281 . N ZZ S ZZ=""
     282 . F  S ZZ=$O(C0XVOC(ZZ)) Q:ZZ=""  W !,ZZ,":",C0XVOC(ZZ)
     283 ;
     284 ; -- look for children called rdf:Description. quit if none. not an rdf file
     285 ;
     286 S C0XTYPE("rdf:Description")=1
     287 S C0XTYPE("owl:ObjectProperty")=1
     288 S C0XTYPE("owl:Ontology")=1
     289 S C0XTYPE("owl:Class")=1
     290 S C0XTYPE("rdfs:subClassOf")=1
     291 S C0XTYPE("rdf:RDF")=1
     292 S ZI=$O(@ZDOM@(1,"C",""))
     293 I '$G(C0XTYPE(@ZDOM@(1,"C",ZI))) D  ;Q  ; not an rdf file
     294 . W !,"Unusual RDF file ",@ZDOM@(1,"C",ZI)
     295 . ;W !,"Error. Not an RDF file. Cannot process."
     296 . D SHOW(1)
     297 ;
     298 ; -- now process the rdf description children
     299 ;
     300 S ZI=""
     301 S (C0XSUB,C0XPRE,C0XOBJ)="" ; INITIALIZE subject, object and predicate
     302 F  S ZI=$O(@ZDOM@(1,"C",ZI)) Q:ZI=""  D  ;
     303 . ; -- we are skipping any child that is not rdf:Description
     304 . ; -- TODO: check to see if this is right in general
     305 . ;
     306 . IF '$G(C0XTYPE(@ZDOM@(1,"C",ZI))) D  Q  ;
     307 . . W !,"SKIPPING NODE: ",ZI
     308 . ; -- now looking for the subject for the triples
     309 . S ZX=$G(@ZDOM@(ZI,"A","rdf:about"))
     310 . I ZX'="" D  ; we have the subject
     311 . . ;W " about: ",ZX
     312 . . S C0XSUB=ZX
     313 . E  D  ;
     314 . . S ZX=$G(@ZDOM@(ZI,"A","rdf:nodeID")) ; node id is another style of subject
     315 . . I ZX'="" D  ;
     316 . . . S C0XSUB=ZX
     317 . I C0XSUB="" S C0XSUB=$$ANONS ; DEFAULT TO BLANK SUBJECT
     318 . ;
     319 . ; -- we now have the subject. the children of this node have the rest
     320 . ;
     321 . S ZJ="" ; for the children of the rdf:Description nodes
     322 . F  S ZJ=$O(@ZDOM@(ZI,"C",ZJ)) Q:ZJ=""  D  ; for each child
     323 . . S C0XPRE=@ZDOM@(ZJ) ; the predicate without a prefix
     324 . . S ZX=$G(@ZDOM@(ZJ,"A","xmlns")) ; name space
     325 . . I ZX'="" S C0XPRE=ZX_C0XPRE ; add the namespace prefix
     326 . . I C0XPRE[":" D  ; expand using vocabulary
     327 . . . N ZB,ZA
     328 . . . S ZB=$P(C0XPRE,":",1)
     329 . . . S ZA=$P(C0XPRE,":",2)
     330 . . . I $G(C0XVOC(ZB))'="" D  ;
     331 . . . . S C0XPRE=C0XVOC(ZB)_ZA ; expanded
     332 . . S ZY=$G(@ZDOM@(ZJ,"A","rdf:resource")) ; potential object
     333 . . I ZY'="" D  Q ;
     334 . . . S C0XOBJ=$$EXT^C0XUTIL(ZY) ; object
     335 . . . D ADD(ZGRF,C0XSUB,C0XPRE,C0XOBJ) ; finally. our first real triple
     336 . . ; -- this is an else because of the quit above
     337 . . S ZX=$G(@ZDOM@(ZJ,"A","rdf:nodeID")) ; fishing for nodeId object
     338 . . I ZX'="" D  Q  ; got one
     339 . . . S C0XOBJ=ZX ; we are using the incoming nodeIDs as object/subject
     340 . . . ; without change... this could be foolish .. look at it again later
     341 . . . D ADD(ZGRF,C0XSUB,C0XPRE,C0XOBJ) ; go for it and add a node
     342 . . S C0XOBJ=$G(@ZDOM@(ZJ,"T",1)) ; hopefully an object is here
     343 . . I C0XOBJ="" D  Q  ; not a happy situation
     344 . . . W !,"ERROR, NO OBJECT FOUND FOR NODE: ",ZJ
     345 . . S C0XOBJ=$$EXT^C0XUTIL(C0XOBJ) ; might be namespaced
     346 . . D ADD(ZGRF,C0XSUB,C0XPRE,C0XOBJ) ; go for it and add a node
     347 S C0XTRP=$$NOW^XLFDT ; PARSE COMPLETE
     348 W !,"TRIPLES COMPLETE AT ",C0XTRP
     349 S C0XDIFF=$$FMDIFF^XLFDT(C0XTRP,C0XPRS,2)
     350 W !," ELAPSED TIME: ",C0XDIFF," SECONDS"
     351 I C0XDIFF'=0 D  ;
     352 . W !," APPROXIMATELY ",$P(C0XCNT/C0XDIFF,".")," TRIPLES PER SECOND"
     353 W !,"INSERTING ",C0XCNT," TRIPLES"
     354 I $D(C0XFDA) D  ;
     355 . I $G(BLKLOAD) D  ;
     356 . . D BULKLOAD(.C0XFDA)
     357 . E  D  ;
     358 . . D UPDIE(.C0XFDA) ; commit the updates to the file
     359 ; next, mark the graph as finished
     360 S C0XINS=$$NOW^XLFDT ; PARSE COMPLETE
     361 W !,"INSERTION COMPLETE AT ",C0XPRS
     362 S C0XDIFF=$$FMDIFF^XLFDT(C0XINS,C0XTRP,2)
     363 W !," ELAPSED TIME: ",C0XDIFF," SECONDS"
     364 I C0XDIFF'=0 W !," APPROXIMATELY ",$P(C0XCNT/C0XDIFF,".")," NODES PER SECOND"
     365 S C0XEND=$$NOW^XLFDT
     366 W !," ENDED AT: ",C0XEND
     367 S C0XDIFF=$$FMDIFF^XLFDT(C0XEND,C0XSTART,2)
     368 W !," ELAPSED TIME: ",C0XDIFF," SECONDS"
     369 I C0XDIFF'=0 W !," APPROXIMATELY ",$P(C0XCNT/C0XDIFF,".")," TRIPLES PER SECOND"
     370 Q
     371 ;
     372SHOW(ZN) ;
     373 I '$D(C0XJOB) S C0XJOB=$J
     374 N ZD
     375 S ZD=$NA(^TMP("MXMLDOM",C0XJOB,1,ZN))
     376 W ZD,"=",@ZD
     377 F  S ZD=$Q(@ZD) Q:$QS(ZD,4)'=ZN  W !,ZD,"=",@ZD
     378 ;ZWR ^TMP("MXMLDOM",C0XJOB,1,ZN,*)
     379 Q
     380 ;
     381ANONS() ; RETURNS AN ANONOMOUS SUBJECT
     382 Q "iDPsDPss"_$$LKY9
     383 ;
     384NEWG(NGRAPH,NMETA) ; CREATES A NEW META GRAPH, MARKS IT AS UNFINISHED
     385 ; THEN CREATES A NEW GRAPH AND POINTS THE METAGRAPH TO IT
     386 ; NGRAPH AND NMETA ARE PASSED BY REFERENCE AND ARE THE RETURN
     387 S NGRAPH="G"_$$LKY9
     388 S NMETA=NGRAPH_"A"
     389 Q
     390 ;
     391STARTADD ; INITIALIZE C0XFDA AND BATCNT
     392 K C0XFDA
     393 K BATCNT
     394 Q
     395 ;
     396ADD(ZG,ZS,ZP,ZO,FARY) ; ADD A TRIPLE TO THE TRIPLESTORE. ALL VALUES ARE TEXT
     397 ; THE FDA IS SET UP BUT THE FILES ARE NOT UPDATED. CALL UPDIE TO COMPLETE
     398 I '$D(FARY) D  ;
     399 . D INITFARY("C0XFARY")
     400 . S FARY="C0XFARY"
     401 D USEFARY(FARY)
     402 I '$D(C0XCNT) S C0XCNT=0
     403 N ZNODE
     404 S ZNODE="N"_$$LKY17
     405 N ZNARY ; GET READY TO CALL IENOFA
     406 I (ZG="")!(ZS="")!(ZP="")!(ZO="") D  Q  ;
     407 . I $G(DEBUG) W !,"Error Empty String ZG:"_ZG_" ZS:"_ZS_" ZP:"_ZP_" ZO"_ZO
     408 S ZNARY("ZG",ZG)=""
     409 S ZNARY("ZS",ZS)=""
     410 S ZNARY("ZP",ZP)=""
     411 S ZNARY("ZO",ZO)=""
     412 D IENOFA(.ZIENS,.ZNARY,FARY) ; RESOLVE/ADD STRINGS
     413 ;S ZGIEN=$$IENOF(ZG) ; LAYGO TO GET IEN
     414 ;S ZSIEN=$$IENOF(ZS)
     415 ;S ZPIEN=$$IENOF(ZP)
     416 ;S ZOIEN=$$IENOF(ZO)
     417 ;I $D(C0XFDA) D UPDIE ; ADD THE STRINGS IF NEEDED
     418 I '$D(BATCNT) S BATCNT=0
     419 S BATCNT=BATCNT+1
     420 S C0XCNT=C0XCNT+1
     421 I $G(BLKLOAD)=1 D  ; we are using bulk load
     422 . S C0XFDA(C0XTFN,BATCNT,.01)=ZNODE
     423 . S C0XFDA(C0XTFN,BATCNT,.02)=$O(ZIENS("IEN","ZG",""))
     424 . S C0XFDA(C0XTFN,BATCNT,.03)=$O(ZIENS("IEN","ZS",""))
     425 . S C0XFDA(C0XTFN,BATCNT,.04)=$O(ZIENS("IEN","ZP",""))
     426 . S C0XFDA(C0XTFN,BATCNT,.05)=$O(ZIENS("IEN","ZO",""))
     427 E  D  ;
     428 . S C0XFDA(C0XTFN,"?+"_BATCNT_",",.01)=ZNODE
     429 . S C0XFDA(C0XTFN,"?+"_BATCNT_",",.02)=$O(ZIENS("IEN","ZG",""))
     430 . S C0XFDA(C0XTFN,"?+"_BATCNT_",",.03)=$O(ZIENS("IEN","ZS",""))
     431 . S C0XFDA(C0XTFN,"?+"_BATCNT_",",.04)=$O(ZIENS("IEN","ZP",""))
     432 . S C0XFDA(C0XTFN,"?+"_BATCNT_",",.05)=$O(ZIENS("IEN","ZO",""))
     433 I '$D(BATMAX) S BATMAX=10000
     434 I BATCNT=BATMAX D  ; BATCH IS DONE
     435 . I $G(BLKLOAD) D  ; bulk load
     436 . . D BULKLOAD(.C0XFDA) ; bulk load the batch
     437 . E  D  ; no bulk load
     438 . . D UPDIE(.C0XFDA)
     439 . K C0XFDA
     440 . S BATCNT=0 ; RESET COUNTER
     441 ; REMEMBER TO CALL UPDIE WHEN YOU'RE DONE
     442 Q
     443 ;
     444LKY5() ;EXTRINIC THAT RETURNS A RANDOM 5 DIGIT NUMBER. USED FOR GENERATING
     445 ; UNIQUE NODE AND GRAPH NAMES
     446 N ZN,ZI
     447 S ZN=""
     448 F ZI=1:1:5 D  ;
     449 . S ZN=ZN_$R(10)
     450 Q ZN
     451 ;
     452LKY9() ;EXTRINIC THAT RETURNS A RANDOM 9 DIGIT NUMBER. USED FOR GENERATING
     453 ; UNIQUE NODE AND GRAPH NAMES
     454 N ZN,ZI
     455 S ZN=""
     456 F ZI=1:1:9 D  ;
     457 . S ZN=ZN_$R(10)
     458 Q ZN
     459 ;
     460LKY17() ;EXTRINIC THAT RETURNS A RANDOM 9 DIGIT NUMBER. USED FOR GENERATING
     461 ; UNIQUE NODE AND GRAPH NAMES
     462 N ZN,ZI
     463 S ZN=""
     464 F ZI=1:1:17 D  ;
     465 . S ZN=ZN_$R(10)
     466 Q ZN
     467 ;
     468 ; these routines add the string if it is not found
     469 ;
     470IENOF(ZSTRING,FARY) ; EXTRINSIC WHICH RETURNS THE IEN OF ZS IN THE STRINGS FILE
     471 I '$D(FARY) D  ;
     472 . D INITFARY("C0XFARY")
     473 . S FARY="C0XFARY"
     474 N ZIEN
     475 I $G(ZSTRING)="" Q "" ; NO STRING
     476 S ZIEN=$O(@C0XSN@("B",ZSTRING,""))
     477 I ZIEN="" D  ;
     478 . S C0XFDA2(C0XSFN,"+1,",.01)=ZSTRING
     479 . D UPDIE(.C0XFDA2)
     480 . S ZIEN=$O(@C0XSN@("B",ZSTRING,""))
     481 . K C0XFDA2
     482 Q ZIEN
     483 ;
     484IENOFA(ZOUTARY,INARY,FARY) ; RESOLVE STRINGS TO IEN IN STRINGS FILE
     485 ; OR ADD THEM IF
     486 ; MISSING. ZINARY AND ZOUTARY ARE PASSED BY REFERENCE
     487 ; ZINARY LOOKS LIKE ZINARY("VAR","VAL")=""
     488 ; RETURNS IN ZOUTARY OF THE FORM ZOUTARY("IEN","VAR",IEN)=""
     489 I '$D(FARY) D  ;
     490 . D INITFARY("C0XFARY")
     491 . S FARY="C0XFARY"
     492 K ZOUTARY ; START WITH CLEAN RESULTS
     493 K C0XFDA2 ; USE A SEPARATE FDA FOR THIS
     494 I '$D(C0XVOC) D VOCINIT^C0XUTIL
     495 N ZINARY
     496 N ZI S ZI=""
     497 F  S ZI=$O(INARY(ZI)) Q:ZI=""  D  ;
     498 . N ZK
     499 . S ZK=$O(INARY(ZI,""))
     500 . S ZINARY($$EXT^C0XUTIL(ZI),$$EXT^C0XUTIL(ZK))=""
     501 N ZV,ZIEN,ABORT
     502 S ABORT=0
     503 N ZCNT S ZCNT=0
     504 F  S ZI=$O(ZINARY(ZI)) Q:(ZI="")!+ABORT  D  ; LOOK FOR MISSING STRINGS
     505 . S ZV=$O(ZINARY(ZI,""))
     506 . I ZV="" S ABORT=1 Q  ; abandon quad -- missing an entry
     507 . I ZV["^" S ZV=$TR(ZV,"^","|")
     508 . I $O(@C0XSN@("B",ZV,""))="" D  ;
     509 . . S ZCNT=ZCNT+1
     510 . . S C0XFDA2(C0XSFN,"+"_ZCNT_",",.01)=ZV
     511 I +ABORT Q  ;
     512 I $D(C0XFDA2) D  ;
     513 . D UPDIE(.C0XFDA2) ; ADD MISSING STRINGS
     514 . K C0XFDA2 ; CLEAN UP
     515 F  S ZI=$O(ZINARY(ZI)) Q:ZI=""  D  ; NOW GET ALL IENS
     516 . S ZV=$O(ZINARY(ZI,""))
     517 . I ZV["^" S ZV=$TR(ZV,"^","|")
     518 . S ZIEN=$O(@C0XSN@("B",ZV,"")) ; THEY SHOULD BE THERE NOW
     519 . I ZIEN="" D  ;
     520 . . W !,"ERROR ADDING STRING: ",ZV
     521 . . B
     522 . S ZOUTARY("IEN",ZI,ZIEN)=""
     523 Q
     524 ;
     525ADDINN(ZG,ZS,ZARY) ; ADD IF NOT NULL
     526 ; ZG IS THE GRAPH NAME, PASSED BY VALUE
     527 ; ZS IS THE SUBJECT, PASSED BY VALUE
     528 ; ZARY IS AN ARRAY, PASSED BY REFERENCE OF THE PREDICATE AND OBJECT
     529 ;  FORMAT IS ZARY(PRED)=OBJ
     530 N ZI S ZI=""
     531 F  S ZI=$O(ZARY(ZI)) Q:ZI=""  D  ;
     532 . ;I ZARY(ZI)="" S ZARY(ZI)="NULL"
     533 . I ZARY(ZI)'="" D  ;
     534 . . D ADD^C0XF2N(ZG,ZS,ZI,ZARY(ZI))
     535 . . I $D(DEBUG) W !,"ADDING",ZI," ",ZARY(ZI)
     536 ;ZWR ZARY
     537 Q
     538 ;
     539BULKLOAD(ZBFDA) ; BULK LOADER FOR LOADING TRIPLES INTO FILE 172.101
     540 ; USING GLOBAL SETS INSTEAD OF UPDATE^DIE
     541 ; QUITS IF FILE IS NOT 172.101
     542 ; EXPECTS AN FDA WITHOUT STRINGS FOR THE IENS, STARTING AT 1
     543 ; QUITS IF FIRST ENTRY IS NOT IENS 1
     544 ; ASSUMES THAT THE LAST IENS IS THE COUNT OF ENTRIES
     545 ; ZBFDA IS PASSED BY REFERENCE
     546 ;
     547 ; -- reserves a block of iens from file 172.101 by locking the zero node
     548 ; -- ^C0X(101,0) and adding the count of entries to piece 2 and 3
     549 ; -- then unlocking to minimize the duration of the lock
     550 ;
     551 I $D(DEBUG) W !,"USING BULKLOAD"
     552 I '$D(ZBFDA) Q  ; EMPTY FDA
     553 I $O(ZBFDA(""))'=172.101 Q  ; WRONG FILE
     554 N ZCNT,ZP3,ZP4
     555 ; -- find the number of nodes to insert
     556 S ZCNT=$O(ZBFDA(172.101,""),-1)
     557 I ZCNT="" D  Q  ;
     558 . W !,"ERROR IN BULK LOAD - INVALID NODE COUNT"
     559 . B
     560 ; -- lock the zero node and reserve a block of iens to insert
     561 I $D(DEBUG) W !,"LOCKING ZERO NODE"
     562 LOCK +^C0X(101,0)
     563 S ZP3=$P(^C0X(101,0),U,3)
     564 S ZP4=$P(^C0X(101,0),U,4)
     565 S $P(^C0X(101,0),U,3)=ZP3+ZCNT+1
     566 S $P(^C0X(101,0),U,4)=ZP4+ZCNT+1
     567 LOCK -^C0X(101,0)
     568 N ZI,ZN,ZG,ZS,ZP,ZO,ZIEN,ZBASE
     569 S ZBASE=ZP3 ; the last ien in the file
     570 I $D(DEBUG) W !,"ZERO NODE UNLOCKED, IENS RESERVED=",ZCNT
     571 I $D(DEBUG) W !,$$NOW^XLFDT
     572 S ZI=""
     573 F  S ZI=$O(ZBFDA(172.101,ZI)) Q:ZI=""  D  ;
     574 . S ZN=$G(ZBFDA(172.101,ZI,.01)) ; node name
     575 . I ZN="" D BLKERR Q  ;
     576 . S ZG=$G(ZBFDA(172.101,ZI,.02)) ; graph pointer
     577 . I ZG="" D BLKERR Q  ;
     578 . S ZS=$G(ZBFDA(172.101,ZI,.03)) ; subject pointer
     579 . I ZS="" D BLKERR Q  ;
     580 . S ZP=$G(ZBFDA(172.101,ZI,.04)) ; predicate pointer
     581 . I ZP="" D BLKERR Q  ;
     582 . S ZO=$G(ZBFDA(172.101,ZI,.05)) ; object pointer
     583 . I ZO="" D BLKERR Q  ;
     584 . S ZIEN=ZI+ZBASE ; the new ien
     585 . S ^C0X(101,ZIEN,0)=ZN_U_ZG_U_ZS_U_ZP_U_ZO ; set the zero node
     586 . D INDEX(ZIEN,ZN,ZG,ZS,ZP,ZO)
     587 Q
     588 ;
     589INDEX(ZIEN,ZN,ZG,ZS,ZP,ZO) ; HARD SET THE INDEX FOR ONE ENTRY
     590 S ^C0X(101,"B",ZN,ZIEN)="" ; the B index
     591 S ^C0X(101,"G",ZG,ZIEN)="" ; the G for Graph index
     592 S ^C0X(101,"SPO",ZS,ZP,ZO,ZIEN)=""
     593 S ^C0X(101,"SOP",ZS,ZO,ZP,ZIEN)=""
     594 S ^C0X(101,"OPS",ZO,ZP,ZS,ZIEN)=""
     595 S ^C0X(101,"OSP",ZO,ZS,ZP,ZIEN)=""
     596 S ^C0X(101,"PSO",ZP,ZS,ZO,ZIEN)=""
     597 S ^C0X(101,"POS",ZP,ZO,ZS,ZIEN)=""
     598 S ^C0X(101,"GOPS",ZG,ZO,ZP,ZS,ZIEN)=""
     599 S ^C0X(101,"GOSP",ZG,ZO,ZS,ZP,ZIEN)=""
     600 S ^C0X(101,"GPSO",ZG,ZP,ZS,ZO,ZIEN)=""
     601 S ^C0X(101,"GPOS",ZG,ZP,ZO,ZS,ZIEN)=""
     602 S ^C0X(101,"GSPO",ZG,ZS,ZP,ZO,ZIEN)=""
     603 S ^C0X(101,"GSOP",ZG,ZS,ZO,ZP,ZIEN)=""
     604 Q
     605 ;
     606REINDEX ; REINDEX THE ^C0X(101, TRIPLE STORE
     607 K ^C0X(101,"B")
     608 K ^C0X(101,"G")
     609 K ^C0X(101,"SPO")
     610 K ^C0X(101,"SOP")
     611 K ^C0X(101,"OPS")
     612 K ^C0X(101,"OSP")
     613 K ^C0X(101,"PSO")
     614 K ^C0X(101,"POS")
     615 K ^C0X(101,"GOPS")
     616 K ^C0X(101,"GOSP")
     617 K ^C0X(101,"GPSO")
     618 K ^C0X(101,"GPOS")
     619 K ^C0X(101,"GSPO")
     620 K ^C0X(101,"GSOP")
     621 N ZIEN,ZZ
     622 S ZIEN=0
     623 F  S ZIEN=$O(^C0X(101,ZIEN)) Q:+ZIEN=0  D  ; FOR EACH NODE
     624 . S ZZ=$G(^C0X(101,ZIEN,0))
     625 . I ZZ="" D  Q  ;
     626 . . W !,"ERROR REINDEXING NODE ",ZI
     627 . S ZN=$P(ZZ,"^",1)
     628 . S ZG=$P(ZZ,"^",2)
     629 . S ZS=$P(ZZ,"^",3)
     630 . S ZP=$P(ZZ,"^",4)
     631 . S ZO=$P(ZZ,"^",5)
     632 . D INDEX(ZIEN,ZN,ZG,ZS,ZP,ZO)
     633 Q
     634 ;
     635BLKERR ;
     636 W !,"ERROR IN BULK LOAD"
     637 S C0XERR="ERROR IN BULK LOAD"
     638 S C0XLOC=ZBFDA(ZI)
     639 D ^%ZTER ; report the error
     640 B
     641 Q
     642 ;
     643DELGRAPH(ZGRF,FARY) ; delete a graph from the triplestore
     644 ; (doesn't delete strings)
     645 ;
     646 I '$D(FARY) D  ;
     647 . D INITFARY("C0XFARY")
     648 . S FARY="C0XFARY"
     649 D USEFARY(FARY)
     650 N ZGRAPH
     651 D TING(.ZGRAPH,ZGRF,FARY)
     652 I '$D(ZGRAPH) D  Q  ;
     653 . I $D(DEBUG) W !,"NO TRIPLES IN GRAPH"
     654 K C0XFDA
     655 N ZI S ZI=""
     656 F  S ZI=$O(ZGRAPH(ZI)) Q:ZI=""  D  ;
     657 . S C0XFDA(C0XTFN,ZI_",",.01)="@"
     658 D UPDIE(.C0XFDA)
     659 Q
     660 ;
     661TING(ZRTN,ZGRF,FARY) ; return the iens for graph ZGRF
     662 ; ZRTN is passed by reference
     663 I '$D(FARY) D  ;
     664 . D INITFARY("C0XFARY")
     665 . S FARY="C0XFARY"
     666 D USEFARY(FARY)
     667 K ZRTN
     668 N ZI,ZG S ZI=""
     669 S ZG=$$IENOF^C0XGET1(ZGRF)
     670 I ZG="" D  Q  ;
     671 . I $D(DEBUG) W !,"ERROR GRAPH NOT FOUND"
     672 I '$D(@C0XTN@("G",ZG)) Q  ;
     673 F  S ZI=$O(@C0XTN@("G",ZG,ZI)) Q:ZI=""  D  ;
     674 . S ZRTN(ZI)=""
     675 Q
     676 
     677SWUPDIE(ZFDA) ; SWITCH BETWEEN UPDIE AND BULKLOAD
     678 I $G(BLKLOAD)=1 D  ; bulk load
     679 . D BULKLOAD(.ZFDA) ; bulk load the batch
     680 E  D  ; no bulk load
     681 . D UPDIE(.ZFDA)
     682 K ZFDA
     683 Q
     684 ;
     685UPDIE(ZFDA) ; INTERNAL ROUTINE TO CALL UPDATE^DIE AND CHECK FOR ERRORS
     686 ; ZFDA IS PASSED BY REFERENCE
     687 ;ZWR ZFDA
     688 ;B
     689 K ZERR
     690 D CLEAN^DILF
     691 D UPDATE^DIE("","ZFDA","","ZERR")
     692 I $D(ZERR) S ZZERR=ZZERR ; ZZERR DOESN'T EXIST,
     693 ; INVOKE THE ERROR TRAP IF TASKED
     694 ;. W "ERROR",!
     695 ;. ZWR ZERR
     696 ;. B
     697 K ZFDA
     698 Q
     699 ;
  • fmts/trunk/p/C0XUTIL.m

    r1539 r1628  
    6666        S C0XVOC("sage")="http://oro.com/sage/schema#"
    6767        S C0XVOC("v")="http://www.w3.org/2006/vcard/ns#"
     68        S C0XVOC("cadrug")="http://drugbank.ca"
     69        S C0XVOC("xmlns")="http://www.w3.org/"
     70        S C0XVOC("xs")="http://www.w3.org/xs#"
    6871        Q
    6972        ;
Note: See TracChangeset for help on using the changeset viewer.