Ignore:
Timestamp:
Jan 20, 2012, 3:18:05 PM (12 years ago)
Author:
George Lilly
Message:

beginning some retrieval code

File:
1 edited

Legend:

Unmodified
Added
Removed
  • fmts/trunk/p/C0XMAIN.m

    r1294 r1343  
    1 C0XMAIN ; GPL - Fileman Triples entry point routine ;10/13/11  17:05
    2         ;;0.1;C0X;nopatch;noreleasedate;Build 5
    3         ;Copyright 2011 George Lilly.  Licensed under the terms of the GNU
    4         ;General Public License See attached copy of the License.
    5         ;
    6         ;This program is free software; you can redistribute it and/or modify
    7         ;it under the terms of the GNU General Public License as published by
    8         ;the Free Software Foundation; either version 2 of the License, or
    9         ;(at your option) any later version.
    10         ;
    11         ;This program is distributed in the hope that it will be useful,
    12         ;but WITHOUT ANY WARRANTY; without even the implied warranty of
    13         ;MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    14         ;GNU General Public License for more details.
    15         ;
    16         ;You should have received a copy of the GNU General Public License along
    17         ;with this program; if not, write to the Free Software Foundation, Inc.,
    18         ;51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
    19         ;
    20         Q
    21         ;
    22 INITFARY(ZFARY) ; INITIALIZE FILE NUMBERS AND OTHER USEFUL THINGS
    23         ; FOR THE DEFAULT TRIPLE STORE. USE OTHER VALUES FOR SUPPORTING ADDITIONAL
    24         ; TRIPLE STORES
    25         S @ZFARY@("C0XTFN")=172.101 ; TRIPLES FILE NUMBER
    26         S @ZFARY@("C0XSFN")=172.201 ; TRIPLES STRINGS FILE NUMBER
    27         S @ZFARY@("C0XTN")=$NA(^C0X(101)) ; TRIPLES GLOBAL NAME
    28         S @ZFARY@("C0XSN")=$NA(^C0X(201)) ; STRING FILE GLOBAL NAME
    29         ;S @ZFARY@("C0XDIR")="/home/glilly/all_smart_patient_data/smart-rdf/"
    30         S @ZFARY@("C0XDIR")="/home/george/fmts/trunk/samples/"
    31         D USEFARY(ZFARY)
    32         Q
    33         ;
    34 USEFARY(ZFARY)  ; INITIALIZES VARIABLES SAVED IN ARRAY ZFARY
    35         N ZI S ZI=""
    36         F  S ZI=$O(@ZFARY@(ZI)) Q:ZI=""  D 
    37         . ;N ZX
    38         . S ZX="S "_ZI_"="""_@ZFARY@(ZI)_""""
    39         . ;W !,ZX
    40         . X ZX
    41         Q
    42         ;
    43 IMPORT(FNAME,FDIR,FURL,FARY)    ; EXTRINSIC THAT READS A FILE FROM THE STANDARD
    44         ; DIRECTORY, LOADS IT INTO THE TRIPLESTORE AS TEXT, AND RETURNS THE
    45         ; NODE NAME OF THE TEXT TRIPLE
    46         ; FDIR IS THE OPTIONAL DIRECTORY (DEFAUTS TO STANDARD DIR)
    47         ; FURL IS THE OPTIONAL URI FOR ACCESSING THE FILE FROM THE TRIPLE STORE
    48         ; FARY IS THE OPTIONAL FILE ARRAY OF THE TRIPLE STORE TO USE
    49         I '$D(FARY) D  ;
    50         . D INITFARY("C0XFARY")
    51         . S FARY="C0XFARY"
    52         D USEFARY(FARY)
    53         N ZD,ZTMP
    54         I '$D(FDIR) S FDIR=C0XDIR ; DIRECTORY OF THE RDF FILE
    55         I '$D(FURL) D  ;
    56         . N ZN2 S ZN2=$TR(FNAME,".","_") ; REMOVE THE DOT FROM THE NAME
    57         . S FURL=FDIR_ZN2
    58         N ZTMP
    59         S ZTMP=$NA(^TMP("C0X",$J,"FILEIN",1)) ; WHERE TO PUT THE INCOMING FILE
    60         K @ZTMP ; MAKE SURE IT'S CLEAR
    61         S C0XSTART=$$NOW^XLFDT
    62         W !,"STARTED: ",C0XSTART
    63         W !,"READING IN: ",FNAME
    64         I '$$FILEIN(ZTMP,FDIR,FNAME,4) D  Q  ; QUIT IF NO SUCCESS
    65         . W !,"ERROR READING FILE: ",FDIR,FNAME
    66         S ZRDF=$NA(^TMP("C0X",$J,"FILEIN")) ; WITHOUT THE SUBSCRIPT
    67         W !,$O(@ZRDF@(""),-1)," LINES READ"
    68         D INSRDF(ZRDF,FURL,FARY) ; IMPORT AND PROCESS THE RDF
    69         Q
    70         ;
    71 WGET(ZURL,FARY) ; GET FROM THE INTERNET AN RDF FILE AND INSERT IT
    72         ;
    73         I '$D(FARY) D  ;
    74         . D INITFARY("C0XFARY")
    75         . S FARY="C0XFARY"
    76         D USEFARY(FARY)
    77         ;N ZLOC,ZTMP
    78         K ZTMP
    79         S ZLOC=$NA(^TMP("C0X","WGET",$J))
    80         S C0XSTART=$$NOW^XLFDT
    81         W !,"STARTED: ",C0XSTART
    82         W !,"DOWNLOADING: ",ZURL
    83         S OK=$$httpGET^%zewdGTM(ZURL,.ZTMP)
    84         M @ZLOC=ZTMP
    85         W !,$O(@ZLOC@(""),-1)," LINES READ"
    86         D INSRDF(ZLOC,ZURL,FARY)
    87         Q
    88         ;
    89 INSRDF(ZRDF,ZNAME,FARY) ; INSERT AN RDF FILE INTO THE STORE AND PROCESS
    90         ; ZRDF IS PASSED BY NAME
    91         I '$D(FARY) D  ;
    92         . D INITFARY("C0XFARY")
    93         . S FARY="C0XFARY"
    94         D USEFARY(FARY)
    95         N ZGRAPH,ZSUBJECT
    96         S ZGRAPH="_:G"_$$LKY9 ; RANDOM GRAPH NAME
    97         S ZSUBJECT=$$ANONS ; RANDOM ANOYMOUS SUBJECT
    98         D ADD(ZGRAPH,ZSUBJECT,"fmts:url",ZNAME,FARY)
    99         N ZTXTNM
    100         S ZTXTNM="_TXT_INCOMING_RDF_FILE_"_ZNAME_"_"_$$LKY9 ; NAME FOR TEXT NODE
    101         D ADD(ZGRAPH,ZSUBJECT,"fmts:rdfSource",ZTXTNM,FARY)
    102         D UPDIE(.C0XFDA) ; TRY IT OUT
    103         K C0XCNT ;RESET FOR NEXT TIME
    104         D STORETXT(ZRDF,ZTXTNM,FARY)
    105         W !,"ADDED: ",ZGRAPH," ",ZSUBJECT," fmts:rdfSource ",ZTXTNM
    106         D PROCESS(.G,ZRDF,ZNAME,ZGRAPH,FARY) ; PARSE AND INSERT THE RDF
    107         Q
    108         ;
    109 STORETXT(ZTXT,ZNAME,FARY)       ; STORE TEXT IN THE TRIPLESTORE AT ZNAME
    110         ;
    111         I '$D(FARY) D  ;
    112         . D INITFARY("C0XFARY")
    113         . S FARY="C0XFARY"
    114         D USEFARY(FARY)
    115         N ZIEN
    116         S ZIEN=$$IENOF(ZNAME,FARY) ; GET THE IEN
    117         D CLEAN^DILF
    118         K ZERR
    119         D WP^DIE(C0XSFN,ZIEN_",",1,,ZTXT,"ZERR")
    120         I $D(ZERR) D  ;
    121         . ZWR ZERR
    122         Q
    123         ;
    124 GETTXT(ZRTN,ZNAME,FARY) ; RETURNS RDF SOURCE OR OTHER TEXT
    125         ; ZRTN IS PASSED BY REFERENCE
    126         I '$D(FARY) D  ;
    127         . D INITFARY("C0XFARY")
    128         . S FARY="C0XFARY"
    129         D USEFARY(FARY)
    130         N ZIEN
    131         S ZIEN=$$IENOF(ZNAME)
    132         S OK=$$GET1^DIQ(C0XSFN,ZIEN_",",1,,"ZRTN")
    133         Q
    134         ;
    135 WHERETXT(ZNAME,FARY)    ; EXTRINSIC WHICH RETURNS THE NAME OF THE GLOBAL
    136         ; WHERE THE TEXT IS LOCATED. NAME IS THE NAME OF THE STRING
    137         I '$D(FARY) D  ;
    138         . D INITFARY("C0XFARY")
    139         . S FARY="C0XFARY"
    140         D USEFARY(FARY)
    141         N ZIEN
    142         S ZIEN=$$IENOF(ZNAME)
    143         Q $NA(@C0XSN@(ZIEN,1))
    144         ;
    145 FILEIN(ZINTMP,ZDIR,ZFNAME,ZLVL) ; READS A FILE INTO ZINTMP USING FTG^%ZISH
    146         ; ZINTMP IS PASSED BY NAME AND INCLUDES THE NEW SUBSCRIPT
    147         ; IE ^TMP("C0X","FILEIN",1)
    148         ; ZLVL IN THIS CASE WOULD BE 3 INCREMENTING THE 1
    149         ; EXTRINSIC WHICH RETURNS THE RESULT OF FTG^%ZISH
    150         S OK=$$FTG^%ZISH(ZDIR,FNAME,ZINTMP,ZLVL)
    151         Q OK
    152         ;
    153 TESTPROC        ; TEST PROCESS WITH EXISTING SMALL RDF FILE
    154         S ZIN=$NA(^TMP("C0X",12226,"FILEIN"))
    155         S ZGRAPH="/test/rdfFile"
    156         S ZM="/test/rdfFile/meta"
    157         D PROCESS(.G,ZIN,ZGRAPH,ZM)
    158         Q
    159         ;
    160 PROCESS(ZRTN,ZRDF,ZGRF,ZMETA,FARY)      ; PROCESS AN INCOMING RDF FILE
    161         ; ZRTN IS PASS BY REFERENCE AND RETURNS MESSAGES ABOUT THE PROCESSING
    162         ; ZRDF IS PASSED BY NAME AND IS THE GLOBAL CONTAINING THE RDF FILE
    163         ; ZGRF IS THE NAME OF THE GRAPH TO USE IN THE TRIPLE STORE FOR RESULTS
    164         ; ZMETA IS OPTIONAL AND IS THE NAME OF THE GRAPH TO STORE METADATA
    165         ;
    166         I '$D(FARY) D  ;
    167         . D INITFARY("C0XFARY")
    168         . S FARY="C0XFARY"
    169         D USEFARY(FARY)
    170         ; -- first parse the rdf file with the MXML parser
    171         ;S C0XDOCID=$$PARSE^C0CNHIN(ZRDF,"C0XARRAY") ; PARSE WITH MXML
    172         S C0XDOCID=$$EN^MXMLDOM(ZRDF,"W")
    173         ; -- assign the MXLM dom global name to ZDOM
    174         S ZDOM=$NA(^TMP("MXMLDOM",$J,C0XDOCID))
    175         W !,$O(@ZDOM@(""),-1)," XML NODES PARSED"
    176         ; -- populate the metagraph to point to the graph with status unfinished
    177         S METAS=$$ANONS ; GET AN ANONOMOUS RANDOM SUBJECT
    178         I '$D(ZMETA) S ZMETA="_:G"_$$LKY9 ; RANDOM GRAPH NAME FOR METAGRAPH
    179         D ADD(ZMETA,METAS,"fmts:about",ZGRF,FARY) ; POINT THE META TO THE GRAPH
    180         D ADD(ZMETA,METAS,"fmts:status","unfinished",FARY) ; mark as unfinished
    181         ;S C0XDATE=$$FMDTOUTC^C0CUTIL($$NOW^XLFDT,"DT")
    182         S C0XDATE=$$NOW^XLFDT
    183         D ADD(ZMETA,METAS,"fmts:dateTime",C0XDATE,FARY)
    184         D UPDIE(.C0XFDA) ; commit the metagraph changes to the triple store
    185         ; --
    186         ; -- pull out the vocabularies in the RDF statement. marked with xmlns:
    187         ; -- put them in a local variable for quick reference
    188         ; -- TODO: create a graph for vocabularies and validate incoming against it
    189         ;
    190         S C0XVOC=""
    191         N ZI,ZJ,ZK S ZI=""
    192         F  S ZI=$O(@ZDOM@(1,"A",ZI)) Q:ZI=""  D  ; FOR EACH xmlns
    193         . S ZVOC=$P(ZI,"xmlns:",2)
    194         . I ZVOC'="" S C0XVOC(ZVOC)=$G(@ZDOM@(1,"A",ZI))
    195         ;W !,"VOCABS:" ZWR C0XVOC
    196         ;
    197         ; -- look for children called rdf:Description. quit if none. not an rdf file
    198         ;
    199         S ZI=$O(@ZDOM@(1,"C",""))
    200         I $G(@ZDOM@(1,"C",ZI))'="rdf:Description" D  Q  ; not an rdf file
    201         . W !,"Error. Not an RDF file. Cannot process."
    202         ;
    203         ; -- now process the rdf description children
    204         ;
    205         S ZI=""
    206         S (C0XSUB,C0XPRE,C0XOBJ)="" ; INITIALIZE subject, object and predicate
    207         F  S ZI=$O(@ZDOM@(1,"C",ZI)) Q:ZI=""  D  ;
    208         . ; -- we are skipping any child that is not rdf:Description
    209         . ; -- TODO: check to see if this is right in general
    210         . ;
    211         . IF $G(@ZDOM@(1,"C",ZI))'="rdf:Description" D  Q  ;
    212         . . W !,"SKIPPING NODE: ",ZI
    213         . ; -- now looking for the subject for the triples
    214         . S ZX=$G(@ZDOM@(ZI,"A","rdf:about"))
    215         . I ZX'="" D  ; we have the subject
    216         . . ;W " about: ",ZX
    217         . . S C0XSUB=ZX
    218         . E  D  ;
    219         . . S ZX=$G(@ZDOM@(ZI,"A","rdf:nodeID")) ; node id is another style of subject
    220         . . I ZX'="" D  ;
    221         . . . S C0XSUB=ZX
    222         . I C0XSUB="" S C0XSUB=$$ANONS ; DEFAULT TO BLANK SUBJECT
    223         . ;
    224         . ; -- we now have the subject. the children of this node have the rest
    225         . ;
    226         . S ZJ="" ; for the children of the rdf:Description nodes
    227         . F  S ZJ=$O(@ZDOM@(ZI,"C",ZJ)) Q:ZJ=""  D  ; for each child
    228         . . S C0XPRE=@ZDOM@(ZJ) ; the predicate without a prefix
    229         . . S ZX=$G(@ZDOM@(ZJ,"A","xmlns")) ; name space
    230         . . I ZX'="" S C0XPRE=ZX_C0XPRE ; add the namespace prefix
    231         . . I C0XPRE[":" D  ; expand using vocabulary
    232         . . . N ZB,ZA
    233         . . . S ZB=$P(C0XPRE,":",1)
    234         . . . S ZA=$P(C0XPRE,":",2)
    235         . . . I $G(C0XVOC(ZB))'="" D  ;
    236         . . . . S C0XPRE=C0XVOC(ZB)_ZA ; expanded
    237         . . S ZY=$G(@ZDOM@(ZJ,"A","rdf:resource")) ; potential object
    238         . . I ZY'="" D  Q ;
    239         . . . S C0XOBJ=ZY ; object
    240         . . . D ADD(ZGRF,C0XSUB,C0XPRE,C0XOBJ) ; finally. our first real triple
    241         . . ; -- this is an else because of the quit above
    242         . . S ZX=$G(@ZDOM@(ZJ,"A","rdf:nodeID")) ; fishing for nodeId object
    243         . . I ZX'="" D  Q  ; got one
    244         . . . S C0XOBJ=ZX ; we are using the incoming nodeIDs as object/subject
    245         . . . ; without change... this could be foolish .. look at it again later
    246         . . . D ADD(ZGRF,C0XSUB,C0XPRE,C0XOBJ) ; go for it and add a node
    247         . . S C0XOBJ=$G(@ZDOM@(ZJ,"T",1)) ; hopefully an object is here
    248         . . I C0XOBJ="" D  Q  ; not a happy situation
    249         . . . W !,"ERROR, NO OBJECT FOUND FOR NODE: ",ZJ
    250         . . D ADD(ZGRF,C0XSUB,C0XPRE,C0XOBJ) ; go for it and add a node
    251         W !,"INSERTING ",C0XCNT," TRIPLES"
    252         D UPDIE(.C0XFDA) ; commit the updates to the file
    253         ; next, mark the graph as finished
    254         S C0XEND=$$NOW^XLFDT
    255         W !," ENDED AT: ",C0XEND
    256         S C0XDIFF=$$FMDIFF^XLFDT(C0XEND,C0XSTART,2)
    257         W !," ELAPSED TIME: ",C0XDIFF," SECONDS"
    258         W !," APPROXIMATELY ",$P(C0XCNT/C0XDIFF,".")," TRIPLES PER SECOND"
    259         Q
    260         ;
    261 SHOW(ZN)        ;
    262         ZWR ^TMP("MXMLDOM",$J,1,ZN,*)
    263         Q
    264         ;
    265 ANONS() ; RETURNS AN ANONOMOUS SUBJECT
    266         Q "_S:"_$$LKY9
    267         ;
    268 NEWG(NGRAPH,NMETA)      ; CREATES A NEW META GRAPH, MARKS IT AS UNFINISHED
    269         ; THEN CREATES A NEW GRAPH AND POINTS THE METAGRAPH TO IT
    270         ; NGRAPH AND NMETA ARE PASSED BY REFERENCE AND ARE THE RETURN
    271         S NGRAPH="G"_$$LKY9
    272         S NMETA=NGRAPH_"A"
    273         Q
    274         ;
    275 ADD(ZG,ZS,ZP,ZO,FARY)   ; ADD A TRIPLE TO THE TRIPLESTORE. ALL VALUES ARE TEXT
    276         ; THE FDA IS SET UP BUT THE FILES ARE NOT UPDATED. CALL UPDIE TO COMPLETE
    277         I '$D(FARY) D  ;
    278         . D INITFARY("C0XFARY")
    279         . S FARY="C0XFARY"
    280         D USEFARY(FARY)
    281         I '$D(C0XCNT) S C0XCNT=0
    282         N ZNODE
    283         S ZNODE="N"_$$LKY17
    284         N ZNARY ; GET READY TO CALL IENOFA
    285         S ZNARY("ZG",ZG)=""
    286         S ZNARY("ZS",ZS)=""
    287         S ZNARY("ZP",ZP)=""
    288         S ZNARY("ZO",ZO)=""
    289         D IENOFA(.ZIENS,.ZNARY,FARY) ; RESOLVE/ADD STRINGS
    290         ;S ZGIEN=$$IENOF(ZG) ; LAYGO TO GET IEN
    291         ;S ZSIEN=$$IENOF(ZS)
    292         ;S ZPIEN=$$IENOF(ZP)
    293         ;S ZOIEN=$$IENOF(ZO)
    294         ;I $D(C0XFDA) D UPDIE ; ADD THE STRINGS IF NEEDED
    295         S C0XCNT=C0XCNT+1
    296         S C0XFDA(C0XTFN,"?+"_C0XCNT_",",.01)=ZNODE
    297         S C0XFDA(C0XTFN,"?+"_C0XCNT_",",.02)=$O(ZIENS("IEN","ZG",""))
    298         S C0XFDA(C0XTFN,"?+"_C0XCNT_",",.03)=$O(ZIENS("IEN","ZS",""))
    299         S C0XFDA(C0XTFN,"?+"_C0XCNT_",",.04)=$O(ZIENS("IEN","ZP",""))
    300         S C0XFDA(C0XTFN,"?+"_C0XCNT_",",.05)=$O(ZIENS("IEN","ZO",""))
    301         ; REMEMBER TO CALL UPDIE WHEN YOU'RE DONE
    302         Q
    303         ;
    304 LKY9()  ;EXTRINIC THAT RETURNS A RANDOM 9 DIGIT NUMBER. USED FOR GENERATING
    305         ; UNIQUE NODE AND GRAPH NAMES
    306         N ZN,ZI
    307         S ZN=""
    308         F ZI=1:1:9 D  ;
    309         . S ZN=ZN_$R(10)
    310         Q ZN
    311         ;
    312 LKY17() ;EXTRINIC THAT RETURNS A RANDOM 9 DIGIT NUMBER. USED FOR GENERATING
    313         ; UNIQUE NODE AND GRAPH NAMES
    314         N ZN,ZI
    315         S ZN=""
    316         F ZI=1:1:17 D  ;
    317         . S ZN=ZN_$R(10)
    318         Q ZN
    319         ;
    320 IENOF(ZSTRING,FARY)     ; EXTRINSIC WHICH RETURNS THE IEN OF ZS IN THE STRINGS FILE
    321         I '$D(FARY) D  ;
    322         . D INITFARY("C0XFARY")
    323         . S FARY="C0XFARY"
    324         N ZIEN
    325         S ZIEN=$O(@C0XSN@("B",ZSTRING,""))
    326         I ZIEN="" D  ;
    327         . S C0XFDA2(C0XSFN,"+1,",.01)=ZSTRING
    328         . D UPDIE(.C0XFDA2)
    329         . S ZIEN=$O(@C0XSN@("B",ZSTRING,""))
    330         . K C0XFDA2
    331         Q ZIEN
    332         ;
    333 IENOFA(ZOUTARY,ZINARY,FARY)     ; RESOLVE STRINGS TO IEN IN STRINGS FILE
    334         ; OR ADD THEM IF
    335         ; MISSING. ZINARY AND ZOUTARY ARE PASSED BY REFERENCE
    336         ; ZINARY LOOKS LIKE ZINARY("VAR","VAL")=""
    337         ; RETURNS IN ZOUTARY OF THE FORM ZOUTARY("IEN","VAR",IEN)=""
    338         I '$D(FARY) D  ;
    339         . D INITFARY("C0XFARY")
    340         . S FARY="C0XFARY"
    341         K ZOUTARY ; START WITH CLEAN RESULTS
    342         K C0XFDA2 ; USE A SEPARATE FDA FOR THIS
    343         N ZI S ZI=""
    344         N ZV,ZIEN
    345         N ZCNT S ZCNT=0
    346         F  S ZI=$O(ZINARY(ZI)) Q:ZI=""  D  ; LOOK FOR MISSING STRINGS
    347         . S ZV=$O(ZINARY(ZI,""))
    348         . I $O(@C0XSN@("B",ZV,""))="" D  ;
    349         . . S ZCNT=ZCNT+1
    350         . . S C0XFDA2(C0XSFN,"+"_ZCNT_",",.01)=ZV
    351         I $D(C0XFDA2) D  ;
    352         . D UPDIE(.C0XFDA2) ; ADD MISSING STRINGS
    353         . K C0XFDA2 ; CLEAN UP
    354         F  S ZI=$O(ZINARY(ZI)) Q:ZI=""  D  ; NOT GET ALL IENS
    355         . S ZV=$O(ZINARY(ZI,""))
    356         . S ZIEN=$O(@C0XSN@("B",ZV,"")) ; THEY SHOULD BE THERE NOW
    357         . I ZIEN="" D  ;
    358         . . W !,"ERROR ADDING STRING: ",ZV
    359         . . B
    360         . S ZOUTARY("IEN",ZI,ZIEN)=""
    361         Q
    362         ;
    363 UPDIE(ZFDA)     ; INTERNAL ROUTINE TO CALL UPDATE^DIE AND CHECK FOR ERRORS
    364         ; ZFDA IS PASSED BY REFERENCE
    365         ;ZWR ZFDA
    366         ;B
    367         K ZERR
    368         D CLEAN^DILF
    369         D UPDATE^DIE("","ZFDA","","ZERR")
    370         I $D(ZERR) S ZZERR=ZZERR ; ZZERR DOESN'T EXIST,
    371         ; INVOKE THE ERROR TRAP IF TASKED
    372         ;. W "ERROR",!
    373         ;. ZWR ZERR
    374         ;. B
    375         K ZFDA
    376         Q
    377         ;
     1C0XMAIN ; GPL - Fileman Triples entry point routine ;10/13/11  17:05
     2 ;;0.1;C0X;nopatch;noreleasedate;Build 7
     3 ;Copyright 2011 George Lilly.  Licensed under the terms of the GNU
     4 ;General Public License See attached copy of the License.
     5 ;
     6 ;This program is free software; you can redistribute it and/or modify
     7 ;it under the terms of the GNU General Public License as published by
     8 ;the Free Software Foundation; either version 2 of the License, or
     9 ;(at your option) any later version.
     10 ;
     11 ;This program is distributed in the hope that it will be useful,
     12 ;but WITHOUT ANY WARRANTY; without even the implied warranty of
     13 ;MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
     14 ;GNU General Public License for more details.
     15 ;
     16 ;You should have received a copy of the GNU General Public License along
     17 ;with this program; if not, write to the Free Software Foundation, Inc.,
     18 ;51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
     19 ;
     20 Q
     21 ;
     22INITFARY(ZFARY) ; INITIALIZE FILE NUMBERS AND OTHER USEFUL THINGS
     23 ; FOR THE DEFAULT TRIPLE STORE. USE OTHER VALUES FOR SUPPORTING ADDITIONAL
     24 ; TRIPLE STORES
     25 S @ZFARY@("C0XTFN")=172.101 ; TRIPLES FILE NUMBER
     26 S @ZFARY@("C0XSFN")=172.201 ; TRIPLES STRINGS FILE NUMBER
     27 S @ZFARY@("C0XTN")=$NA(^C0X(101)) ; TRIPLES GLOBAL NAME
     28 S @ZFARY@("C0XSN")=$NA(^C0X(201)) ; STRING FILE GLOBAL NAME
     29 ;S @ZFARY@("C0XDIR")="/home/glilly/all_smart_patient_data/smart-rdf/"
     30 S @ZFARY@("C0XDIR")="/home/george/fmts/trunk/samples/"
     31 D USEFARY(ZFARY)
     32 Q
     33 ;
     34USEFARY(ZFARY) ; INITIALIZES VARIABLES SAVED IN ARRAY ZFARY
     35 N ZI S ZI=""
     36 F  S ZI=$O(@ZFARY@(ZI)) Q:ZI=""  D 
     37 . ;N ZX
     38 . S ZX="S "_ZI_"="""_@ZFARY@(ZI)_""""
     39 . ;W !,ZX
     40 . X ZX
     41 Q
     42 ;
     43IMPORT(FNAME,FDIR,FURL,FARY) ; EXTRINSIC THAT READS A FILE FROM THE STANDARD
     44 ; DIRECTORY, LOADS IT INTO THE TRIPLESTORE AS TEXT, AND RETURNS THE
     45 ; NODE NAME OF THE TEXT TRIPLE
     46 ; FDIR IS THE OPTIONAL DIRECTORY (DEFAUTS TO STANDARD DIR)
     47 ; FURL IS THE OPTIONAL URI FOR ACCESSING THE FILE FROM THE TRIPLE STORE
     48 ; FARY IS THE OPTIONAL FILE ARRAY OF THE TRIPLE STORE TO USE
     49 I '$D(FARY) D  ;
     50 . D INITFARY("C0XFARY")
     51 . S FARY="C0XFARY"
     52 D USEFARY(FARY)
     53 N ZD,ZTMP
     54 I '$D(FDIR) S FDIR=C0XDIR ; DIRECTORY OF THE RDF FILE
     55 I '$D(FURL) D  ;
     56 . N ZN2 S ZN2=$TR(FNAME,".","_") ; REMOVE THE DOT FROM THE NAME
     57 . S FURL=FDIR_ZN2
     58 N ZTMP
     59 S ZTMP=$NA(^TMP("C0X",$J,"FILEIN",1)) ; WHERE TO PUT THE INCOMING FILE
     60 K @ZTMP ; MAKE SURE IT'S CLEAR
     61 S C0XSTART=$$NOW^XLFDT
     62 W !,"STARTED: ",C0XSTART
     63 W !,"READING IN: ",FNAME
     64 I '$$FILEIN(ZTMP,FDIR,FNAME,4) D  Q  ; QUIT IF NO SUCCESS
     65 . W !,"ERROR READING FILE: ",FDIR,FNAME
     66 S ZRDF=$NA(^TMP("C0X",$J,"FILEIN")) ; WITHOUT THE SUBSCRIPT
     67 W !,$O(@ZRDF@(""),-1)," LINES READ"
     68 D INSRDF(ZRDF,FURL,FARY) ; IMPORT AND PROCESS THE RDF
     69 Q
     70 ;
     71WGET(ZURL,FARY) ; GET FROM THE INTERNET AN RDF FILE AND INSERT IT
     72 ;
     73 I '$D(FARY) D  ;
     74 . D INITFARY("C0XFARY")
     75 . S FARY="C0XFARY"
     76 D USEFARY(FARY)
     77 ;N ZLOC,ZTMP
     78 K ZTMP
     79 S ZLOC=$NA(^TMP("C0X","WGET",$J))
     80 S C0XSTART=$$NOW^XLFDT
     81 W !,"STARTED: ",C0XSTART
     82 W !,"DOWNLOADING: ",ZURL
     83 S OK=$$httpGET^%zewdGTM(ZURL,.ZTMP)
     84 M @ZLOC=ZTMP
     85 W !,$O(@ZLOC@(""),-1)," LINES READ"
     86 D INSRDF(ZLOC,ZURL,FARY)
     87 Q
     88 ;
     89INSRDF(ZRDF,ZNAME,FARY) ; INSERT AN RDF FILE INTO THE STORE AND PROCESS
     90 ; ZRDF IS PASSED BY NAME
     91 I '$D(FARY) D  ;
     92 . D INITFARY("C0XFARY")
     93 . S FARY="C0XFARY"
     94 D USEFARY(FARY)
     95 N ZGRAPH,ZSUBJECT
     96 S ZGRAPH="_:G"_$$LKY9 ; RANDOM GRAPH NAME
     97 S ZSUBJECT=$$ANONS ; RANDOM ANOYMOUS SUBJECT
     98 D ADD(ZGRAPH,ZSUBJECT,"fmts:url",ZNAME,FARY)
     99 N ZTXTNM
     100 S ZTXTNM="_TXT_INCOMING_RDF_FILE_"_ZNAME_"_"_$$LKY9 ; NAME FOR TEXT NODE
     101 D ADD(ZGRAPH,ZSUBJECT,"fmts:rdfSource",ZTXTNM,FARY)
     102 D UPDIE(.C0XFDA) ; TRY IT OUT
     103 K C0XCNT ;RESET FOR NEXT TIME
     104 D STORETXT(ZRDF,ZTXTNM,FARY)
     105 W !,"ADDED: ",ZGRAPH," ",ZSUBJECT," fmts:rdfSource ",ZTXTNM
     106 D PROCESS(.G,ZRDF,ZNAME,ZGRAPH,FARY) ; PARSE AND INSERT THE RDF
     107 Q
     108 ;
     109STORETXT(ZTXT,ZNAME,FARY) ; STORE TEXT IN THE TRIPLESTORE AT ZNAME
     110 ;
     111 I '$D(FARY) D  ;
     112 . D INITFARY("C0XFARY")
     113 . S FARY="C0XFARY"
     114 D USEFARY(FARY)
     115 N ZIEN
     116 S ZIEN=$$IENOF(ZNAME,FARY) ; GET THE IEN
     117 D CLEAN^DILF
     118 K ZERR
     119 D WP^DIE(C0XSFN,ZIEN_",",1,,ZTXT,"ZERR")
     120 I $D(ZERR) D  ;
     121 . ZWR ZERR
     122 Q
     123 ;
     124GETTXT(ZRTN,ZNAME,FARY) ; RETURNS RDF SOURCE OR OTHER TEXT
     125 ; ZRTN IS PASSED BY REFERENCE
     126 I '$D(FARY) D  ;
     127 . D INITFARY("C0XFARY")
     128 . S FARY="C0XFARY"
     129 D USEFARY(FARY)
     130 N ZIEN
     131 S ZIEN=$$IENOF(ZNAME)
     132 S OK=$$GET1^DIQ(C0XSFN,ZIEN_",",1,,"ZRTN")
     133 Q
     134 ;
     135WHERETXT(ZNAME,FARY) ; EXTRINSIC WHICH RETURNS THE NAME OF THE GLOBAL
     136 ; WHERE THE TEXT IS LOCATED. NAME IS THE NAME OF THE STRING
     137 I '$D(FARY) D  ;
     138 . D INITFARY("C0XFARY")
     139 . S FARY="C0XFARY"
     140 D USEFARY(FARY)
     141 N ZIEN
     142 S ZIEN=$$IENOF(ZNAME)
     143 Q $NA(@C0XSN@(ZIEN,1))
     144 ;
     145FILEIN(ZINTMP,ZDIR,ZFNAME,ZLVL) ; READS A FILE INTO ZINTMP USING FTG^%ZISH
     146 ; ZINTMP IS PASSED BY NAME AND INCLUDES THE NEW SUBSCRIPT
     147 ; IE ^TMP("C0X","FILEIN",1)
     148 ; ZLVL IN THIS CASE WOULD BE 3 INCREMENTING THE 1
     149 ; EXTRINSIC WHICH RETURNS THE RESULT OF FTG^%ZISH
     150 S OK=$$FTG^%ZISH(ZDIR,FNAME,ZINTMP,ZLVL)
     151 Q OK
     152 ;
     153TESTPROC ; TEST PROCESS WITH EXISTING SMALL RDF FILE
     154 S ZIN=$NA(^TMP("C0X",12226,"FILEIN"))
     155 S ZGRAPH="/test/rdfFile"
     156 S ZM="/test/rdfFile/meta"
     157 D PROCESS(.G,ZIN,ZGRAPH,ZM)
     158 Q
     159 ;
     160PROCESS(ZRTN,ZRDF,ZGRF,ZMETA,FARY) ; PROCESS AN INCOMING RDF FILE
     161 ; ZRTN IS PASS BY REFERENCE AND RETURNS MESSAGES ABOUT THE PROCESSING
     162 ; ZRDF IS PASSED BY NAME AND IS THE GLOBAL CONTAINING THE RDF FILE
     163 ; ZGRF IS THE NAME OF THE GRAPH TO USE IN THE TRIPLE STORE FOR RESULTS
     164 ; ZMETA IS OPTIONAL AND IS THE NAME OF THE GRAPH TO STORE METADATA
     165 ;
     166 I '$D(FARY) D  ;
     167 . D INITFARY("C0XFARY")
     168 . S FARY="C0XFARY"
     169 D USEFARY(FARY)
     170 ; -- first parse the rdf file with the MXML parser
     171 ;S C0XDOCID=$$PARSE^C0CNHIN(ZRDF,"C0XARRAY") ; PARSE WITH MXML
     172 S C0XDOCID=$$EN^MXMLDOM(ZRDF,"W")
     173 ; -- assign the MXLM dom global name to ZDOM
     174 S ZDOM=$NA(^TMP("MXMLDOM",$J,C0XDOCID))
     175 W !,$O(@ZDOM@(""),-1)," XML NODES PARSED"
     176 ; -- populate the metagraph to point to the graph with status unfinished
     177 S METAS=$$ANONS ; GET AN ANONOMOUS RANDOM SUBJECT
     178 I '$D(ZMETA) S ZMETA="_:G"_$$LKY9 ; RANDOM GRAPH NAME FOR METAGRAPH
     179 D ADD(ZMETA,METAS,"fmts:about",ZGRF,FARY) ; POINT THE META TO THE GRAPH
     180 D ADD(ZMETA,METAS,"fmts:status","unfinished",FARY) ; mark as unfinished
     181 ;S C0XDATE=$$FMDTOUTC^C0CUTIL($$NOW^XLFDT,"DT")
     182 S C0XDATE=$$NOW^XLFDT
     183 D ADD(ZMETA,METAS,"fmts:dateTime",C0XDATE,FARY)
     184 D UPDIE(.C0XFDA) ; commit the metagraph changes to the triple store
     185 ; --
     186 ; -- pull out the vocabularies in the RDF statement. marked with xmlns:
     187 ; -- put them in a local variable for quick reference
     188 ; -- TODO: create a graph for vocabularies and validate incoming against it
     189 ;
     190 S C0XVOC=""
     191 N ZI,ZJ,ZK S ZI=""
     192 F  S ZI=$O(@ZDOM@(1,"A",ZI)) Q:ZI=""  D  ; FOR EACH xmlns
     193 . S ZVOC=$P(ZI,"xmlns:",2)
     194 . I ZVOC'="" S C0XVOC(ZVOC)=$G(@ZDOM@(1,"A",ZI))
     195 ;W !,"VOCABS:" ZWR C0XVOC
     196 ;
     197 ; -- look for children called rdf:Description. quit if none. not an rdf file
     198 ;
     199 S ZI=$O(@ZDOM@(1,"C",""))
     200 I $G(@ZDOM@(1,"C",ZI))'="rdf:Description" D  Q  ; not an rdf file
     201 . W !,"Error. Not an RDF file. Cannot process."
     202 ;
     203 ; -- now process the rdf description children
     204 ;
     205 S ZI=""
     206 S (C0XSUB,C0XPRE,C0XOBJ)="" ; INITIALIZE subject, object and predicate
     207 F  S ZI=$O(@ZDOM@(1,"C",ZI)) Q:ZI=""  D  ;
     208 . ; -- we are skipping any child that is not rdf:Description
     209 . ; -- TODO: check to see if this is right in general
     210 . ;
     211 . IF $G(@ZDOM@(1,"C",ZI))'="rdf:Description" D  Q  ;
     212 . . W !,"SKIPPING NODE: ",ZI
     213 . ; -- now looking for the subject for the triples
     214 . S ZX=$G(@ZDOM@(ZI,"A","rdf:about"))
     215 . I ZX'="" D  ; we have the subject
     216 . . ;W " about: ",ZX
     217 . . S C0XSUB=ZX
     218 . E  D  ;
     219 . . S ZX=$G(@ZDOM@(ZI,"A","rdf:nodeID")) ; node id is another style of subject
     220 . . I ZX'="" D  ;
     221 . . . S C0XSUB=ZX
     222 . I C0XSUB="" S C0XSUB=$$ANONS ; DEFAULT TO BLANK SUBJECT
     223 . ;
     224 . ; -- we now have the subject. the children of this node have the rest
     225 . ;
     226 . S ZJ="" ; for the children of the rdf:Description nodes
     227 . F  S ZJ=$O(@ZDOM@(ZI,"C",ZJ)) Q:ZJ=""  D  ; for each child
     228 . . S C0XPRE=@ZDOM@(ZJ) ; the predicate without a prefix
     229 . . S ZX=$G(@ZDOM@(ZJ,"A","xmlns")) ; name space
     230 . . I ZX'="" S C0XPRE=ZX_C0XPRE ; add the namespace prefix
     231 . . I C0XPRE[":" D  ; expand using vocabulary
     232 . . . N ZB,ZA
     233 . . . S ZB=$P(C0XPRE,":",1)
     234 . . . S ZA=$P(C0XPRE,":",2)
     235 . . . I $G(C0XVOC(ZB))'="" D  ;
     236 . . . . S C0XPRE=C0XVOC(ZB)_ZA ; expanded
     237 . . S ZY=$G(@ZDOM@(ZJ,"A","rdf:resource")) ; potential object
     238 . . I ZY'="" D  Q ;
     239 . . . S C0XOBJ=ZY ; object
     240 . . . D ADD(ZGRF,C0XSUB,C0XPRE,C0XOBJ) ; finally. our first real triple
     241 . . ; -- this is an else because of the quit above
     242 . . S ZX=$G(@ZDOM@(ZJ,"A","rdf:nodeID")) ; fishing for nodeId object
     243 . . I ZX'="" D  Q  ; got one
     244 . . . S C0XOBJ=ZX ; we are using the incoming nodeIDs as object/subject
     245 . . . ; without change... this could be foolish .. look at it again later
     246 . . . D ADD(ZGRF,C0XSUB,C0XPRE,C0XOBJ) ; go for it and add a node
     247 . . S C0XOBJ=$G(@ZDOM@(ZJ,"T",1)) ; hopefully an object is here
     248 . . I C0XOBJ="" D  Q  ; not a happy situation
     249 . . . W !,"ERROR, NO OBJECT FOUND FOR NODE: ",ZJ
     250 . . D ADD(ZGRF,C0XSUB,C0XPRE,C0XOBJ) ; go for it and add a node
     251 W !,"INSERTING ",C0XCNT," TRIPLES"
     252 D UPDIE(.C0XFDA) ; commit the updates to the file
     253 ; next, mark the graph as finished
     254 S C0XEND=$$NOW^XLFDT
     255 W !," ENDED AT: ",C0XEND
     256 S C0XDIFF=$$FMDIFF^XLFDT(C0XEND,C0XSTART,2)
     257 W !," ELAPSED TIME: ",C0XDIFF," SECONDS"
     258 W !," APPROXIMATELY ",$P(C0XCNT/C0XDIFF,".")," TRIPLES PER SECOND"
     259 Q
     260 ;
     261SHOW(ZN) ;
     262 ZWR ^TMP("MXMLDOM",$J,1,ZN,*)
     263 Q
     264 ;
     265ANONS() ; RETURNS AN ANONOMOUS SUBJECT
     266 Q "_S:"_$$LKY9
     267 ;
     268NEWG(NGRAPH,NMETA) ; CREATES A NEW META GRAPH, MARKS IT AS UNFINISHED
     269 ; THEN CREATES A NEW GRAPH AND POINTS THE METAGRAPH TO IT
     270 ; NGRAPH AND NMETA ARE PASSED BY REFERENCE AND ARE THE RETURN
     271 S NGRAPH="G"_$$LKY9
     272 S NMETA=NGRAPH_"A"
     273 Q
     274 ;
     275ADD(ZG,ZS,ZP,ZO,FARY) ; ADD A TRIPLE TO THE TRIPLESTORE. ALL VALUES ARE TEXT
     276 ; THE FDA IS SET UP BUT THE FILES ARE NOT UPDATED. CALL UPDIE TO COMPLETE
     277 I '$D(FARY) D  ;
     278 . D INITFARY("C0XFARY")
     279 . S FARY="C0XFARY"
     280 D USEFARY(FARY)
     281 I '$D(C0XCNT) S C0XCNT=0
     282 N ZNODE
     283 S ZNODE="N"_$$LKY17
     284 N ZNARY ; GET READY TO CALL IENOFA
     285 S ZNARY("ZG",ZG)=""
     286 S ZNARY("ZS",ZS)=""
     287 S ZNARY("ZP",ZP)=""
     288 S ZNARY("ZO",ZO)=""
     289 D IENOFA(.ZIENS,.ZNARY,FARY) ; RESOLVE/ADD STRINGS
     290 ;S ZGIEN=$$IENOF(ZG) ; LAYGO TO GET IEN
     291 ;S ZSIEN=$$IENOF(ZS)
     292 ;S ZPIEN=$$IENOF(ZP)
     293 ;S ZOIEN=$$IENOF(ZO)
     294 ;I $D(C0XFDA) D UPDIE ; ADD THE STRINGS IF NEEDED
     295 S C0XCNT=C0XCNT+1
     296 S C0XFDA(C0XTFN,"?+"_C0XCNT_",",.01)=ZNODE
     297 S C0XFDA(C0XTFN,"?+"_C0XCNT_",",.02)=$O(ZIENS("IEN","ZG",""))
     298 S C0XFDA(C0XTFN,"?+"_C0XCNT_",",.03)=$O(ZIENS("IEN","ZS",""))
     299 S C0XFDA(C0XTFN,"?+"_C0XCNT_",",.04)=$O(ZIENS("IEN","ZP",""))
     300 S C0XFDA(C0XTFN,"?+"_C0XCNT_",",.05)=$O(ZIENS("IEN","ZO",""))
     301 ; REMEMBER TO CALL UPDIE WHEN YOU'RE DONE
     302 Q
     303 ;
     304LKY9() ;EXTRINIC THAT RETURNS A RANDOM 9 DIGIT NUMBER. USED FOR GENERATING
     305 ; UNIQUE NODE AND GRAPH NAMES
     306 N ZN,ZI
     307 S ZN=""
     308 F ZI=1:1:9 D  ;
     309 . S ZN=ZN_$R(10)
     310 Q ZN
     311 ;
     312LKY17() ;EXTRINIC THAT RETURNS A RANDOM 9 DIGIT NUMBER. USED FOR GENERATING
     313 ; UNIQUE NODE AND GRAPH NAMES
     314 N ZN,ZI
     315 S ZN=""
     316 F ZI=1:1:17 D  ;
     317 . S ZN=ZN_$R(10)
     318 Q ZN
     319 ;
     320IENOF(ZSTRING,FARY) ; EXTRINSIC WHICH RETURNS THE IEN OF ZS IN THE STRINGS FILE
     321 I '$D(FARY) D  ;
     322 . D INITFARY("C0XFARY")
     323 . S FARY="C0XFARY"
     324 N ZIEN
     325 S ZIEN=$O(@C0XSN@("B",ZSTRING,""))
     326 I ZIEN="" D  ;
     327 . S C0XFDA2(C0XSFN,"+1,",.01)=ZSTRING
     328 . D UPDIE(.C0XFDA2)
     329 . S ZIEN=$O(@C0XSN@("B",ZSTRING,""))
     330 . K C0XFDA2
     331 Q ZIEN
     332 ;
     333IENOFA(ZOUTARY,ZINARY,FARY) ; RESOLVE STRINGS TO IEN IN STRINGS FILE
     334 ; OR ADD THEM IF
     335 ; MISSING. ZINARY AND ZOUTARY ARE PASSED BY REFERENCE
     336 ; ZINARY LOOKS LIKE ZINARY("VAR","VAL")=""
     337 ; RETURNS IN ZOUTARY OF THE FORM ZOUTARY("IEN","VAR",IEN)=""
     338 I '$D(FARY) D  ;
     339 . D INITFARY("C0XFARY")
     340 . S FARY="C0XFARY"
     341 K ZOUTARY ; START WITH CLEAN RESULTS
     342 K C0XFDA2 ; USE A SEPARATE FDA FOR THIS
     343 N ZI S ZI=""
     344 N ZV,ZIEN
     345 N ZCNT S ZCNT=0
     346 F  S ZI=$O(ZINARY(ZI)) Q:ZI=""  D  ; LOOK FOR MISSING STRINGS
     347 . S ZV=$O(ZINARY(ZI,""))
     348 . I $O(@C0XSN@("B",ZV,""))="" D  ;
     349 . . S ZCNT=ZCNT+1
     350 . . S C0XFDA2(C0XSFN,"+"_ZCNT_",",.01)=ZV
     351 I $D(C0XFDA2) D  ;
     352 . D UPDIE(.C0XFDA2) ; ADD MISSING STRINGS
     353 . K C0XFDA2 ; CLEAN UP
     354 F  S ZI=$O(ZINARY(ZI)) Q:ZI=""  D  ; NOT GET ALL IENS
     355 . S ZV=$O(ZINARY(ZI,""))
     356 . S ZIEN=$O(@C0XSN@("B",ZV,"")) ; THEY SHOULD BE THERE NOW
     357 . I ZIEN="" D  ;
     358 . . W !,"ERROR ADDING STRING: ",ZV
     359 . . B
     360 . S ZOUTARY("IEN",ZI,ZIEN)=""
     361 Q
     362 ;
     363UPDIE(ZFDA) ; INTERNAL ROUTINE TO CALL UPDATE^DIE AND CHECK FOR ERRORS
     364 ; ZFDA IS PASSED BY REFERENCE
     365 ;ZWR ZFDA
     366 ;B
     367 K ZERR
     368 D CLEAN^DILF
     369 D UPDATE^DIE("","ZFDA","","ZERR")
     370 I $D(ZERR) S ZZERR=ZZERR ; ZZERR DOESN'T EXIST,
     371 ; INVOKE THE ERROR TRAP IF TASKED
     372 ;. W "ERROR",!
     373 ;. ZWR ZERR
     374 ;. B
     375 K ZFDA
     376 Q
     377 ;
Note: See TracChangeset for help on using the changeset viewer.