C0XNOTS ; GPL - RDF processing without the triplestore ;7/5/12 17:05 ;;0.1;C0X;nopatch;noreleasedate;Build 7 ;Copyright 2011 George Lilly. Licensed under the terms of the GNU ;General Public License See attached copy of the License. ; ;This program is free software; you can redistribute it and/or modify ;it under the terms of the GNU General Public License as published by ;the Free Software Foundation; either version 2 of the License, or ;(at your option) any later version. ; ;This program is distributed in the hope that it will be useful, ;but WITHOUT ANY WARRANTY; without even the implied warranty of ;MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ;GNU General Public License for more details. ; ;You should have received a copy of the GNU General Public License along ;with this program; if not, write to the Free Software Foundation, Inc., ;51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. ; Q ; ; This is based on C0XMAIN but experiments with a fast load for triples ; that will write directly to the fileman global ; The file 172.101 is a F2N design style for triples, which means ; that it is a Flat file with no subfiles, all fields at the root ; ... it is a "2" file solution which means all strings are stored in ; ... strings file and pointed to by the triples file ; ... it is an N file because it has generated Node IDs instead of ; ... DINUM which would use the IEN for the Node ID. ; gpl 11/04/2011 ; INITFARY(ZFARY) ; INITIALIZE FILE NUMBERS AND OTHER USEFUL THINGS ; FOR THE DEFAULT TRIPLE STORE. USE OTHER VALUES FOR SUPPORTING ADDITIONAL ; TRIPLE STORES I $D(@ZFARY) Q ; ALREADY INITIALIZED S @ZFARY@("C0XTFN")=172.101 ; TRIPLES FILE NUMBER S @ZFARY@("C0XSFN")=172.201 ; TRIPLES STRINGS FILE NUMBER S @ZFARY@("C0XTN")=$NA(^C0X(101)) ; TRIPLES GLOBAL NAME S @ZFARY@("C0XSN")=$NA(^C0X(201)) ; STRING FILE GLOBAL NAME S @ZFARY@("C0XDIR")="/home/glilly/fmts/trunk/samples/smart-new/" S @ZFARY@("BLKLOAD")=1 ; this file supports block load S @ZFARY@("FMTSSTYLE")="F2N" ; fileman style S @ZFARY@("REPLYFMT")="JSON" D USEFARY(ZFARY) Q ; USEFARY(ZFARY) ; INITIALIZES VARIABLES SAVED IN ARRAY ZFARY N ZI S ZI="" F S ZI=$O(@ZFARY@(ZI)) Q:ZI="" D . ;N ZX . S ZX="S "_ZI_"="""_@ZFARY@(ZI)_"""" . ;W !,ZX . X ZX Q ; FILEIN ; INTERACTIVE ENTRY POINT FOR OPTION TO READ IN A FILE I '$D(C0XFARY) D INITFARY("C0XFARY") D USEFARY("C0XFARY") S DIR(0)="F^3:240" S DIR("A")="File Directory" S DIR("B")=C0XDIR D ^DIR I Y="^" Q ; S C0XDIR=Y S C0XFARY("C0XDIR")=Y S DIR(0)="F^3:240" S DIR("A")="File Name" I '$D(C0XFN) S DIR("B")="qds.rdf" E S DIR("B")=C0XFN D ^DIR I Y="" Q ; I Y="^" Q ; S C0XFN=Y K C0XARY S C0XARY="" D IMPORT(.C0XARY,C0XFN,C0XDIR) K C0XFDA Q ; IMPORT(ZRETURN,FNAME,INDIR) ; EXTRINSIC THAT READS A FILE FROM THE STANDARD ; DIRECTORY, PARSES IT AND RETURNS AN ARRAY OF TRIPLES ; INDIR IS THE OPTIONAL DIRECTORY (DEFAUTS TO STANDARD DIR) ; INURL IS THE OPTIONAL URI FOR ACCESSING THE FILE FROM THE TRIPLE STORE ; FARY IS THE OPTIONAL FILE ARRAY OF THE TRIPLE STORE TO USE I '$D(FARY) D ; . D INITFARY("C0XFARY") . S FARY="C0XFARY" D USEFARY(FARY) N ZD,ZTMP I '$D(INDIR) S INDIR=C0XDIR ; DIRECTORY OF THE RDF FILE I $G(INURL)="" D ; . ;N ZN2 S ZN2=$P(FNAME,".",1)_"_"_$P(FNAME,".",2) ; REMOVE THE DOT . ;S INURL=FDIR_ZN2 . S INURL=INDIR_FNAME N ZTMP S ZTMP=$NA(^TMP("C0X",$J,"FILEIN",1)) ; WHERE TO PUT THE INCOMING FILE K @ZTMP ; MAKE SURE IT'S CLEAR S C0XSTART=$$NOW^XLFDT I $D(DEBUG) W !,"STARTED: ",C0XSTART I $D(DEBUG) W !,"READING IN: ",FNAME I '$$FILEREAD(ZTMP,INDIR,FNAME,4) D Q ; QUIT IF NO SUCCESS . W !,"ERROR READING FILE: ",INDIR,FNAME S ZRDF=$NA(^TMP("C0X",$J,"FILEIN")) ; WITHOUT THE SUBSCRIPT I $D(DEBUG) W !,$O(@ZRDF@(""),-1)," LINES READ" D INSRDF(.ZRETURN,ZRDF) ; IMPORT AND PROCESS THE RDF K INURL K C0XFDA ;K ^TMP("MXMLDOM",$J) Q ; WGET(ZRETURN,ZURL) ; GET FROM THE INTERNET AN RDF FILE AND INSERT IT ; I '$D(FARY) D ; . D INITFARY("C0XFARY") . S FARY="C0XFARY" D USEFARY(FARY) ;N ZLOC,ZTMP K ZTMP S ZLOC=$NA(^TMP("C0X","WGET",$J)) K @ZLOC S C0XSTART=$$NOW^XLFDT W !,"STARTED: ",C0XSTART W !,"DOWNLOADING: ",ZURL S OK=$$httpGET^%zewdGTM(ZURL,.ZTMP) M @ZLOC=ZTMP S C0XLINES=$O(@ZLOC@(""),-1) W !,C0XLINES," LINES READ" S C0XDLC=$$NOW^XLFDT ; DOWNLOAD COMPLETE W !,"DOWNLOAD COMPLETE AT ",C0XDLC S C0XDIFF=$$FMDIFF^XLFDT(C0XDLC,C0XSTART,2) W !," ELAPSED TIME: ",C0XDIFF," SECONDS" I C0XDIFF'=0 W !," APPROXIMATELY ",$P(C0XLINES/C0XDIFF,".")," LINES PER SEC" D INSRDF(.ZRETURN,ZLOC) Q ; INSRDF(ZRETURN,ZRDF) ; PARSE AN RDF FILE AND RETURN AN ARRAY ; ZRDF IS PASSED BY NAME I '$D(FARY) D ; . D INITFARY("C0XFARY") . S FARY="C0XFARY" D USEFARY(FARY) S BATCNT=0 ; BATCH COUNTER S BATMAX=10000 ; TRY BATCHES OF THIS SIZE N ZGRAPH,ZSUBJECT S ZGRAPH="_:G"_$$LKY9 ; RANDOM GRAPH NAME S ZSUBJECT=$$ANONS() ; RANDOM ANOYMOUS SUBJECT ;D ADD(ZGRAPH,ZSUBJECT,"fmts:url",ZNAME,FARY) N ZTXTNM ;S ZTXTNM="_TXT_INCOMING_RDF_FILE_"_ZNAME_"_"_$$LKY9 ; NAME FOR TEXT NODE ;D ADD(ZGRAPH,ZSUBJECT,"fmts:fileSource",ZTXTNM,FARY) ;D ADD(ZGRAPH,ZSUBJECT,"fmts:fileTag",$$name2tag(ZNAME),FARY) ;D SWUPDIE(.C0XFDA) ; TRY IT OUT K C0XCNT ;RESET FOR NEXT TIME ;D STORETXT(ZRDF,ZTXTNM,FARY) ;W !,"ADDED: ",ZGRAPH," ",ZSUBJECT," fmts:fileSource ",ZTXTNM D PROCESS(.ZRETURN,ZRDF) ; PARSE THE RDF AND RETURN THE ARRAY OF TRIPLES Q ; name2tag(zname) ; extrinsic which returns a tag derived from a name ; /home/vista/project.xml ==> project q $p($re($p($re(zname),"/")),".") ; FILEREAD(ZINTMP,ZDIR,ZFNAME,ZLVL) ; READS A FILE INTO ZINTMP USING FTG^%ZISH ; ZINTMP IS PASSED BY NAME AND INCLUDES THE NEW SUBSCRIPT ; IE ^TMP("C0X","FILEIN",1) ; ZLVL IN THIS CASE WOULD BE 3 INCREMENTING THE 1 ; EXTRINSIC WHICH RETURNS THE RESULT OF FTG^%ZISH S OK=$$FTG^%ZISH(ZDIR,FNAME,ZINTMP,ZLVL) Q OK ; TESTPROC ; TEST PROCESS WITH EXISTING SMALL RDF FILE S ZIN=$NA(^TMP("C0X",12226,"FILEIN")) S ZGRAPH="/test/rdfFile" S ZM="/test/rdfFile/meta" D PROCESS(.G,ZIN) Q ; VISTAOWL ; S ZRDF=$NA(^TMP("C0X",542,"FILEIN")) S ZNAME="/home/glilly/vistaowl/VistAOWL.owl" S ZGRAPH="_:G431590209" S FARY="C0XFARY" D INITFARY(FARY) S C0XDOCID=1 S BATCNT=0 S BATMAX=10000 D PROCESS(.G,ZRDF) Q ; FETCH(C0XRARY,FNAME,FDIR) ; read in an RDF file and return a usable mumps array of the ; contents ; I '$D(FNAME) S FNAME="dewdrop-patient-32-v2.rdf" I '$D(FDIR) S FDIR="/home/vista/CCR/" D IMPORT(.C0XRARY,FNAME,FDIR) Q ; PROCESS(ZRTN,ZRDF) ; PROCESS AN INCOMING RDF FILE ; ZRTN IS PASS BY REFERENCE AND RETURNS MESSAGES ABOUT THE PROCESSING ; ZRDF IS PASSED BY NAME AND IS THE GLOBAL CONTAINING THE RDF FILE ; I '$D(FARY) D ; . D INITFARY("C0XFARY") . S FARY="C0XFARY" D USEFARY(FARY) ;N BATCNT ;N BATMAX ; -- first parse the rdf file with the MXML parser ;S C0XDOCID=$$PARSE^C0CNHIN(ZRDF,"C0XARRAY") ; PARSE WITH MXML S C0XDLC2=$$NOW^XLFDT ; START OF PARSE I @ZRDF@(1)'["