C0XF2N ; GPL - Fileman Triples entry point routine ;10/13/11 17:05 ;;0.1;C0X;nopatch;noreleasedate;Build 7 ;Copyright 2011 George Lilly. Licensed under the terms of the GNU ;General Public License See attached copy of the License. ; ;This program is free software; you can redistribute it and/or modify ;it under the terms of the GNU General Public License as published by ;the Free Software Foundation; either version 2 of the License, or ;(at your option) any later version. ; ;This program is distributed in the hope that it will be useful, ;but WITHOUT ANY WARRANTY; without even the implied warranty of ;MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ;GNU General Public License for more details. ; ;You should have received a copy of the GNU General Public License along ;with this program; if not, write to the Free Software Foundation, Inc., ;51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. ; Q ; ; This is based on C0XMAIN but experiments with a fast load for triples ; that will write directly to the fileman global ; The file 172.101 is a F2N design style for triples, which means ; that it is a Flat file with no subfiles, all fields at the root ; ... it is a "2" file solution which means all strings are stored in ; ... strings file and pointed to by the triples file ; ... it is an N file because it has generated Node IDs instead of ; ... DINUM which would use the IEN for the Node ID. ; gpl 11/04/2011 ; INITFARY(ZFARY) ; INITIALIZE FILE NUMBERS AND OTHER USEFUL THINGS ; FOR THE DEFAULT TRIPLE STORE. USE OTHER VALUES FOR SUPPORTING ADDITIONAL ; TRIPLE STORES I $D(@ZFARY) Q ; ALREADY INITIALIZED S @ZFARY@("C0XTFN")=172.101 ; TRIPLES FILE NUMBER S @ZFARY@("C0XSFN")=172.201 ; TRIPLES STRINGS FILE NUMBER S @ZFARY@("C0XTN")=$NA(^C0X(101)) ; TRIPLES GLOBAL NAME S @ZFARY@("C0XSN")=$NA(^C0X(201)) ; STRING FILE GLOBAL NAME S @ZFARY@("C0XDIR")="/home/glilly/fmts/trunk/samples/smart-new/" S @ZFARY@("BLKLOAD")=1 ; this file supports block load S @ZFARY@("FMTSSTYLE")="F2N" ; fileman style S @ZFARY@("REPLYFMT")="JSON" D USEFARY(ZFARY) Q ; USEFARY(ZFARY) ; INITIALIZES VARIABLES SAVED IN ARRAY ZFARY N ZI S ZI="" F S ZI=$O(@ZFARY@(ZI)) Q:ZI="" D . ;N ZX . S ZX="S "_ZI_"="""_@ZFARY@(ZI)_"""" . ;W !,ZX . X ZX Q ; FILEIN ; INTERACTIVE ENTRY POINT FOR OPTION TO READ IN A FILE I '$D(C0XFARY) D INITFARY("C0XFARY") D USEFARY("C0XFARY") S DIR(0)="F^3:240" S DIR("A")="File Directory" S DIR("B")=C0XDIR D ^DIR I Y="^" Q ; S C0XDIR=Y S C0XFARY("C0XDIR")=Y S DIR(0)="F^3:240" S DIR("A")="File Name" I '$D(C0XFN) S DIR("B")="qds.rdf" E S DIR("B")=C0XFN D ^DIR I Y="" Q ; I Y="^" Q ; S C0XFN=Y D IMPORT(C0XFN,C0XDIR,,"C0XFARY") K C0XFDA Q ; IMPORT(FNAME,INDIR,INURL,FARY) ; EXTRINSIC THAT READS A FILE FROM THE STANDARD ; DIRECTORY, LOADS IT INTO THE TRIPLESTORE AS TEXT, AND RETURNS THE ; NODE NAME OF THE TEXT TRIPLE ; INDIR IS THE OPTIONAL DIRECTORY (DEFAUTS TO STANDARD DIR) ; INURL IS THE OPTIONAL URI FOR ACCESSING THE FILE FROM THE TRIPLE STORE ; FARY IS THE OPTIONAL FILE ARRAY OF THE TRIPLE STORE TO USE I '$D(FARY) D ; . D INITFARY("C0XFARY") . S FARY="C0XFARY" D USEFARY(FARY) N ZD,ZTMP I '$D(INDIR) S INDIR=C0XDIR ; DIRECTORY OF THE RDF FILE I $G(INURL)="" D ; . ;N ZN2 S ZN2=$P(FNAME,".",1)_"_"_$P(FNAME,".",2) ; REMOVE THE DOT . ;S INURL=FDIR_ZN2 . S INURL=INDIR_FNAME N ZTMP S ZTMP=$NA(^TMP("C0X",$J,"FILEIN",1)) ; WHERE TO PUT THE INCOMING FILE K @ZTMP ; MAKE SURE IT'S CLEAR S C0XSTART=$$NOW^XLFDT W !,"STARTED: ",C0XSTART W !,"READING IN: ",FNAME I '$$FILEREAD(ZTMP,INDIR,FNAME,4) D Q ; QUIT IF NO SUCCESS . W !,"ERROR READING FILE: ",INDIR,FNAME S ZRDF=$NA(^TMP("C0X",$J,"FILEIN")) ; WITHOUT THE SUBSCRIPT W !,$O(@ZRDF@(""),-1)," LINES READ" D INSRDF(ZRDF,INURL,FARY) ; IMPORT AND PROCESS THE RDF K INURL K C0XFDA ;K ^TMP("MXMLDOM",$J) Q ; WGET(ZURL,FARY) ; GET FROM THE INTERNET AN RDF FILE AND INSERT IT ; I '$D(FARY) D ; . D INITFARY("C0XFARY") . S FARY="C0XFARY" D USEFARY(FARY) ;N ZLOC,ZTMP K ZTMP S ZLOC=$NA(^TMP("C0X","WGET",$J)) K @ZLOC S C0XSTART=$$NOW^XLFDT W !,"STARTED: ",C0XSTART W !,"DOWNLOADING: ",ZURL S OK=$$httpGET^%zewdGTM(ZURL,.ZTMP) M @ZLOC=ZTMP S C0XLINES=$O(@ZLOC@(""),-1) W !,C0XLINES," LINES READ" S C0XDLC=$$NOW^XLFDT ; DOWNLOAD COMPLETE W !,"DOWNLOAD COMPLETE AT ",C0XDLC S C0XDIFF=$$FMDIFF^XLFDT(C0XDLC,C0XSTART,2) W !," ELAPSED TIME: ",C0XDIFF," SECONDS" I C0XDIFF'=0 W !," APPROXIMATELY ",$P(C0XLINES/C0XDIFF,".")," LINES PER SEC" D INSRDF(ZLOC,ZURL,FARY) Q ; INSRDF(ZRDF,ZNAME,FARY) ; INSERT AN RDF FILE INTO THE STORE AND PROCESS ; ZRDF IS PASSED BY NAME I '$D(FARY) D ; . D INITFARY("C0XFARY") . S FARY="C0XFARY" D USEFARY(FARY) S BATCNT=0 ; BATCH COUNTER S BATMAX=10000 ; TRY BATCHES OF THIS SIZE N ZGRAPH,ZSUBJECT S ZGRAPH="_:G"_$$LKY9 ; RANDOM GRAPH NAME S ZSUBJECT=$$ANONS() ; RANDOM ANOYMOUS SUBJECT D ADD(ZGRAPH,ZSUBJECT,"fmts:url",ZNAME,FARY) N ZTXTNM S ZTXTNM="_TXT_INCOMING_RDF_FILE_"_ZNAME_"_"_$$LKY9 ; NAME FOR TEXT NODE D ADD(ZGRAPH,ZSUBJECT,"fmts:fileSource",ZTXTNM,FARY) D ADD(ZGRAPH,ZSUBJECT,"fmts:fileTag",$$name2tag(ZNAME),FARY) D SWUPDIE(.C0XFDA) ; TRY IT OUT K C0XCNT ;RESET FOR NEXT TIME D STORETXT(ZRDF,ZTXTNM,FARY) W !,"ADDED: ",ZGRAPH," ",ZSUBJECT," fmts:fileSource ",ZTXTNM D PROCESS(.G,ZRDF,ZNAME,ZGRAPH,FARY) ; PARSE AND INSERT THE RDF Q ; name2tag(zname) ; extrinsic which returns a tag derived from a name ; /home/vista/project.xml ==> project q $p($re($p($re(zname),"/")),".") ; STORETXT(ZTXT,ZNAME,FARY) ; STORE TEXT IN THE TRIPLESTORE AT ZNAME ; I '$D(FARY) D ; . D INITFARY("C0XFARY") . S FARY="C0XFARY" D USEFARY(FARY) N ZIEN S ZIEN=$$IENOF(ZNAME,FARY) ; GET THE IEN D CLEAN^DILF K ZERR D WP^DIE(C0XSFN,ZIEN_",",1,,ZTXT,"ZERR") I $D(ZERR) D Q ; . W !,"ERROR CREATING WORD PROCESSING FIELD" . S C0XERR="ERROR CREATING WORD PROCESSING FIELD" . D ^%ZTER ; error trap Q ; GETTXT(ZRTN,ZNAME,FARY) ; RETURNS RDF SOURCE OR OTHER TEXT ; ZRTN IS PASSED BY REFERENCE I '$D(FARY) D ; . D INITFARY("C0XFARY") . S FARY="C0XFARY" D USEFARY(FARY) N ZIEN S ZIEN=$$IENOF(ZNAME) S OK=$$GET1^DIQ(C0XSFN,ZIEN_",",1,,"ZRTN") Q ; WHERETXT(ZNAME,FARY) ; EXTRINSIC WHICH RETURNS THE NAME OF THE GLOBAL ; WHERE THE TEXT IS LOCATED. NAME IS THE NAME OF THE STRING I '$D(FARY) D ; . D INITFARY("C0XFARY") . S FARY="C0XFARY" D USEFARY(FARY) N ZIEN S ZIEN=$$IENOF(ZNAME) Q $NA(@C0XSN@(ZIEN,1)) ; FILEREAD(ZINTMP,ZDIR,ZFNAME,ZLVL) ; READS A FILE INTO ZINTMP USING FTG^%ZISH ; ZINTMP IS PASSED BY NAME AND INCLUDES THE NEW SUBSCRIPT ; IE ^TMP("C0X","FILEIN",1) ; ZLVL IN THIS CASE WOULD BE 3 INCREMENTING THE 1 ; EXTRINSIC WHICH RETURNS THE RESULT OF FTG^%ZISH S OK=$$FTG^%ZISH(ZDIR,FNAME,ZINTMP,ZLVL) Q OK ; TESTPROC ; TEST PROCESS WITH EXISTING SMALL RDF FILE S ZIN=$NA(^TMP("C0X",12226,"FILEIN")) S ZGRAPH="/test/rdfFile" S ZM="/test/rdfFile/meta" D PROCESS(.G,ZIN,ZGRAPH,ZM) Q ; VISTAOWL ; S ZRDF=$NA(^TMP("C0X",542,"FILEIN")) S ZNAME="/home/glilly/vistaowl/VistAOWL.owl" S ZGRAPH="_:G431590209" S FARY="C0XFARY" D INITFARY(FARY) S C0XDOCID=1 S BATCNT=0 S BATMAX=10000 D PROCESS(.G,ZRDF,ZGRAPH,ZNAME,FARY) Q ; PROCESS(ZRTN,ZRDF,ZGRF,ZMETA,FARY) ; PROCESS AN INCOMING RDF FILE ; ZRTN IS PASS BY REFERENCE AND RETURNS MESSAGES ABOUT THE PROCESSING ; ZRDF IS PASSED BY NAME AND IS THE GLOBAL CONTAINING THE RDF FILE ; ZGRF IS THE NAME OF THE GRAPH TO USE IN THE TRIPLE STORE FOR RESULTS ; ZMETA IS OPTIONAL AND IS THE NAME OF THE GRAPH TO STORE METADATA ; I '$D(FARY) D ; . D INITFARY("C0XFARY") . S FARY="C0XFARY" D USEFARY(FARY) ;N BATCNT ;N BATMAX ; -- first parse the rdf file with the MXML parser ;S C0XDOCID=$$PARSE^C0CNHIN(ZRDF,"C0XARRAY") ; PARSE WITH MXML S C0XDLC2=$$NOW^XLFDT ; START OF PARSE I @ZRDF@(1)'["