changeset 3:6c8dac2c5214

New tools export graph and generate landing page of an echo ressource
author dwinter
date Thu, 21 Jun 2012 12:24:29 +0200
parents 221a0fd0a5d3
children e93de4e99b52
files .classpath lib/openrdf-sesame-2.3.2-onejar.jar lib/openrdf-sesame-2.6.6-onejar.jar lib/virt_sesame2.jar lib/virtjdbc3.jar lib/virtjdbc4.jar src/de/mpiwg/itgroup/triplestoremanager/owl/MetaDataHandler.java src/de/mpiwg/itgroup/triplestoremanager/owl/TripleStoreHandler.java src/de/mpiwg/itgroup/triplestoremanager/tools/Exporter.java src/de/mpiwg/itgroup/triplestoremanager/tools/GenerateLandingPage.java
diffstat 10 files changed, 255 insertions(+), 13 deletions(-) [+]
line wrap: on
line diff
--- a/.classpath	Tue Dec 13 17:45:30 2011 +0100
+++ b/.classpath	Thu Jun 21 12:24:29 2012 +0200
@@ -2,23 +2,19 @@
 <classpath>
 	<classpathentry kind="src" path="src"/>
 	<classpathentry exported="true" kind="lib" path="lib/log4j-1.2.15.jar"/>
-	<classpathentry exported="true" kind="lib" path="lib/openrdf-sesame-2.3.2-onejar.jar"/>
 	<classpathentry exported="true" kind="lib" path="lib/jena.jar"/>
 	<classpathentry exported="true" kind="lib" path="lib/virt_sesame2.jar"/>
 	<classpathentry exported="true" kind="lib" path="lib/virtjdbc3.jar"/>
 	<classpathentry exported="true" kind="lib" path="lib/virtjdbc4.jar"/>
 	<classpathentry kind="lib" path="lib/protege-owl.jar"/>
 	<classpathentry kind="lib" path="lib/protege.jar"/>
+	<classpathentry kind="lib" path="lib/openrdf-sesame-2.6.6-onejar.jar"/>
 	<classpathentry kind="con" path="org.eclipse.jst.j2ee.internal.module.container"/>
 	<classpathentry kind="con" path="org.eclipse.jst.server.core.container/org.eclipse.jst.server.tomcat.runtimeTarget/Apache Tomcat v6.0">
 		<attributes>
 			<attribute name="owner.project.facets" value="jst.utility"/>
 		</attributes>
 	</classpathentry>
-	<classpathentry kind="con" path="org.eclipse.jdt.launching.JRE_CONTAINER/org.eclipse.jdt.internal.launching.macosx.MacOSXType/JVM 1.6.0 (MacOS X Default)">
-		<attributes>
-			<attribute name="owner.project.facets" value="java"/>
-		</attributes>
-	</classpathentry>
+	<classpathentry kind="con" path="org.eclipse.jdt.launching.JRE_CONTAINER"/>
 	<classpathentry kind="output" path="bin"/>
 </classpath>
Binary file lib/openrdf-sesame-2.3.2-onejar.jar has changed
Binary file lib/openrdf-sesame-2.6.6-onejar.jar has changed
Binary file lib/virt_sesame2.jar has changed
Binary file lib/virtjdbc3.jar has changed
Binary file lib/virtjdbc4.jar has changed
--- a/src/de/mpiwg/itgroup/triplestoremanager/owl/MetaDataHandler.java	Tue Dec 13 17:45:30 2011 +0100
+++ b/src/de/mpiwg/itgroup/triplestoremanager/owl/MetaDataHandler.java	Thu Jun 21 12:24:29 2012 +0200
@@ -28,9 +28,12 @@
 
 public class MetaDataHandler {
 
-	String ontologieUriString = "file:///Users/dwinter/Documents/Projekte/ECHO-eSciDoc-MPDL/escidocMPIWG/NamedIdentityManager/ontologies/namedIdentities.owl";
+	//String ontologieUriString = "file:///Users/dwinter/Documents/Projekte/ECHO-eSciDoc-MPDL/escidocMPIWG/NamedIdentityManager/ontologies/namedIdentities.owl";
+	String ontologieUriString = "file:///Users/dwinter/Documents/Projekte/ECHO-eSciDoc-MPDL/escidocMPIWG/MPIWG-ontologies/namedIdentities.owl";
+	//String ontologieUriString = "http://ontologies.mpiwg-berlin.mpg.de/owl/authorities/namedIdentities";
 	//String rdfUriString = "file:///Users/dwinter/Documents/Projekte/ECHO-eSciDoc-MPDL/escidocMPIWG/NamedIdentityManager/ontologies/rdf-schema";
-	String owlFolder = "/Users/dwinter/Documents/Projekte/ECHO-eSciDoc-MPDL/escidocMPIWG/NamedIdentityManager/ontologies";
+	//String owlFolder = "/Users/dwinter/Documents/Projekte/ECHO-eSciDoc-MPDL/escidocMPIWG/NamedIdentityManager/ontologies";
+	String owlFolder = "/Users/dwinter/Documents/Projekte/ECHO-eSciDoc-MPDL/escidocMPIWG/MPIWG-ontologies/";
 
 	static MetaDataHandler instance=null;
 	
@@ -98,7 +101,7 @@
 	 * @param string
 	 * @return
 	 */
-	public Set getAssociatedPropsForClass(String string) {
+	public Collection getAssociatedPropsForClass(String string) {
 
 	
 
@@ -114,13 +117,23 @@
 		}
 
 		if (owlclass != null) {
-			return owlclass.getAssociatedProperties();
+			Collection props2 = owlclass.getPossibleRDFProperties();
+			Collection props1 = owlclass.getAssociatedProperties();
+			props1.addAll(props2);
+			return props1;
+			
 		} else {
 			logger.debug("Cannot find OWLClass:" + string);
 			logger.debug("Trying RDFSClass");
 			RDFSNamedClass rdfsclass=(RDFSNamedClass)owlModel.getRDFSNamedClass(string);
 			if (rdfsclass != null) {
-				return rdfsclass.getAssociatedProperties();
+				Collection props2 = rdfsclass.getPossibleRDFProperties();
+				//System.out.println(props2);
+				
+				Collection props1 = rdfsclass.getAssociatedProperties();
+				props1.addAll(props2);
+				return props1;
+				
 			} else {
 				logger.debug("Cannot find RDFSClass:" + string);
 				return null;
--- a/src/de/mpiwg/itgroup/triplestoremanager/owl/TripleStoreHandler.java	Tue Dec 13 17:45:30 2011 +0100
+++ b/src/de/mpiwg/itgroup/triplestoremanager/owl/TripleStoreHandler.java	Thu Jun 21 12:24:29 2012 +0200
@@ -6,6 +6,7 @@
 import java.io.OutputStream;
 import java.sql.Connection;
 import java.sql.DriverManager;
+import java.util.ArrayList;
 import java.util.Collection;
 import java.util.HashMap;
 import java.util.Iterator;
@@ -33,7 +34,10 @@
 import org.openrdf.rio.rdfxml.RDFXMLWriter;
 
 import virtuoso.sesame2.driver.VirtuosoRepository;
+import virtuoso.sesame2.driver.VirtuosoRepositoryConnection.CloseableIterationStmt;
 import de.mpiwg.itgroup.triplestoremanager.exceptions.TripleStoreHandlerException;
+import de.mpiwg.itgroup.triplestoremanager.owl.TripleStoreHandler.LiteralQuadruple;
+import de.mpiwg.itgroup.triplestoremanager.owl.TripleStoreHandler.Quadruple;
 import edu.stanford.smi.protegex.owl.model.OWLIndividual;
 import edu.stanford.smi.protegex.owl.model.OWLModel;
 import edu.stanford.smi.protegex.owl.model.OWLNamedClass;
@@ -151,10 +155,16 @@
 			this.sqlCon = DriverManager.getConnection(connectString, username,
 					password);
 		} catch (Exception e) {
+			e.printStackTrace();
 			throw new TripleStoreHandlerException();
 		}
 	}
-	public Map<String,Object> getJenaRDFValues(Collection props, String identifier, OWLModel model) throws RepositoryException {
+	
+	public Map<String,Object> getJenaRDFValues(Collection props, String identifier, OWLModel model)throws RepositoryException {
+		return getJenaRDFValues(props, identifier,  model, false);
+	}
+	
+	public Map<String,Object> getJenaRDFValues(Collection props, String identifier, OWLModel model, boolean keepEmtpyValues) throws RepositoryException {
 		identifier = identifier.replace("<", "");
 		identifier = identifier.replace(">", "");
 		Map<String,Object> retMap = new HashMap<String, Object>();
@@ -172,6 +182,12 @@
 			//RepositoryResult<Statement> statements = con.getStatements(subj, null, null, false);
 			//System.out.println(statements);
 			statements.enableDuplicateFilter();
+			
+			if (!statements.hasNext() && keepEmtpyValues){ // keine werte leere werte trotzdem behalten
+				retMap.put(name, "");
+			}
+			
+			
 			while(statements.hasNext()){
 				Statement sm = statements.next();
 				Value val = sm.getObject();
@@ -269,6 +285,8 @@
 	return statements;
 	}
 	
+
+	
 	public RepositoryResult<Statement> getStatements(String subj,String pred, String obj, String ctx) throws RepositoryException {
 		RepositoryConnection con = repository.getConnection();
 	
@@ -278,6 +296,15 @@
 	return statements;
 	}
 	
+	/**
+	 * Hole Statements aus dem Triplestore, set value or null.
+	 * @param subj
+	 * @param pred
+	 * @param obj
+	 * @param ctx
+	 * @return
+	 * @throws RepositoryException
+	 */
 	public RepositoryResult<Statement> getStatements(Resource subj,URI pred, Value obj, String ctx) throws RepositoryException {
 		RepositoryConnection con = repository.getConnection();
 		
@@ -435,6 +462,9 @@
 		}
 		
 	}
+	
+
+		
 	public void exportCtx(String ctx, File file) throws RepositoryException, RDFHandlerException, IOException{
 		
 		RepositoryConnection conn = repository.getConnection();
@@ -467,4 +497,21 @@
 		TripleStoreHandler th= instance.get(identString);
 		return th;
 	}
-}
+	public void remove(ArrayList<Quadruple> quads) throws TripleStoreHandlerException {
+		logger.debug("remove quads");
+		
+		for (Quadruple quad:quads){
+			try {
+				RepositoryResult<Statement> resTmp = getStatements(quad.getSubj(),quad.getPred(),(String)quad.getObj(),quad.getCtx());
+				
+				repository.getConnection().remove(resTmp);
+			} catch (RepositoryException e) {
+				// TODO Auto-generated catch block
+				e.printStackTrace();
+				throw new TripleStoreHandlerException();
+			}
+			
+		}
+
+	}	
+}
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/de/mpiwg/itgroup/triplestoremanager/tools/Exporter.java	Thu Jun 21 12:24:29 2012 +0200
@@ -0,0 +1,119 @@
+package de.mpiwg.itgroup.triplestoremanager.tools;
+
+import java.io.FileWriter;
+import java.io.IOException;
+
+import org.openrdf.model.Resource;
+import org.openrdf.model.Statement;
+import org.openrdf.model.URI;
+import org.openrdf.model.Value;
+import org.openrdf.model.impl.ContextStatementImpl;
+import org.openrdf.model.impl.LiteralImpl;
+import org.openrdf.model.impl.StatementImpl;
+import org.openrdf.model.impl.URIImpl;
+import org.openrdf.query.Binding;
+import org.openrdf.query.BindingSet;
+import org.openrdf.query.MalformedQueryException;
+import org.openrdf.query.QueryEvaluationException;
+import org.openrdf.query.TupleQueryResult;
+import org.openrdf.rio.RDFHandlerException;
+import org.openrdf.rio.trig.TriGWriter;
+import org.openrdf.rio.turtle.TurtleWriter;
+
+import de.mpiwg.itgroup.triplestoremanager.exceptions.TripleStoreHandlerException;
+import de.mpiwg.itgroup.triplestoremanager.owl.TripleStoreHandler;
+
+/**
+ * Exportiere alle Daten von virtuoso in TTL files.
+ * 
+ * Import dann z.B. mit: DB.DBA.TTLP_MT(file_to_string_output ('/tmp/out0.ttl'),'http://int','http://int',257);
+ * @author dwinter
+ *
+ */
+public class Exporter {
+
+	public static void main(String args[]) throws TripleStoreHandlerException, MalformedQueryException, QueryEvaluationException, RDFHandlerException, IOException {
+		//TripleStoreHandler th = new TripleStoreHandler(
+		//		"jdbc:virtuoso://virtuoso.mpiwg-berlin.mpg.de:1112", args[0], args[1]);
+		
+		
+		if  (args.length<3){
+			System.out.println("Usage: connection String user password");
+			System.out.println("eg: jdbc:virtuoso://virtuoso.mpiwg-berlin.mpg.de:1112 user password");
+			System.exit(1);
+		}
+		TripleStoreHandler th = new TripleStoreHandler(
+						args[0], args[1], args[2]);
+				
+		int start = 0;
+		if (args.length==4){
+			start= Integer.valueOf(args[3]);
+		}
+		int counter=0;
+		int offset=0;
+		int errOff=0;
+		
+		FileWriter errorWriter = new FileWriter(String.format("/tmp/errors"));
+		String currentFile="";
+		FileWriter fw=null;
+		TriGWriter turtleWriter=null;
+		for (int i=start;i<10;i++){
+			
+			String newFile=String.format("/tmp/out%s.ttl",i);
+			
+			if(!newFile.equals(currentFile)){
+				fw= new FileWriter(newFile);
+				currentFile=newFile;
+				turtleWriter = new TriGWriter(fw);
+				turtleWriter.startRDF();
+			}
+		
+	
+		
+		int limit= 10000001;
+		if (errOff==0){
+			offset=i*10000000;
+			
+		}
+		else{
+			offset=errOff;
+			limit=limit-(offset-i*10000000);
+		}
+		//String qs = String.format("select ?x ?y ?z ?g where {graph ?g {?x ?y ?z}} limit 10000001 offset %s",i*10000000);
+		String qs = String.format("select ?g  ?x ?y ?z where { graph <http://indexMeta> {?x ?y ?z.} graph ?g {?x ?y ?z}} limit %s offset %s",limit,offset);
+		try {
+		TupleQueryResult res = th.querySPARQL(qs);
+		         
+		
+			while(res.hasNext()){
+				errOff=0;
+				BindingSet bs = res.next();
+				Binding g = bs.getBinding("g");
+				Binding x = bs.getBinding("x");
+				Binding y = bs.getBinding("y");
+				Binding z = bs.getBinding("z");
+				
+
+				Statement stm = new ContextStatementImpl((Resource)x.getValue(), (URIImpl)y.getValue(), (Value)z.getValue(),(URIImpl)g.getValue());
+			
+				turtleWriter.handleStatement(stm);
+				fw.flush();
+				System.out.println(counter);
+				counter+=1;
+			}
+		} catch (Exception e) {
+			e.printStackTrace();
+			counter=counter+1;
+			System.err.println("skip:"+String.valueOf(counter));
+			errorWriter.write(String.valueOf(counter)+"\n");
+			errOff=counter;
+			i=i-1;
+			continue;
+		}
+		turtleWriter.endRDF();
+		fw.close();
+		}
+	errorWriter.close();
+	}
+	
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/de/mpiwg/itgroup/triplestoremanager/tools/GenerateLandingPage.java	Thu Jun 21 12:24:29 2012 +0200
@@ -0,0 +1,67 @@
+package de.mpiwg.itgroup.triplestoremanager.tools;
+
+import java.io.FileWriter;
+import java.io.IOException;
+
+import org.openrdf.model.Resource;
+import org.openrdf.model.Statement;
+import org.openrdf.model.Value;
+import org.openrdf.model.impl.ContextStatementImpl;
+import org.openrdf.model.impl.URIImpl;
+import org.openrdf.query.Binding;
+import org.openrdf.query.BindingSet;
+import org.openrdf.query.MalformedQueryException;
+import org.openrdf.query.QueryEvaluationException;
+import org.openrdf.query.TupleQueryResult;
+import org.openrdf.rio.RDFHandlerException;
+import org.openrdf.rio.trig.TriGWriter;
+
+import de.mpiwg.itgroup.triplestoremanager.exceptions.TripleStoreHandlerException;
+import de.mpiwg.itgroup.triplestoremanager.owl.TripleStoreHandler;
+
+public class GenerateLandingPage {
+
+	/**
+	 * @param args
+	 */
+
+		public static void main(String args[]) throws TripleStoreHandlerException, MalformedQueryException, QueryEvaluationException, RDFHandlerException, IOException {
+			//TripleStoreHandler th = new TripleStoreHandler(
+			//		"jdbc:virtuoso://virtuoso.mpiwg-berlin.mpg.de:1112", args[0], args[1]);
+			
+			
+			FileWriter fw = new FileWriter("/tmp/lp.ttl");
+			TriGWriter turtleWriter = new TriGWriter(fw);
+			turtleWriter.startRDF();
+			if  (args.length<3){
+				System.out.println("Usage: connection String user password");
+				System.out.println("eg: jdbc:virtuoso://virtuoso.mpiwg-berlin.mpg.de:1111 user password");
+				System.exit(1);
+			}
+			TripleStoreHandler th = new TripleStoreHandler(
+							args[0], args[1], args[2]);
+		
+			String qs = String.format("select ?entity where {?entity a <http://ontologies.mpiwg-berlin.mpg.de/general/IndexMeta>}");
+			
+			TupleQueryResult res = th.querySPARQL(qs);
+	         
+			
+			while(res.hasNext()){
+				BindingSet bs = res.next();
+				Binding entity = bs.getBinding("entity");
+			
+				URIImpl y = new URIImpl("http://ontologies.mpiwg-berlin.mpg.de/general/hasLandingPage");
+				URIImpl z =(URIImpl)entity.getValue();
+				URIImpl z2= new URIImpl(z.toString().replace("/indexMeta", ""));
+				
+				URIImpl g = new URIImpl("http://indexMeta_ECHOE_landingPages");
+				
+				Statement stm = new ContextStatementImpl((Resource)entity.getValue(), y, z2,g);
+			
+				turtleWriter.handleStatement(stm);
+			}
+			turtleWriter.endRDF();
+			fw.close();
+		}
+}
+