diff src/de/mpiwg/itgroup/nimanager/tools/StoreRDFToVirtuoso.java @ 0:1384a0d382fa

first input
author dwinter
date Thu, 30 Jun 2011 11:44:24 +0200
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/de/mpiwg/itgroup/nimanager/tools/StoreRDFToVirtuoso.java	Thu Jun 30 11:44:24 2011 +0200
@@ -0,0 +1,132 @@
+package de.mpiwg.itgroup.nimanager.tools;
+// see http://docs.openlinksw.com/virtuoso/VirtuosoDriverJDBC.html
+import java.io.BufferedReader;
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.FileNotFoundException;
+import java.io.FileReader;
+import java.io.IOException;
+import java.io.InputStreamReader;
+import java.sql.Connection;
+import java.sql.DriverManager;
+import java.sql.ResultSet;
+import java.sql.ResultSetMetaData;
+import java.sql.SQLException;
+import java.sql.Statement;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+public class StoreRDFToVirtuoso {
+
+	private Statement smt;
+	private Connection con;
+
+
+	public StoreRDFToVirtuoso(String username, String password) throws SQLException{
+		String connectString="jdbc:virtuoso://localhost:1111/charset=UTF-8";
+		con = DriverManager.getConnection(connectString,username,password);
+	
+		smt = con.createStatement();
+		
+	}
+	
+	public void run(String filename) throws IOException, SQLException{
+		
+		
+		BufferedReader nr = new BufferedReader(  new InputStreamReader(new FileInputStream(filename), "UTF8"));
+		String line;
+		String currentRDF="";
+		int counter=0;
+		while ((line = nr.readLine()) != null)   {
+			 if (line.startsWith("<rdf:RDF")){ // start a new file
+				 currentRDF=line;
+			  } else if (line.startsWith("</rdf:RDF>")) {
+				  currentRDF+=line;
+				  try {
+					process(currentRDF);
+				} catch (Exception e) {
+					// TODO Auto-generated catch block
+					e.printStackTrace();
+				}
+				  counter+=1;
+				  //if (counter>0)
+				//	  break;
+			  } else {
+				  currentRDF+=line;
+			  }
+		
+			
+		}
+	}
+
+	
+
+	private void process(String currentRDF) throws SQLException {
+		
+		if (alreadyExists(currentRDF)){
+			return;
+		}
+		System.out.println("--Process:");
+		currentRDF = currentRDF.replace("'", "''");
+		String cmd = String.format("DB.DBA.RDF_LOAD_RDFXML ('%s', '', 'file:///GND.rdf')", currentRDF);
+		System.out.println(cmd);
+		boolean more = smt.execute(cmd);
+		ResultSetMetaData data = smt.getResultSet().getMetaData();
+		 while(more)
+		    {
+			
+			ResultSet rs = smt.getResultSet();
+			while(rs.next())
+			{
+				 for(int i = 1;i <= data.getColumnCount();i++)
+				    {
+					String s = rs.getString(i);
+					System.out.println(s);
+			}
+			}
+			more = smt.getMoreResults();
+		    }
+			
+		System.out.println(currentRDF);
+		System.out.println("--End");
+		con.commit();
+	}
+
+	
+	private boolean alreadyExists(String currentRDF) throws SQLException {
+		//String cmdString="sparql select * from <file:///GND.rdf> where {<http://d-nb.info/gnd/100004776> ?x ?y}";
+		// find entries like <rdf:Description rdf:about="http://d-nb.info/gnd/100004776"> in currentRDF
+		Pattern p = Pattern.compile("<rdf:Description rdf:about=\"(.*?)\">", Pattern.MULTILINE | Pattern.DOTALL);
+		Matcher m = p.matcher(currentRDF);
+		m.find();
+		String gndID = m.group(1);
+		//String gndID="XSDGG";
+		String cmdString=String.format("sparql select count(*) from <file:///GND.rdf> where {<%s> ?x ?y}",gndID);
+		smt.execute(cmdString);
+		ResultSet rs = smt.getResultSet();
+		rs.next();
+		int count=rs.getInt(1);
+		if (count>0){
+			System.out.println("Already in:"+gndID);
+			return true;}
+		else
+			return false;
+	}
+
+	static public void main(String args[]) throws IOException, SQLException, ClassNotFoundException{
+		Class.forName("virtuoso.jdbc4.Driver");
+		if (args.length <2){
+			System.out.println("Usage: storeRDF username password");
+		}
+		StoreRDFToVirtuoso st = new StoreRDFToVirtuoso(args[0],args[1]);
+		
+		st.run("/Users/dwinter/Documents/Projekte/ECHO-eSciDoc-MPDL/escidocMPIWG/NamedIdentityManager/data/PNDrdf_1.rdf.txt");
+		
+		st.close();
+	}
+
+	private void close() throws SQLException {
+		con.close();
+		
+	}
+}
\ No newline at end of file