Mercurial > hg > NamedIdentityManager
diff src/de/mpiwg/itgroup/nimanager/tools/StoreRDFToVirtuoso.java @ 0:1384a0d382fa
first input
author | dwinter |
---|---|
date | Thu, 30 Jun 2011 11:44:24 +0200 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/de/mpiwg/itgroup/nimanager/tools/StoreRDFToVirtuoso.java Thu Jun 30 11:44:24 2011 +0200 @@ -0,0 +1,132 @@ +package de.mpiwg.itgroup.nimanager.tools; +// see http://docs.openlinksw.com/virtuoso/VirtuosoDriverJDBC.html +import java.io.BufferedReader; +import java.io.File; +import java.io.FileInputStream; +import java.io.FileNotFoundException; +import java.io.FileReader; +import java.io.IOException; +import java.io.InputStreamReader; +import java.sql.Connection; +import java.sql.DriverManager; +import java.sql.ResultSet; +import java.sql.ResultSetMetaData; +import java.sql.SQLException; +import java.sql.Statement; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +public class StoreRDFToVirtuoso { + + private Statement smt; + private Connection con; + + + public StoreRDFToVirtuoso(String username, String password) throws SQLException{ + String connectString="jdbc:virtuoso://localhost:1111/charset=UTF-8"; + con = DriverManager.getConnection(connectString,username,password); + + smt = con.createStatement(); + + } + + public void run(String filename) throws IOException, SQLException{ + + + BufferedReader nr = new BufferedReader( new InputStreamReader(new FileInputStream(filename), "UTF8")); + String line; + String currentRDF=""; + int counter=0; + while ((line = nr.readLine()) != null) { + if (line.startsWith("<rdf:RDF")){ // start a new file + currentRDF=line; + } else if (line.startsWith("</rdf:RDF>")) { + currentRDF+=line; + try { + process(currentRDF); + } catch (Exception e) { + // TODO Auto-generated catch block + e.printStackTrace(); + } + counter+=1; + //if (counter>0) + // break; + } else { + currentRDF+=line; + } + + + } + } + + + + private void process(String currentRDF) throws SQLException { + + if (alreadyExists(currentRDF)){ + return; + } + System.out.println("--Process:"); + currentRDF = currentRDF.replace("'", "''"); + String cmd = String.format("DB.DBA.RDF_LOAD_RDFXML ('%s', '', 'file:///GND.rdf')", currentRDF); + System.out.println(cmd); + boolean more = smt.execute(cmd); + ResultSetMetaData data = smt.getResultSet().getMetaData(); + while(more) + { + + ResultSet rs = smt.getResultSet(); + while(rs.next()) + { + for(int i = 1;i <= data.getColumnCount();i++) + { + String s = rs.getString(i); + System.out.println(s); + } + } + more = smt.getMoreResults(); + } + + System.out.println(currentRDF); + System.out.println("--End"); + con.commit(); + } + + + private boolean alreadyExists(String currentRDF) throws SQLException { + //String cmdString="sparql select * from <file:///GND.rdf> where {<http://d-nb.info/gnd/100004776> ?x ?y}"; + // find entries like <rdf:Description rdf:about="http://d-nb.info/gnd/100004776"> in currentRDF + Pattern p = Pattern.compile("<rdf:Description rdf:about=\"(.*?)\">", Pattern.MULTILINE | Pattern.DOTALL); + Matcher m = p.matcher(currentRDF); + m.find(); + String gndID = m.group(1); + //String gndID="XSDGG"; + String cmdString=String.format("sparql select count(*) from <file:///GND.rdf> where {<%s> ?x ?y}",gndID); + smt.execute(cmdString); + ResultSet rs = smt.getResultSet(); + rs.next(); + int count=rs.getInt(1); + if (count>0){ + System.out.println("Already in:"+gndID); + return true;} + else + return false; + } + + static public void main(String args[]) throws IOException, SQLException, ClassNotFoundException{ + Class.forName("virtuoso.jdbc4.Driver"); + if (args.length <2){ + System.out.println("Usage: storeRDF username password"); + } + StoreRDFToVirtuoso st = new StoreRDFToVirtuoso(args[0],args[1]); + + st.run("/Users/dwinter/Documents/Projekte/ECHO-eSciDoc-MPDL/escidocMPIWG/NamedIdentityManager/data/PNDrdf_1.rdf.txt"); + + st.close(); + } + + private void close() throws SQLException { + con.close(); + + } +} \ No newline at end of file