Mercurial > hg > TripleStoreManager
view src/de/mpiwg/itgroup/indexMeta2RDF/TransformIndexMeta.java @ 4:e93de4e99b52 default tip
indexMeta2rdf in dieses Projekt verschoben
author | dwinter |
---|---|
date | Thu, 21 Jun 2012 14:37:55 +0200 |
parents | |
children |
line wrap: on
line source
package de.mpiwg.itgroup.indexMeta2RDF; import java.io.File; import java.io.FileInputStream; import java.io.FileWriter; import java.io.IOException; import java.io.InputStream; import java.net.MalformedURLException; import java.net.URL; import java.util.Iterator; import org.jdom.Attribute; import org.jdom.Document; import org.jdom.Element; import org.jdom.JDOMException; import org.jdom.input.SAXBuilder; import org.jdom.xpath.XPath; import org.openrdf.model.Statement; import org.openrdf.model.impl.LiteralImpl; import org.openrdf.model.impl.StatementImpl; import org.openrdf.model.impl.URIImpl; import org.openrdf.rio.RDFHandlerException; import org.openrdf.rio.trig.TriGWriter; import org.openrdf.rio.turtle.TurtleWriter; import org.openrdf.model.Value; public class TransformIndexMeta { String OBJ_BASE_URL="http://echo.mpiwg-berlin.mpg.de/indexMeta/"; String ONT_BASE_URL="http://ontologies.mpiwg-berlin.mpg.de/general/MetaData/"; private String indexMetaType ="http://ontologies.mpiwg-berlin.mpg.de/general/IndexMeta"; private String bibObjType ="http://ontologies.mpiwg-berlin.mpg.de/general/BibData"; private FileWriter out; private FileWriter error; private TurtleWriter turtleWriter; public TransformIndexMeta(FileWriter fw, FileWriter ew){ out=fw; error=ew; turtleWriter = new TurtleWriter(fw); } public void transform(String metaData) throws IOException, JDOMException, RDFHandlerException{ //URL url = new URL(metaData); //InputStream is = url.openStream(); turtleWriter.startRDF(); FileInputStream is = new FileInputStream(metaData); Document doc; try { doc = new SAXBuilder().build(is); } catch (Exception e1) { // TODO Auto-generated catch block System.err.println("Cannot parse:"+metaData); error.write("cannotparse:"+metaData+"\n"); return; } XPath xpDri = XPath.newInstance("//meta/dri[@type=\"escidoc-test\"]"); Element result = (Element)xpDri.selectSingleNode(doc); if (result==null){ System.err.println("No dri for:"+metaData); error.write("Non dri for:"+metaData+"\n"); return; } String dri=result.getTextTrim(); String objIdent=OBJ_BASE_URL+dri; //out.write(String.format("<%s> rdf:type <%s>.\n", objIdent,indexMetaType)); Statement smt = new StatementImpl(new URIImpl(objIdent), new URIImpl("http://www.w3.org/1999/02/22-rdf-syntax-ns#type"), new URIImpl(indexMetaType)); turtleWriter.handleStatement(smt); Element resElement = doc.getRootElement(); for (Object n: resElement.getChildren()){ if (Element.class.isInstance(n)) { Element e = (Element)n; if (!e.getTextTrim().equals("")){ String txt=e.getTextTrim();//.replace("\"","\\\""); smt = new StatementImpl (new URIImpl(objIdent), new URIImpl(ONT_BASE_URL+e.getName()), (Value)(new LiteralImpl(txt))); turtleWriter.handleStatement(smt); //out.write(String.format("<%s> <%s> \"%s\".\n", objIdent,ONT_BASE_URL+e.getName(),txt)); } } } XPath bib = XPath.newInstance("//meta/bib"); Element bibElement = (Element)bib.selectSingleNode(doc); if (bibElement==null){ System.err.println("No bibelement in:"+metaData); error.write("No bibelement in:"+metaData+"\n"); return; } String bibIdent=objIdent+":bib"; smt = new StatementImpl(new URIImpl(bibIdent), new URIImpl("http://www.w3.org/1999/02/22-rdf-syntax-ns#type"), new URIImpl(bibObjType)); turtleWriter.handleStatement(smt); //out.write(String.format("<%s> rdf:type <%s>.\n",bibIdent,bibObjType)); smt = new StatementImpl(new URIImpl(objIdent), new URIImpl(ONT_BASE_URL+"has_bibl_metaData"), new URIImpl(bibIdent)); turtleWriter.handleStatement(smt); //out.write(String.format("<%s> <%s> <%s>.\n", objIdent,ONT_BASE_URL+"has_bibl_metaData",bibIdent)); Attribute bibType = bibElement.getAttribute("type"); if (bibType==null){ System.err.println("No bibtype in:"+metaData); error.write("No bibtype in:"+metaData+"\n"); return; } String type=bibType.getValue(); smt = new StatementImpl (new URIImpl(bibIdent), new URIImpl(ONT_BASE_URL+"is_of_type"), (Value)(new LiteralImpl(type))); turtleWriter.handleStatement(smt); //out.write(String.format("<%s> <%s> \"%s\".\n", bibIdent,ONT_BASE_URL+"is_of_type",type)); for (Object n: bibElement.getChildren()){ if (Element.class.isInstance(n)) { Element e = (Element)n; String txt=e.getTextTrim();//.replace("\"","\\\""); smt = new StatementImpl (new URIImpl(bibIdent), new URIImpl(ONT_BASE_URL+e.getName()), (Value)(new LiteralImpl(txt))); turtleWriter.handleStatement(smt); //out.write(String.format("<%s> <%s> \"%s\".\n", bibIdent,ONT_BASE_URL+e.getName(),txt)); } } turtleWriter.endRDF(); } static public void main(String[] args) throws IOException, RDFHandlerException{ if (args.length!=1){ System.out.println("Usage: transfom path"); System.exit(1); } String root = args[0]; FileWriter fw = new FileWriter("/tmp/out.rdf"); FileWriter ew = new FileWriter("/tmp/errors_transform.txt"); TransformIndexMeta tim = new TransformIndexMeta(fw,ew); Iterator<String> it = new IndexMetaIterator(new File(root)); while (it.hasNext()){ String nx = it.next(); try { if(nx==null){ continue; //weiss noch nicht warum das passiert. } tim.transform(nx); } catch (JDOMException e) { System.out.println("JDOM exception:"+nx); //e.printStackTrace(); } fw.flush(); ew.flush(); } fw.close(); ew.close(); } }