Mercurial > hg > TripleStoreManager
diff src/de/mpiwg/itgroup/indexMeta2RDF/TransformIndexMeta.java @ 4:e93de4e99b52 default tip
indexMeta2rdf in dieses Projekt verschoben
author | dwinter |
---|---|
date | Thu, 21 Jun 2012 14:37:55 +0200 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/de/mpiwg/itgroup/indexMeta2RDF/TransformIndexMeta.java Thu Jun 21 14:37:55 2012 +0200 @@ -0,0 +1,182 @@ +package de.mpiwg.itgroup.indexMeta2RDF; + +import java.io.File; +import java.io.FileInputStream; +import java.io.FileWriter; +import java.io.IOException; +import java.io.InputStream; +import java.net.MalformedURLException; +import java.net.URL; +import java.util.Iterator; + +import org.jdom.Attribute; +import org.jdom.Document; +import org.jdom.Element; +import org.jdom.JDOMException; +import org.jdom.input.SAXBuilder; +import org.jdom.xpath.XPath; +import org.openrdf.model.Statement; +import org.openrdf.model.impl.LiteralImpl; +import org.openrdf.model.impl.StatementImpl; +import org.openrdf.model.impl.URIImpl; +import org.openrdf.rio.RDFHandlerException; +import org.openrdf.rio.trig.TriGWriter; +import org.openrdf.rio.turtle.TurtleWriter; +import org.openrdf.model.Value; + + + + + +public class TransformIndexMeta { + String OBJ_BASE_URL="http://echo.mpiwg-berlin.mpg.de/indexMeta/"; + String ONT_BASE_URL="http://ontologies.mpiwg-berlin.mpg.de/general/MetaData/"; + private String indexMetaType ="http://ontologies.mpiwg-berlin.mpg.de/general/IndexMeta"; + private String bibObjType ="http://ontologies.mpiwg-berlin.mpg.de/general/BibData"; + private FileWriter out; + private FileWriter error; + private TurtleWriter turtleWriter; + + + public TransformIndexMeta(FileWriter fw, FileWriter ew){ + out=fw; + error=ew; + turtleWriter = new TurtleWriter(fw); + } + public void transform(String metaData) throws IOException, JDOMException, RDFHandlerException{ + //URL url = new URL(metaData); + //InputStream is = url.openStream(); + + turtleWriter.startRDF(); + FileInputStream is = new FileInputStream(metaData); + Document doc; + try { + doc = new SAXBuilder().build(is); + } catch (Exception e1) { + // TODO Auto-generated catch block + System.err.println("Cannot parse:"+metaData); + error.write("cannotparse:"+metaData+"\n"); + return; + } + + XPath xpDri = XPath.newInstance("//meta/dri[@type=\"escidoc-test\"]"); + + Element result = (Element)xpDri.selectSingleNode(doc); + + if (result==null){ + System.err.println("No dri for:"+metaData); + error.write("Non dri for:"+metaData+"\n"); + return; + } + + String dri=result.getTextTrim(); + + String objIdent=OBJ_BASE_URL+dri; + + + //out.write(String.format("<%s> rdf:type <%s>.\n", objIdent,indexMetaType)); + + Statement smt = new StatementImpl(new URIImpl(objIdent), new URIImpl("http://www.w3.org/1999/02/22-rdf-syntax-ns#type"), new URIImpl(indexMetaType)); + turtleWriter.handleStatement(smt); + + Element resElement = doc.getRootElement(); + + for (Object n: resElement.getChildren()){ + if (Element.class.isInstance(n)) { + Element e = (Element)n; + + if (!e.getTextTrim().equals("")){ + String txt=e.getTextTrim();//.replace("\"","\\\""); + + smt = new StatementImpl + (new URIImpl(objIdent), new URIImpl(ONT_BASE_URL+e.getName()), (Value)(new LiteralImpl(txt))); + turtleWriter.handleStatement(smt); + + + //out.write(String.format("<%s> <%s> \"%s\".\n", objIdent,ONT_BASE_URL+e.getName(),txt)); + } + } + + + } + + XPath bib = XPath.newInstance("//meta/bib"); + + Element bibElement = (Element)bib.selectSingleNode(doc); + if (bibElement==null){ + System.err.println("No bibelement in:"+metaData); + error.write("No bibelement in:"+metaData+"\n"); + return; + } + String bibIdent=objIdent+":bib"; + + smt = new StatementImpl(new URIImpl(bibIdent), new URIImpl("http://www.w3.org/1999/02/22-rdf-syntax-ns#type"), new URIImpl(bibObjType)); + turtleWriter.handleStatement(smt); + + //out.write(String.format("<%s> rdf:type <%s>.\n",bibIdent,bibObjType)); + + smt = new StatementImpl(new URIImpl(objIdent), new URIImpl(ONT_BASE_URL+"has_bibl_metaData"), new URIImpl(bibIdent)); + turtleWriter.handleStatement(smt); + + //out.write(String.format("<%s> <%s> <%s>.\n", objIdent,ONT_BASE_URL+"has_bibl_metaData",bibIdent)); + + Attribute bibType = bibElement.getAttribute("type"); + if (bibType==null){ + System.err.println("No bibtype in:"+metaData); + error.write("No bibtype in:"+metaData+"\n"); + return; + } + String type=bibType.getValue(); + + smt = new StatementImpl + (new URIImpl(bibIdent), new URIImpl(ONT_BASE_URL+"is_of_type"), (Value)(new LiteralImpl(type))); + turtleWriter.handleStatement(smt); + + //out.write(String.format("<%s> <%s> \"%s\".\n", bibIdent,ONT_BASE_URL+"is_of_type",type)); + + for (Object n: bibElement.getChildren()){ + if (Element.class.isInstance(n)) { + Element e = (Element)n; + String txt=e.getTextTrim();//.replace("\"","\\\""); + smt = new StatementImpl + (new URIImpl(bibIdent), new URIImpl(ONT_BASE_URL+e.getName()), (Value)(new LiteralImpl(txt))); + turtleWriter.handleStatement(smt); + + //out.write(String.format("<%s> <%s> \"%s\".\n", bibIdent,ONT_BASE_URL+e.getName(),txt)); + } + + + } + turtleWriter.endRDF(); + +} + + static public void main(String[] args) throws IOException, RDFHandlerException{ + if (args.length!=1){ + System.out.println("Usage: transfom path"); + System.exit(1); + } + String root = args[0]; + FileWriter fw = new FileWriter("/tmp/out.rdf"); + FileWriter ew = new FileWriter("/tmp/errors_transform.txt"); + TransformIndexMeta tim = new TransformIndexMeta(fw,ew); + + Iterator<String> it = new IndexMetaIterator(new File(root)); + while (it.hasNext()){ + String nx = it.next(); + try { + if(nx==null){ + continue; //weiss noch nicht warum das passiert. + } + tim.transform(nx); + } catch (JDOMException e) { + System.out.println("JDOM exception:"+nx); + //e.printStackTrace(); + } + fw.flush(); + ew.flush(); + } + fw.close(); + ew.close(); + } +}