diff src/de/mpiwg/itgroup/indexMeta2RDF/TransformIndexMeta.java @ 4:e93de4e99b52 default tip

indexMeta2rdf in dieses Projekt verschoben
author dwinter
date Thu, 21 Jun 2012 14:37:55 +0200
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/de/mpiwg/itgroup/indexMeta2RDF/TransformIndexMeta.java	Thu Jun 21 14:37:55 2012 +0200
@@ -0,0 +1,182 @@
+package de.mpiwg.itgroup.indexMeta2RDF;
+
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.FileWriter;
+import java.io.IOException;
+import java.io.InputStream;
+import java.net.MalformedURLException;
+import java.net.URL;
+import java.util.Iterator;
+
+import org.jdom.Attribute;
+import org.jdom.Document;
+import org.jdom.Element;
+import org.jdom.JDOMException;
+import org.jdom.input.SAXBuilder;
+import org.jdom.xpath.XPath;
+import org.openrdf.model.Statement;
+import org.openrdf.model.impl.LiteralImpl;
+import org.openrdf.model.impl.StatementImpl;
+import org.openrdf.model.impl.URIImpl;
+import org.openrdf.rio.RDFHandlerException;
+import org.openrdf.rio.trig.TriGWriter;
+import org.openrdf.rio.turtle.TurtleWriter;
+import org.openrdf.model.Value;
+
+
+
+
+	
+public class TransformIndexMeta {
+	String OBJ_BASE_URL="http://echo.mpiwg-berlin.mpg.de/indexMeta/";
+	String ONT_BASE_URL="http://ontologies.mpiwg-berlin.mpg.de/general/MetaData/";
+	private String indexMetaType ="http://ontologies.mpiwg-berlin.mpg.de/general/IndexMeta";
+	private String bibObjType ="http://ontologies.mpiwg-berlin.mpg.de/general/BibData";
+	private FileWriter out;
+	private FileWriter error;
+	private TurtleWriter turtleWriter;
+	
+	
+	public  TransformIndexMeta(FileWriter fw, FileWriter ew){
+		out=fw;
+		error=ew;
+		turtleWriter = new TurtleWriter(fw);
+	}
+	public void transform(String metaData) throws IOException, JDOMException, RDFHandlerException{
+	//URL url = new URL(metaData);
+	//InputStream is = url.openStream();
+	
+	turtleWriter.startRDF();
+	FileInputStream is = new FileInputStream(metaData);
+	Document doc;
+	try {
+		doc = new SAXBuilder().build(is);
+	} catch (Exception e1) {
+		// TODO Auto-generated catch block
+		System.err.println("Cannot parse:"+metaData);
+		error.write("cannotparse:"+metaData+"\n");
+		return;
+	}
+	
+	XPath xpDri = XPath.newInstance("//meta/dri[@type=\"escidoc-test\"]");
+	
+	Element result = (Element)xpDri.selectSingleNode(doc);
+	
+	if (result==null){
+		System.err.println("No dri for:"+metaData);
+		error.write("Non dri for:"+metaData+"\n");
+		return;
+	}
+		
+	String dri=result.getTextTrim();
+	
+	String objIdent=OBJ_BASE_URL+dri;
+	
+	
+	//out.write(String.format("<%s> rdf:type <%s>.\n", objIdent,indexMetaType));
+	
+	Statement smt = new StatementImpl(new URIImpl(objIdent), new URIImpl("http://www.w3.org/1999/02/22-rdf-syntax-ns#type"), new URIImpl(indexMetaType));
+	turtleWriter.handleStatement(smt);
+	
+	Element resElement = doc.getRootElement();
+
+	for (Object n: resElement.getChildren()){
+		if (Element.class.isInstance(n))	{
+			Element e = (Element)n;
+			
+			if (!e.getTextTrim().equals("")){
+				String txt=e.getTextTrim();//.replace("\"","\\\"");
+				
+				smt = new StatementImpl
+						(new URIImpl(objIdent), new URIImpl(ONT_BASE_URL+e.getName()), (Value)(new LiteralImpl(txt)));
+				turtleWriter.handleStatement(smt);
+				
+				
+				//out.write(String.format("<%s> <%s> \"%s\".\n", objIdent,ONT_BASE_URL+e.getName(),txt));
+			}
+		}
+	
+	
+	}
+	
+	XPath bib = XPath.newInstance("//meta/bib");
+	
+	Element bibElement = (Element)bib.selectSingleNode(doc);
+	if (bibElement==null){
+		System.err.println("No bibelement in:"+metaData);
+		error.write("No bibelement in:"+metaData+"\n");
+		return;
+	}
+	String bibIdent=objIdent+":bib";
+	
+	smt = new StatementImpl(new URIImpl(bibIdent), new URIImpl("http://www.w3.org/1999/02/22-rdf-syntax-ns#type"), new URIImpl(bibObjType));
+	turtleWriter.handleStatement(smt);
+	
+	//out.write(String.format("<%s> rdf:type <%s>.\n",bibIdent,bibObjType));
+	
+	smt = new StatementImpl(new URIImpl(objIdent), new URIImpl(ONT_BASE_URL+"has_bibl_metaData"), new URIImpl(bibIdent));
+	turtleWriter.handleStatement(smt);
+	
+	//out.write(String.format("<%s> <%s> <%s>.\n", objIdent,ONT_BASE_URL+"has_bibl_metaData",bibIdent));
+	
+	Attribute bibType = bibElement.getAttribute("type");
+	if (bibType==null){
+		System.err.println("No bibtype in:"+metaData);
+		error.write("No bibtype in:"+metaData+"\n");
+		return;
+	}
+	String type=bibType.getValue();
+	
+	smt = new StatementImpl
+			(new URIImpl(bibIdent), new URIImpl(ONT_BASE_URL+"is_of_type"), (Value)(new LiteralImpl(type)));
+	turtleWriter.handleStatement(smt);
+
+	//out.write(String.format("<%s> <%s> \"%s\".\n", bibIdent,ONT_BASE_URL+"is_of_type",type));
+	
+	for (Object n: bibElement.getChildren()){
+		if (Element.class.isInstance(n))	{
+			Element e = (Element)n;
+			String txt=e.getTextTrim();//.replace("\"","\\\"");
+			smt = new StatementImpl
+					(new URIImpl(bibIdent), new URIImpl(ONT_BASE_URL+e.getName()), (Value)(new LiteralImpl(txt)));
+			turtleWriter.handleStatement(smt);
+
+			//out.write(String.format("<%s> <%s> \"%s\".\n", bibIdent,ONT_BASE_URL+e.getName(),txt));
+		}
+	
+	
+	}
+	turtleWriter.endRDF();
+	
+}
+	
+	static public void main(String[] args) throws IOException, RDFHandlerException{
+		if (args.length!=1){
+			System.out.println("Usage: transfom path");
+			System.exit(1);
+		}
+		String root = args[0];
+		FileWriter fw = new FileWriter("/tmp/out.rdf");
+		FileWriter ew = new FileWriter("/tmp/errors_transform.txt");
+		TransformIndexMeta tim = new TransformIndexMeta(fw,ew);
+		
+		Iterator<String> it = new IndexMetaIterator(new File(root));
+		while (it.hasNext()){
+			String nx = it.next();
+		try {
+			if(nx==null){
+				continue; //weiss noch nicht warum das passiert.
+			}
+			tim.transform(nx);
+		} catch (JDOMException e) {
+			System.out.println("JDOM exception:"+nx);
+			//e.printStackTrace();
+		}
+		fw.flush();
+		ew.flush();
+		}
+		fw.close();
+		ew.close();
+	}
+}