view src/de/mpiwg/itgroup/indexMeta2RDF/TransformIndexMeta.java @ 4:e93de4e99b52 default tip

indexMeta2rdf in dieses Projekt verschoben
author dwinter
date Thu, 21 Jun 2012 14:37:55 +0200
parents
children
line wrap: on
line source

package de.mpiwg.itgroup.indexMeta2RDF;

import java.io.File;
import java.io.FileInputStream;
import java.io.FileWriter;
import java.io.IOException;
import java.io.InputStream;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.Iterator;

import org.jdom.Attribute;
import org.jdom.Document;
import org.jdom.Element;
import org.jdom.JDOMException;
import org.jdom.input.SAXBuilder;
import org.jdom.xpath.XPath;
import org.openrdf.model.Statement;
import org.openrdf.model.impl.LiteralImpl;
import org.openrdf.model.impl.StatementImpl;
import org.openrdf.model.impl.URIImpl;
import org.openrdf.rio.RDFHandlerException;
import org.openrdf.rio.trig.TriGWriter;
import org.openrdf.rio.turtle.TurtleWriter;
import org.openrdf.model.Value;




	
public class TransformIndexMeta {
	String OBJ_BASE_URL="http://echo.mpiwg-berlin.mpg.de/indexMeta/";
	String ONT_BASE_URL="http://ontologies.mpiwg-berlin.mpg.de/general/MetaData/";
	private String indexMetaType ="http://ontologies.mpiwg-berlin.mpg.de/general/IndexMeta";
	private String bibObjType ="http://ontologies.mpiwg-berlin.mpg.de/general/BibData";
	private FileWriter out;
	private FileWriter error;
	private TurtleWriter turtleWriter;
	
	
	public  TransformIndexMeta(FileWriter fw, FileWriter ew){
		out=fw;
		error=ew;
		turtleWriter = new TurtleWriter(fw);
	}
	public void transform(String metaData) throws IOException, JDOMException, RDFHandlerException{
	//URL url = new URL(metaData);
	//InputStream is = url.openStream();
	
	turtleWriter.startRDF();
	FileInputStream is = new FileInputStream(metaData);
	Document doc;
	try {
		doc = new SAXBuilder().build(is);
	} catch (Exception e1) {
		// TODO Auto-generated catch block
		System.err.println("Cannot parse:"+metaData);
		error.write("cannotparse:"+metaData+"\n");
		return;
	}
	
	XPath xpDri = XPath.newInstance("//meta/dri[@type=\"escidoc-test\"]");
	
	Element result = (Element)xpDri.selectSingleNode(doc);
	
	if (result==null){
		System.err.println("No dri for:"+metaData);
		error.write("Non dri for:"+metaData+"\n");
		return;
	}
		
	String dri=result.getTextTrim();
	
	String objIdent=OBJ_BASE_URL+dri;
	
	
	//out.write(String.format("<%s> rdf:type <%s>.\n", objIdent,indexMetaType));
	
	Statement smt = new StatementImpl(new URIImpl(objIdent), new URIImpl("http://www.w3.org/1999/02/22-rdf-syntax-ns#type"), new URIImpl(indexMetaType));
	turtleWriter.handleStatement(smt);
	
	Element resElement = doc.getRootElement();

	for (Object n: resElement.getChildren()){
		if (Element.class.isInstance(n))	{
			Element e = (Element)n;
			
			if (!e.getTextTrim().equals("")){
				String txt=e.getTextTrim();//.replace("\"","\\\"");
				
				smt = new StatementImpl
						(new URIImpl(objIdent), new URIImpl(ONT_BASE_URL+e.getName()), (Value)(new LiteralImpl(txt)));
				turtleWriter.handleStatement(smt);
				
				
				//out.write(String.format("<%s> <%s> \"%s\".\n", objIdent,ONT_BASE_URL+e.getName(),txt));
			}
		}
	
	
	}
	
	XPath bib = XPath.newInstance("//meta/bib");
	
	Element bibElement = (Element)bib.selectSingleNode(doc);
	if (bibElement==null){
		System.err.println("No bibelement in:"+metaData);
		error.write("No bibelement in:"+metaData+"\n");
		return;
	}
	String bibIdent=objIdent+":bib";
	
	smt = new StatementImpl(new URIImpl(bibIdent), new URIImpl("http://www.w3.org/1999/02/22-rdf-syntax-ns#type"), new URIImpl(bibObjType));
	turtleWriter.handleStatement(smt);
	
	//out.write(String.format("<%s> rdf:type <%s>.\n",bibIdent,bibObjType));
	
	smt = new StatementImpl(new URIImpl(objIdent), new URIImpl(ONT_BASE_URL+"has_bibl_metaData"), new URIImpl(bibIdent));
	turtleWriter.handleStatement(smt);
	
	//out.write(String.format("<%s> <%s> <%s>.\n", objIdent,ONT_BASE_URL+"has_bibl_metaData",bibIdent));
	
	Attribute bibType = bibElement.getAttribute("type");
	if (bibType==null){
		System.err.println("No bibtype in:"+metaData);
		error.write("No bibtype in:"+metaData+"\n");
		return;
	}
	String type=bibType.getValue();
	
	smt = new StatementImpl
			(new URIImpl(bibIdent), new URIImpl(ONT_BASE_URL+"is_of_type"), (Value)(new LiteralImpl(type)));
	turtleWriter.handleStatement(smt);

	//out.write(String.format("<%s> <%s> \"%s\".\n", bibIdent,ONT_BASE_URL+"is_of_type",type));
	
	for (Object n: bibElement.getChildren()){
		if (Element.class.isInstance(n))	{
			Element e = (Element)n;
			String txt=e.getTextTrim();//.replace("\"","\\\"");
			smt = new StatementImpl
					(new URIImpl(bibIdent), new URIImpl(ONT_BASE_URL+e.getName()), (Value)(new LiteralImpl(txt)));
			turtleWriter.handleStatement(smt);

			//out.write(String.format("<%s> <%s> \"%s\".\n", bibIdent,ONT_BASE_URL+e.getName(),txt));
		}
	
	
	}
	turtleWriter.endRDF();
	
}
	
	static public void main(String[] args) throws IOException, RDFHandlerException{
		if (args.length!=1){
			System.out.println("Usage: transfom path");
			System.exit(1);
		}
		String root = args[0];
		FileWriter fw = new FileWriter("/tmp/out.rdf");
		FileWriter ew = new FileWriter("/tmp/errors_transform.txt");
		TransformIndexMeta tim = new TransformIndexMeta(fw,ew);
		
		Iterator<String> it = new IndexMetaIterator(new File(root));
		while (it.hasNext()){
			String nx = it.next();
		try {
			if(nx==null){
				continue; //weiss noch nicht warum das passiert.
			}
			tim.transform(nx);
		} catch (JDOMException e) {
			System.out.println("JDOM exception:"+nx);
			//e.printStackTrace();
		}
		fw.flush();
		ew.flush();
		}
		fw.close();
		ew.close();
	}
}