comparison src/de/mpiwg/itgroup/indexMeta2RDF/TransformIndexMeta.java @ 4:e93de4e99b52 default tip

indexMeta2rdf in dieses Projekt verschoben
author dwinter
date Thu, 21 Jun 2012 14:37:55 +0200
parents
children
comparison
equal deleted inserted replaced
3:6c8dac2c5214 4:e93de4e99b52
1 package de.mpiwg.itgroup.indexMeta2RDF;
2
3 import java.io.File;
4 import java.io.FileInputStream;
5 import java.io.FileWriter;
6 import java.io.IOException;
7 import java.io.InputStream;
8 import java.net.MalformedURLException;
9 import java.net.URL;
10 import java.util.Iterator;
11
12 import org.jdom.Attribute;
13 import org.jdom.Document;
14 import org.jdom.Element;
15 import org.jdom.JDOMException;
16 import org.jdom.input.SAXBuilder;
17 import org.jdom.xpath.XPath;
18 import org.openrdf.model.Statement;
19 import org.openrdf.model.impl.LiteralImpl;
20 import org.openrdf.model.impl.StatementImpl;
21 import org.openrdf.model.impl.URIImpl;
22 import org.openrdf.rio.RDFHandlerException;
23 import org.openrdf.rio.trig.TriGWriter;
24 import org.openrdf.rio.turtle.TurtleWriter;
25 import org.openrdf.model.Value;
26
27
28
29
30
31 public class TransformIndexMeta {
32 String OBJ_BASE_URL="http://echo.mpiwg-berlin.mpg.de/indexMeta/";
33 String ONT_BASE_URL="http://ontologies.mpiwg-berlin.mpg.de/general/MetaData/";
34 private String indexMetaType ="http://ontologies.mpiwg-berlin.mpg.de/general/IndexMeta";
35 private String bibObjType ="http://ontologies.mpiwg-berlin.mpg.de/general/BibData";
36 private FileWriter out;
37 private FileWriter error;
38 private TurtleWriter turtleWriter;
39
40
41 public TransformIndexMeta(FileWriter fw, FileWriter ew){
42 out=fw;
43 error=ew;
44 turtleWriter = new TurtleWriter(fw);
45 }
46 public void transform(String metaData) throws IOException, JDOMException, RDFHandlerException{
47 //URL url = new URL(metaData);
48 //InputStream is = url.openStream();
49
50 turtleWriter.startRDF();
51 FileInputStream is = new FileInputStream(metaData);
52 Document doc;
53 try {
54 doc = new SAXBuilder().build(is);
55 } catch (Exception e1) {
56 // TODO Auto-generated catch block
57 System.err.println("Cannot parse:"+metaData);
58 error.write("cannotparse:"+metaData+"\n");
59 return;
60 }
61
62 XPath xpDri = XPath.newInstance("//meta/dri[@type=\"escidoc-test\"]");
63
64 Element result = (Element)xpDri.selectSingleNode(doc);
65
66 if (result==null){
67 System.err.println("No dri for:"+metaData);
68 error.write("Non dri for:"+metaData+"\n");
69 return;
70 }
71
72 String dri=result.getTextTrim();
73
74 String objIdent=OBJ_BASE_URL+dri;
75
76
77 //out.write(String.format("<%s> rdf:type <%s>.\n", objIdent,indexMetaType));
78
79 Statement smt = new StatementImpl(new URIImpl(objIdent), new URIImpl("http://www.w3.org/1999/02/22-rdf-syntax-ns#type"), new URIImpl(indexMetaType));
80 turtleWriter.handleStatement(smt);
81
82 Element resElement = doc.getRootElement();
83
84 for (Object n: resElement.getChildren()){
85 if (Element.class.isInstance(n)) {
86 Element e = (Element)n;
87
88 if (!e.getTextTrim().equals("")){
89 String txt=e.getTextTrim();//.replace("\"","\\\"");
90
91 smt = new StatementImpl
92 (new URIImpl(objIdent), new URIImpl(ONT_BASE_URL+e.getName()), (Value)(new LiteralImpl(txt)));
93 turtleWriter.handleStatement(smt);
94
95
96 //out.write(String.format("<%s> <%s> \"%s\".\n", objIdent,ONT_BASE_URL+e.getName(),txt));
97 }
98 }
99
100
101 }
102
103 XPath bib = XPath.newInstance("//meta/bib");
104
105 Element bibElement = (Element)bib.selectSingleNode(doc);
106 if (bibElement==null){
107 System.err.println("No bibelement in:"+metaData);
108 error.write("No bibelement in:"+metaData+"\n");
109 return;
110 }
111 String bibIdent=objIdent+":bib";
112
113 smt = new StatementImpl(new URIImpl(bibIdent), new URIImpl("http://www.w3.org/1999/02/22-rdf-syntax-ns#type"), new URIImpl(bibObjType));
114 turtleWriter.handleStatement(smt);
115
116 //out.write(String.format("<%s> rdf:type <%s>.\n",bibIdent,bibObjType));
117
118 smt = new StatementImpl(new URIImpl(objIdent), new URIImpl(ONT_BASE_URL+"has_bibl_metaData"), new URIImpl(bibIdent));
119 turtleWriter.handleStatement(smt);
120
121 //out.write(String.format("<%s> <%s> <%s>.\n", objIdent,ONT_BASE_URL+"has_bibl_metaData",bibIdent));
122
123 Attribute bibType = bibElement.getAttribute("type");
124 if (bibType==null){
125 System.err.println("No bibtype in:"+metaData);
126 error.write("No bibtype in:"+metaData+"\n");
127 return;
128 }
129 String type=bibType.getValue();
130
131 smt = new StatementImpl
132 (new URIImpl(bibIdent), new URIImpl(ONT_BASE_URL+"is_of_type"), (Value)(new LiteralImpl(type)));
133 turtleWriter.handleStatement(smt);
134
135 //out.write(String.format("<%s> <%s> \"%s\".\n", bibIdent,ONT_BASE_URL+"is_of_type",type));
136
137 for (Object n: bibElement.getChildren()){
138 if (Element.class.isInstance(n)) {
139 Element e = (Element)n;
140 String txt=e.getTextTrim();//.replace("\"","\\\"");
141 smt = new StatementImpl
142 (new URIImpl(bibIdent), new URIImpl(ONT_BASE_URL+e.getName()), (Value)(new LiteralImpl(txt)));
143 turtleWriter.handleStatement(smt);
144
145 //out.write(String.format("<%s> <%s> \"%s\".\n", bibIdent,ONT_BASE_URL+e.getName(),txt));
146 }
147
148
149 }
150 turtleWriter.endRDF();
151
152 }
153
154 static public void main(String[] args) throws IOException, RDFHandlerException{
155 if (args.length!=1){
156 System.out.println("Usage: transfom path");
157 System.exit(1);
158 }
159 String root = args[0];
160 FileWriter fw = new FileWriter("/tmp/out.rdf");
161 FileWriter ew = new FileWriter("/tmp/errors_transform.txt");
162 TransformIndexMeta tim = new TransformIndexMeta(fw,ew);
163
164 Iterator<String> it = new IndexMetaIterator(new File(root));
165 while (it.hasNext()){
166 String nx = it.next();
167 try {
168 if(nx==null){
169 continue; //weiss noch nicht warum das passiert.
170 }
171 tim.transform(nx);
172 } catch (JDOMException e) {
173 System.out.println("JDOM exception:"+nx);
174 //e.printStackTrace();
175 }
176 fw.flush();
177 ew.flush();
178 }
179 fw.close();
180 ew.close();
181 }
182 }