4
|
1 package de.mpiwg.itgroup.indexMeta2RDF;
|
|
2
|
|
3 import java.io.File;
|
|
4 import java.io.FileInputStream;
|
|
5 import java.io.FileWriter;
|
|
6 import java.io.IOException;
|
|
7 import java.io.InputStream;
|
|
8 import java.net.MalformedURLException;
|
|
9 import java.net.URL;
|
|
10 import java.util.Iterator;
|
|
11
|
|
12 import org.jdom.Attribute;
|
|
13 import org.jdom.Document;
|
|
14 import org.jdom.Element;
|
|
15 import org.jdom.JDOMException;
|
|
16 import org.jdom.input.SAXBuilder;
|
|
17 import org.jdom.xpath.XPath;
|
|
18 import org.openrdf.model.Statement;
|
|
19 import org.openrdf.model.impl.LiteralImpl;
|
|
20 import org.openrdf.model.impl.StatementImpl;
|
|
21 import org.openrdf.model.impl.URIImpl;
|
|
22 import org.openrdf.rio.RDFHandlerException;
|
|
23 import org.openrdf.rio.trig.TriGWriter;
|
|
24 import org.openrdf.rio.turtle.TurtleWriter;
|
|
25 import org.openrdf.model.Value;
|
|
26
|
|
27
|
|
28
|
|
29
|
|
30
|
|
31 public class TransformIndexMeta {
|
|
32 String OBJ_BASE_URL="http://echo.mpiwg-berlin.mpg.de/indexMeta/";
|
|
33 String ONT_BASE_URL="http://ontologies.mpiwg-berlin.mpg.de/general/MetaData/";
|
|
34 private String indexMetaType ="http://ontologies.mpiwg-berlin.mpg.de/general/IndexMeta";
|
|
35 private String bibObjType ="http://ontologies.mpiwg-berlin.mpg.de/general/BibData";
|
|
36 private FileWriter out;
|
|
37 private FileWriter error;
|
|
38 private TurtleWriter turtleWriter;
|
|
39
|
|
40
|
|
41 public TransformIndexMeta(FileWriter fw, FileWriter ew){
|
|
42 out=fw;
|
|
43 error=ew;
|
|
44 turtleWriter = new TurtleWriter(fw);
|
|
45 }
|
|
46 public void transform(String metaData) throws IOException, JDOMException, RDFHandlerException{
|
|
47 //URL url = new URL(metaData);
|
|
48 //InputStream is = url.openStream();
|
|
49
|
|
50 turtleWriter.startRDF();
|
|
51 FileInputStream is = new FileInputStream(metaData);
|
|
52 Document doc;
|
|
53 try {
|
|
54 doc = new SAXBuilder().build(is);
|
|
55 } catch (Exception e1) {
|
|
56 // TODO Auto-generated catch block
|
|
57 System.err.println("Cannot parse:"+metaData);
|
|
58 error.write("cannotparse:"+metaData+"\n");
|
|
59 return;
|
|
60 }
|
|
61
|
|
62 XPath xpDri = XPath.newInstance("//meta/dri[@type=\"escidoc-test\"]");
|
|
63
|
|
64 Element result = (Element)xpDri.selectSingleNode(doc);
|
|
65
|
|
66 if (result==null){
|
|
67 System.err.println("No dri for:"+metaData);
|
|
68 error.write("Non dri for:"+metaData+"\n");
|
|
69 return;
|
|
70 }
|
|
71
|
|
72 String dri=result.getTextTrim();
|
|
73
|
|
74 String objIdent=OBJ_BASE_URL+dri;
|
|
75
|
|
76
|
|
77 //out.write(String.format("<%s> rdf:type <%s>.\n", objIdent,indexMetaType));
|
|
78
|
|
79 Statement smt = new StatementImpl(new URIImpl(objIdent), new URIImpl("http://www.w3.org/1999/02/22-rdf-syntax-ns#type"), new URIImpl(indexMetaType));
|
|
80 turtleWriter.handleStatement(smt);
|
|
81
|
|
82 Element resElement = doc.getRootElement();
|
|
83
|
|
84 for (Object n: resElement.getChildren()){
|
|
85 if (Element.class.isInstance(n)) {
|
|
86 Element e = (Element)n;
|
|
87
|
|
88 if (!e.getTextTrim().equals("")){
|
|
89 String txt=e.getTextTrim();//.replace("\"","\\\"");
|
|
90
|
|
91 smt = new StatementImpl
|
|
92 (new URIImpl(objIdent), new URIImpl(ONT_BASE_URL+e.getName()), (Value)(new LiteralImpl(txt)));
|
|
93 turtleWriter.handleStatement(smt);
|
|
94
|
|
95
|
|
96 //out.write(String.format("<%s> <%s> \"%s\".\n", objIdent,ONT_BASE_URL+e.getName(),txt));
|
|
97 }
|
|
98 }
|
|
99
|
|
100
|
|
101 }
|
|
102
|
|
103 XPath bib = XPath.newInstance("//meta/bib");
|
|
104
|
|
105 Element bibElement = (Element)bib.selectSingleNode(doc);
|
|
106 if (bibElement==null){
|
|
107 System.err.println("No bibelement in:"+metaData);
|
|
108 error.write("No bibelement in:"+metaData+"\n");
|
|
109 return;
|
|
110 }
|
|
111 String bibIdent=objIdent+":bib";
|
|
112
|
|
113 smt = new StatementImpl(new URIImpl(bibIdent), new URIImpl("http://www.w3.org/1999/02/22-rdf-syntax-ns#type"), new URIImpl(bibObjType));
|
|
114 turtleWriter.handleStatement(smt);
|
|
115
|
|
116 //out.write(String.format("<%s> rdf:type <%s>.\n",bibIdent,bibObjType));
|
|
117
|
|
118 smt = new StatementImpl(new URIImpl(objIdent), new URIImpl(ONT_BASE_URL+"has_bibl_metaData"), new URIImpl(bibIdent));
|
|
119 turtleWriter.handleStatement(smt);
|
|
120
|
|
121 //out.write(String.format("<%s> <%s> <%s>.\n", objIdent,ONT_BASE_URL+"has_bibl_metaData",bibIdent));
|
|
122
|
|
123 Attribute bibType = bibElement.getAttribute("type");
|
|
124 if (bibType==null){
|
|
125 System.err.println("No bibtype in:"+metaData);
|
|
126 error.write("No bibtype in:"+metaData+"\n");
|
|
127 return;
|
|
128 }
|
|
129 String type=bibType.getValue();
|
|
130
|
|
131 smt = new StatementImpl
|
|
132 (new URIImpl(bibIdent), new URIImpl(ONT_BASE_URL+"is_of_type"), (Value)(new LiteralImpl(type)));
|
|
133 turtleWriter.handleStatement(smt);
|
|
134
|
|
135 //out.write(String.format("<%s> <%s> \"%s\".\n", bibIdent,ONT_BASE_URL+"is_of_type",type));
|
|
136
|
|
137 for (Object n: bibElement.getChildren()){
|
|
138 if (Element.class.isInstance(n)) {
|
|
139 Element e = (Element)n;
|
|
140 String txt=e.getTextTrim();//.replace("\"","\\\"");
|
|
141 smt = new StatementImpl
|
|
142 (new URIImpl(bibIdent), new URIImpl(ONT_BASE_URL+e.getName()), (Value)(new LiteralImpl(txt)));
|
|
143 turtleWriter.handleStatement(smt);
|
|
144
|
|
145 //out.write(String.format("<%s> <%s> \"%s\".\n", bibIdent,ONT_BASE_URL+e.getName(),txt));
|
|
146 }
|
|
147
|
|
148
|
|
149 }
|
|
150 turtleWriter.endRDF();
|
|
151
|
|
152 }
|
|
153
|
|
154 static public void main(String[] args) throws IOException, RDFHandlerException{
|
|
155 if (args.length!=1){
|
|
156 System.out.println("Usage: transfom path");
|
|
157 System.exit(1);
|
|
158 }
|
|
159 String root = args[0];
|
|
160 FileWriter fw = new FileWriter("/tmp/out.rdf");
|
|
161 FileWriter ew = new FileWriter("/tmp/errors_transform.txt");
|
|
162 TransformIndexMeta tim = new TransformIndexMeta(fw,ew);
|
|
163
|
|
164 Iterator<String> it = new IndexMetaIterator(new File(root));
|
|
165 while (it.hasNext()){
|
|
166 String nx = it.next();
|
|
167 try {
|
|
168 if(nx==null){
|
|
169 continue; //weiss noch nicht warum das passiert.
|
|
170 }
|
|
171 tim.transform(nx);
|
|
172 } catch (JDOMException e) {
|
|
173 System.out.println("JDOM exception:"+nx);
|
|
174 //e.printStackTrace();
|
|
175 }
|
|
176 fw.flush();
|
|
177 ew.flush();
|
|
178 }
|
|
179 fw.close();
|
|
180 ew.close();
|
|
181 }
|
|
182 }
|