Mercurial > hg > TripleStoreManager
comparison src/de/mpiwg/itgroup/indexMeta2RDF/TransformIndexMeta.java @ 4:e93de4e99b52 default tip
indexMeta2rdf in dieses Projekt verschoben
author | dwinter |
---|---|
date | Thu, 21 Jun 2012 14:37:55 +0200 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
3:6c8dac2c5214 | 4:e93de4e99b52 |
---|---|
1 package de.mpiwg.itgroup.indexMeta2RDF; | |
2 | |
3 import java.io.File; | |
4 import java.io.FileInputStream; | |
5 import java.io.FileWriter; | |
6 import java.io.IOException; | |
7 import java.io.InputStream; | |
8 import java.net.MalformedURLException; | |
9 import java.net.URL; | |
10 import java.util.Iterator; | |
11 | |
12 import org.jdom.Attribute; | |
13 import org.jdom.Document; | |
14 import org.jdom.Element; | |
15 import org.jdom.JDOMException; | |
16 import org.jdom.input.SAXBuilder; | |
17 import org.jdom.xpath.XPath; | |
18 import org.openrdf.model.Statement; | |
19 import org.openrdf.model.impl.LiteralImpl; | |
20 import org.openrdf.model.impl.StatementImpl; | |
21 import org.openrdf.model.impl.URIImpl; | |
22 import org.openrdf.rio.RDFHandlerException; | |
23 import org.openrdf.rio.trig.TriGWriter; | |
24 import org.openrdf.rio.turtle.TurtleWriter; | |
25 import org.openrdf.model.Value; | |
26 | |
27 | |
28 | |
29 | |
30 | |
31 public class TransformIndexMeta { | |
32 String OBJ_BASE_URL="http://echo.mpiwg-berlin.mpg.de/indexMeta/"; | |
33 String ONT_BASE_URL="http://ontologies.mpiwg-berlin.mpg.de/general/MetaData/"; | |
34 private String indexMetaType ="http://ontologies.mpiwg-berlin.mpg.de/general/IndexMeta"; | |
35 private String bibObjType ="http://ontologies.mpiwg-berlin.mpg.de/general/BibData"; | |
36 private FileWriter out; | |
37 private FileWriter error; | |
38 private TurtleWriter turtleWriter; | |
39 | |
40 | |
41 public TransformIndexMeta(FileWriter fw, FileWriter ew){ | |
42 out=fw; | |
43 error=ew; | |
44 turtleWriter = new TurtleWriter(fw); | |
45 } | |
46 public void transform(String metaData) throws IOException, JDOMException, RDFHandlerException{ | |
47 //URL url = new URL(metaData); | |
48 //InputStream is = url.openStream(); | |
49 | |
50 turtleWriter.startRDF(); | |
51 FileInputStream is = new FileInputStream(metaData); | |
52 Document doc; | |
53 try { | |
54 doc = new SAXBuilder().build(is); | |
55 } catch (Exception e1) { | |
56 // TODO Auto-generated catch block | |
57 System.err.println("Cannot parse:"+metaData); | |
58 error.write("cannotparse:"+metaData+"\n"); | |
59 return; | |
60 } | |
61 | |
62 XPath xpDri = XPath.newInstance("//meta/dri[@type=\"escidoc-test\"]"); | |
63 | |
64 Element result = (Element)xpDri.selectSingleNode(doc); | |
65 | |
66 if (result==null){ | |
67 System.err.println("No dri for:"+metaData); | |
68 error.write("Non dri for:"+metaData+"\n"); | |
69 return; | |
70 } | |
71 | |
72 String dri=result.getTextTrim(); | |
73 | |
74 String objIdent=OBJ_BASE_URL+dri; | |
75 | |
76 | |
77 //out.write(String.format("<%s> rdf:type <%s>.\n", objIdent,indexMetaType)); | |
78 | |
79 Statement smt = new StatementImpl(new URIImpl(objIdent), new URIImpl("http://www.w3.org/1999/02/22-rdf-syntax-ns#type"), new URIImpl(indexMetaType)); | |
80 turtleWriter.handleStatement(smt); | |
81 | |
82 Element resElement = doc.getRootElement(); | |
83 | |
84 for (Object n: resElement.getChildren()){ | |
85 if (Element.class.isInstance(n)) { | |
86 Element e = (Element)n; | |
87 | |
88 if (!e.getTextTrim().equals("")){ | |
89 String txt=e.getTextTrim();//.replace("\"","\\\""); | |
90 | |
91 smt = new StatementImpl | |
92 (new URIImpl(objIdent), new URIImpl(ONT_BASE_URL+e.getName()), (Value)(new LiteralImpl(txt))); | |
93 turtleWriter.handleStatement(smt); | |
94 | |
95 | |
96 //out.write(String.format("<%s> <%s> \"%s\".\n", objIdent,ONT_BASE_URL+e.getName(),txt)); | |
97 } | |
98 } | |
99 | |
100 | |
101 } | |
102 | |
103 XPath bib = XPath.newInstance("//meta/bib"); | |
104 | |
105 Element bibElement = (Element)bib.selectSingleNode(doc); | |
106 if (bibElement==null){ | |
107 System.err.println("No bibelement in:"+metaData); | |
108 error.write("No bibelement in:"+metaData+"\n"); | |
109 return; | |
110 } | |
111 String bibIdent=objIdent+":bib"; | |
112 | |
113 smt = new StatementImpl(new URIImpl(bibIdent), new URIImpl("http://www.w3.org/1999/02/22-rdf-syntax-ns#type"), new URIImpl(bibObjType)); | |
114 turtleWriter.handleStatement(smt); | |
115 | |
116 //out.write(String.format("<%s> rdf:type <%s>.\n",bibIdent,bibObjType)); | |
117 | |
118 smt = new StatementImpl(new URIImpl(objIdent), new URIImpl(ONT_BASE_URL+"has_bibl_metaData"), new URIImpl(bibIdent)); | |
119 turtleWriter.handleStatement(smt); | |
120 | |
121 //out.write(String.format("<%s> <%s> <%s>.\n", objIdent,ONT_BASE_URL+"has_bibl_metaData",bibIdent)); | |
122 | |
123 Attribute bibType = bibElement.getAttribute("type"); | |
124 if (bibType==null){ | |
125 System.err.println("No bibtype in:"+metaData); | |
126 error.write("No bibtype in:"+metaData+"\n"); | |
127 return; | |
128 } | |
129 String type=bibType.getValue(); | |
130 | |
131 smt = new StatementImpl | |
132 (new URIImpl(bibIdent), new URIImpl(ONT_BASE_URL+"is_of_type"), (Value)(new LiteralImpl(type))); | |
133 turtleWriter.handleStatement(smt); | |
134 | |
135 //out.write(String.format("<%s> <%s> \"%s\".\n", bibIdent,ONT_BASE_URL+"is_of_type",type)); | |
136 | |
137 for (Object n: bibElement.getChildren()){ | |
138 if (Element.class.isInstance(n)) { | |
139 Element e = (Element)n; | |
140 String txt=e.getTextTrim();//.replace("\"","\\\""); | |
141 smt = new StatementImpl | |
142 (new URIImpl(bibIdent), new URIImpl(ONT_BASE_URL+e.getName()), (Value)(new LiteralImpl(txt))); | |
143 turtleWriter.handleStatement(smt); | |
144 | |
145 //out.write(String.format("<%s> <%s> \"%s\".\n", bibIdent,ONT_BASE_URL+e.getName(),txt)); | |
146 } | |
147 | |
148 | |
149 } | |
150 turtleWriter.endRDF(); | |
151 | |
152 } | |
153 | |
154 static public void main(String[] args) throws IOException, RDFHandlerException{ | |
155 if (args.length!=1){ | |
156 System.out.println("Usage: transfom path"); | |
157 System.exit(1); | |
158 } | |
159 String root = args[0]; | |
160 FileWriter fw = new FileWriter("/tmp/out.rdf"); | |
161 FileWriter ew = new FileWriter("/tmp/errors_transform.txt"); | |
162 TransformIndexMeta tim = new TransformIndexMeta(fw,ew); | |
163 | |
164 Iterator<String> it = new IndexMetaIterator(new File(root)); | |
165 while (it.hasNext()){ | |
166 String nx = it.next(); | |
167 try { | |
168 if(nx==null){ | |
169 continue; //weiss noch nicht warum das passiert. | |
170 } | |
171 tim.transform(nx); | |
172 } catch (JDOMException e) { | |
173 System.out.println("JDOM exception:"+nx); | |
174 //e.printStackTrace(); | |
175 } | |
176 fw.flush(); | |
177 ew.flush(); | |
178 } | |
179 fw.close(); | |
180 ew.close(); | |
181 } | |
182 } |