Mercurial > hg > duomoOWLProject
comparison src/de/mpiwg/dwinter/duomo/lexdump/OWLImporter.java @ 8:919e9f3b5efd
neue klassen zur textanalyse (stanford parser eingebaut)
alle has_readable_labe Datatype properties durch rdfs:label ersetzt.
author | dwinter |
---|---|
date | Thu, 21 Jun 2012 17:08:22 +0200 |
parents | 19e40abb3e8a |
children | 4392a6adf85a |
comparison
equal
deleted
inserted
replaced
7:19e40abb3e8a | 8:919e9f3b5efd |
---|---|
1 package de.mpiwg.dwinter.duomo.lexdump; | 1 package de.mpiwg.dwinter.duomo.lexdump; |
2 | 2 |
3 import java.io.File; | 3 import java.io.File; |
4 import java.io.FileOutputStream; | |
4 import java.io.FileWriter; | 5 import java.io.FileWriter; |
5 import java.io.IOException; | 6 import java.io.IOException; |
7 import java.io.OutputStreamWriter; | |
6 import java.net.URI; | 8 import java.net.URI; |
7 import java.net.URISyntaxException; | 9 import java.net.URISyntaxException; |
8 import java.util.ArrayList; | 10 import java.util.ArrayList; |
9 import java.util.Collection; | 11 import java.util.Collection; |
10 import java.util.HashMap; | 12 import java.util.HashMap; |
11 import java.util.Iterator; | 13 import java.util.Iterator; |
12 import java.util.List; | 14 import java.util.List; |
15 | |
16 import javax.swing.text.html.HTMLDocument.HTMLReader.IsindexAction; | |
13 | 17 |
14 import org.apache.log4j.Logger; | 18 import org.apache.log4j.Logger; |
15 | 19 |
16 import edu.stanford.smi.protege.exception.OntologyLoadException; | 20 import edu.stanford.smi.protege.exception.OntologyLoadException; |
17 import edu.stanford.smi.protegex.owl.ProtegeOWL; | 21 import edu.stanford.smi.protegex.owl.ProtegeOWL; |
22 import edu.stanford.smi.protegex.owl.model.RDFProperty; | 26 import edu.stanford.smi.protegex.owl.model.RDFProperty; |
23 import edu.stanford.smi.protegex.owl.model.RDFSLiteral; | 27 import edu.stanford.smi.protegex.owl.model.RDFSLiteral; |
24 import edu.stanford.smi.protegex.owl.repository.RepositoryManager; | 28 import edu.stanford.smi.protegex.owl.repository.RepositoryManager; |
25 import edu.stanford.smi.protegex.owl.repository.impl.LocalFolderRepository; | 29 import edu.stanford.smi.protegex.owl.repository.impl.LocalFolderRepository; |
26 | 30 |
31 /** | |
32 * Erzeugt Entities und Verwaltet gemŠ§ eines OWL-Modells. | |
33 * Alle erzeugten Entities werden direkt als nturtle in eine File geschrieben. | |
34 * | |
35 * TODO: Dieses File ist immer /tmp/out.rdf | |
36 * TODO: zur Ausgabe gibt es eine Methode @see{org.openrdf.rio.trig.TriGWriter} bzw @see{de.mpiwg.itgroup.triplestoremanager.tools.Exporter} | |
37 *ie benutzt werden sollte anstelle der handgestrickten bei denen direkt in outrdf geschrieben wird. | |
38 * | |
39 * TODO: Einzelen Methoden hier sind noch DUOMO und CRM spezifisch, insbesondere | |
40 * @author dwinter | |
41 * | |
42 * | |
43 */ | |
27 public class OWLImporter { | 44 public class OWLImporter { |
28 | 45 |
29 JenaOWLModel owlModel; // contains the model | 46 JenaOWLModel owlModel; // contains the model |
30 Logger logger = Logger.getRootLogger(); | 47 Logger logger = Logger.getRootLogger(); |
31 FileWriter fh; | 48 FileWriter fh; |
32 private HashMap<String, String> typeId2className=null; | 49 private HashMap<String, String> typeId2className=null; |
33 private HashMap<String, OWLNamedClass> typeId2class=null; | 50 private HashMap<String, OWLNamedClass> typeId2class=null; |
34 private String ontFolder; | 51 private String ontFolder; |
35 private URI uri; | 52 private URI uri; |
36 | 53 //private FileWriter outRDF; |
54 private OutputStreamWriter outRDF; | |
55 | |
56 /** | |
57 * Initialisiert die Klasse und | |
58 * lŠdt die Ontologien ein | |
59 * @param folder Ordner mit der zu bearbeitenden Ontologie | |
60 * @param uri URI der Ontologie selbst | |
61 * @throws OntologyLoadException | |
62 */ | |
37 public OWLImporter(String folder, URI uri) throws OntologyLoadException { | 63 public OWLImporter(String folder, URI uri) throws OntologyLoadException { |
38 // owlModel = ProtegeOWL.createJenaOWLModelFromURI(uri); | 64 // owlModel = ProtegeOWL.createJenaOWLModelFromURI(uri); |
39 | 65 |
40 this.ontFolder=folder; | 66 this.ontFolder=folder; |
41 this.uri=uri; | 67 this.uri=uri; |
42 try { | 68 try { |
43 this.fh= new FileWriter(new File("/tmp/identifier")); | 69 this.fh= new FileWriter(new File("/tmp/identifier")); |
70 this.outRDF= new OutputStreamWriter(new FileOutputStream("/tmp/out.rdf"),"UTF-8"); | |
44 } catch (IOException e) { | 71 } catch (IOException e) { |
45 // TODO Auto-generated catch block | 72 // TODO Auto-generated catch block |
46 e.printStackTrace(); | 73 e.printStackTrace(); |
47 } | 74 } |
48 loadOWL(folder, uri); | 75 loadOWL(folder, uri); |
49 } | 76 } |
50 | 77 |
78 /** | |
79 * LŠdt die Ontologie erneut ein. | |
80 * @throws OntologyLoadException | |
81 */ | |
51 public void reloadOWL() throws OntologyLoadException{ | 82 public void reloadOWL() throws OntologyLoadException{ |
52 loadOWL(ontFolder, uri); | 83 loadOWL(ontFolder, uri); |
53 } | 84 } |
54 | 85 |
86 /** | |
87 * | |
88 * LŠdt die Ontologien ein | |
89 * @param folder Ordner mit der zu bearbeitenden Ontologie | |
90 * @param uri URI der Ontologie selbst | |
91 * @throws OntologyLoadException | |
92 */ | |
55 public void loadOWL(String folder, URI uri) throws OntologyLoadException { | 93 public void loadOWL(String folder, URI uri) throws OntologyLoadException { |
56 owlModel = ProtegeOWL.createJenaOWLModel(); | 94 owlModel = ProtegeOWL.createJenaOWLModel(); |
57 // Load repository | 95 // Load repository |
58 RepositoryManager rman = owlModel.getRepositoryManager(); | 96 RepositoryManager rman = owlModel.getRepositoryManager(); |
59 | 97 |
82 } | 120 } |
83 | 121 |
84 } | 122 } |
85 | 123 |
86 public static void main(String args[]) throws URISyntaxException { | 124 public static void main(String args[]) throws URISyntaxException { |
87 String base = "/Users/dwinter/Documents/Projekte/Diss - data-mining/eclipseWorkspace/de.mpiwg.dwinter.duomo/owlInput"; | 125 String base = "/Users/dwinter/Documents/Projekte/Diss - data-mining/eclipseWorkspace/duomoData/owl-version"; |
88 URI ontologieUri = new URI( | 126 URI ontologieUri = new URI( |
89 "file:///Users/dwinter/Documents/Projekte/Diss%20-%20data-mining/eclipseWorkspace/de.mpiwg.dwinter.duomo/owlInput/duomoAnalysis.owl"); | 127 "file:///Users/dwinter/Documents/Projekte/Diss - data-mining/eclipseWorkspace/duomoData/owl-version/protege_version_duomo/duomoAnalysis.owl"); |
90 try { | 128 try { |
91 | 129 |
92 OWLImporter im = new OWLImporter(base, ontologieUri); | 130 OWLImporter im = new OWLImporter(base, ontologieUri); |
93 im.printModel(); | 131 im.printModel(); |
94 } catch (OntologyLoadException e) { | 132 } catch (OntologyLoadException e) { |
95 // TODO Auto-generated catch block | 133 // TODO Auto-generated catch block |
96 e.printStackTrace(); | 134 e.printStackTrace(); |
97 } | 135 } |
98 } | 136 } |
99 | 137 |
100 public OWLIndividual createInstance(String string) { | 138 /** Erzeuge Instanz uns schreibe in das Exportfile die enstprechende <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> Relation. |
139 * @param fullClassName Name der Classe | |
140 * @return null, wenn das Objekt nicht angelegt werden kann. | |
141 */ | |
142 public OWLIndividual createInstance(String fullClassName) { | |
101 OWLNamedClass owlclass = (OWLNamedClass) owlModel | 143 OWLNamedClass owlclass = (OWLNamedClass) owlModel |
102 .getOWLNamedClass(string); | 144 .getOWLNamedClass(fullClassName); |
103 | 145 |
104 if (owlclass == null) { | 146 if (owlclass == null) { |
105 logger.debug("Cannot find OWLClass:" + string); | 147 logger.debug("Cannot find OWLClass:" + fullClassName); |
106 return null; | 148 return null; |
107 } | 149 } |
108 //logger.debug("Create new individual of type:"+string); | 150 //logger.debug("Create new individual of type:"+string); |
109 return owlclass.createOWLIndividual(null); | 151 |
110 | 152 |
111 } | 153 OWLIndividual ind = owlclass.createOWLIndividual(null); |
112 | 154 |
113 public void setProperty(OWLIndividual identifier, String propertyName, | 155 //TODO: replace the following by @see{org.openrdf.rio.trig.TriGWriter} |
156 String triple = String.format("<%s> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <%s>.\n", ind.getName(),owlclass.getName()); | |
157 try { | |
158 outRDF.write(triple); | |
159 outRDF.flush(); | |
160 } catch (IOException e) { | |
161 // TODO Auto-generated catch block | |
162 e.printStackTrace(); | |
163 } | |
164 return ind; | |
165 | |
166 } | |
167 | |
168 | |
169 /** | |
170 * Erzeuge eine Prpoerty und schreibe die entsprechenden Informationen in das out-file. | |
171 * @param individual | |
172 * @param propertyName | |
173 * @param value | |
174 */ | |
175 public void setProperty(OWLIndividual individual, String propertyName, | |
114 Object value) { | 176 Object value) { |
115 RDFProperty prop = owlModel.getRDFProperty(propertyName); | 177 RDFProperty prop = owlModel.getRDFProperty(propertyName); |
116 identifier.setPropertyValue(prop, value); | 178 individual.setPropertyValue(prop, value); |
117 | 179 |
118 } | 180 |
119 | 181 |
120 public void setDataTypePropery(OWLIndividual eventInstance, | 182 //TODO: replace the following by @see{org.openrdf.rio.trig.TriGWriter} |
183 String valName=""; | |
184 | |
185 | |
186 if (OWLIndividual.class.isInstance(value)) | |
187 valName="<"+((OWLIndividual)value).getName()+">"; | |
188 else | |
189 valName="\""+escapeRDFLit((String)value)+"\""; | |
190 | |
191 | |
192 String triple = String.format("<%s> <%s> %s.\n", individual.getName(),prop.getName(),valName); | |
193 try { | |
194 outRDF.write(triple); | |
195 outRDF.flush(); | |
196 } catch (IOException e) { | |
197 // TODO Auto-generated catch block | |
198 e.printStackTrace(); | |
199 } | |
200 } | |
201 | |
202 private String escapeRDFLit(String string){ | |
203 return string.replace("\"", ""); | |
204 } | |
205 | |
206 /** Erzeuge DatatypePropery schreibe die entsprechenden Informationen in das out-file. | |
207 * @param individual | |
208 * @param propertyName | |
209 * @param value | |
210 * @param lang Sprach-tag | |
211 */ | |
212 public void setDataTypePropery(OWLIndividual individual, | |
121 String propertyName, String value, String lang) { | 213 String propertyName, String value, String lang) { |
122 RDFProperty prop = owlModel.getRDFProperty(propertyName); | 214 RDFProperty prop = owlModel.getRDFProperty(propertyName); |
123 // if(OWLDatatypeProperty.class.isInstance(prop)){ | 215 // if(OWLDatatypeProperty.class.isInstance(prop)){ |
124 // OWLDatatypeProperty dp = (OWLDatatypeProperty)prop; | 216 // OWLDatatypeProperty dp = (OWLDatatypeProperty)prop; |
125 // prop.set | 217 // prop.set |
127 // } else { | 219 // } else { |
128 // logger.error("Is not a datatypeprop:"+propertyName); | 220 // logger.error("Is not a datatypeprop:"+propertyName); |
129 // } | 221 // } |
130 | 222 |
131 | 223 |
224 | |
132 RDFSLiteral langLiteral = owlModel.createRDFSLiteral(value, lang); | 225 RDFSLiteral langLiteral = owlModel.createRDFSLiteral(value, lang); |
133 eventInstance.setPropertyValue(prop, langLiteral); | 226 individual.setPropertyValue(prop, langLiteral); |
134 | 227 |
135 | 228 //TODO: replace the following by @see{org.openrdf.rio.trig.TriGWriter} |
136 } | 229 String triple = String.format("<%s> <%s> \"%s\"@%s.\n", individual.getName(),prop.getName(),escapeRDFLit(langLiteral.getString()),lang); |
137 | 230 try { |
138 public Object getRelatedIndividual(OWLIndividual cardInd, | 231 outRDF.write(triple); |
232 outRDF.flush(); | |
233 } catch (IOException e) { | |
234 // TODO Auto-generated catch block | |
235 e.printStackTrace(); | |
236 } | |
237 } | |
238 | |
239 /** | |
240 * Gibt Individual zurŸck das Ÿber die Property mit der Quelle verbunden ist. | |
241 * @param sourceIndividual | |
242 * @param propertyName | |
243 * @return | |
244 */ | |
245 public Object getRelatedIndividual(OWLIndividual sourceIndividual, | |
139 String propertyName) { | 246 String propertyName) { |
140 RDFProperty prop = owlModel.getRDFProperty(propertyName); | 247 RDFProperty prop = owlModel.getRDFProperty(propertyName); |
141 if (prop == null) { | 248 if (prop == null) { |
142 logger.debug("Property does not exist:" + propertyName); | 249 logger.debug("Property does not exist:" + propertyName); |
143 } | 250 } |
144 | 251 |
145 Object value = cardInd.getPropertyValue(prop); | 252 Object value = sourceIndividual.getPropertyValue(prop); |
146 return value; | 253 return value; |
147 } | 254 } |
148 | 255 |
149 | 256 |
150 public OWLIndividual getIndividual(String classNameFrom, | 257 public OWLIndividual getIndividual(String classNameFrom, |
261 | 368 |
262 return returnList; | 369 return returnList; |
263 } | 370 } |
264 | 371 |
265 public OWLIndividual createTimeSpan(String dateDcStart, String dateDcEnd) { | 372 public OWLIndividual createTimeSpan(String dateDcStart, String dateDcEnd) { |
266 OWLIndividual timeSpan = createInstance("DatesDocument"); | 373 // A date is described by an timespan which is described by an appellation |
267 | 374 |
268 | 375 //TODO: die lesbaren daten gehšren in time span mit den angaben wir ungefŠhr und soweiter |
269 | 376 // d.h. die klammern um ein Datum die ungefaehr signalisieren, gehoeren in die Beschreibung der |
270 setProperty(timeSpan, "has_readable_date", dateDcStart); | 377 //Qualifier in Time span |
378 // in DuomoDate_Appellation kommen die genauen teile | |
379 // zunaechst schreibe ich in "has_readable" hier genau rein was im Text steht | |
380 // dieses muss noch analysiert werden | |
381 // so sollte 1432 gennai 9 --> in ein Computer lesbares modell umgesetzt werden | |
382 // und 1432 gennaio (9) muss in 1432 gennaio 9 in DuomoDate_Appellation eingetragen werden | |
383 // und dann mit P79 und P80 in DuomoDate_TimeSpan genauer beschrieben werden, d.h in P79 kommt dann | |
384 // ungefaehr | |
385 // ausserdem mussesn "POST" und "ANTE aus dem Text in das Modell eingepflegt werden. | |
386 | |
387 | |
388 OWLIndividual timeSpan = createInstance("DuomoDate_TimeSpan"); | |
389 | |
390 OWLIndividual date= createInstance("DuomoDate_Appellation"); | |
391 | |
392 setProperty(date, "rdfs:label", dateDcStart); | |
393 //setProperty(timeSpan, "has_readable_date", dateDcStart); | |
394 | |
395 setProperty(timeSpan,"is_identified_by_Date",date); | |
271 | 396 |
272 if(!(dateDcEnd==null || dateDcEnd.equals(""))){ | 397 if(!(dateDcEnd==null || dateDcEnd.equals(""))){ |
273 setProperty(timeSpan, "has_readable_to_date", dateDcEnd); | 398 OWLIndividual toDate= createInstance("DuomoDate_Appellation"); |
399 setProperty(toDate, "rdfs:label", dateDcEnd); | |
400 //setProperty(timeSpan, "has_readable_toDate", dateDcEnd); | |
401 setProperty(timeSpan,"is_identified_by_toDate",toDate); | |
274 } else { | 402 } else { |
275 dateDcEnd=dateDcStart; | 403 dateDcEnd=dateDcStart; |
276 } | 404 } |
277 | 405 |
278 OWLIndividual timeSpanIdentifier = createInstance("Identifier"); | 406 OWLIndividual timeSpanIdentifier = createInstance("Identifier"); |
279 setProperty(timeSpanIdentifier,"has_readable_id",dateDcStart+"-"+dateDcEnd); | 407 setProperty(timeSpanIdentifier,"rdfs:label",dateDcStart+"-"+dateDcEnd); |
280 | 408 |
281 setProperty(timeSpan,"crm:P48_has_preferred_identifier",timeSpanIdentifier); | 409 setProperty(timeSpan,"crm:P48_has_preferred_identifier",timeSpanIdentifier); |
282 | 410 |
283 return timeSpan; | 411 return timeSpan; |
284 } | 412 } |
413 | |
414 // public OWLIndividual createTimeSpan2(String dateDcStart, String dateDcEnd) { | |
415 // OWLIndividual timeSpan = createInstance("DatesDocument"); | |
416 // | |
417 // | |
418 // | |
419 // setProperty(timeSpan, "has_readable_date", dateDcStart); | |
420 // | |
421 // if(!(dateDcEnd==null || dateDcEnd.equals(""))){ | |
422 // setProperty(timeSpan, "has_readable_to_date", dateDcEnd); | |
423 // } else { | |
424 // dateDcEnd=dateDcStart; | |
425 // } | |
426 // | |
427 // OWLIndividual timeSpanIdentifier = createInstance("Identifier"); | |
428 // setProperty(timeSpanIdentifier,"has_readable_id",dateDcStart+"-"+dateDcEnd); | |
429 // | |
430 // setProperty(timeSpan,"crm:P48_has_preferred_identifier",timeSpanIdentifier); | |
431 // | |
432 // return timeSpan; | |
433 // } | |
434 | |
285 | 435 |
286 public OWLIndividual getIndividualByReadableId(String className,String identifier){ | 436 public OWLIndividual getIndividualByReadableId(String className,String identifier){ |
287 return getIndividual(className, "crm:P48_has_preferred_identifier", "Identifier", "has_readable_id", identifier, true); | 437 return getIndividual(className, "crm:P48_has_preferred_identifier", "Identifier", "rdfs:label", identifier, true); |
288 } | 438 } |
289 | 439 |
290 public OWLIndividual getIndividualByReadableId(String className,String identifier, String classNameIdentifier,boolean subclassedIdentifier){ | 440 public OWLIndividual getIndividualByReadableId(String className,String identifier, String classNameIdentifier,boolean subclassedIdentifier){ |
291 return getIndividual(className, "crm:P48_has_preferred_identifier", classNameIdentifier, "has_readable_id", identifier,subclassedIdentifier); | 441 return getIndividual(className, "crm:P48_has_preferred_identifier", classNameIdentifier, "rdfs:label", identifier,subclassedIdentifier); |
292 } | 442 } |
293 | 443 |
294 public String getClassNameFromTypeId(String typeId) { | 444 public String getClassNameFromTypeId(String typeId) { |
295 if (typeId2className==null){ // hash nicht angelegt | 445 if (typeId2className==null){ // hash nicht angelegt |
296 createTypeId2classHashes(); | 446 createTypeId2classHashes(); |
331 | 481 |
332 } | 482 } |
333 | 483 |
334 public OWLIndividual createOrGetInstanceWithIdentifier(String classNameInstance, | 484 public OWLIndividual createOrGetInstanceWithIdentifier(String classNameInstance, |
335 String classNameIdentifier, String identifier,boolean followSubclasses) { | 485 String classNameIdentifier, String identifier,boolean followSubclasses) { |
486 | |
487 identifier=org.apache.commons.lang.StringUtils.strip(identifier); | |
488 | |
336 OWLIndividual ind = getIndividualByReadableId(classNameInstance, identifier,classNameIdentifier,followSubclasses); | 489 OWLIndividual ind = getIndividualByReadableId(classNameInstance, identifier,classNameIdentifier,followSubclasses); |
337 if(ind==null){ | 490 if(ind==null){ |
338 ind = createInstance(classNameInstance); | 491 ind = createInstance(classNameInstance); |
339 OWLIndividual identifierInd = createInstance(classNameIdentifier); | 492 OWLIndividual identifierInd = createInstance(classNameIdentifier); |
340 setProperty(identifierInd, "has_readable_id", identifier); | 493 setProperty(identifierInd, "rdfs:label", identifier); |
341 try { | 494 try { |
342 fh.write(classNameInstance+" --" +classNameIdentifier+"---"+identifier+"\n"); | 495 fh.write(classNameInstance+" --" +classNameIdentifier+"---"+identifier+"\n"); |
343 fh.flush(); | 496 fh.flush(); |
344 } catch (IOException e) { | 497 } catch (IOException e) { |
345 // TODO Auto-generated catch block | 498 // TODO Auto-generated catch block |