comparison src/de/mpiwg/dwinter/duomo/lexdump/OWLImporter.java @ 8:919e9f3b5efd

neue klassen zur textanalyse (stanford parser eingebaut) alle has_readable_labe Datatype properties durch rdfs:label ersetzt.
author dwinter
date Thu, 21 Jun 2012 17:08:22 +0200
parents 19e40abb3e8a
children 4392a6adf85a
comparison
equal deleted inserted replaced
7:19e40abb3e8a 8:919e9f3b5efd
1 package de.mpiwg.dwinter.duomo.lexdump; 1 package de.mpiwg.dwinter.duomo.lexdump;
2 2
3 import java.io.File; 3 import java.io.File;
4 import java.io.FileOutputStream;
4 import java.io.FileWriter; 5 import java.io.FileWriter;
5 import java.io.IOException; 6 import java.io.IOException;
7 import java.io.OutputStreamWriter;
6 import java.net.URI; 8 import java.net.URI;
7 import java.net.URISyntaxException; 9 import java.net.URISyntaxException;
8 import java.util.ArrayList; 10 import java.util.ArrayList;
9 import java.util.Collection; 11 import java.util.Collection;
10 import java.util.HashMap; 12 import java.util.HashMap;
11 import java.util.Iterator; 13 import java.util.Iterator;
12 import java.util.List; 14 import java.util.List;
15
16 import javax.swing.text.html.HTMLDocument.HTMLReader.IsindexAction;
13 17
14 import org.apache.log4j.Logger; 18 import org.apache.log4j.Logger;
15 19
16 import edu.stanford.smi.protege.exception.OntologyLoadException; 20 import edu.stanford.smi.protege.exception.OntologyLoadException;
17 import edu.stanford.smi.protegex.owl.ProtegeOWL; 21 import edu.stanford.smi.protegex.owl.ProtegeOWL;
22 import edu.stanford.smi.protegex.owl.model.RDFProperty; 26 import edu.stanford.smi.protegex.owl.model.RDFProperty;
23 import edu.stanford.smi.protegex.owl.model.RDFSLiteral; 27 import edu.stanford.smi.protegex.owl.model.RDFSLiteral;
24 import edu.stanford.smi.protegex.owl.repository.RepositoryManager; 28 import edu.stanford.smi.protegex.owl.repository.RepositoryManager;
25 import edu.stanford.smi.protegex.owl.repository.impl.LocalFolderRepository; 29 import edu.stanford.smi.protegex.owl.repository.impl.LocalFolderRepository;
26 30
31 /**
32 * Erzeugt Entities und Verwaltet gemŠ§ eines OWL-Modells.
33 * Alle erzeugten Entities werden direkt als nturtle in eine File geschrieben.
34 *
35 * TODO: Dieses File ist immer /tmp/out.rdf
36 * TODO: zur Ausgabe gibt es eine Methode @see{org.openrdf.rio.trig.TriGWriter} bzw @see{de.mpiwg.itgroup.triplestoremanager.tools.Exporter}
37 *ie benutzt werden sollte anstelle der handgestrickten bei denen direkt in outrdf geschrieben wird.
38 *
39 * TODO: Einzelen Methoden hier sind noch DUOMO und CRM spezifisch, insbesondere
40 * @author dwinter
41 *
42 *
43 */
27 public class OWLImporter { 44 public class OWLImporter {
28 45
29 JenaOWLModel owlModel; // contains the model 46 JenaOWLModel owlModel; // contains the model
30 Logger logger = Logger.getRootLogger(); 47 Logger logger = Logger.getRootLogger();
31 FileWriter fh; 48 FileWriter fh;
32 private HashMap<String, String> typeId2className=null; 49 private HashMap<String, String> typeId2className=null;
33 private HashMap<String, OWLNamedClass> typeId2class=null; 50 private HashMap<String, OWLNamedClass> typeId2class=null;
34 private String ontFolder; 51 private String ontFolder;
35 private URI uri; 52 private URI uri;
36 53 //private FileWriter outRDF;
54 private OutputStreamWriter outRDF;
55
56 /**
57 * Initialisiert die Klasse und
58 * lŠdt die Ontologien ein
59 * @param folder Ordner mit der zu bearbeitenden Ontologie
60 * @param uri URI der Ontologie selbst
61 * @throws OntologyLoadException
62 */
37 public OWLImporter(String folder, URI uri) throws OntologyLoadException { 63 public OWLImporter(String folder, URI uri) throws OntologyLoadException {
38 // owlModel = ProtegeOWL.createJenaOWLModelFromURI(uri); 64 // owlModel = ProtegeOWL.createJenaOWLModelFromURI(uri);
39 65
40 this.ontFolder=folder; 66 this.ontFolder=folder;
41 this.uri=uri; 67 this.uri=uri;
42 try { 68 try {
43 this.fh= new FileWriter(new File("/tmp/identifier")); 69 this.fh= new FileWriter(new File("/tmp/identifier"));
70 this.outRDF= new OutputStreamWriter(new FileOutputStream("/tmp/out.rdf"),"UTF-8");
44 } catch (IOException e) { 71 } catch (IOException e) {
45 // TODO Auto-generated catch block 72 // TODO Auto-generated catch block
46 e.printStackTrace(); 73 e.printStackTrace();
47 } 74 }
48 loadOWL(folder, uri); 75 loadOWL(folder, uri);
49 } 76 }
50 77
78 /**
79 * LŠdt die Ontologie erneut ein.
80 * @throws OntologyLoadException
81 */
51 public void reloadOWL() throws OntologyLoadException{ 82 public void reloadOWL() throws OntologyLoadException{
52 loadOWL(ontFolder, uri); 83 loadOWL(ontFolder, uri);
53 } 84 }
54 85
86 /**
87 *
88 * LŠdt die Ontologien ein
89 * @param folder Ordner mit der zu bearbeitenden Ontologie
90 * @param uri URI der Ontologie selbst
91 * @throws OntologyLoadException
92 */
55 public void loadOWL(String folder, URI uri) throws OntologyLoadException { 93 public void loadOWL(String folder, URI uri) throws OntologyLoadException {
56 owlModel = ProtegeOWL.createJenaOWLModel(); 94 owlModel = ProtegeOWL.createJenaOWLModel();
57 // Load repository 95 // Load repository
58 RepositoryManager rman = owlModel.getRepositoryManager(); 96 RepositoryManager rman = owlModel.getRepositoryManager();
59 97
82 } 120 }
83 121
84 } 122 }
85 123
86 public static void main(String args[]) throws URISyntaxException { 124 public static void main(String args[]) throws URISyntaxException {
87 String base = "/Users/dwinter/Documents/Projekte/Diss - data-mining/eclipseWorkspace/de.mpiwg.dwinter.duomo/owlInput"; 125 String base = "/Users/dwinter/Documents/Projekte/Diss - data-mining/eclipseWorkspace/duomoData/owl-version";
88 URI ontologieUri = new URI( 126 URI ontologieUri = new URI(
89 "file:///Users/dwinter/Documents/Projekte/Diss%20-%20data-mining/eclipseWorkspace/de.mpiwg.dwinter.duomo/owlInput/duomoAnalysis.owl"); 127 "file:///Users/dwinter/Documents/Projekte/Diss - data-mining/eclipseWorkspace/duomoData/owl-version/protege_version_duomo/duomoAnalysis.owl");
90 try { 128 try {
91 129
92 OWLImporter im = new OWLImporter(base, ontologieUri); 130 OWLImporter im = new OWLImporter(base, ontologieUri);
93 im.printModel(); 131 im.printModel();
94 } catch (OntologyLoadException e) { 132 } catch (OntologyLoadException e) {
95 // TODO Auto-generated catch block 133 // TODO Auto-generated catch block
96 e.printStackTrace(); 134 e.printStackTrace();
97 } 135 }
98 } 136 }
99 137
100 public OWLIndividual createInstance(String string) { 138 /** Erzeuge Instanz uns schreibe in das Exportfile die enstprechende <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> Relation.
139 * @param fullClassName Name der Classe
140 * @return null, wenn das Objekt nicht angelegt werden kann.
141 */
142 public OWLIndividual createInstance(String fullClassName) {
101 OWLNamedClass owlclass = (OWLNamedClass) owlModel 143 OWLNamedClass owlclass = (OWLNamedClass) owlModel
102 .getOWLNamedClass(string); 144 .getOWLNamedClass(fullClassName);
103 145
104 if (owlclass == null) { 146 if (owlclass == null) {
105 logger.debug("Cannot find OWLClass:" + string); 147 logger.debug("Cannot find OWLClass:" + fullClassName);
106 return null; 148 return null;
107 } 149 }
108 //logger.debug("Create new individual of type:"+string); 150 //logger.debug("Create new individual of type:"+string);
109 return owlclass.createOWLIndividual(null); 151
110 152
111 } 153 OWLIndividual ind = owlclass.createOWLIndividual(null);
112 154
113 public void setProperty(OWLIndividual identifier, String propertyName, 155 //TODO: replace the following by @see{org.openrdf.rio.trig.TriGWriter}
156 String triple = String.format("<%s> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <%s>.\n", ind.getName(),owlclass.getName());
157 try {
158 outRDF.write(triple);
159 outRDF.flush();
160 } catch (IOException e) {
161 // TODO Auto-generated catch block
162 e.printStackTrace();
163 }
164 return ind;
165
166 }
167
168
169 /**
170 * Erzeuge eine Prpoerty und schreibe die entsprechenden Informationen in das out-file.
171 * @param individual
172 * @param propertyName
173 * @param value
174 */
175 public void setProperty(OWLIndividual individual, String propertyName,
114 Object value) { 176 Object value) {
115 RDFProperty prop = owlModel.getRDFProperty(propertyName); 177 RDFProperty prop = owlModel.getRDFProperty(propertyName);
116 identifier.setPropertyValue(prop, value); 178 individual.setPropertyValue(prop, value);
117 179
118 } 180
119 181
120 public void setDataTypePropery(OWLIndividual eventInstance, 182 //TODO: replace the following by @see{org.openrdf.rio.trig.TriGWriter}
183 String valName="";
184
185
186 if (OWLIndividual.class.isInstance(value))
187 valName="<"+((OWLIndividual)value).getName()+">";
188 else
189 valName="\""+escapeRDFLit((String)value)+"\"";
190
191
192 String triple = String.format("<%s> <%s> %s.\n", individual.getName(),prop.getName(),valName);
193 try {
194 outRDF.write(triple);
195 outRDF.flush();
196 } catch (IOException e) {
197 // TODO Auto-generated catch block
198 e.printStackTrace();
199 }
200 }
201
202 private String escapeRDFLit(String string){
203 return string.replace("\"", "");
204 }
205
206 /** Erzeuge DatatypePropery schreibe die entsprechenden Informationen in das out-file.
207 * @param individual
208 * @param propertyName
209 * @param value
210 * @param lang Sprach-tag
211 */
212 public void setDataTypePropery(OWLIndividual individual,
121 String propertyName, String value, String lang) { 213 String propertyName, String value, String lang) {
122 RDFProperty prop = owlModel.getRDFProperty(propertyName); 214 RDFProperty prop = owlModel.getRDFProperty(propertyName);
123 // if(OWLDatatypeProperty.class.isInstance(prop)){ 215 // if(OWLDatatypeProperty.class.isInstance(prop)){
124 // OWLDatatypeProperty dp = (OWLDatatypeProperty)prop; 216 // OWLDatatypeProperty dp = (OWLDatatypeProperty)prop;
125 // prop.set 217 // prop.set
127 // } else { 219 // } else {
128 // logger.error("Is not a datatypeprop:"+propertyName); 220 // logger.error("Is not a datatypeprop:"+propertyName);
129 // } 221 // }
130 222
131 223
224
132 RDFSLiteral langLiteral = owlModel.createRDFSLiteral(value, lang); 225 RDFSLiteral langLiteral = owlModel.createRDFSLiteral(value, lang);
133 eventInstance.setPropertyValue(prop, langLiteral); 226 individual.setPropertyValue(prop, langLiteral);
134 227
135 228 //TODO: replace the following by @see{org.openrdf.rio.trig.TriGWriter}
136 } 229 String triple = String.format("<%s> <%s> \"%s\"@%s.\n", individual.getName(),prop.getName(),escapeRDFLit(langLiteral.getString()),lang);
137 230 try {
138 public Object getRelatedIndividual(OWLIndividual cardInd, 231 outRDF.write(triple);
232 outRDF.flush();
233 } catch (IOException e) {
234 // TODO Auto-generated catch block
235 e.printStackTrace();
236 }
237 }
238
239 /**
240 * Gibt Individual zurŸck das Ÿber die Property mit der Quelle verbunden ist.
241 * @param sourceIndividual
242 * @param propertyName
243 * @return
244 */
245 public Object getRelatedIndividual(OWLIndividual sourceIndividual,
139 String propertyName) { 246 String propertyName) {
140 RDFProperty prop = owlModel.getRDFProperty(propertyName); 247 RDFProperty prop = owlModel.getRDFProperty(propertyName);
141 if (prop == null) { 248 if (prop == null) {
142 logger.debug("Property does not exist:" + propertyName); 249 logger.debug("Property does not exist:" + propertyName);
143 } 250 }
144 251
145 Object value = cardInd.getPropertyValue(prop); 252 Object value = sourceIndividual.getPropertyValue(prop);
146 return value; 253 return value;
147 } 254 }
148 255
149 256
150 public OWLIndividual getIndividual(String classNameFrom, 257 public OWLIndividual getIndividual(String classNameFrom,
261 368
262 return returnList; 369 return returnList;
263 } 370 }
264 371
265 public OWLIndividual createTimeSpan(String dateDcStart, String dateDcEnd) { 372 public OWLIndividual createTimeSpan(String dateDcStart, String dateDcEnd) {
266 OWLIndividual timeSpan = createInstance("DatesDocument"); 373 // A date is described by an timespan which is described by an appellation
267 374
268 375 //TODO: die lesbaren daten gehšren in time span mit den angaben wir ungefŠhr und soweiter
269 376 // d.h. die klammern um ein Datum die ungefaehr signalisieren, gehoeren in die Beschreibung der
270 setProperty(timeSpan, "has_readable_date", dateDcStart); 377 //Qualifier in Time span
378 // in DuomoDate_Appellation kommen die genauen teile
379 // zunaechst schreibe ich in "has_readable" hier genau rein was im Text steht
380 // dieses muss noch analysiert werden
381 // so sollte 1432 gennai 9 --> in ein Computer lesbares modell umgesetzt werden
382 // und 1432 gennaio (9) muss in 1432 gennaio 9 in DuomoDate_Appellation eingetragen werden
383 // und dann mit P79 und P80 in DuomoDate_TimeSpan genauer beschrieben werden, d.h in P79 kommt dann
384 // ungefaehr
385 // ausserdem mussesn "POST" und "ANTE aus dem Text in das Modell eingepflegt werden.
386
387
388 OWLIndividual timeSpan = createInstance("DuomoDate_TimeSpan");
389
390 OWLIndividual date= createInstance("DuomoDate_Appellation");
391
392 setProperty(date, "rdfs:label", dateDcStart);
393 //setProperty(timeSpan, "has_readable_date", dateDcStart);
394
395 setProperty(timeSpan,"is_identified_by_Date",date);
271 396
272 if(!(dateDcEnd==null || dateDcEnd.equals(""))){ 397 if(!(dateDcEnd==null || dateDcEnd.equals(""))){
273 setProperty(timeSpan, "has_readable_to_date", dateDcEnd); 398 OWLIndividual toDate= createInstance("DuomoDate_Appellation");
399 setProperty(toDate, "rdfs:label", dateDcEnd);
400 //setProperty(timeSpan, "has_readable_toDate", dateDcEnd);
401 setProperty(timeSpan,"is_identified_by_toDate",toDate);
274 } else { 402 } else {
275 dateDcEnd=dateDcStart; 403 dateDcEnd=dateDcStart;
276 } 404 }
277 405
278 OWLIndividual timeSpanIdentifier = createInstance("Identifier"); 406 OWLIndividual timeSpanIdentifier = createInstance("Identifier");
279 setProperty(timeSpanIdentifier,"has_readable_id",dateDcStart+"-"+dateDcEnd); 407 setProperty(timeSpanIdentifier,"rdfs:label",dateDcStart+"-"+dateDcEnd);
280 408
281 setProperty(timeSpan,"crm:P48_has_preferred_identifier",timeSpanIdentifier); 409 setProperty(timeSpan,"crm:P48_has_preferred_identifier",timeSpanIdentifier);
282 410
283 return timeSpan; 411 return timeSpan;
284 } 412 }
413
414 // public OWLIndividual createTimeSpan2(String dateDcStart, String dateDcEnd) {
415 // OWLIndividual timeSpan = createInstance("DatesDocument");
416 //
417 //
418 //
419 // setProperty(timeSpan, "has_readable_date", dateDcStart);
420 //
421 // if(!(dateDcEnd==null || dateDcEnd.equals(""))){
422 // setProperty(timeSpan, "has_readable_to_date", dateDcEnd);
423 // } else {
424 // dateDcEnd=dateDcStart;
425 // }
426 //
427 // OWLIndividual timeSpanIdentifier = createInstance("Identifier");
428 // setProperty(timeSpanIdentifier,"has_readable_id",dateDcStart+"-"+dateDcEnd);
429 //
430 // setProperty(timeSpan,"crm:P48_has_preferred_identifier",timeSpanIdentifier);
431 //
432 // return timeSpan;
433 // }
434
285 435
286 public OWLIndividual getIndividualByReadableId(String className,String identifier){ 436 public OWLIndividual getIndividualByReadableId(String className,String identifier){
287 return getIndividual(className, "crm:P48_has_preferred_identifier", "Identifier", "has_readable_id", identifier, true); 437 return getIndividual(className, "crm:P48_has_preferred_identifier", "Identifier", "rdfs:label", identifier, true);
288 } 438 }
289 439
290 public OWLIndividual getIndividualByReadableId(String className,String identifier, String classNameIdentifier,boolean subclassedIdentifier){ 440 public OWLIndividual getIndividualByReadableId(String className,String identifier, String classNameIdentifier,boolean subclassedIdentifier){
291 return getIndividual(className, "crm:P48_has_preferred_identifier", classNameIdentifier, "has_readable_id", identifier,subclassedIdentifier); 441 return getIndividual(className, "crm:P48_has_preferred_identifier", classNameIdentifier, "rdfs:label", identifier,subclassedIdentifier);
292 } 442 }
293 443
294 public String getClassNameFromTypeId(String typeId) { 444 public String getClassNameFromTypeId(String typeId) {
295 if (typeId2className==null){ // hash nicht angelegt 445 if (typeId2className==null){ // hash nicht angelegt
296 createTypeId2classHashes(); 446 createTypeId2classHashes();
331 481
332 } 482 }
333 483
334 public OWLIndividual createOrGetInstanceWithIdentifier(String classNameInstance, 484 public OWLIndividual createOrGetInstanceWithIdentifier(String classNameInstance,
335 String classNameIdentifier, String identifier,boolean followSubclasses) { 485 String classNameIdentifier, String identifier,boolean followSubclasses) {
486
487 identifier=org.apache.commons.lang.StringUtils.strip(identifier);
488
336 OWLIndividual ind = getIndividualByReadableId(classNameInstance, identifier,classNameIdentifier,followSubclasses); 489 OWLIndividual ind = getIndividualByReadableId(classNameInstance, identifier,classNameIdentifier,followSubclasses);
337 if(ind==null){ 490 if(ind==null){
338 ind = createInstance(classNameInstance); 491 ind = createInstance(classNameInstance);
339 OWLIndividual identifierInd = createInstance(classNameIdentifier); 492 OWLIndividual identifierInd = createInstance(classNameIdentifier);
340 setProperty(identifierInd, "has_readable_id", identifier); 493 setProperty(identifierInd, "rdfs:label", identifier);
341 try { 494 try {
342 fh.write(classNameInstance+" --" +classNameIdentifier+"---"+identifier+"\n"); 495 fh.write(classNameInstance+" --" +classNameIdentifier+"---"+identifier+"\n");
343 fh.flush(); 496 fh.flush();
344 } catch (IOException e) { 497 } catch (IOException e) {
345 // TODO Auto-generated catch block 498 // TODO Auto-generated catch block