comparison src/de/mpiwg/dwinter/duomo/lexdump/LexOWLTransformer.java @ 8:919e9f3b5efd

neue klassen zur textanalyse (stanford parser eingebaut) alle has_readable_labe Datatype properties durch rdfs:label ersetzt.
author dwinter
date Thu, 21 Jun 2012 17:08:22 +0200
parents 19e40abb3e8a
children 4392a6adf85a
comparison
equal deleted inserted replaced
7:19e40abb3e8a 8:919e9f3b5efd
24 import org.jdom.xpath.XPath; 24 import org.jdom.xpath.XPath;
25 25
26 import com.sun.xml.internal.ws.developer.MemberSubmissionEndpointReference.Elements; 26 import com.sun.xml.internal.ws.developer.MemberSubmissionEndpointReference.Elements;
27 27
28 import edu.stanford.smi.protege.exception.OntologyLoadException; 28 import edu.stanford.smi.protege.exception.OntologyLoadException;
29 import edu.stanford.smi.protege.model.Facet;
29 import edu.stanford.smi.protege.model.Instance; 30 import edu.stanford.smi.protege.model.Instance;
31 import edu.stanford.smi.protege.model.Slot;
30 import edu.stanford.smi.protegex.owl.jena.JenaOWLModel; 32 import edu.stanford.smi.protegex.owl.jena.JenaOWLModel;
31 import edu.stanford.smi.protegex.owl.model.OWLAllValuesFrom; 33 import edu.stanford.smi.protegex.owl.model.OWLAllValuesFrom;
32 import edu.stanford.smi.protegex.owl.model.OWLClass; 34 import edu.stanford.smi.protegex.owl.model.OWLClass;
33 import edu.stanford.smi.protegex.owl.model.OWLDataRange; 35 import edu.stanford.smi.protegex.owl.model.OWLDataRange;
34 import edu.stanford.smi.protegex.owl.model.OWLIndividual; 36 import edu.stanford.smi.protegex.owl.model.OWLIndividual;
36 import edu.stanford.smi.protegex.owl.model.OWLProperty; 38 import edu.stanford.smi.protegex.owl.model.OWLProperty;
37 import edu.stanford.smi.protegex.owl.model.OWLUnionClass; 39 import edu.stanford.smi.protegex.owl.model.OWLUnionClass;
38 import edu.stanford.smi.protegex.owl.model.RDFList; 40 import edu.stanford.smi.protegex.owl.model.RDFList;
39 import edu.stanford.smi.protegex.owl.model.RDFProperty; 41 import edu.stanford.smi.protegex.owl.model.RDFProperty;
40 import edu.stanford.smi.protegex.owl.model.RDFResource; 42 import edu.stanford.smi.protegex.owl.model.RDFResource;
43 import edu.stanford.smi.protegex.owl.model.RDFSClass;
41 import edu.stanford.smi.protegex.owl.model.impl.AbstractOWLQuantifierRestriction; 44 import edu.stanford.smi.protegex.owl.model.impl.AbstractOWLQuantifierRestriction;
45 import edu.stanford.smi.protegex.owl.model.impl.AbstractOWLRestriction;
46 import edu.stanford.smi.protegex.owl.model.impl.DefaultOWLAllValuesFrom;
42 import edu.stanford.smi.protegex.owl.model.impl.DefaultOWLUnionClass; 47 import edu.stanford.smi.protegex.owl.model.impl.DefaultOWLUnionClass;
43 48
49 /**
50 * @author dwinter
51 *
52 *Transformiert die Lex Files in OWL
53 */
44 public class LexOWLTransformer { 54 public class LexOWLTransformer {
45 private Logger logger = Logger.getRootLogger(); 55 private Logger logger = Logger.getRootLogger();
46 private FileWriter missing; 56 private FileWriter missing;
47 private OWLImporter owlDoc; 57 private OWLImporter owlDoc;
48 private LexDumpImporter lexDoc; 58 private LexDumpImporter lexDoc;
104 // List<Element> cartas = lexDoc.getCartas(); 114 // List<Element> cartas = lexDoc.getCartas();
105 // System.out.println(cartas.size()); 115 // System.out.println(cartas.size());
106 116
107 LexOWLTransformer tf = new LexOWLTransformer(owlDoc, lexDoc); 117 LexOWLTransformer tf = new LexOWLTransformer(owlDoc, lexDoc);
108 tf.transform(); 118 tf.transform();
109 owlDoc.save("file:///tmp/out.owl"); 119 //owlDoc.save("file:///tmp/out.owl");
110 } 120 }
111 121
122 /**
123 * Hautmethoden zur Transformation
124 * @throws URISyntaxException
125 * @throws Exception
126 */
112 private void transform() throws URISyntaxException, Exception { 127 private void transform() throws URISyntaxException, Exception {
113 List<Element> signatures = lexDoc.getSignatures(); 128 List<Element> signatures = lexDoc.getSignatures();
114 129
115 // Element signature = signatures.get(0); 130 // Element signature = signatures.get(0);
116 int signatureCount = 0; 131 int signatureCount = 0;
167 owlDoc.setProperty(recordInd, "crm:P70_documents", eventInstance); 182 owlDoc.setProperty(recordInd, "crm:P70_documents", eventInstance);
168 183
169 // timespan 184 // timespan
170 185
171 try { 186 try {
172 String dateDcStart = lexDoc.getValue(record, ".//datdf/startdate"); 187 String dateDcStart = lexDoc.getValue(record, ".//datrf/startdate");
173 188
174 String dateDcEnd = lexDoc.getValue(record, ".//datdf/startdate"); 189 String dateDcEnd = lexDoc.getValue(record, ".//datrf/enddate");
175 190
176 if (!dateDcStart.equals("")) { 191 if (!dateDcStart.equals("")) {
177 OWLIndividual timeSpan = owlDoc.createTimeSpan(dateDcStart, 192 OWLIndividual timeSpan = owlDoc.createTimeSpan(dateDcStart,
178 dateDcEnd); 193 dateDcEnd);
179 owlDoc.setProperty(eventInstance, "crm:P4_has_time-span", 194 owlDoc.setProperty(eventInstance, "crm:P4_has_time-span",
215 OWLIndividual recordNamesRoles = null; 230 OWLIndividual recordNamesRoles = null;
216 for (Element nomiq : nomiqs) { 231 for (Element nomiq : nomiqs) {
217 String name = lexDoc.getValue(nomiq, "./name"); 232 String name = lexDoc.getValue(nomiq, "./name");
218 String role = lexDoc.getValue(nomiq, "./role"); 233 String role = lexDoc.getValue(nomiq, "./role");
219 234
235 String provenance = lexDoc.getValue(nomiq, "./name/provenance");
236
237
238
220 if (!name.equals("") && !role.equals("")) { 239 if (!name.equals("") && !role.equals("")) {
221 recordNamesRoles = handleNameWithRole(recordInd, name, role); 240 recordNamesRoles = handleNameWithRole(recordInd, name, role);
222 } else if (!role.equals("")) { 241 } else if (!role.equals("")) {
223 recordNamesRoles = createOrGetRole(role); 242 recordNamesRoles = createOrGetRole(role);
224 } else if (!name.equals("")) { 243 } else if (!name.equals("")) {
225 recordNamesRoles = createOrGetName(name); 244 recordNamesRoles = createOrGetName(name);
245 if (provenance!=""){
246 owlDoc.setDataTypePropery(recordNamesRoles, "has_provenance_as_string", provenance, "it");
247 }
226 } 248 }
227 249
228 if (recordNamesRoles != null) { 250 if (recordNamesRoles != null) {
229 owlDoc.setProperty(eventInstance, 251 owlDoc.setProperty(eventInstance,
230 "recordsDuomoObjectNameRoles", recordNamesRoles); 252 "recordsDuomoObjectNameRoles", recordNamesRoles);
272 294
273 return eventInstance; 295 return eventInstance;
274 } 296 }
275 297
276 private void createType(OWLIndividual eventInstance, Element type) { 298 private void createType(OWLIndividual eventInstance, Element type) {
277 299
300
278 String typeId; 301 String typeId;
279 try { 302 try {
280 typeId = lexDoc.getValue(type, "./ptr/@target"); 303 typeId = lexDoc.getValue(type, "./ptr/@target");
281 String clsName = owlDoc.getClassNameFromTypeId(typeId); 304 String clsName = owlDoc.getClassNameFromTypeId(typeId);
282 OWLNamedClass cls = owlDoc.getClassFromTypeId(typeId);
283 OWLIndividual typeInd = owlDoc.createInstance(clsName); 305 OWLIndividual typeInd = owlDoc.createInstance(clsName);
284 306
285 OWLNamedClass subjectClass = getPreferredTargetClass(cls, 307 owlDoc.setProperty(eventInstance, "has_topic", typeInd);
286 "has_subject");
287 OWLNamedClass predicateClass = getPreferredTargetClass(cls,
288 "has_predicate");
289
290 List<Element> freeTexts = XPath.selectNodes(type, "./freetext"); 308 List<Element> freeTexts = XPath.selectNodes(type, "./freetext");
291 for (Element freeText : freeTexts) { 309 for (Element freeText : freeTexts) {
310 OWLNamedClass cls = owlDoc.getClassFromTypeId(typeId);
311
312 OWLNamedClass subjectClass = getPreferredTargetClass(cls,
313 "has_subject");
314 OWLNamedClass predicateClass = getPreferredTargetClass(cls,
315 "has_predicate");
292 316
293 String subjPointer = lexDoc.getValue(freeText, 317 String subjPointer = lexDoc.getValue(freeText,
294 "./sub/ptrtoperson/@target"); 318 "./sub/ptrtoperson/@target");
295 String subjText = lexDoc.getValue(freeText, "./sub"); 319 String subjText = lexDoc.getValue(freeText, "./sub");
296 320
297 OWLIndividual subjInd = createSubjectOrPredicate(subjectClass, 321 OWLIndividual subjInd = createSubjectOrPredicate(subjectClass,
298 subjPointer, subjText); 322 subjPointer, subjText);
323
324
325 //suche ob eine subpropery von materiaInvolved fuer die die zem Type (type) gehoerige Klasse (clsName) existiert
326 // und wenn ja welche, TODO: zur Zeit wird dann aus dem String "subjText" das entsprechende Material erzeugt.
327
328
329 //Collection<RDFProperty> props = cls.getPossibleRDFProperties();
330
331
332
333 RDFProperty superproperty= owlDoc.owlModel.getRDFProperty("http://ontologies.mpiwg-berlin.mpg.de/research/duomoAnalysis.owl/materialInvolved");
334
335 Collection<RDFSClass> sc = cls.getSuperclasses(true);
336
337 OWLNamedClass mat = owlDoc.owlModel.getOWLNamedClass("http://ontologies.mpiwg-berlin.mpg.de/research/duomoAnalysis.owl/DuomoActivities");
338
339 if (sc.contains(mat)){ // cls is subclass of DuomoActivities
340 Collection<RDFProperty> props = superproperty.getSubproperties(false);
341
342 //jetzt suche zu welcher subproperty die classe cls gehšrt
343
344 //dazu gehe durch alle subproperties von materialInvolved-
345 for (RDFProperty prop:props){
346
347 @SuppressWarnings("unchecked")
348 Collection<RDFSClass> domains = prop.getDomains(true);
349
350 for(RDFSClass domain: domains){
351 //if (domain.getName().equals(cls.getName()))
352
353 //suche jetzt die domaene zu diesen property in schaue ob die cls eine subklasse davon ist
354 if (cls.isSubclassOf(domain)) // cls ist in der domaene der property
355 {
356
357 //die propery muss genau einen wert aus einer festen klasse haben, diese wird jetzt gesucht und dann eine individual erzeugt.
358 Collection<AbstractOWLRestriction> restrictions = cls.getRestrictions(prop, false); // suche die restriction und erzeuge dann ein object dieses type
359 for (AbstractOWLRestriction restriction: restrictions){
360 if (DefaultOWLAllValuesFrom.class.isInstance(restriction)){
361 DefaultOWLAllValuesFrom rest = (DefaultOWLAllValuesFrom)restriction;
362 RDFResource restClass = rest.getAllValuesFrom();
363 OWLIndividual inst = owlDoc.createOrGetInstanceWithIdentifier(restClass.getLocalName(), "Identifier", subjText, false);
364 owlDoc.setProperty(typeInd, prop.getLocalName(), inst);
365 //materialInd = owlDoc.createInstance(res.getName());
366 }
367 }
368 }
369 }
370 }
371 }
299 372
300 String predPointer = lexDoc.getValue(freeText, 373 String predPointer = lexDoc.getValue(freeText,
301 "./pred/ptrtoperson/@target"); 374 "./pred/ptrtoperson/@target");
302 String predText = lexDoc.getValue(freeText, "./pred"); 375 String predText = lexDoc.getValue(freeText, "./pred");
303 376
326 private OWLIndividual createSubjectOrPredicate(OWLNamedClass toClass, 399 private OWLIndividual createSubjectOrPredicate(OWLNamedClass toClass,
327 String subjPointer, String subjText) { 400 String subjPointer, String subjText) {
328 OWLIndividual subjInd = null; 401 OWLIndividual subjInd = null;
329 402
330 if (!subjPointer.equals("")) { 403 if (!subjPointer.equals("")) {
331 subjInd = toClass.createOWLIndividual(null); 404 subjInd = owlDoc.createInstance(toClass.getName());
405 //subjInd = toClass.createOWLIndividual(null);
332 OWLIndividual ind = individualIds.get(subjPointer); 406 OWLIndividual ind = individualIds.get(subjPointer);
333 if (ind == null) { 407 if (ind == null) {
334 logger.debug("target ID does not exist:" + subjPointer); 408 logger.debug("target ID does not exist:" + subjPointer);
335 try { 409 try {
336 missing.write("target ID does not exist:" + subjPointer+"\n"); 410 missing.write("target ID does not exist:" + subjPointer+"\n");
342 } else { 416 } else {
343 owlDoc.setProperty(subjInd, "has_NameOrRoleFromIndex", ind); 417 owlDoc.setProperty(subjInd, "has_NameOrRoleFromIndex", ind);
344 } 418 }
345 } 419 }
346 420
347 if (!subjText.equals("")) { 421 if (!subjText.equals("") & !subjText.equals(" ")) {
348 if (subjInd == null) 422 if (subjInd == null)
349 subjInd = toClass.createOWLIndividual(null); 423 subjInd = owlDoc.createInstance(toClass.getName());
424 //subjInd = toClass.createOWLIndividual(null);
350 425
351 OWLNamedClass idcls = owlDoc.owlModel 426 OWLNamedClass idcls = owlDoc.owlModel
352 .getOWLNamedClass("Identifier"); // is die klasse selbst 427 .getOWLNamedClass("Identifier"); // is die klasse selbst
353 // schon ein identifiert 428 // schon ein identifiert
354 if (toClass.getNamedSuperclasses(true).contains(idcls)) { // to 429 if (toClass.getNamedSuperclasses(true).contains(idcls)) { // to
355 owlDoc.setProperty(subjInd, "has_readable_id", subjText); 430 owlDoc.setProperty(subjInd, "rdfs:label", subjText);
356 } else { 431 } else {
357 432
358 OWLIndividual ident = owlDoc 433 OWLIndividual ident = owlDoc
359 .createInstance("IdentifierPredicateOrSubject"); 434 .createInstance("IdentifierPredicateOrSubject");
360 owlDoc.setProperty(ident, "has_readable_id", subjText); 435 owlDoc.setProperty(ident, "rdfs:label", subjText);
361 owlDoc.setProperty(subjInd, "crm:P48_has_preferred_identifier", 436 owlDoc.setProperty(subjInd, "crm:P48_has_preferred_identifier",
362 ident); 437 ident);
363 } 438 }
364 } 439 }
365 return subjInd; 440 return subjInd;
511 private OWLIndividual createRecord(Element record, OWLIndividual cardInd) 586 private OWLIndividual createRecord(Element record, OWLIndividual cardInd)
512 throws JDOMException { 587 throws JDOMException {
513 OWLIndividual recordInstance = owlDoc.createInstance("Record"); 588 OWLIndividual recordInstance = owlDoc.createInstance("Record");
514 owlDoc.setProperty(recordInstance, "is_on_card", cardInd); 589 owlDoc.setProperty(recordInstance, "is_on_card", cardInd);
515 createNewDependingInstanceFromXpath(record, recordInstance, "./@id", 590 createNewDependingInstanceFromXpath(record, recordInstance, "./@id",
516 new String[] { "has_readable_id", "rdfs:label" }, 591 new String[] { "rdfs:label" },
517 "IdentifierCurrent", "crm:P48_has_preferred_identifier"); 592 "IdentifierCurrent", "crm:P48_has_preferred_identifier");
518 593
519 String value = lexDoc.getValue(record, ".//textblockid"); 594 String value = lexDoc.getValue(record, ".//textblockid");
520 if (!value.equals("")) 595 if (!value.equals(""))
521 owlDoc.setProperty(recordInstance, "has_textblockid", value); 596 owlDoc.setProperty(recordInstance, "has_textblockid", value);
522 597
523 String endOnCarta = lexDoc.getValue(record, "./@end_on_carta"); 598 String endOnCarta = lexDoc.getValue(record, "./@end_on_carta");
599
600 //FIXME: addRecordToCarta ist buggy. siehe dort! ausserdem wir nicht berŸcksichtig, dass zwischen
601 // card und end_on_carta mehr als eine liegen kann, zur Zeit wird nur die carta die in end_on_carta beschrieben wird zu
602 // record mittels is_on_card hinzugefŸgt.
524 if (!endOnCarta.equals("")) { 603 if (!endOnCarta.equals("")) {
525 OWLIndividual signature = (OWLIndividual) owlDoc 604 OWLIndividual signature = (OWLIndividual) owlDoc
526 .getRelatedIndividual(cardInd, "has_signature"); 605 .getRelatedIndividual(cardInd, "has_signature");
527 addRecordToCarta(recordInstance, value, signature); 606 addRecordToCarta(recordInstance, endOnCarta, signature);
528 } 607 }
529 608
530 String dateDcStart = lexDoc.getValue(record, ".//datdc/startdate"); 609 String dateDcStart = lexDoc.getValue(record, ".//datdc/startdate");
531 String dateDcEnd = lexDoc.getValue(record, ".//datdc/startdate"); 610 String dateDcEnd = lexDoc.getValue(record, ".//datdc/enddate");
532 611
533 OWLIndividual timeSpan = owlDoc.createTimeSpan(dateDcStart, dateDcEnd); 612 OWLIndividual timeSpan = owlDoc.createTimeSpan(dateDcStart, dateDcEnd);
534 613
535 owlDoc.setProperty(recordInstance, "crm:P4_has_time-span", timeSpan); 614 owlDoc.setProperty(recordInstance, "crm:P4_has_time-span", timeSpan);
536 615
538 } 617 }
539 618
540 private void addRecordToCarta(OWLIndividual recordInstance, String cardID, 619 private void addRecordToCarta(OWLIndividual recordInstance, String cardID,
541 OWLIndividual signature) { 620 OWLIndividual signature) {
542 621
622 //FIXME: cartID ist nur innerhalb einer Signatur eindeutig, d.h. h, es muss die cardID gefunden werden die in der
623 // selben signatur lebt wir "signature"
543 OWLIndividual card = owlDoc.getIndividualByReadableId("Card", cardID); 624 OWLIndividual card = owlDoc.getIndividualByReadableId("Card", cardID);
544 625
545 if (card == null) { 626 if (card == null) {
546 card = createCard(cardID, signature); 627 card = createCard(cardID, signature);
547 } 628 }
565 OWLIndividual cardInstance = owlDoc.createInstance("Card"); 646 OWLIndividual cardInstance = owlDoc.createInstance("Card");
566 647
567 owlDoc.setProperty(cardInstance, "has_signature", signature); 648 owlDoc.setProperty(cardInstance, "has_signature", signature);
568 649
569 OWLIndividual preferredId = owlDoc.createInstance("IdentifierCurrent"); 650 OWLIndividual preferredId = owlDoc.createInstance("IdentifierCurrent");
570 owlDoc.setProperty(preferredId, "has_readable_id", cardId);
571 owlDoc.setProperty(preferredId, "rdfs:label", cardId); 651 owlDoc.setProperty(preferredId, "rdfs:label", cardId);
572 652
573 owlDoc.setProperty(cardInstance, "crm:P48_has_preferred_identifier", 653 owlDoc.setProperty(cardInstance, "crm:P48_has_preferred_identifier",
574 preferredId); 654 preferredId);
575 655
581 OWLIndividual cardInstance = owlDoc.createInstance("Card"); 661 OWLIndividual cardInstance = owlDoc.createInstance("Card");
582 662
583 try { 663 try {
584 664
585 createNewDependingInstanceFromXpath(card, cardInstance, 665 createNewDependingInstanceFromXpath(card, cardInstance,
586 ".//cartanr", new String[] { "has_readable_id", 666 ".//cartanr", new String[] { "rdfs:label" }, "IdentifierCurrent",
587 "rdfs:label" }, "IdentifierCurrent",
588 "crm:P48_has_preferred_identifier"); 667 "crm:P48_has_preferred_identifier");
589 668
590 createNewDependingInstanceFromXpath(card, cardInstance, 669 createNewDependingInstanceFromXpath(card, cardInstance,
591 ".//cartaant", new String[] { "has_readable_id", 670 ".//cartaant", new String[] { "rdfs:label" }, "IdentifierCurrent",
592 "rdfs:label" }, "IdentifierCurrent",
593 "crm:P1_is_identified_by"); 671 "crm:P1_is_identified_by");
594 672
595 owlDoc.setProperty(cardInstance, "has_signature", signatureInd); 673 owlDoc.setProperty(cardInstance, "has_signature", signatureInd);
596 674
597 } catch (JDOMException e) { 675 } catch (JDOMException e) {