Mercurial > hg > duomoOWLProject
comparison src/de/mpiwg/dwinter/duomo/lexdump/LexOWLTransformer.java @ 8:919e9f3b5efd
neue klassen zur textanalyse (stanford parser eingebaut)
alle has_readable_labe Datatype properties durch rdfs:label ersetzt.
author | dwinter |
---|---|
date | Thu, 21 Jun 2012 17:08:22 +0200 |
parents | 19e40abb3e8a |
children | 4392a6adf85a |
comparison
equal
deleted
inserted
replaced
7:19e40abb3e8a | 8:919e9f3b5efd |
---|---|
24 import org.jdom.xpath.XPath; | 24 import org.jdom.xpath.XPath; |
25 | 25 |
26 import com.sun.xml.internal.ws.developer.MemberSubmissionEndpointReference.Elements; | 26 import com.sun.xml.internal.ws.developer.MemberSubmissionEndpointReference.Elements; |
27 | 27 |
28 import edu.stanford.smi.protege.exception.OntologyLoadException; | 28 import edu.stanford.smi.protege.exception.OntologyLoadException; |
29 import edu.stanford.smi.protege.model.Facet; | |
29 import edu.stanford.smi.protege.model.Instance; | 30 import edu.stanford.smi.protege.model.Instance; |
31 import edu.stanford.smi.protege.model.Slot; | |
30 import edu.stanford.smi.protegex.owl.jena.JenaOWLModel; | 32 import edu.stanford.smi.protegex.owl.jena.JenaOWLModel; |
31 import edu.stanford.smi.protegex.owl.model.OWLAllValuesFrom; | 33 import edu.stanford.smi.protegex.owl.model.OWLAllValuesFrom; |
32 import edu.stanford.smi.protegex.owl.model.OWLClass; | 34 import edu.stanford.smi.protegex.owl.model.OWLClass; |
33 import edu.stanford.smi.protegex.owl.model.OWLDataRange; | 35 import edu.stanford.smi.protegex.owl.model.OWLDataRange; |
34 import edu.stanford.smi.protegex.owl.model.OWLIndividual; | 36 import edu.stanford.smi.protegex.owl.model.OWLIndividual; |
36 import edu.stanford.smi.protegex.owl.model.OWLProperty; | 38 import edu.stanford.smi.protegex.owl.model.OWLProperty; |
37 import edu.stanford.smi.protegex.owl.model.OWLUnionClass; | 39 import edu.stanford.smi.protegex.owl.model.OWLUnionClass; |
38 import edu.stanford.smi.protegex.owl.model.RDFList; | 40 import edu.stanford.smi.protegex.owl.model.RDFList; |
39 import edu.stanford.smi.protegex.owl.model.RDFProperty; | 41 import edu.stanford.smi.protegex.owl.model.RDFProperty; |
40 import edu.stanford.smi.protegex.owl.model.RDFResource; | 42 import edu.stanford.smi.protegex.owl.model.RDFResource; |
43 import edu.stanford.smi.protegex.owl.model.RDFSClass; | |
41 import edu.stanford.smi.protegex.owl.model.impl.AbstractOWLQuantifierRestriction; | 44 import edu.stanford.smi.protegex.owl.model.impl.AbstractOWLQuantifierRestriction; |
45 import edu.stanford.smi.protegex.owl.model.impl.AbstractOWLRestriction; | |
46 import edu.stanford.smi.protegex.owl.model.impl.DefaultOWLAllValuesFrom; | |
42 import edu.stanford.smi.protegex.owl.model.impl.DefaultOWLUnionClass; | 47 import edu.stanford.smi.protegex.owl.model.impl.DefaultOWLUnionClass; |
43 | 48 |
49 /** | |
50 * @author dwinter | |
51 * | |
52 *Transformiert die Lex Files in OWL | |
53 */ | |
44 public class LexOWLTransformer { | 54 public class LexOWLTransformer { |
45 private Logger logger = Logger.getRootLogger(); | 55 private Logger logger = Logger.getRootLogger(); |
46 private FileWriter missing; | 56 private FileWriter missing; |
47 private OWLImporter owlDoc; | 57 private OWLImporter owlDoc; |
48 private LexDumpImporter lexDoc; | 58 private LexDumpImporter lexDoc; |
104 // List<Element> cartas = lexDoc.getCartas(); | 114 // List<Element> cartas = lexDoc.getCartas(); |
105 // System.out.println(cartas.size()); | 115 // System.out.println(cartas.size()); |
106 | 116 |
107 LexOWLTransformer tf = new LexOWLTransformer(owlDoc, lexDoc); | 117 LexOWLTransformer tf = new LexOWLTransformer(owlDoc, lexDoc); |
108 tf.transform(); | 118 tf.transform(); |
109 owlDoc.save("file:///tmp/out.owl"); | 119 //owlDoc.save("file:///tmp/out.owl"); |
110 } | 120 } |
111 | 121 |
122 /** | |
123 * Hautmethoden zur Transformation | |
124 * @throws URISyntaxException | |
125 * @throws Exception | |
126 */ | |
112 private void transform() throws URISyntaxException, Exception { | 127 private void transform() throws URISyntaxException, Exception { |
113 List<Element> signatures = lexDoc.getSignatures(); | 128 List<Element> signatures = lexDoc.getSignatures(); |
114 | 129 |
115 // Element signature = signatures.get(0); | 130 // Element signature = signatures.get(0); |
116 int signatureCount = 0; | 131 int signatureCount = 0; |
167 owlDoc.setProperty(recordInd, "crm:P70_documents", eventInstance); | 182 owlDoc.setProperty(recordInd, "crm:P70_documents", eventInstance); |
168 | 183 |
169 // timespan | 184 // timespan |
170 | 185 |
171 try { | 186 try { |
172 String dateDcStart = lexDoc.getValue(record, ".//datdf/startdate"); | 187 String dateDcStart = lexDoc.getValue(record, ".//datrf/startdate"); |
173 | 188 |
174 String dateDcEnd = lexDoc.getValue(record, ".//datdf/startdate"); | 189 String dateDcEnd = lexDoc.getValue(record, ".//datrf/enddate"); |
175 | 190 |
176 if (!dateDcStart.equals("")) { | 191 if (!dateDcStart.equals("")) { |
177 OWLIndividual timeSpan = owlDoc.createTimeSpan(dateDcStart, | 192 OWLIndividual timeSpan = owlDoc.createTimeSpan(dateDcStart, |
178 dateDcEnd); | 193 dateDcEnd); |
179 owlDoc.setProperty(eventInstance, "crm:P4_has_time-span", | 194 owlDoc.setProperty(eventInstance, "crm:P4_has_time-span", |
215 OWLIndividual recordNamesRoles = null; | 230 OWLIndividual recordNamesRoles = null; |
216 for (Element nomiq : nomiqs) { | 231 for (Element nomiq : nomiqs) { |
217 String name = lexDoc.getValue(nomiq, "./name"); | 232 String name = lexDoc.getValue(nomiq, "./name"); |
218 String role = lexDoc.getValue(nomiq, "./role"); | 233 String role = lexDoc.getValue(nomiq, "./role"); |
219 | 234 |
235 String provenance = lexDoc.getValue(nomiq, "./name/provenance"); | |
236 | |
237 | |
238 | |
220 if (!name.equals("") && !role.equals("")) { | 239 if (!name.equals("") && !role.equals("")) { |
221 recordNamesRoles = handleNameWithRole(recordInd, name, role); | 240 recordNamesRoles = handleNameWithRole(recordInd, name, role); |
222 } else if (!role.equals("")) { | 241 } else if (!role.equals("")) { |
223 recordNamesRoles = createOrGetRole(role); | 242 recordNamesRoles = createOrGetRole(role); |
224 } else if (!name.equals("")) { | 243 } else if (!name.equals("")) { |
225 recordNamesRoles = createOrGetName(name); | 244 recordNamesRoles = createOrGetName(name); |
245 if (provenance!=""){ | |
246 owlDoc.setDataTypePropery(recordNamesRoles, "has_provenance_as_string", provenance, "it"); | |
247 } | |
226 } | 248 } |
227 | 249 |
228 if (recordNamesRoles != null) { | 250 if (recordNamesRoles != null) { |
229 owlDoc.setProperty(eventInstance, | 251 owlDoc.setProperty(eventInstance, |
230 "recordsDuomoObjectNameRoles", recordNamesRoles); | 252 "recordsDuomoObjectNameRoles", recordNamesRoles); |
272 | 294 |
273 return eventInstance; | 295 return eventInstance; |
274 } | 296 } |
275 | 297 |
276 private void createType(OWLIndividual eventInstance, Element type) { | 298 private void createType(OWLIndividual eventInstance, Element type) { |
277 | 299 |
300 | |
278 String typeId; | 301 String typeId; |
279 try { | 302 try { |
280 typeId = lexDoc.getValue(type, "./ptr/@target"); | 303 typeId = lexDoc.getValue(type, "./ptr/@target"); |
281 String clsName = owlDoc.getClassNameFromTypeId(typeId); | 304 String clsName = owlDoc.getClassNameFromTypeId(typeId); |
282 OWLNamedClass cls = owlDoc.getClassFromTypeId(typeId); | |
283 OWLIndividual typeInd = owlDoc.createInstance(clsName); | 305 OWLIndividual typeInd = owlDoc.createInstance(clsName); |
284 | 306 |
285 OWLNamedClass subjectClass = getPreferredTargetClass(cls, | 307 owlDoc.setProperty(eventInstance, "has_topic", typeInd); |
286 "has_subject"); | |
287 OWLNamedClass predicateClass = getPreferredTargetClass(cls, | |
288 "has_predicate"); | |
289 | |
290 List<Element> freeTexts = XPath.selectNodes(type, "./freetext"); | 308 List<Element> freeTexts = XPath.selectNodes(type, "./freetext"); |
291 for (Element freeText : freeTexts) { | 309 for (Element freeText : freeTexts) { |
310 OWLNamedClass cls = owlDoc.getClassFromTypeId(typeId); | |
311 | |
312 OWLNamedClass subjectClass = getPreferredTargetClass(cls, | |
313 "has_subject"); | |
314 OWLNamedClass predicateClass = getPreferredTargetClass(cls, | |
315 "has_predicate"); | |
292 | 316 |
293 String subjPointer = lexDoc.getValue(freeText, | 317 String subjPointer = lexDoc.getValue(freeText, |
294 "./sub/ptrtoperson/@target"); | 318 "./sub/ptrtoperson/@target"); |
295 String subjText = lexDoc.getValue(freeText, "./sub"); | 319 String subjText = lexDoc.getValue(freeText, "./sub"); |
296 | 320 |
297 OWLIndividual subjInd = createSubjectOrPredicate(subjectClass, | 321 OWLIndividual subjInd = createSubjectOrPredicate(subjectClass, |
298 subjPointer, subjText); | 322 subjPointer, subjText); |
323 | |
324 | |
325 //suche ob eine subpropery von materiaInvolved fuer die die zem Type (type) gehoerige Klasse (clsName) existiert | |
326 // und wenn ja welche, TODO: zur Zeit wird dann aus dem String "subjText" das entsprechende Material erzeugt. | |
327 | |
328 | |
329 //Collection<RDFProperty> props = cls.getPossibleRDFProperties(); | |
330 | |
331 | |
332 | |
333 RDFProperty superproperty= owlDoc.owlModel.getRDFProperty("http://ontologies.mpiwg-berlin.mpg.de/research/duomoAnalysis.owl/materialInvolved"); | |
334 | |
335 Collection<RDFSClass> sc = cls.getSuperclasses(true); | |
336 | |
337 OWLNamedClass mat = owlDoc.owlModel.getOWLNamedClass("http://ontologies.mpiwg-berlin.mpg.de/research/duomoAnalysis.owl/DuomoActivities"); | |
338 | |
339 if (sc.contains(mat)){ // cls is subclass of DuomoActivities | |
340 Collection<RDFProperty> props = superproperty.getSubproperties(false); | |
341 | |
342 //jetzt suche zu welcher subproperty die classe cls gehšrt | |
343 | |
344 //dazu gehe durch alle subproperties von materialInvolved- | |
345 for (RDFProperty prop:props){ | |
346 | |
347 @SuppressWarnings("unchecked") | |
348 Collection<RDFSClass> domains = prop.getDomains(true); | |
349 | |
350 for(RDFSClass domain: domains){ | |
351 //if (domain.getName().equals(cls.getName())) | |
352 | |
353 //suche jetzt die domaene zu diesen property in schaue ob die cls eine subklasse davon ist | |
354 if (cls.isSubclassOf(domain)) // cls ist in der domaene der property | |
355 { | |
356 | |
357 //die propery muss genau einen wert aus einer festen klasse haben, diese wird jetzt gesucht und dann eine individual erzeugt. | |
358 Collection<AbstractOWLRestriction> restrictions = cls.getRestrictions(prop, false); // suche die restriction und erzeuge dann ein object dieses type | |
359 for (AbstractOWLRestriction restriction: restrictions){ | |
360 if (DefaultOWLAllValuesFrom.class.isInstance(restriction)){ | |
361 DefaultOWLAllValuesFrom rest = (DefaultOWLAllValuesFrom)restriction; | |
362 RDFResource restClass = rest.getAllValuesFrom(); | |
363 OWLIndividual inst = owlDoc.createOrGetInstanceWithIdentifier(restClass.getLocalName(), "Identifier", subjText, false); | |
364 owlDoc.setProperty(typeInd, prop.getLocalName(), inst); | |
365 //materialInd = owlDoc.createInstance(res.getName()); | |
366 } | |
367 } | |
368 } | |
369 } | |
370 } | |
371 } | |
299 | 372 |
300 String predPointer = lexDoc.getValue(freeText, | 373 String predPointer = lexDoc.getValue(freeText, |
301 "./pred/ptrtoperson/@target"); | 374 "./pred/ptrtoperson/@target"); |
302 String predText = lexDoc.getValue(freeText, "./pred"); | 375 String predText = lexDoc.getValue(freeText, "./pred"); |
303 | 376 |
326 private OWLIndividual createSubjectOrPredicate(OWLNamedClass toClass, | 399 private OWLIndividual createSubjectOrPredicate(OWLNamedClass toClass, |
327 String subjPointer, String subjText) { | 400 String subjPointer, String subjText) { |
328 OWLIndividual subjInd = null; | 401 OWLIndividual subjInd = null; |
329 | 402 |
330 if (!subjPointer.equals("")) { | 403 if (!subjPointer.equals("")) { |
331 subjInd = toClass.createOWLIndividual(null); | 404 subjInd = owlDoc.createInstance(toClass.getName()); |
405 //subjInd = toClass.createOWLIndividual(null); | |
332 OWLIndividual ind = individualIds.get(subjPointer); | 406 OWLIndividual ind = individualIds.get(subjPointer); |
333 if (ind == null) { | 407 if (ind == null) { |
334 logger.debug("target ID does not exist:" + subjPointer); | 408 logger.debug("target ID does not exist:" + subjPointer); |
335 try { | 409 try { |
336 missing.write("target ID does not exist:" + subjPointer+"\n"); | 410 missing.write("target ID does not exist:" + subjPointer+"\n"); |
342 } else { | 416 } else { |
343 owlDoc.setProperty(subjInd, "has_NameOrRoleFromIndex", ind); | 417 owlDoc.setProperty(subjInd, "has_NameOrRoleFromIndex", ind); |
344 } | 418 } |
345 } | 419 } |
346 | 420 |
347 if (!subjText.equals("")) { | 421 if (!subjText.equals("") & !subjText.equals(" ")) { |
348 if (subjInd == null) | 422 if (subjInd == null) |
349 subjInd = toClass.createOWLIndividual(null); | 423 subjInd = owlDoc.createInstance(toClass.getName()); |
424 //subjInd = toClass.createOWLIndividual(null); | |
350 | 425 |
351 OWLNamedClass idcls = owlDoc.owlModel | 426 OWLNamedClass idcls = owlDoc.owlModel |
352 .getOWLNamedClass("Identifier"); // is die klasse selbst | 427 .getOWLNamedClass("Identifier"); // is die klasse selbst |
353 // schon ein identifiert | 428 // schon ein identifiert |
354 if (toClass.getNamedSuperclasses(true).contains(idcls)) { // to | 429 if (toClass.getNamedSuperclasses(true).contains(idcls)) { // to |
355 owlDoc.setProperty(subjInd, "has_readable_id", subjText); | 430 owlDoc.setProperty(subjInd, "rdfs:label", subjText); |
356 } else { | 431 } else { |
357 | 432 |
358 OWLIndividual ident = owlDoc | 433 OWLIndividual ident = owlDoc |
359 .createInstance("IdentifierPredicateOrSubject"); | 434 .createInstance("IdentifierPredicateOrSubject"); |
360 owlDoc.setProperty(ident, "has_readable_id", subjText); | 435 owlDoc.setProperty(ident, "rdfs:label", subjText); |
361 owlDoc.setProperty(subjInd, "crm:P48_has_preferred_identifier", | 436 owlDoc.setProperty(subjInd, "crm:P48_has_preferred_identifier", |
362 ident); | 437 ident); |
363 } | 438 } |
364 } | 439 } |
365 return subjInd; | 440 return subjInd; |
511 private OWLIndividual createRecord(Element record, OWLIndividual cardInd) | 586 private OWLIndividual createRecord(Element record, OWLIndividual cardInd) |
512 throws JDOMException { | 587 throws JDOMException { |
513 OWLIndividual recordInstance = owlDoc.createInstance("Record"); | 588 OWLIndividual recordInstance = owlDoc.createInstance("Record"); |
514 owlDoc.setProperty(recordInstance, "is_on_card", cardInd); | 589 owlDoc.setProperty(recordInstance, "is_on_card", cardInd); |
515 createNewDependingInstanceFromXpath(record, recordInstance, "./@id", | 590 createNewDependingInstanceFromXpath(record, recordInstance, "./@id", |
516 new String[] { "has_readable_id", "rdfs:label" }, | 591 new String[] { "rdfs:label" }, |
517 "IdentifierCurrent", "crm:P48_has_preferred_identifier"); | 592 "IdentifierCurrent", "crm:P48_has_preferred_identifier"); |
518 | 593 |
519 String value = lexDoc.getValue(record, ".//textblockid"); | 594 String value = lexDoc.getValue(record, ".//textblockid"); |
520 if (!value.equals("")) | 595 if (!value.equals("")) |
521 owlDoc.setProperty(recordInstance, "has_textblockid", value); | 596 owlDoc.setProperty(recordInstance, "has_textblockid", value); |
522 | 597 |
523 String endOnCarta = lexDoc.getValue(record, "./@end_on_carta"); | 598 String endOnCarta = lexDoc.getValue(record, "./@end_on_carta"); |
599 | |
600 //FIXME: addRecordToCarta ist buggy. siehe dort! ausserdem wir nicht berŸcksichtig, dass zwischen | |
601 // card und end_on_carta mehr als eine liegen kann, zur Zeit wird nur die carta die in end_on_carta beschrieben wird zu | |
602 // record mittels is_on_card hinzugefŸgt. | |
524 if (!endOnCarta.equals("")) { | 603 if (!endOnCarta.equals("")) { |
525 OWLIndividual signature = (OWLIndividual) owlDoc | 604 OWLIndividual signature = (OWLIndividual) owlDoc |
526 .getRelatedIndividual(cardInd, "has_signature"); | 605 .getRelatedIndividual(cardInd, "has_signature"); |
527 addRecordToCarta(recordInstance, value, signature); | 606 addRecordToCarta(recordInstance, endOnCarta, signature); |
528 } | 607 } |
529 | 608 |
530 String dateDcStart = lexDoc.getValue(record, ".//datdc/startdate"); | 609 String dateDcStart = lexDoc.getValue(record, ".//datdc/startdate"); |
531 String dateDcEnd = lexDoc.getValue(record, ".//datdc/startdate"); | 610 String dateDcEnd = lexDoc.getValue(record, ".//datdc/enddate"); |
532 | 611 |
533 OWLIndividual timeSpan = owlDoc.createTimeSpan(dateDcStart, dateDcEnd); | 612 OWLIndividual timeSpan = owlDoc.createTimeSpan(dateDcStart, dateDcEnd); |
534 | 613 |
535 owlDoc.setProperty(recordInstance, "crm:P4_has_time-span", timeSpan); | 614 owlDoc.setProperty(recordInstance, "crm:P4_has_time-span", timeSpan); |
536 | 615 |
538 } | 617 } |
539 | 618 |
540 private void addRecordToCarta(OWLIndividual recordInstance, String cardID, | 619 private void addRecordToCarta(OWLIndividual recordInstance, String cardID, |
541 OWLIndividual signature) { | 620 OWLIndividual signature) { |
542 | 621 |
622 //FIXME: cartID ist nur innerhalb einer Signatur eindeutig, d.h. h, es muss die cardID gefunden werden die in der | |
623 // selben signatur lebt wir "signature" | |
543 OWLIndividual card = owlDoc.getIndividualByReadableId("Card", cardID); | 624 OWLIndividual card = owlDoc.getIndividualByReadableId("Card", cardID); |
544 | 625 |
545 if (card == null) { | 626 if (card == null) { |
546 card = createCard(cardID, signature); | 627 card = createCard(cardID, signature); |
547 } | 628 } |
565 OWLIndividual cardInstance = owlDoc.createInstance("Card"); | 646 OWLIndividual cardInstance = owlDoc.createInstance("Card"); |
566 | 647 |
567 owlDoc.setProperty(cardInstance, "has_signature", signature); | 648 owlDoc.setProperty(cardInstance, "has_signature", signature); |
568 | 649 |
569 OWLIndividual preferredId = owlDoc.createInstance("IdentifierCurrent"); | 650 OWLIndividual preferredId = owlDoc.createInstance("IdentifierCurrent"); |
570 owlDoc.setProperty(preferredId, "has_readable_id", cardId); | |
571 owlDoc.setProperty(preferredId, "rdfs:label", cardId); | 651 owlDoc.setProperty(preferredId, "rdfs:label", cardId); |
572 | 652 |
573 owlDoc.setProperty(cardInstance, "crm:P48_has_preferred_identifier", | 653 owlDoc.setProperty(cardInstance, "crm:P48_has_preferred_identifier", |
574 preferredId); | 654 preferredId); |
575 | 655 |
581 OWLIndividual cardInstance = owlDoc.createInstance("Card"); | 661 OWLIndividual cardInstance = owlDoc.createInstance("Card"); |
582 | 662 |
583 try { | 663 try { |
584 | 664 |
585 createNewDependingInstanceFromXpath(card, cardInstance, | 665 createNewDependingInstanceFromXpath(card, cardInstance, |
586 ".//cartanr", new String[] { "has_readable_id", | 666 ".//cartanr", new String[] { "rdfs:label" }, "IdentifierCurrent", |
587 "rdfs:label" }, "IdentifierCurrent", | |
588 "crm:P48_has_preferred_identifier"); | 667 "crm:P48_has_preferred_identifier"); |
589 | 668 |
590 createNewDependingInstanceFromXpath(card, cardInstance, | 669 createNewDependingInstanceFromXpath(card, cardInstance, |
591 ".//cartaant", new String[] { "has_readable_id", | 670 ".//cartaant", new String[] { "rdfs:label" }, "IdentifierCurrent", |
592 "rdfs:label" }, "IdentifierCurrent", | |
593 "crm:P1_is_identified_by"); | 671 "crm:P1_is_identified_by"); |
594 | 672 |
595 owlDoc.setProperty(cardInstance, "has_signature", signatureInd); | 673 owlDoc.setProperty(cardInstance, "has_signature", signatureInd); |
596 | 674 |
597 } catch (JDOMException e) { | 675 } catch (JDOMException e) { |