comparison src/de/mpiwg/dwinter/duomo/lexdump/LexOWLTransformer.java @ 0:fb3f3df002df

first release
author dwinter
date Thu, 03 Feb 2011 10:15:16 +0100
parents
children 0fa29ab5e5e0
comparison
equal deleted inserted replaced
-1:000000000000 0:fb3f3df002df
1 package de.mpiwg.dwinter.duomo.lexdump;
2
3 import java.io.IOException;
4 import java.net.URI;
5 import java.net.URISyntaxException;
6 import java.net.URLEncoder;
7 import java.util.Collection;
8 import java.util.HashMap;
9 import java.util.HashSet;
10 import java.util.Iterator;
11 import java.util.List;
12 import java.util.Map;
13 import java.util.Set;
14
15 import javax.print.URIException;
16
17 import org.apache.log4j.Level;
18 import org.apache.log4j.Logger;
19 import org.jdom.Attribute;
20 import org.jdom.Element;
21 import org.jdom.JDOMException;
22 import org.jdom.xpath.XPath;
23
24 import com.sun.xml.internal.ws.developer.MemberSubmissionEndpointReference.Elements;
25
26 import edu.stanford.smi.protege.exception.OntologyLoadException;
27 import edu.stanford.smi.protege.model.Instance;
28 import edu.stanford.smi.protegex.owl.model.OWLAllValuesFrom;
29 import edu.stanford.smi.protegex.owl.model.OWLClass;
30 import edu.stanford.smi.protegex.owl.model.OWLDataRange;
31 import edu.stanford.smi.protegex.owl.model.OWLIndividual;
32 import edu.stanford.smi.protegex.owl.model.OWLNamedClass;
33 import edu.stanford.smi.protegex.owl.model.OWLProperty;
34 import edu.stanford.smi.protegex.owl.model.OWLUnionClass;
35 import edu.stanford.smi.protegex.owl.model.RDFList;
36 import edu.stanford.smi.protegex.owl.model.RDFProperty;
37 import edu.stanford.smi.protegex.owl.model.RDFResource;
38 import edu.stanford.smi.protegex.owl.model.impl.AbstractOWLQuantifierRestriction;
39 import edu.stanford.smi.protegex.owl.model.impl.DefaultOWLUnionClass;
40
41 public class LexOWLTransformer {
42 private Logger logger = Logger.getRootLogger();
43 private OWLImporter owlDoc;
44 private LexDumpImporter lexDoc;
45 private Map<String, OWLIndividual> individualIds = new HashMap<String, OWLIndividual>(); // speichere
46 // ids
47 // fuer
48 // weitere
49 // Verwendung
50
51 public LexOWLTransformer(OWLImporter owlDoc, LexDumpImporter lexDoc) {
52 this.owlDoc = owlDoc;
53 this.lexDoc = lexDoc;
54 }
55
56 /**
57 * @param args
58 * @throws Exception
59 */
60 public static void main(String[] args) throws Exception {
61 Logger.getRootLogger().setLevel(Level.DEBUG);
62 // Import OWL
63 //String base = "/Users/dwinter/Documents/Projekte/Diss - data-mining/eclipseWorkspace/de.mpiwg.dwinter.duomo/owlInput";
64 String base=args[0];
65 String baseUri= base.replace(" ", "%20");
66 URI ontologieUri = new URI("file://"+baseUri+"/duomoAnalysis.owl");
67 //URI ontologieUri = new URI(
68 // "file:///Users/dwinter/Documents/Projekte/Diss%20-%20data-mining/eclipseWorkspace/de.mpiwg.dwinter.duomo/owlInput/duomoAnalysis.owl");
69 OWLImporter owlDoc = null;
70 try {
71
72 owlDoc = new OWLImporter(base, ontologieUri);
73 // owlDoc.printModel();
74 } catch (OntologyLoadException e) {
75 e.printStackTrace();
76 System.exit(-1);
77 }
78 // read and parse lexfile
79 //String lexFile = "/Users/dwinter/Documents/Projekte/Diss - data-mining/eclipseWorkspace/duomoData/LEXXDUMP.xml";
80 String lexFile=args[1];
81 LexDumpImporter lexDoc = null;
82 try {
83 lexDoc = new LexDumpImporter(lexFile);
84 } catch (JDOMException e) {
85 e.printStackTrace();
86 System.exit(-1);
87 } catch (IOException e) {
88 System.exit(-1);
89 e.printStackTrace();
90 }
91
92 // List<Element> cartas = lexDoc.getCartas();
93 // System.out.println(cartas.size());
94
95 LexOWLTransformer tf = new LexOWLTransformer(owlDoc, lexDoc);
96 tf.transform();
97 owlDoc.save("file:///tmp/out.owl");
98 }
99
100 private void transform() throws JDOMException {
101 List<Element> signatures = lexDoc.getSignatures();
102
103 // Element signature = signatures.get(0);
104 int signatureCount = 1;
105 int maxsign = signatures.size();
106 for (Element signature : signatures) {
107 logger.debug(String.format("Signature: %s (%s)", signatureCount,
108 maxsign));
109 OWLIndividual signatureInd = createSignature(signature);
110
111 @SuppressWarnings("unchecked")
112 List<Element> cards = XPath.selectNodes(signature, ".//carta");
113 int cardsCount = 1;
114 int maxcards = cards.size();
115
116 // Element card=cards.get(0);
117 for (Element card : cards) {
118 logger.debug(String.format("Cards: %s (%s)", cardsCount++,
119 maxcards));
120 logger.debug(String.format("Signature: %s (%s)",
121 signatureCount, maxsign));
122 OWLIndividual cardInd = createCard(card, signatureInd);
123
124 @SuppressWarnings("unchecked")
125 List<Element> records = XPath.selectNodes(card, ".//record");
126 for (Element record : records) {
127 // Element record = records.get(0);
128 OWLIndividual recordInd = createRecord(record, cardInd);
129 OWLIndividual recordEvent = createEvent(record, recordInd); // Event
130 // und
131 // Records
132 // sind
133 // im
134 // Original
135 // im
136 // record-tag
137 }
138 }
139 signatureCount++;
140 }
141 owlDoc.printModel();
142 }
143
144 private OWLIndividual createEvent(Element record, OWLIndividual recordInd) {
145
146 logger.debug("Create Event");
147 OWLIndividual eventInstance = owlDoc.createInstance("RecordedEvent");
148
149 owlDoc.setProperty(recordInd, "crm:P70_documents", eventInstance);
150
151 // timespan
152
153 try {
154 String dateDcStart = lexDoc.getValue(record, ".//datdf/startdate");
155
156 String dateDcEnd = lexDoc.getValue(record, ".//datdf/startdate");
157
158 if (!dateDcStart.equals("")) {
159 OWLIndividual timeSpan = owlDoc.createTimeSpan(dateDcStart,
160 dateDcEnd);
161 owlDoc.setProperty(eventInstance, "crm:P4_has_time-span",
162 timeSpan);
163 }
164 } catch (JDOMException e) {
165 // TODO Auto-generated catch block
166 e.printStackTrace();
167 }
168
169 // tipol
170 String typology;
171 try {
172 typology = lexDoc.getValue(record, ".//tipol");
173 OWLIndividual typolInd = createOrGetTipol(typology);
174 owlDoc.setProperty(eventInstance, "has_tipol", typolInd);
175
176 } catch (JDOMException e) {
177 // TODO Auto-generated catch block
178 e.printStackTrace();
179 }
180
181 // reges
182 try {
183 createNewDependingDataTypePropertyFromXpath(record, eventInstance,
184 ".//reges/italian", "has_reges", "it");
185 createNewDependingDataTypePropertyFromXpath(record, eventInstance,
186 ".//reges/english", "has_reges", "en");
187 } catch (JDOMException e) {
188 // TODO Auto-generated catch block
189 e.printStackTrace();
190 }
191
192 // nomiq
193
194 try {
195 @SuppressWarnings("unchecked")
196 List<Element> nomiqs = XPath.selectNodes(record, ".//nomiq");
197 OWLIndividual recordNamesRoles = null;
198 for (Element nomiq : nomiqs) {
199 String name = lexDoc.getValue(nomiq, "./name");
200 String role = lexDoc.getValue(nomiq, "./role");
201
202 if (!name.equals("") && !role.equals("")) {
203 recordNamesRoles = handleNameWithRole(recordInd, name, role);
204 } else if (!role.equals("")) {
205 recordNamesRoles = handleRole(recordInd, role);
206 } else if (!name.equals("")) {
207 recordNamesRoles = handleName(recordInd, name);
208 }
209
210 if (recordNamesRoles != null) {
211 owlDoc.setProperty(eventInstance,
212 "recordsDuomoObjectNameRoles", recordNamesRoles);
213 String id = lexDoc.getValue(nomiq, "./@id");
214 individualIds.put(id, recordNamesRoles);
215 }
216 }
217
218 } catch (JDOMException e) {
219 // TODO Auto-generated catch block
220 e.printStackTrace();
221 }
222
223 // istit
224 try {
225 String istit = lexDoc.getValue(record, "./istit");
226 OWLIndividual istitInd = owlDoc.getIndividualByReadableId(
227 "IndicesInstitutions", istit);
228 if (istitInd == null) {
229 istitInd = createOrGetInstitution(istit);
230 }
231
232 owlDoc.setProperty(eventInstance, "recordsDuomoObjectInstitution",
233 istitInd);
234
235 } catch (JDOMException e) {
236 // TODO Auto-generated catch block
237 e.printStackTrace();
238 }
239
240 // types
241
242 try {
243 List<Element> types = XPath.selectNodes(record, ".//type");
244 for (Element type : types) {
245 createType(eventInstance, type);
246 }
247 } catch (JDOMException e) {
248 // TODO Auto-generated catch block
249 e.printStackTrace();
250 }
251
252 return eventInstance;
253 }
254
255 private void createType(OWLIndividual eventInstance, Element type) {
256
257 String typeId;
258 try {
259 typeId = lexDoc.getValue(type, "./ptr/@target");
260 String clsName = owlDoc.getClassNameFromTypeId(typeId);
261 OWLNamedClass cls = owlDoc.getClassFromTypeId(typeId);
262 OWLIndividual typeInd = owlDoc.createInstance(clsName);
263
264 OWLNamedClass subjectClass = getPreferredTargetClass(cls,
265 "has_subject");
266 OWLNamedClass predicateClass = getPreferredTargetClass(cls,
267 "has_predicate");
268
269 List<Element> freeTexts = XPath.selectNodes(type, "./freetext");
270 for (Element freeText : freeTexts) {
271
272 String subjPointer = lexDoc.getValue(freeText,
273 "./sub/ptrtoperson/@target");
274 String subjText = lexDoc.getValue(freeText, "./sub");
275
276 OWLIndividual subjInd = createSubjectOrPredicate(subjectClass,
277 subjPointer, subjText);
278
279 String predPointer = lexDoc.getValue(freeText,
280 "./pred/ptrtoperson/@target");
281 String predText = lexDoc.getValue(freeText, "./pred");
282
283 OWLIndividual predInd = createSubjectOrPredicate(
284 predicateClass, predPointer, predText);
285
286 if (subjInd != null) {
287 owlDoc.setProperty(typeInd, "has_subject", subjInd);
288 }
289
290 if (predInd != null) {
291 owlDoc.setProperty(typeInd, "has_predicate", predInd);
292 }
293
294 owlDoc.setProperty(eventInstance, "recordsDuomoObjectConcept",
295 typeInd);
296 }
297
298 } catch (JDOMException e) {
299 // TODO Auto-generated catch block
300 e.printStackTrace();
301 }
302
303 }
304
305 private OWLIndividual createSubjectOrPredicate(OWLNamedClass toClass,
306 String subjPointer, String subjText) {
307 OWLIndividual subjInd = null;
308
309 if (!subjPointer.equals("")) {
310 subjInd = toClass.createOWLIndividual(null);
311 OWLIndividual ind = individualIds.get(subjPointer);
312 if (ind == null) {
313 logger.debug("target ID does not exist:" + subjPointer);
314 } else {
315 owlDoc.setProperty(subjInd, "has_NameOrRoleFromIndex", ind);
316 }
317 }
318
319 if (!subjText.equals("")) {
320 if (subjInd == null)
321 subjInd = toClass.createOWLIndividual(null);
322
323 OWLNamedClass idcls = owlDoc.owlModel
324 .getOWLNamedClass("Identifier"); // is die klasse selbst
325 // schon ein identifiert
326 if (toClass.getNamedSuperclasses(true).contains(idcls)) { // to
327 owlDoc.setProperty(subjInd, "has_readable_id", subjText);
328 } else {
329
330 OWLIndividual ident = owlDoc
331 .createInstance("IdentifierPredicateOrSubject");
332 owlDoc.setProperty(ident, "has_readable_id", subjText);
333 owlDoc.setProperty(subjInd, "crm:P48_has_preferred_identifier",
334 ident);
335 }
336 }
337 return subjInd;
338 }
339
340 private OWLNamedClass getPreferredTargetClass(OWLNamedClass cls,
341 String propertyName) {
342 RDFProperty prop = owlDoc.owlModel.getRDFProperty(propertyName);
343
344 // finde welche klasse als subject erlaubt ist
345 Collection<?> restrictions = cls.getRestrictions(prop, true);
346
347 RDFResource restrictionValues = prop.getRange(); // nimm erstmal den
348 // gesammten Range
349
350 // schaue jetzt nach ob eb es einschraenkungen gibt.
351 for (Iterator<?> it = restrictions.iterator(); it.hasNext();) {
352 Object restriction = it.next();
353 if (OWLAllValuesFrom.class.isInstance(restriction)) {
354 OWLAllValuesFrom ar = (OWLAllValuesFrom) restriction;
355 restrictionValues = ar.getAllValuesFrom();
356 break;
357 }
358
359 }
360 OWLNamedClass toClass = null;
361 if (OWLNamedClass.class.isInstance(restrictionValues)) {
362 toClass = (OWLNamedClass) restrictionValues;
363 } else if (OWLDataRange.class.isInstance(restrictionValues)) {
364 RDFList dr = ((OWLDataRange) restrictionValues).getOneOf();
365 for (Object d : dr.getValues()) {
366 System.out.println(d);
367 toClass = (OWLNamedClass) d;
368 // FIXME: geht das??
369 }
370 } else if (DefaultOWLUnionClass.class.isInstance(restrictionValues)) { // mehr
371 // als
372 // eine
373 // moeglich
374 DefaultOWLUnionClass ou = (DefaultOWLUnionClass) restrictionValues;
375 Set set = new HashSet();
376 ou.getNestedNamedClasses(set);
377
378 for (Iterator<?> it = set.iterator(); it.hasNext();) {
379 OWLNamedClass cl = (OWLNamedClass) it.next();
380 OWLNamedClass idcls = owlDoc.owlModel
381 .getOWLNamedClass("Identifier"); // nimm die Klasse die
382 // ein Identifier
383 // ist.
384 if (cl.getNamedSuperclasses(true).contains(idcls)) {
385 toClass = cl;
386 break;
387 }
388
389 }
390 }
391 return toClass;
392 }
393
394 private OWLIndividual handleRole(OWLIndividual recordInd, String role) {
395 OWLIndividual roleInd = owlDoc.getIndividualByReadableId(
396 "IndicesRoles", role);
397 if (roleInd == null) {
398 roleInd = createRole(role);
399 }
400
401 return roleInd;
402 }
403
404 private OWLIndividual handleName(OWLIndividual recordInd, String name) {
405 OWLIndividual nameInd = owlDoc.getIndividualByReadableId(
406 "IndicesNames", name);
407 if (nameInd == null) {
408 nameInd = createRole(name);
409 }
410
411 return nameInd;
412 }
413
414 private OWLIndividual handleNameWithRole(OWLIndividual recordInd,
415 String name, String role) {
416 // teste ob schon ein solcer Eintrag existiert
417 OWLIndividual nameInd = owlDoc.getIndividualByReadableId(
418 "IndicesNames", name);
419 Boolean createNewNameWithRole = false;
420 if (nameInd == null) {
421 nameInd = createOrGetName(name);
422 createNewNameWithRole = true; // name existierte nicht dann kann
423 // auch NameWithRole nicht
424 // existierten.
425 }
426 OWLIndividual roleInd = owlDoc.getIndividualByReadableId(
427 "IndicesRoles", role);
428 if (roleInd == null) {
429 roleInd = createRole(role);
430 createNewNameWithRole = true; // role existierte nicht dann kann
431 // auch NameWithRole nicht
432 // existierten.
433 }
434 OWLIndividual nameWithRoleInd = null;
435 if (!createNewNameWithRole) { // schon klar, dass er nicht existiert
436 nameWithRoleInd = getNameWithRole(nameInd, roleInd);
437 }
438
439 if (nameWithRoleInd == null) { // existiert nicht
440 nameWithRoleInd = createNameWithRole(nameInd, roleInd);
441 }
442
443 return nameWithRoleInd;
444 }
445
446 private OWLIndividual getNameWithRole(OWLIndividual nameInd,
447 OWLIndividual roleInd) {
448 List<OWLIndividual> indicesWithNames = owlDoc.getIndividuals(
449 "IndicesNameWithRole", "refers_to_name", nameInd); // suche alle
450 // infrage
451 // kommenden
452 // nameen
453
454 if (indicesWithNames == null) { // kein Treffer
455 return null;
456 }
457
458 for (OWLIndividual name : indicesWithNames) {
459 Object role = owlDoc.getRelatedIndividual(name, "refers_to_role");
460 if (roleInd.equals(role)) {
461 return name;
462 }
463 }
464 return null;
465 }
466
467 private OWLIndividual createOrGetTipol(String typology) {
468 OWLIndividual tipol = owlDoc.createOrGetInstanceWithIdentifier("Typology","Identifier",typology);
469 return tipol;
470 }
471
472 private OWLIndividual createNameWithRole(OWLIndividual nameInd,
473 OWLIndividual roleInd) {
474
475 OWLIndividual nameWithRoleInd = owlDoc
476 .createInstance("IndicesNameWithRole");
477
478 owlDoc.setProperty(nameWithRoleInd, "refers_to_name", nameInd);
479 owlDoc.setProperty(nameWithRoleInd, "refers_to_role", roleInd);
480
481 return nameWithRoleInd;
482
483 }
484
485 private OWLIndividual createOrGetInstitution(String name) {
486 OWLIndividual nameInd = owlDoc.createOrGetInstanceWithIdentifier("IndicesInstitutions","IdentifierInstitutions",name);
487
488 return nameInd;
489 }
490
491 private OWLIndividual createOrGetName(String name) {
492 OWLIndividual nameInd = owlDoc.createOrGetInstanceWithIdentifier("IndicesNames","IdentifierNames",name);
493 return nameInd;
494 }
495
496 private OWLIndividual createRole(String name) {
497 OWLIndividual roleInd = owlDoc.createOrGetInstanceWithIdentifier("IndicesRoles","IdentifierRoles",name);
498 return roleInd;
499 }
500
501 private OWLIndividual createRecord(Element record, OWLIndividual cardInd)
502 throws JDOMException {
503 OWLIndividual recordInstance = owlDoc.createInstance("Record");
504 owlDoc.setProperty(recordInstance, "is_on_card", cardInd);
505 createNewDependingInstanceFromXpath(record, recordInstance, "./@id",
506 new String[] { "has_readable_id", "rdfs:label" },
507 "IdentifierCurrent", "crm:P48_has_preferred_identifier");
508
509 String value = lexDoc.getValue(record, ".//textblockid");
510 if (!value.equals(""))
511 owlDoc.setProperty(recordInstance, "has_textblockid", value);
512
513 String endOnCarta = lexDoc.getValue(record, "./@end_on_carta");
514 if (!endOnCarta.equals("")) {
515 OWLIndividual signature = (OWLIndividual) owlDoc
516 .getRelatedIndividual(cardInd, "has_signature");
517 addRecordToCarta(recordInstance, value, signature);
518 }
519
520 String dateDcStart = lexDoc.getValue(record, ".//datdc/startdate");
521 String dateDcEnd = lexDoc.getValue(record, ".//datdc/startdate");
522
523 OWLIndividual timeSpan = owlDoc.createTimeSpan(dateDcStart, dateDcEnd);
524
525 owlDoc.setProperty(recordInstance, "crm:P4_has_time-span", timeSpan);
526
527 return recordInstance;
528 }
529
530 private void addRecordToCarta(OWLIndividual recordInstance, String cardID,
531 OWLIndividual signature) {
532
533 OWLIndividual card = owlDoc.getIndividualByReadableId("Card", cardID);
534
535 if (card == null) {
536 card = createCard(cardID, signature);
537 }
538
539 owlDoc.setProperty(recordInstance, "is_on_card", card);
540 }
541
542 private OWLIndividual createSignature(Element signature)
543 throws JDOMException {
544 Element segHeaderElement = (Element) XPath.selectSingleNode(signature,
545 ".//segheader");
546 String segheader = segHeaderElement.getTextTrim();
547
548 OWLIndividual signatureInstance = owlDoc.createInstance("Signatur");
549 owlDoc.setProperty(signatureInstance, "rdfs:label", segheader);
550 return signatureInstance;
551 }
552
553 private OWLIndividual createCard(String cardId, OWLIndividual signature) {
554
555 OWLIndividual cardInstance = owlDoc.createInstance("Card");
556
557 owlDoc.setProperty(cardInstance, "has_signature", signature);
558
559 OWLIndividual preferredId = owlDoc.createInstance("IdentifierCurrent");
560 owlDoc.setProperty(preferredId, "has_readable_id", cardId);
561 owlDoc.setProperty(preferredId, "rdfs:label", cardId);
562
563 owlDoc.setProperty(cardInstance, "crm:P48_has_preferred_identifier",
564 preferredId);
565
566 return cardInstance;
567 }
568
569 private OWLIndividual createCard(Element card, OWLIndividual signatureInd) {
570
571 OWLIndividual cardInstance = owlDoc.createInstance("Card");
572
573 try {
574
575 createNewDependingInstanceFromXpath(card, cardInstance,
576 ".//cartanr", new String[] { "has_readable_id",
577 "rdfs:label" }, "IdentifierCurrent",
578 "crm:P48_has_preferred_identifier");
579
580 createNewDependingInstanceFromXpath(card, cardInstance,
581 ".//cartaant", new String[] { "has_readable_id",
582 "rdfs:label" }, "IdentifierCurrent",
583 "crm:P1_is_identified_by");
584
585 owlDoc.setProperty(cardInstance, "has_signature", signatureInd);
586
587 } catch (JDOMException e) {
588 e.printStackTrace();
589 return null;
590 }
591
592 return cardInstance;
593 }
594
595 private void createNewDependingInstanceFromXpath(Element card,
596 OWLIndividual cardInstance, String xpath, String[] propertyNames,
597 String newInstanceClassName, String relationNameToNewInstance)
598 throws JDOMException {
599
600 List<?> identifierIdEls = (List<?>) XPath.selectNodes(card, xpath);
601 for (Object identifierIdEl : identifierIdEls) {
602 String identifierId = "";
603 if (Element.class.isInstance(identifierIdEl)) {
604 identifierId = ((Element) identifierIdEl).getTextTrim();
605 } else if (Attribute.class.isInstance(identifierIdEl)) {
606 identifierId = ((Attribute) identifierIdEl).getValue();
607 }
608
609 OWLIndividual identifier = owlDoc
610 .createInstance(newInstanceClassName);
611 for (int i = 0; i < propertyNames.length; i++) {
612 owlDoc.setProperty(identifier, propertyNames[i], identifierId);
613 }
614
615 owlDoc.setProperty(cardInstance, relationNameToNewInstance,
616 identifier);
617 }
618
619 }
620
621 // createNewDependingDataTypePropertyFromXpath(record, eventInstance,
622 // ".//resges/italian", "has_reges",
623 // "italian");
624 private void createNewDependingDataTypePropertyFromXpath(Element record,
625 OWLIndividual eventInstance, String xpath, String propertyName,
626 String lang) throws JDOMException {
627
628 List<?> identifierIdEls = (List<?>) XPath.selectNodes(record, xpath);
629 for (Object identifierIdEl : identifierIdEls) {
630 String identifierId = "";
631 if (Element.class.isInstance(identifierIdEl)) {
632 identifierId = ((Element) identifierIdEl).getTextTrim();
633 } else if (Attribute.class.isInstance(identifierIdEl)) {
634 identifierId = ((Attribute) identifierIdEl).getValue();
635 }
636
637 owlDoc.setDataTypePropery(eventInstance, propertyName,
638 identifierId, lang);
639 }
640 }
641 }