diff src/de/mpiwg/dwinter/duomo/lexdump/OWLImporter.java @ 8:919e9f3b5efd

neue klassen zur textanalyse (stanford parser eingebaut) alle has_readable_labe Datatype properties durch rdfs:label ersetzt.
author dwinter
date Thu, 21 Jun 2012 17:08:22 +0200
parents 19e40abb3e8a
children 4392a6adf85a
line wrap: on
line diff
--- a/src/de/mpiwg/dwinter/duomo/lexdump/OWLImporter.java	Wed Feb 09 16:36:36 2011 +0100
+++ b/src/de/mpiwg/dwinter/duomo/lexdump/OWLImporter.java	Thu Jun 21 17:08:22 2012 +0200
@@ -1,8 +1,10 @@
 package de.mpiwg.dwinter.duomo.lexdump;
 
 import java.io.File;
+import java.io.FileOutputStream;
 import java.io.FileWriter;
 import java.io.IOException;
+import java.io.OutputStreamWriter;
 import java.net.URI;
 import java.net.URISyntaxException;
 import java.util.ArrayList;
@@ -11,6 +13,8 @@
 import java.util.Iterator;
 import java.util.List;
 
+import javax.swing.text.html.HTMLDocument.HTMLReader.IsindexAction;
+
 import org.apache.log4j.Logger;
 
 import edu.stanford.smi.protege.exception.OntologyLoadException;
@@ -24,6 +28,19 @@
 import edu.stanford.smi.protegex.owl.repository.RepositoryManager;
 import edu.stanford.smi.protegex.owl.repository.impl.LocalFolderRepository;
 
+/**
+ * Erzeugt Entities und Verwaltet gemŠ§ eines OWL-Modells.
+ * Alle erzeugten Entities werden direkt als nturtle in eine File geschrieben.
+ * 
+ * TODO: Dieses File ist immer /tmp/out.rdf
+ * TODO: zur Ausgabe gibt es eine Methode @see{org.openrdf.rio.trig.TriGWriter} bzw @see{de.mpiwg.itgroup.triplestoremanager.tools.Exporter}
+ *ie benutzt werden sollte anstelle der handgestrickten bei denen direkt in outrdf geschrieben wird.
+ * 
+ * TODO: Einzelen Methoden hier sind noch DUOMO und CRM spezifisch, insbesondere
+ * @author dwinter
+ *
+ *
+ */
 public class OWLImporter {
 
 	JenaOWLModel owlModel; // contains the model
@@ -33,7 +50,16 @@
 	private HashMap<String, OWLNamedClass> typeId2class=null;
 	private String ontFolder;
 	private URI uri;
+	//private FileWriter outRDF;
+	private OutputStreamWriter outRDF;
 
+	/**
+	 * Initialisiert die Klasse und
+	 * lŠdt die Ontologien ein
+	 * @param folder Ordner mit der zu bearbeitenden Ontologie
+	 * @param uri URI der Ontologie selbst
+	 * @throws OntologyLoadException
+	 */
 	public OWLImporter(String folder, URI uri) throws OntologyLoadException {
 		// owlModel = ProtegeOWL.createJenaOWLModelFromURI(uri);
 
@@ -41,6 +67,7 @@
 		this.uri=uri;
 		try {
 			this.fh= new FileWriter(new File("/tmp/identifier"));
+			this.outRDF= new OutputStreamWriter(new FileOutputStream("/tmp/out.rdf"),"UTF-8");
 		} catch (IOException e) {
 			// TODO Auto-generated catch block
 			e.printStackTrace();
@@ -48,10 +75,21 @@
 		loadOWL(folder, uri);
 	}
 
+	/**
+	 * LŠdt die Ontologie erneut ein.
+	 * @throws OntologyLoadException
+	 */
 	public void reloadOWL() throws OntologyLoadException{
 		loadOWL(ontFolder, uri);
 	}
 	
+	/**
+	 * 
+	 * LŠdt die Ontologien ein
+	 * @param folder Ordner mit der zu bearbeitenden Ontologie
+	 * @param uri URI der Ontologie selbst
+	 * @throws OntologyLoadException
+	 */
 	public void loadOWL(String folder, URI uri) throws OntologyLoadException {
 		owlModel = ProtegeOWL.createJenaOWLModel();
 		// Load repository
@@ -84,9 +122,9 @@
 	}
 
 	public static void main(String args[]) throws URISyntaxException {
-		String base = "/Users/dwinter/Documents/Projekte/Diss - data-mining/eclipseWorkspace/de.mpiwg.dwinter.duomo/owlInput";
+		String base = "/Users/dwinter/Documents/Projekte/Diss - data-mining/eclipseWorkspace/duomoData/owl-version";
 		URI ontologieUri = new URI(
-				"file:///Users/dwinter/Documents/Projekte/Diss%20-%20data-mining/eclipseWorkspace/de.mpiwg.dwinter.duomo/owlInput/duomoAnalysis.owl");
+				"file:///Users/dwinter/Documents/Projekte/Diss - data-mining/eclipseWorkspace/duomoData/owl-version/protege_version_duomo/duomoAnalysis.owl");
 		try {
 
 			OWLImporter im = new OWLImporter(base, ontologieUri);
@@ -97,27 +135,81 @@
 		}
 	}
 
-	public OWLIndividual createInstance(String string) {
+	/** Erzeuge Instanz uns schreibe  in das Exportfile die enstprechende <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> Relation.
+	 * @param fullClassName Name der Classe
+	 * @return null, wenn das Objekt nicht angelegt werden kann.
+	 */
+	public OWLIndividual createInstance(String fullClassName) {
 		OWLNamedClass owlclass = (OWLNamedClass) owlModel
-				.getOWLNamedClass(string);
+				.getOWLNamedClass(fullClassName);
 
 		if (owlclass == null) {
-			logger.debug("Cannot find OWLClass:" + string);
+			logger.debug("Cannot find OWLClass:" + fullClassName);
 			return null;
 		}
 		//logger.debug("Create new individual of type:"+string);
-		return owlclass.createOWLIndividual(null);
+		
+		
+		OWLIndividual ind = owlclass.createOWLIndividual(null);
+		
+		//TODO: replace the following by  @see{org.openrdf.rio.trig.TriGWriter}
+		String triple = String.format("<%s> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <%s>.\n", ind.getName(),owlclass.getName());
+		try {
+			outRDF.write(triple);
+			outRDF.flush();
+		} catch (IOException e) {
+			// TODO Auto-generated catch block
+			e.printStackTrace();
+		}
+		return ind;
 
 	}
 
-	public void setProperty(OWLIndividual identifier, String propertyName,
+	
+	/**
+	 * Erzeuge eine Prpoerty und schreibe die entsprechenden Informationen in das out-file.
+	 * @param individual 
+	 * @param propertyName
+	 * @param value
+	 */
+	public void setProperty(OWLIndividual individual, String propertyName,
 			Object value) {
 		RDFProperty prop = owlModel.getRDFProperty(propertyName);
-		identifier.setPropertyValue(prop, value);
-
+		individual.setPropertyValue(prop, value);
+		
+		
+		
+		//TODO: replace the following by  @see{org.openrdf.rio.trig.TriGWriter}
+		String valName="";
+		
+		
+		if (OWLIndividual.class.isInstance(value))
+			valName="<"+((OWLIndividual)value).getName()+">";
+		else
+			valName="\""+escapeRDFLit((String)value)+"\"";
+		
+		
+		String triple = String.format("<%s> <%s> %s.\n", individual.getName(),prop.getName(),valName);
+		try {
+			outRDF.write(triple);
+			outRDF.flush();
+		} catch (IOException e) {
+			// TODO Auto-generated catch block
+			e.printStackTrace();
+		}
 	}
 	
-	public void setDataTypePropery(OWLIndividual eventInstance,
+	private String escapeRDFLit(String string){
+		return string.replace("\"", "");
+	}
+	
+	/** Erzeuge DatatypePropery schreibe die entsprechenden Informationen in das out-file.
+	 * @param individual
+	 * @param propertyName
+	 * @param value
+	 * @param lang Sprach-tag
+	 */
+	public void setDataTypePropery(OWLIndividual individual,
 			String propertyName, String value, String lang) {
 		RDFProperty prop = owlModel.getRDFProperty(propertyName);
 //		if(OWLDatatypeProperty.class.isInstance(prop)){
@@ -129,20 +221,35 @@
 //		}
 		
 		
+		
 		RDFSLiteral langLiteral = owlModel.createRDFSLiteral(value, lang);
-		eventInstance.setPropertyValue(prop, langLiteral);
-
+		individual.setPropertyValue(prop, langLiteral);
 		
+		//TODO: replace the following by  @see{org.openrdf.rio.trig.TriGWriter}
+		String triple = String.format("<%s> <%s> \"%s\"@%s.\n", individual.getName(),prop.getName(),escapeRDFLit(langLiteral.getString()),lang);
+		try {
+			outRDF.write(triple);
+			outRDF.flush();
+		} catch (IOException e) {
+			// TODO Auto-generated catch block
+			e.printStackTrace();
+		}
 	}
 
-	public Object getRelatedIndividual(OWLIndividual cardInd,
+	/**
+	 * Gibt Individual zurŸck das Ÿber die Property mit der Quelle verbunden ist.
+	 * @param sourceIndividual
+	 * @param propertyName
+	 * @return
+	 */
+	public Object getRelatedIndividual(OWLIndividual sourceIndividual,
 			String propertyName) {
 		RDFProperty prop = owlModel.getRDFProperty(propertyName);
 		if (prop == null) {
 			logger.debug("Property does not exist:" + propertyName);
 		}
 
-		Object value = cardInd.getPropertyValue(prop);
+		Object value = sourceIndividual.getPropertyValue(prop);
 		return value;
 	}
 
@@ -263,32 +370,75 @@
 	}
 	
 	public OWLIndividual createTimeSpan(String dateDcStart, String dateDcEnd) {
-		OWLIndividual timeSpan = createInstance("DatesDocument");
+		// A date is described by an timespan which is described by an appellation
 		
-			
+		//TODO: die lesbaren daten gehšren in time span mit den angaben wir ungefŠhr und soweiter
+		// d.h. die klammern um ein Datum die ungefaehr signalisieren, gehoeren in die Beschreibung der 
+		//Qualifier in Time span
+		// in DuomoDate_Appellation kommen die genauen teile
+		// zunaechst schreibe ich in "has_readable" hier genau rein was im Text steht
+		// dieses muss noch analysiert werden
+		// so sollte 1432 gennai 9 --> in ein Computer lesbares modell umgesetzt werden
+		// und 1432 gennaio (9) muss in 1432 gennaio 9 in DuomoDate_Appellation eingetragen werden
+		// und dann mit P79 und P80 in DuomoDate_TimeSpan genauer beschrieben werden, d.h in P79 kommt dann 
+		// ungefaehr
+		// ausserdem mussesn "POST" und "ANTE aus dem Text in das Modell eingepflegt werden.
 		
-		setProperty(timeSpan, "has_readable_date", dateDcStart);
+		
+		OWLIndividual timeSpan = createInstance("DuomoDate_TimeSpan");
+		
+		OWLIndividual date= createInstance("DuomoDate_Appellation");	
+		
+		setProperty(date, "rdfs:label", dateDcStart);
+		//setProperty(timeSpan, "has_readable_date", dateDcStart);
+		
+		setProperty(timeSpan,"is_identified_by_Date",date);
 		
 		if(!(dateDcEnd==null || dateDcEnd.equals(""))){
-			setProperty(timeSpan, "has_readable_to_date", dateDcEnd);
+			OWLIndividual toDate= createInstance("DuomoDate_Appellation");
+			setProperty(toDate, "rdfs:label", dateDcEnd);
+			//setProperty(timeSpan, "has_readable_toDate", dateDcEnd);
+			setProperty(timeSpan,"is_identified_by_toDate",toDate);
 		} else {
 			dateDcEnd=dateDcStart;
 		}
 		
 		OWLIndividual timeSpanIdentifier = createInstance("Identifier");
-		setProperty(timeSpanIdentifier,"has_readable_id",dateDcStart+"-"+dateDcEnd);
+		setProperty(timeSpanIdentifier,"rdfs:label",dateDcStart+"-"+dateDcEnd);
 		
 		setProperty(timeSpan,"crm:P48_has_preferred_identifier",timeSpanIdentifier);
 		
 		return timeSpan;
 	}
+	
+//	public OWLIndividual createTimeSpan2(String dateDcStart, String dateDcEnd) {
+//		OWLIndividual timeSpan = createInstance("DatesDocument");
+//		
+//			
+//		
+//		setProperty(timeSpan, "has_readable_date", dateDcStart);
+//		
+//		if(!(dateDcEnd==null || dateDcEnd.equals(""))){
+//			setProperty(timeSpan, "has_readable_to_date", dateDcEnd);
+//		} else {
+//			dateDcEnd=dateDcStart;
+//		}
+//		
+//		OWLIndividual timeSpanIdentifier = createInstance("Identifier");
+//		setProperty(timeSpanIdentifier,"has_readable_id",dateDcStart+"-"+dateDcEnd);
+//		
+//		setProperty(timeSpan,"crm:P48_has_preferred_identifier",timeSpanIdentifier);
+//		
+//		return timeSpan;
+//	}
+
 
 	public OWLIndividual getIndividualByReadableId(String className,String identifier){
-		return getIndividual(className, "crm:P48_has_preferred_identifier", "Identifier", "has_readable_id", identifier, true);
+		return getIndividual(className, "crm:P48_has_preferred_identifier", "Identifier", "rdfs:label", identifier, true);
 	}
 
 	public OWLIndividual getIndividualByReadableId(String className,String identifier, String classNameIdentifier,boolean subclassedIdentifier){
-		return getIndividual(className, "crm:P48_has_preferred_identifier", classNameIdentifier, "has_readable_id", identifier,subclassedIdentifier);
+		return getIndividual(className, "crm:P48_has_preferred_identifier", classNameIdentifier, "rdfs:label", identifier,subclassedIdentifier);
 	}
 
 	public String getClassNameFromTypeId(String typeId) {
@@ -333,11 +483,14 @@
 
 	public OWLIndividual createOrGetInstanceWithIdentifier(String classNameInstance,
 			String classNameIdentifier, String identifier,boolean followSubclasses) {
+		
+		identifier=org.apache.commons.lang.StringUtils.strip(identifier);
+		
 		OWLIndividual ind = getIndividualByReadableId(classNameInstance, identifier,classNameIdentifier,followSubclasses);
 		if(ind==null){
 			ind = createInstance(classNameInstance);
 			OWLIndividual identifierInd = createInstance(classNameIdentifier);
-			setProperty(identifierInd, "has_readable_id", identifier);
+			setProperty(identifierInd, "rdfs:label", identifier);
 			try {
 				fh.write(classNameInstance+" --" +classNameIdentifier+"---"+identifier+"\n");
 				fh.flush();