Mercurial > hg > duomoOWLProject
diff src/de/mpiwg/dwinter/duomo/lexdump/OWLImporter.java @ 8:919e9f3b5efd
neue klassen zur textanalyse (stanford parser eingebaut)
alle has_readable_labe Datatype properties durch rdfs:label ersetzt.
author | dwinter |
---|---|
date | Thu, 21 Jun 2012 17:08:22 +0200 |
parents | 19e40abb3e8a |
children | 4392a6adf85a |
line wrap: on
line diff
--- a/src/de/mpiwg/dwinter/duomo/lexdump/OWLImporter.java Wed Feb 09 16:36:36 2011 +0100 +++ b/src/de/mpiwg/dwinter/duomo/lexdump/OWLImporter.java Thu Jun 21 17:08:22 2012 +0200 @@ -1,8 +1,10 @@ package de.mpiwg.dwinter.duomo.lexdump; import java.io.File; +import java.io.FileOutputStream; import java.io.FileWriter; import java.io.IOException; +import java.io.OutputStreamWriter; import java.net.URI; import java.net.URISyntaxException; import java.util.ArrayList; @@ -11,6 +13,8 @@ import java.util.Iterator; import java.util.List; +import javax.swing.text.html.HTMLDocument.HTMLReader.IsindexAction; + import org.apache.log4j.Logger; import edu.stanford.smi.protege.exception.OntologyLoadException; @@ -24,6 +28,19 @@ import edu.stanford.smi.protegex.owl.repository.RepositoryManager; import edu.stanford.smi.protegex.owl.repository.impl.LocalFolderRepository; +/** + * Erzeugt Entities und Verwaltet gemŠ§ eines OWL-Modells. + * Alle erzeugten Entities werden direkt als nturtle in eine File geschrieben. + * + * TODO: Dieses File ist immer /tmp/out.rdf + * TODO: zur Ausgabe gibt es eine Methode @see{org.openrdf.rio.trig.TriGWriter} bzw @see{de.mpiwg.itgroup.triplestoremanager.tools.Exporter} + *ie benutzt werden sollte anstelle der handgestrickten bei denen direkt in outrdf geschrieben wird. + * + * TODO: Einzelen Methoden hier sind noch DUOMO und CRM spezifisch, insbesondere + * @author dwinter + * + * + */ public class OWLImporter { JenaOWLModel owlModel; // contains the model @@ -33,7 +50,16 @@ private HashMap<String, OWLNamedClass> typeId2class=null; private String ontFolder; private URI uri; + //private FileWriter outRDF; + private OutputStreamWriter outRDF; + /** + * Initialisiert die Klasse und + * lŠdt die Ontologien ein + * @param folder Ordner mit der zu bearbeitenden Ontologie + * @param uri URI der Ontologie selbst + * @throws OntologyLoadException + */ public OWLImporter(String folder, URI uri) throws OntologyLoadException { // owlModel = ProtegeOWL.createJenaOWLModelFromURI(uri); @@ -41,6 +67,7 @@ this.uri=uri; try { this.fh= new FileWriter(new File("/tmp/identifier")); + this.outRDF= new OutputStreamWriter(new FileOutputStream("/tmp/out.rdf"),"UTF-8"); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); @@ -48,10 +75,21 @@ loadOWL(folder, uri); } + /** + * LŠdt die Ontologie erneut ein. + * @throws OntologyLoadException + */ public void reloadOWL() throws OntologyLoadException{ loadOWL(ontFolder, uri); } + /** + * + * LŠdt die Ontologien ein + * @param folder Ordner mit der zu bearbeitenden Ontologie + * @param uri URI der Ontologie selbst + * @throws OntologyLoadException + */ public void loadOWL(String folder, URI uri) throws OntologyLoadException { owlModel = ProtegeOWL.createJenaOWLModel(); // Load repository @@ -84,9 +122,9 @@ } public static void main(String args[]) throws URISyntaxException { - String base = "/Users/dwinter/Documents/Projekte/Diss - data-mining/eclipseWorkspace/de.mpiwg.dwinter.duomo/owlInput"; + String base = "/Users/dwinter/Documents/Projekte/Diss - data-mining/eclipseWorkspace/duomoData/owl-version"; URI ontologieUri = new URI( - "file:///Users/dwinter/Documents/Projekte/Diss%20-%20data-mining/eclipseWorkspace/de.mpiwg.dwinter.duomo/owlInput/duomoAnalysis.owl"); + "file:///Users/dwinter/Documents/Projekte/Diss - data-mining/eclipseWorkspace/duomoData/owl-version/protege_version_duomo/duomoAnalysis.owl"); try { OWLImporter im = new OWLImporter(base, ontologieUri); @@ -97,27 +135,81 @@ } } - public OWLIndividual createInstance(String string) { + /** Erzeuge Instanz uns schreibe in das Exportfile die enstprechende <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> Relation. + * @param fullClassName Name der Classe + * @return null, wenn das Objekt nicht angelegt werden kann. + */ + public OWLIndividual createInstance(String fullClassName) { OWLNamedClass owlclass = (OWLNamedClass) owlModel - .getOWLNamedClass(string); + .getOWLNamedClass(fullClassName); if (owlclass == null) { - logger.debug("Cannot find OWLClass:" + string); + logger.debug("Cannot find OWLClass:" + fullClassName); return null; } //logger.debug("Create new individual of type:"+string); - return owlclass.createOWLIndividual(null); + + + OWLIndividual ind = owlclass.createOWLIndividual(null); + + //TODO: replace the following by @see{org.openrdf.rio.trig.TriGWriter} + String triple = String.format("<%s> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <%s>.\n", ind.getName(),owlclass.getName()); + try { + outRDF.write(triple); + outRDF.flush(); + } catch (IOException e) { + // TODO Auto-generated catch block + e.printStackTrace(); + } + return ind; } - public void setProperty(OWLIndividual identifier, String propertyName, + + /** + * Erzeuge eine Prpoerty und schreibe die entsprechenden Informationen in das out-file. + * @param individual + * @param propertyName + * @param value + */ + public void setProperty(OWLIndividual individual, String propertyName, Object value) { RDFProperty prop = owlModel.getRDFProperty(propertyName); - identifier.setPropertyValue(prop, value); - + individual.setPropertyValue(prop, value); + + + + //TODO: replace the following by @see{org.openrdf.rio.trig.TriGWriter} + String valName=""; + + + if (OWLIndividual.class.isInstance(value)) + valName="<"+((OWLIndividual)value).getName()+">"; + else + valName="\""+escapeRDFLit((String)value)+"\""; + + + String triple = String.format("<%s> <%s> %s.\n", individual.getName(),prop.getName(),valName); + try { + outRDF.write(triple); + outRDF.flush(); + } catch (IOException e) { + // TODO Auto-generated catch block + e.printStackTrace(); + } } - public void setDataTypePropery(OWLIndividual eventInstance, + private String escapeRDFLit(String string){ + return string.replace("\"", ""); + } + + /** Erzeuge DatatypePropery schreibe die entsprechenden Informationen in das out-file. + * @param individual + * @param propertyName + * @param value + * @param lang Sprach-tag + */ + public void setDataTypePropery(OWLIndividual individual, String propertyName, String value, String lang) { RDFProperty prop = owlModel.getRDFProperty(propertyName); // if(OWLDatatypeProperty.class.isInstance(prop)){ @@ -129,20 +221,35 @@ // } + RDFSLiteral langLiteral = owlModel.createRDFSLiteral(value, lang); - eventInstance.setPropertyValue(prop, langLiteral); - + individual.setPropertyValue(prop, langLiteral); + //TODO: replace the following by @see{org.openrdf.rio.trig.TriGWriter} + String triple = String.format("<%s> <%s> \"%s\"@%s.\n", individual.getName(),prop.getName(),escapeRDFLit(langLiteral.getString()),lang); + try { + outRDF.write(triple); + outRDF.flush(); + } catch (IOException e) { + // TODO Auto-generated catch block + e.printStackTrace(); + } } - public Object getRelatedIndividual(OWLIndividual cardInd, + /** + * Gibt Individual zurŸck das Ÿber die Property mit der Quelle verbunden ist. + * @param sourceIndividual + * @param propertyName + * @return + */ + public Object getRelatedIndividual(OWLIndividual sourceIndividual, String propertyName) { RDFProperty prop = owlModel.getRDFProperty(propertyName); if (prop == null) { logger.debug("Property does not exist:" + propertyName); } - Object value = cardInd.getPropertyValue(prop); + Object value = sourceIndividual.getPropertyValue(prop); return value; } @@ -263,32 +370,75 @@ } public OWLIndividual createTimeSpan(String dateDcStart, String dateDcEnd) { - OWLIndividual timeSpan = createInstance("DatesDocument"); + // A date is described by an timespan which is described by an appellation - + //TODO: die lesbaren daten gehšren in time span mit den angaben wir ungefŠhr und soweiter + // d.h. die klammern um ein Datum die ungefaehr signalisieren, gehoeren in die Beschreibung der + //Qualifier in Time span + // in DuomoDate_Appellation kommen die genauen teile + // zunaechst schreibe ich in "has_readable" hier genau rein was im Text steht + // dieses muss noch analysiert werden + // so sollte 1432 gennai 9 --> in ein Computer lesbares modell umgesetzt werden + // und 1432 gennaio (9) muss in 1432 gennaio 9 in DuomoDate_Appellation eingetragen werden + // und dann mit P79 und P80 in DuomoDate_TimeSpan genauer beschrieben werden, d.h in P79 kommt dann + // ungefaehr + // ausserdem mussesn "POST" und "ANTE aus dem Text in das Modell eingepflegt werden. - setProperty(timeSpan, "has_readable_date", dateDcStart); + + OWLIndividual timeSpan = createInstance("DuomoDate_TimeSpan"); + + OWLIndividual date= createInstance("DuomoDate_Appellation"); + + setProperty(date, "rdfs:label", dateDcStart); + //setProperty(timeSpan, "has_readable_date", dateDcStart); + + setProperty(timeSpan,"is_identified_by_Date",date); if(!(dateDcEnd==null || dateDcEnd.equals(""))){ - setProperty(timeSpan, "has_readable_to_date", dateDcEnd); + OWLIndividual toDate= createInstance("DuomoDate_Appellation"); + setProperty(toDate, "rdfs:label", dateDcEnd); + //setProperty(timeSpan, "has_readable_toDate", dateDcEnd); + setProperty(timeSpan,"is_identified_by_toDate",toDate); } else { dateDcEnd=dateDcStart; } OWLIndividual timeSpanIdentifier = createInstance("Identifier"); - setProperty(timeSpanIdentifier,"has_readable_id",dateDcStart+"-"+dateDcEnd); + setProperty(timeSpanIdentifier,"rdfs:label",dateDcStart+"-"+dateDcEnd); setProperty(timeSpan,"crm:P48_has_preferred_identifier",timeSpanIdentifier); return timeSpan; } + +// public OWLIndividual createTimeSpan2(String dateDcStart, String dateDcEnd) { +// OWLIndividual timeSpan = createInstance("DatesDocument"); +// +// +// +// setProperty(timeSpan, "has_readable_date", dateDcStart); +// +// if(!(dateDcEnd==null || dateDcEnd.equals(""))){ +// setProperty(timeSpan, "has_readable_to_date", dateDcEnd); +// } else { +// dateDcEnd=dateDcStart; +// } +// +// OWLIndividual timeSpanIdentifier = createInstance("Identifier"); +// setProperty(timeSpanIdentifier,"has_readable_id",dateDcStart+"-"+dateDcEnd); +// +// setProperty(timeSpan,"crm:P48_has_preferred_identifier",timeSpanIdentifier); +// +// return timeSpan; +// } + public OWLIndividual getIndividualByReadableId(String className,String identifier){ - return getIndividual(className, "crm:P48_has_preferred_identifier", "Identifier", "has_readable_id", identifier, true); + return getIndividual(className, "crm:P48_has_preferred_identifier", "Identifier", "rdfs:label", identifier, true); } public OWLIndividual getIndividualByReadableId(String className,String identifier, String classNameIdentifier,boolean subclassedIdentifier){ - return getIndividual(className, "crm:P48_has_preferred_identifier", classNameIdentifier, "has_readable_id", identifier,subclassedIdentifier); + return getIndividual(className, "crm:P48_has_preferred_identifier", classNameIdentifier, "rdfs:label", identifier,subclassedIdentifier); } public String getClassNameFromTypeId(String typeId) { @@ -333,11 +483,14 @@ public OWLIndividual createOrGetInstanceWithIdentifier(String classNameInstance, String classNameIdentifier, String identifier,boolean followSubclasses) { + + identifier=org.apache.commons.lang.StringUtils.strip(identifier); + OWLIndividual ind = getIndividualByReadableId(classNameInstance, identifier,classNameIdentifier,followSubclasses); if(ind==null){ ind = createInstance(classNameInstance); OWLIndividual identifierInd = createInstance(classNameIdentifier); - setProperty(identifierInd, "has_readable_id", identifier); + setProperty(identifierInd, "rdfs:label", identifier); try { fh.write(classNameInstance+" --" +classNameIdentifier+"---"+identifier+"\n"); fh.flush();