Mercurial > hg > duomoOWLProject
view src/de/mpiwg/dwinter/duomo/lexdump/OWLImporter.java @ 9:4392a6adf85a default tip
new version der label mit language tag
author | dwinter |
---|---|
date | Thu, 16 Aug 2012 11:40:17 +0200 |
parents | 919e9f3b5efd |
children |
line wrap: on
line source
package de.mpiwg.dwinter.duomo.lexdump; import java.io.File; import java.io.FileOutputStream; import java.io.FileWriter; import java.io.IOException; import java.io.OutputStreamWriter; import java.net.URI; import java.net.URISyntaxException; import java.util.ArrayList; import java.util.Collection; import java.util.HashMap; import java.util.Iterator; import java.util.List; import javax.swing.text.html.HTMLDocument.HTMLReader.IsindexAction; import org.apache.log4j.Logger; import edu.stanford.smi.protege.exception.OntologyLoadException; import edu.stanford.smi.protegex.owl.ProtegeOWL; import edu.stanford.smi.protegex.owl.jena.JenaOWLModel; import edu.stanford.smi.protegex.owl.model.OWLClass; import edu.stanford.smi.protegex.owl.model.OWLIndividual; import edu.stanford.smi.protegex.owl.model.OWLNamedClass; import edu.stanford.smi.protegex.owl.model.RDFProperty; import edu.stanford.smi.protegex.owl.model.RDFSLiteral; import edu.stanford.smi.protegex.owl.repository.RepositoryManager; import edu.stanford.smi.protegex.owl.repository.impl.LocalFolderRepository; /** * Erzeugt Entities und Verwaltet gemŠ§ eines OWL-Modells. * Alle erzeugten Entities werden direkt als nturtle in eine File geschrieben. * * TODO: Dieses File ist immer /tmp/out.rdf * TODO: zur Ausgabe gibt es eine Methode @see{org.openrdf.rio.trig.TriGWriter} bzw @see{de.mpiwg.itgroup.triplestoremanager.tools.Exporter} *ie benutzt werden sollte anstelle der handgestrickten bei denen direkt in outrdf geschrieben wird. * * TODO: Einzelen Methoden hier sind noch DUOMO und CRM spezifisch, insbesondere * @author dwinter * * */ public class OWLImporter { JenaOWLModel owlModel; // contains the model Logger logger = Logger.getRootLogger(); FileWriter fh; private HashMap<String, String> typeId2className=null; private HashMap<String, OWLNamedClass> typeId2class=null; private String ontFolder; private URI uri; //private FileWriter outRDF; private OutputStreamWriter outRDF; /** * Initialisiert die Klasse und * lŠdt die Ontologien ein * @param folder Ordner mit der zu bearbeitenden Ontologie * @param uri URI der Ontologie selbst * @throws OntologyLoadException */ public OWLImporter(String folder, URI uri) throws OntologyLoadException { // owlModel = ProtegeOWL.createJenaOWLModelFromURI(uri); this.ontFolder=folder; this.uri=uri; try { this.fh= new FileWriter(new File("/tmp/identifier")); this.outRDF= new OutputStreamWriter(new FileOutputStream("/tmp/out.rdf"),"UTF-8"); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } loadOWL(folder, uri); } /** * LŠdt die Ontologie erneut ein. * @throws OntologyLoadException */ public void reloadOWL() throws OntologyLoadException{ loadOWL(ontFolder, uri); } /** * * LŠdt die Ontologien ein * @param folder Ordner mit der zu bearbeitenden Ontologie * @param uri URI der Ontologie selbst * @throws OntologyLoadException */ public void loadOWL(String folder, URI uri) throws OntologyLoadException { owlModel = ProtegeOWL.createJenaOWLModel(); // Load repository RepositoryManager rman = owlModel.getRepositoryManager(); // Add working directory to repository manager. File workingDirectory = new File(folder); rman.addGlobalRepository(new LocalFolderRepository(workingDirectory)); // Load ontology ((JenaOWLModel) owlModel).load(uri, "OWL-DL"); } /** * Drucke das Modell nach stdout */ public void printModel() { Collection<?> classes = owlModel.getUserDefinedOWLNamedClasses(); for (Iterator<?> it = classes.iterator(); it.hasNext();) { OWLNamedClass cls = (OWLNamedClass) it.next(); Collection<?> instances = cls.getInstances(false); System.out.println("Class " + cls.getBrowserText() + " (" + instances.size() + ")"); for (Iterator<?> jt = instances.iterator(); jt.hasNext();) { OWLIndividual individual = (OWLIndividual) jt.next(); System.out.println(" - " + individual.getBrowserText()); } } } public static void main(String args[]) throws URISyntaxException { String base = "/Users/dwinter/Documents/Projekte/Diss - data-mining/eclipseWorkspace/duomoData/owl-version"; URI ontologieUri = new URI( "file:///Users/dwinter/Documents/Projekte/Diss - data-mining/eclipseWorkspace/duomoData/owl-version/protege_version_duomo/duomoAnalysis.owl"); try { OWLImporter im = new OWLImporter(base, ontologieUri); im.printModel(); } catch (OntologyLoadException e) { // TODO Auto-generated catch block e.printStackTrace(); } } /** Erzeuge Instanz uns schreibe in das Exportfile die enstprechende <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> Relation. * @param fullClassName Name der Classe * @return null, wenn das Objekt nicht angelegt werden kann. */ public OWLIndividual createInstance(String fullClassName) { OWLNamedClass owlclass = (OWLNamedClass) owlModel .getOWLNamedClass(fullClassName); if (owlclass == null) { logger.debug("Cannot find OWLClass:" + fullClassName); return null; } //logger.debug("Create new individual of type:"+string); OWLIndividual ind = owlclass.createOWLIndividual(null); //TODO: replace the following by @see{org.openrdf.rio.trig.TriGWriter} String triple = String.format("<%s> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <%s>.\n", ind.getName(),owlclass.getName()); try { outRDF.write(triple); outRDF.flush(); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } return ind; } /** * Erzeuge eine Prpoerty und schreibe die entsprechenden Informationen in das out-file. * @param individual * @param propertyName * @param value * @param lang */ public void setProperty(OWLIndividual individual, String propertyName, Object value, String lang) { RDFProperty prop = owlModel.getRDFProperty(propertyName); individual.setPropertyValue(prop, value); //TODO: replace the following by @see{org.openrdf.rio.trig.TriGWriter} String valName=""; String triple; if (OWLIndividual.class.isInstance(value)){ valName="<"+((OWLIndividual)value).getName()+">"; triple = String.format("<%s> <%s> %s.\n", individual.getName(),prop.getName(),valName);} else{ valName=escapeRDFLit((String)value); triple = String.format("<%s> <%s> \"%s\"@%s.\n", individual.getName(),prop.getName(),valName,lang); } try { outRDF.write(triple); outRDF.flush(); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } } /** * Erzeuge eine Prpoerty und schreibe die entsprechenden Informationen in das out-file. * @param individual * @param propertyName * @param value */ public void setProperty(OWLIndividual individual, String propertyName, Object value) { RDFProperty prop = owlModel.getRDFProperty(propertyName); individual.setPropertyValue(prop, value); //TODO: replace the following by @see{org.openrdf.rio.trig.TriGWriter} String valName=""; if (OWLIndividual.class.isInstance(value)) valName="<"+((OWLIndividual)value).getName()+">"; else valName="\""+escapeRDFLit((String)value)+"\""; String triple = String.format("<%s> <%s> %s.\n", individual.getName(),prop.getName(),valName); try { outRDF.write(triple); outRDF.flush(); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } } private String escapeRDFLit(String string){ return string.replace("\"", ""); } /** Erzeuge DatatypePropery schreibe die entsprechenden Informationen in das out-file. * @param individual * @param propertyName * @param value * @param lang Sprach-tag */ public void setDataTypePropery(OWLIndividual individual, String propertyName, String value, String lang) { RDFProperty prop = owlModel.getRDFProperty(propertyName); // if(OWLDatatypeProperty.class.isInstance(prop)){ // OWLDatatypeProperty dp = (OWLDatatypeProperty)prop; // prop.set // // } else { // logger.error("Is not a datatypeprop:"+propertyName); // } RDFSLiteral langLiteral = owlModel.createRDFSLiteral(value, lang); individual.setPropertyValue(prop, langLiteral); //TODO: replace the following by @see{org.openrdf.rio.trig.TriGWriter} String triple = String.format("<%s> <%s> \"%s\"@%s.\n", individual.getName(),prop.getName(),escapeRDFLit(langLiteral.getString()),lang); try { outRDF.write(triple); outRDF.flush(); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } } /** * Gibt Individual zurŸck das Ÿber die Property mit der Quelle verbunden ist. * @param sourceIndividual * @param propertyName * @return */ public Object getRelatedIndividual(OWLIndividual sourceIndividual, String propertyName) { RDFProperty prop = owlModel.getRDFProperty(propertyName); if (prop == null) { logger.debug("Property does not exist:" + propertyName); } Object value = sourceIndividual.getPropertyValue(prop); return value; } public OWLIndividual getIndividual(String classNameFrom, String propertyNameToClass, String classNameTo, String propertyName2, Object value) { return getIndividual(classNameFrom, propertyNameToClass, classNameTo, propertyName2, value, true); } public OWLIndividual getIndividual(String classNameFrom, String propertyNameToClass, String classNameTo, String propertyName2, Object value,boolean subclasses) { RDFProperty prop = owlModel.getRDFProperty(propertyName2); OWLClass classTo = owlModel.getOWLNamedClass(classNameTo); Collection<?> instances = classTo.getInstances(subclasses); OWLIndividual toIv = null; // Such individiual mit propertyName2 und Wert value for (Iterator<?> jt = instances.iterator(); jt.hasNext();) { OWLIndividual iv = (OWLIndividual) jt.next(); Object propVal = iv.getPropertyValue(prop); if (propVal!=null && iv.getPropertyValue(prop).equals(value)) { toIv = iv; break; } } // kein treffer if (toIv == null) return null; // jetzt suche das Invidual, dass mit dieser Instance verbunden ist RDFProperty prop2 = owlModel.getRDFProperty(propertyNameToClass); OWLClass classFrom = owlModel.getOWLNamedClass(classNameFrom); Collection<?> instancesFrom = classFrom.getInstances(true); OWLIndividual fromIv = null; // Such individiual mit propertyName2 und Wert value for (Iterator<?> jt = instancesFrom.iterator(); jt.hasNext();) { OWLIndividual iv = (OWLIndividual) jt.next(); Object propValue = iv.getPropertyValue(prop2); if (propValue!=null && iv.getPropertyValue(prop2).equals(toIv)) { fromIv = iv; break; } } return fromIv; } public List<OWLIndividual> getIndividuals(String classNameFrom, String propertyNameToClass, String classNameTo, String propertyName2, Object value) { List<OWLIndividual> returnList= new ArrayList<OWLIndividual>(); RDFProperty prop = owlModel.getRDFProperty(propertyName2); OWLClass classTo = owlModel.getOWLNamedClass(classNameTo); Collection<?> instances = classTo.getInstances(true); OWLIndividual toIv = null; // Such individiual mit propertyName2 und Wert value for (Iterator<?> jt = instances.iterator(); jt.hasNext();) { OWLIndividual iv = (OWLIndividual) jt.next(); if (iv.getPropertyValue(prop).equals(value)) { toIv = iv; break; } } // kein treffer if (toIv == null) return null; // jetzt suche das Invidual, dass mit dieser Instance verbunden ist RDFProperty prop2 = owlModel.getRDFProperty(propertyNameToClass); OWLClass classFrom = owlModel.getOWLNamedClass(classNameFrom); Collection<?> instancesFrom = classFrom.getInstances(true); //OWLIndividual fromIv = null; // Such individiual mit propertyName2 und Wert value for (Iterator<?> jt = instancesFrom.iterator(); jt.hasNext();) { OWLIndividual iv = (OWLIndividual) jt.next(); if (iv.getPropertyValue(prop2).equals(toIv)) { returnList.add(iv); break; } } return returnList; } public List<OWLIndividual> getIndividuals(String className, String propertyName, OWLIndividual targetInd) { List<OWLIndividual> returnList= new ArrayList<OWLIndividual>(); RDFProperty prop = owlModel.getRDFProperty(propertyName); OWLClass cls = owlModel.getOWLNamedClass(className); Collection<?> instances = cls.getInstances(true); // Such individiual mit propertyName2 und Wert value for (Iterator<?> jt = instances.iterator(); jt.hasNext();) { OWLIndividual iv = (OWLIndividual) jt.next(); if (iv.getPropertyValue(prop).equals(targetInd)) { returnList.add(iv); } } return returnList; } public OWLIndividual createTimeSpan(String dateDcStart, String dateDcEnd) { // A date is described by an timespan which is described by an appellation //TODO: die lesbaren daten gehšren in time span mit den angaben wir ungefŠhr und soweiter // d.h. die klammern um ein Datum die ungefaehr signalisieren, gehoeren in die Beschreibung der //Qualifier in Time span // in DuomoDate_Appellation kommen die genauen teile // zunaechst schreibe ich in "has_readable" hier genau rein was im Text steht // dieses muss noch analysiert werden // so sollte 1432 gennai 9 --> in ein Computer lesbares modell umgesetzt werden // und 1432 gennaio (9) muss in 1432 gennaio 9 in DuomoDate_Appellation eingetragen werden // und dann mit P79 und P80 in DuomoDate_TimeSpan genauer beschrieben werden, d.h in P79 kommt dann // ungefaehr // ausserdem mussesn "POST" und "ANTE aus dem Text in das Modell eingepflegt werden. OWLIndividual timeSpan = createInstance("DuomoDate_TimeSpan"); OWLIndividual date= createInstance("DuomoDate_Appellation"); setProperty(date, "rdfs:label", dateDcStart); //setProperty(timeSpan, "has_readable_date", dateDcStart); setProperty(timeSpan,"is_identified_by_Date",date); if(!(dateDcEnd==null || dateDcEnd.equals(""))){ OWLIndividual toDate= createInstance("DuomoDate_Appellation"); setProperty(toDate, "rdfs:label", dateDcEnd); //setProperty(timeSpan, "has_readable_toDate", dateDcEnd); setProperty(timeSpan,"is_identified_by_toDate",toDate); } else { dateDcEnd=dateDcStart; } OWLIndividual timeSpanIdentifier = createInstance("Identifier"); setProperty(timeSpanIdentifier,"rdfs:label",dateDcStart+"-"+dateDcEnd); setProperty(timeSpan,"crm:P48_has_preferred_identifier",timeSpanIdentifier); return timeSpan; } // public OWLIndividual createTimeSpan2(String dateDcStart, String dateDcEnd) { // OWLIndividual timeSpan = createInstance("DatesDocument"); // // // // setProperty(timeSpan, "has_readable_date", dateDcStart); // // if(!(dateDcEnd==null || dateDcEnd.equals(""))){ // setProperty(timeSpan, "has_readable_to_date", dateDcEnd); // } else { // dateDcEnd=dateDcStart; // } // // OWLIndividual timeSpanIdentifier = createInstance("Identifier"); // setProperty(timeSpanIdentifier,"has_readable_id",dateDcStart+"-"+dateDcEnd); // // setProperty(timeSpan,"crm:P48_has_preferred_identifier",timeSpanIdentifier); // // return timeSpan; // } public OWLIndividual getIndividualByReadableId(String className,String identifier){ return getIndividual(className, "crm:P48_has_preferred_identifier", "Identifier", "rdfs:label", identifier, true); } public OWLIndividual getIndividualByReadableId(String className,String identifier, String classNameIdentifier,boolean subclassedIdentifier){ return getIndividual(className, "crm:P48_has_preferred_identifier", classNameIdentifier, "rdfs:label", identifier,subclassedIdentifier); } public String getClassNameFromTypeId(String typeId) { if (typeId2className==null){ // hash nicht angelegt createTypeId2classHashes(); } return typeId2className.get(typeId); } private void createTypeId2classHashes() { typeId2className= new HashMap<String,String>(); typeId2class= new HashMap<String,OWLNamedClass>(); Collection<?> classes = owlModel.getUserDefinedOWLNamedClasses(); RDFProperty property = owlModel.getRDFProperty("duomoTypeID"); for (Iterator<?> it = classes.iterator(); it.hasNext();) { OWLNamedClass cls = (OWLNamedClass) it.next(); String tid = (String) cls.getPropertyValue(property); if(tid!=null){ typeId2class.put(tid, cls); typeId2className.put(tid, cls.getName()); logger.debug("Adding to typId2className:"+tid+"---"+cls.getName()); } } } public OWLNamedClass getClassFromTypeId(String typeId) { if (typeId2className==null){ // hash nicht angelegt createTypeId2classHashes(); } return typeId2class.get(typeId); } public void save(String string) throws URISyntaxException, Exception { owlModel.save(new URI(string)); } public OWLIndividual createOrGetInstanceWithIdentifier(String classNameInstance, String classNameIdentifier, String identifier,boolean followSubclasses) { identifier=org.apache.commons.lang.StringUtils.strip(identifier); OWLIndividual ind = getIndividualByReadableId(classNameInstance, identifier,classNameIdentifier,followSubclasses); if(ind==null){ ind = createInstance(classNameInstance); OWLIndividual identifierInd = createInstance(classNameIdentifier); setProperty(identifierInd, "rdfs:label", identifier); setProperty(identifierInd, "rdfs:label", identifier,"en"); try { fh.write(classNameInstance+" --" +classNameIdentifier+"---"+identifier+"\n"); fh.flush(); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } setProperty(ind, "crm:P48_has_preferred_identifier", identifierInd); } return ind; } }