view src/de/mpiwg/dwinter/duomo/lexdump/OWLImporter.java @ 8:919e9f3b5efd

neue klassen zur textanalyse (stanford parser eingebaut) alle has_readable_labe Datatype properties durch rdfs:label ersetzt.
author dwinter
date Thu, 21 Jun 2012 17:08:22 +0200
parents 19e40abb3e8a
children 4392a6adf85a
line wrap: on
line source

package de.mpiwg.dwinter.duomo.lexdump;

import java.io.File;
import java.io.FileOutputStream;
import java.io.FileWriter;
import java.io.IOException;
import java.io.OutputStreamWriter;
import java.net.URI;
import java.net.URISyntaxException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;

import javax.swing.text.html.HTMLDocument.HTMLReader.IsindexAction;

import org.apache.log4j.Logger;

import edu.stanford.smi.protege.exception.OntologyLoadException;
import edu.stanford.smi.protegex.owl.ProtegeOWL;
import edu.stanford.smi.protegex.owl.jena.JenaOWLModel;
import edu.stanford.smi.protegex.owl.model.OWLClass;
import edu.stanford.smi.protegex.owl.model.OWLIndividual;
import edu.stanford.smi.protegex.owl.model.OWLNamedClass;
import edu.stanford.smi.protegex.owl.model.RDFProperty;
import edu.stanford.smi.protegex.owl.model.RDFSLiteral;
import edu.stanford.smi.protegex.owl.repository.RepositoryManager;
import edu.stanford.smi.protegex.owl.repository.impl.LocalFolderRepository;

/**
 * Erzeugt Entities und Verwaltet gemŠ§ eines OWL-Modells.
 * Alle erzeugten Entities werden direkt als nturtle in eine File geschrieben.
 * 
 * TODO: Dieses File ist immer /tmp/out.rdf
 * TODO: zur Ausgabe gibt es eine Methode @see{org.openrdf.rio.trig.TriGWriter} bzw @see{de.mpiwg.itgroup.triplestoremanager.tools.Exporter}
 *ie benutzt werden sollte anstelle der handgestrickten bei denen direkt in outrdf geschrieben wird.
 * 
 * TODO: Einzelen Methoden hier sind noch DUOMO und CRM spezifisch, insbesondere
 * @author dwinter
 *
 *
 */
public class OWLImporter {

	JenaOWLModel owlModel; // contains the model
	Logger logger = Logger.getRootLogger();
	FileWriter fh;
	private HashMap<String, String> typeId2className=null;
	private HashMap<String, OWLNamedClass> typeId2class=null;
	private String ontFolder;
	private URI uri;
	//private FileWriter outRDF;
	private OutputStreamWriter outRDF;

	/**
	 * Initialisiert die Klasse und
	 * lŠdt die Ontologien ein
	 * @param folder Ordner mit der zu bearbeitenden Ontologie
	 * @param uri URI der Ontologie selbst
	 * @throws OntologyLoadException
	 */
	public OWLImporter(String folder, URI uri) throws OntologyLoadException {
		// owlModel = ProtegeOWL.createJenaOWLModelFromURI(uri);

		this.ontFolder=folder;
		this.uri=uri;
		try {
			this.fh= new FileWriter(new File("/tmp/identifier"));
			this.outRDF= new OutputStreamWriter(new FileOutputStream("/tmp/out.rdf"),"UTF-8");
		} catch (IOException e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
		}
		loadOWL(folder, uri);
	}

	/**
	 * LŠdt die Ontologie erneut ein.
	 * @throws OntologyLoadException
	 */
	public void reloadOWL() throws OntologyLoadException{
		loadOWL(ontFolder, uri);
	}
	
	/**
	 * 
	 * LŠdt die Ontologien ein
	 * @param folder Ordner mit der zu bearbeitenden Ontologie
	 * @param uri URI der Ontologie selbst
	 * @throws OntologyLoadException
	 */
	public void loadOWL(String folder, URI uri) throws OntologyLoadException {
		owlModel = ProtegeOWL.createJenaOWLModel();
		// Load repository
		RepositoryManager rman = owlModel.getRepositoryManager();

		// Add working directory to repository manager.
		File workingDirectory = new File(folder);
		rman.addGlobalRepository(new LocalFolderRepository(workingDirectory));
		// Load ontology

		((JenaOWLModel) owlModel).load(uri, "OWL-DL");
	}

	/**
	 * Drucke das Modell nach stdout
	 */
	public void printModel() {
		Collection<?> classes = owlModel.getUserDefinedOWLNamedClasses();
		for (Iterator<?> it = classes.iterator(); it.hasNext();) {
			OWLNamedClass cls = (OWLNamedClass) it.next();
			Collection<?> instances = cls.getInstances(false);
			System.out.println("Class " + cls.getBrowserText() + " ("
					+ instances.size() + ")");
			for (Iterator<?> jt = instances.iterator(); jt.hasNext();) {
				OWLIndividual individual = (OWLIndividual) jt.next();
				System.out.println(" - " + individual.getBrowserText());
			}
		}

	}

	public static void main(String args[]) throws URISyntaxException {
		String base = "/Users/dwinter/Documents/Projekte/Diss - data-mining/eclipseWorkspace/duomoData/owl-version";
		URI ontologieUri = new URI(
				"file:///Users/dwinter/Documents/Projekte/Diss - data-mining/eclipseWorkspace/duomoData/owl-version/protege_version_duomo/duomoAnalysis.owl");
		try {

			OWLImporter im = new OWLImporter(base, ontologieUri);
			im.printModel();
		} catch (OntologyLoadException e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
		}
	}

	/** Erzeuge Instanz uns schreibe  in das Exportfile die enstprechende <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> Relation.
	 * @param fullClassName Name der Classe
	 * @return null, wenn das Objekt nicht angelegt werden kann.
	 */
	public OWLIndividual createInstance(String fullClassName) {
		OWLNamedClass owlclass = (OWLNamedClass) owlModel
				.getOWLNamedClass(fullClassName);

		if (owlclass == null) {
			logger.debug("Cannot find OWLClass:" + fullClassName);
			return null;
		}
		//logger.debug("Create new individual of type:"+string);
		
		
		OWLIndividual ind = owlclass.createOWLIndividual(null);
		
		//TODO: replace the following by  @see{org.openrdf.rio.trig.TriGWriter}
		String triple = String.format("<%s> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <%s>.\n", ind.getName(),owlclass.getName());
		try {
			outRDF.write(triple);
			outRDF.flush();
		} catch (IOException e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
		}
		return ind;

	}

	
	/**
	 * Erzeuge eine Prpoerty und schreibe die entsprechenden Informationen in das out-file.
	 * @param individual 
	 * @param propertyName
	 * @param value
	 */
	public void setProperty(OWLIndividual individual, String propertyName,
			Object value) {
		RDFProperty prop = owlModel.getRDFProperty(propertyName);
		individual.setPropertyValue(prop, value);
		
		
		
		//TODO: replace the following by  @see{org.openrdf.rio.trig.TriGWriter}
		String valName="";
		
		
		if (OWLIndividual.class.isInstance(value))
			valName="<"+((OWLIndividual)value).getName()+">";
		else
			valName="\""+escapeRDFLit((String)value)+"\"";
		
		
		String triple = String.format("<%s> <%s> %s.\n", individual.getName(),prop.getName(),valName);
		try {
			outRDF.write(triple);
			outRDF.flush();
		} catch (IOException e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
		}
	}
	
	private String escapeRDFLit(String string){
		return string.replace("\"", "");
	}
	
	/** Erzeuge DatatypePropery schreibe die entsprechenden Informationen in das out-file.
	 * @param individual
	 * @param propertyName
	 * @param value
	 * @param lang Sprach-tag
	 */
	public void setDataTypePropery(OWLIndividual individual,
			String propertyName, String value, String lang) {
		RDFProperty prop = owlModel.getRDFProperty(propertyName);
//		if(OWLDatatypeProperty.class.isInstance(prop)){
//			OWLDatatypeProperty dp = (OWLDatatypeProperty)prop;
//			prop.set
//			
//		} else {
//			logger.error("Is not a datatypeprop:"+propertyName);
//		}
		
		
		
		RDFSLiteral langLiteral = owlModel.createRDFSLiteral(value, lang);
		individual.setPropertyValue(prop, langLiteral);
		
		//TODO: replace the following by  @see{org.openrdf.rio.trig.TriGWriter}
		String triple = String.format("<%s> <%s> \"%s\"@%s.\n", individual.getName(),prop.getName(),escapeRDFLit(langLiteral.getString()),lang);
		try {
			outRDF.write(triple);
			outRDF.flush();
		} catch (IOException e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
		}
	}

	/**
	 * Gibt Individual zurŸck das Ÿber die Property mit der Quelle verbunden ist.
	 * @param sourceIndividual
	 * @param propertyName
	 * @return
	 */
	public Object getRelatedIndividual(OWLIndividual sourceIndividual,
			String propertyName) {
		RDFProperty prop = owlModel.getRDFProperty(propertyName);
		if (prop == null) {
			logger.debug("Property does not exist:" + propertyName);
		}

		Object value = sourceIndividual.getPropertyValue(prop);
		return value;
	}

	
	public OWLIndividual getIndividual(String classNameFrom,
			String propertyNameToClass, String classNameTo,
			String propertyName2, Object value) {
		return getIndividual(classNameFrom,
				propertyNameToClass, classNameTo,
				propertyName2, value, true);
	}
	

	public OWLIndividual getIndividual(String classNameFrom,
			String propertyNameToClass, String classNameTo,
			String propertyName2, Object value,boolean subclasses) {

		RDFProperty prop = owlModel.getRDFProperty(propertyName2);

		OWLClass classTo = owlModel.getOWLNamedClass(classNameTo);
		Collection<?> instances = classTo.getInstances(subclasses);
		OWLIndividual toIv = null;
		// Such individiual mit propertyName2 und Wert value
		for (Iterator<?> jt = instances.iterator(); jt.hasNext();) {
			OWLIndividual iv = (OWLIndividual) jt.next();
			Object propVal = iv.getPropertyValue(prop);
		
			if (propVal!=null && iv.getPropertyValue(prop).equals(value)) {
				toIv = iv;
				break;
			}
		}

		// kein treffer
		if (toIv == null)
			return null;

		// jetzt suche das Invidual, dass mit dieser Instance verbunden ist
		RDFProperty prop2 = owlModel.getRDFProperty(propertyNameToClass);

		OWLClass classFrom = owlModel.getOWLNamedClass(classNameFrom);
		Collection<?> instancesFrom = classFrom.getInstances(true);
		OWLIndividual fromIv = null;
		// Such individiual mit propertyName2 und Wert value
		for (Iterator<?> jt = instancesFrom.iterator(); jt.hasNext();) {
			OWLIndividual iv = (OWLIndividual) jt.next();
			Object propValue = iv.getPropertyValue(prop2);
			if (propValue!=null && iv.getPropertyValue(prop2).equals(toIv)) {
				fromIv = iv;
				break;
			}
		}

		return fromIv;
	}
	
	public List<OWLIndividual> getIndividuals(String classNameFrom,
			String propertyNameToClass, String classNameTo,
			String propertyName2, Object value) {

		List<OWLIndividual>  returnList= new ArrayList<OWLIndividual>();
		
		RDFProperty prop = owlModel.getRDFProperty(propertyName2);

		OWLClass classTo = owlModel.getOWLNamedClass(classNameTo);
		Collection<?> instances = classTo.getInstances(true);
		OWLIndividual toIv = null;
		// Such individiual mit propertyName2 und Wert value
		for (Iterator<?> jt = instances.iterator(); jt.hasNext();) {
			OWLIndividual iv = (OWLIndividual) jt.next();
			if (iv.getPropertyValue(prop).equals(value)) {
				toIv = iv;
				break;
			}
		}

		// kein treffer
		if (toIv == null)
			return null;

		// jetzt suche das Invidual, dass mit dieser Instance verbunden ist
		RDFProperty prop2 = owlModel.getRDFProperty(propertyNameToClass);

		OWLClass classFrom = owlModel.getOWLNamedClass(classNameFrom);
		Collection<?> instancesFrom = classFrom.getInstances(true);
		//OWLIndividual fromIv = null;
		// Such individiual mit propertyName2 und Wert value
		for (Iterator<?> jt = instancesFrom.iterator(); jt.hasNext();) {
			OWLIndividual iv = (OWLIndividual) jt.next();
			if (iv.getPropertyValue(prop2).equals(toIv)) {
				returnList.add(iv);
				break;
			}
		}

		return returnList;
	}

	public List<OWLIndividual> getIndividuals(String className, String propertyName,
			OWLIndividual targetInd) {
		List<OWLIndividual>  returnList= new ArrayList<OWLIndividual>();
		
		RDFProperty prop = owlModel.getRDFProperty(propertyName);

		OWLClass cls = owlModel.getOWLNamedClass(className);
		
		Collection<?> instances = cls.getInstances(true);
		// Such individiual mit propertyName2 und Wert value
		for (Iterator<?> jt = instances.iterator(); jt.hasNext();) {
			OWLIndividual iv = (OWLIndividual) jt.next();
			if (iv.getPropertyValue(prop).equals(targetInd)) {
				returnList.add(iv);
				
			}
		}

		return returnList;
	}
	
	public OWLIndividual createTimeSpan(String dateDcStart, String dateDcEnd) {
		// A date is described by an timespan which is described by an appellation
		
		//TODO: die lesbaren daten gehšren in time span mit den angaben wir ungefŠhr und soweiter
		// d.h. die klammern um ein Datum die ungefaehr signalisieren, gehoeren in die Beschreibung der 
		//Qualifier in Time span
		// in DuomoDate_Appellation kommen die genauen teile
		// zunaechst schreibe ich in "has_readable" hier genau rein was im Text steht
		// dieses muss noch analysiert werden
		// so sollte 1432 gennai 9 --> in ein Computer lesbares modell umgesetzt werden
		// und 1432 gennaio (9) muss in 1432 gennaio 9 in DuomoDate_Appellation eingetragen werden
		// und dann mit P79 und P80 in DuomoDate_TimeSpan genauer beschrieben werden, d.h in P79 kommt dann 
		// ungefaehr
		// ausserdem mussesn "POST" und "ANTE aus dem Text in das Modell eingepflegt werden.
		
		
		OWLIndividual timeSpan = createInstance("DuomoDate_TimeSpan");
		
		OWLIndividual date= createInstance("DuomoDate_Appellation");	
		
		setProperty(date, "rdfs:label", dateDcStart);
		//setProperty(timeSpan, "has_readable_date", dateDcStart);
		
		setProperty(timeSpan,"is_identified_by_Date",date);
		
		if(!(dateDcEnd==null || dateDcEnd.equals(""))){
			OWLIndividual toDate= createInstance("DuomoDate_Appellation");
			setProperty(toDate, "rdfs:label", dateDcEnd);
			//setProperty(timeSpan, "has_readable_toDate", dateDcEnd);
			setProperty(timeSpan,"is_identified_by_toDate",toDate);
		} else {
			dateDcEnd=dateDcStart;
		}
		
		OWLIndividual timeSpanIdentifier = createInstance("Identifier");
		setProperty(timeSpanIdentifier,"rdfs:label",dateDcStart+"-"+dateDcEnd);
		
		setProperty(timeSpan,"crm:P48_has_preferred_identifier",timeSpanIdentifier);
		
		return timeSpan;
	}
	
//	public OWLIndividual createTimeSpan2(String dateDcStart, String dateDcEnd) {
//		OWLIndividual timeSpan = createInstance("DatesDocument");
//		
//			
//		
//		setProperty(timeSpan, "has_readable_date", dateDcStart);
//		
//		if(!(dateDcEnd==null || dateDcEnd.equals(""))){
//			setProperty(timeSpan, "has_readable_to_date", dateDcEnd);
//		} else {
//			dateDcEnd=dateDcStart;
//		}
//		
//		OWLIndividual timeSpanIdentifier = createInstance("Identifier");
//		setProperty(timeSpanIdentifier,"has_readable_id",dateDcStart+"-"+dateDcEnd);
//		
//		setProperty(timeSpan,"crm:P48_has_preferred_identifier",timeSpanIdentifier);
//		
//		return timeSpan;
//	}


	public OWLIndividual getIndividualByReadableId(String className,String identifier){
		return getIndividual(className, "crm:P48_has_preferred_identifier", "Identifier", "rdfs:label", identifier, true);
	}

	public OWLIndividual getIndividualByReadableId(String className,String identifier, String classNameIdentifier,boolean subclassedIdentifier){
		return getIndividual(className, "crm:P48_has_preferred_identifier", classNameIdentifier, "rdfs:label", identifier,subclassedIdentifier);
	}

	public String getClassNameFromTypeId(String typeId) {
		if (typeId2className==null){ // hash nicht angelegt
			createTypeId2classHashes();	
		}
		
		return typeId2className.get(typeId);
		
	}

	private void createTypeId2classHashes() {
		typeId2className= new HashMap<String,String>();
		typeId2class= new HashMap<String,OWLNamedClass>();
		
		Collection<?> classes = owlModel.getUserDefinedOWLNamedClasses();
		
		RDFProperty property = owlModel.getRDFProperty("duomoTypeID");
		for (Iterator<?> it = classes.iterator(); it.hasNext();) {
			OWLNamedClass cls = (OWLNamedClass) it.next();
			String tid = (String) cls.getPropertyValue(property);
			if(tid!=null){
				typeId2class.put(tid, cls);
				typeId2className.put(tid, cls.getName());
				logger.debug("Adding to typId2className:"+tid+"---"+cls.getName());
			}
		}
	}

	public OWLNamedClass getClassFromTypeId(String typeId) {
		if (typeId2className==null){ // hash nicht angelegt
			createTypeId2classHashes();	
		}
		
		return typeId2class.get(typeId);
	}

	public void save(String string) throws URISyntaxException, Exception {
		owlModel.save(new URI(string));
		
	}

	public OWLIndividual createOrGetInstanceWithIdentifier(String classNameInstance,
			String classNameIdentifier, String identifier,boolean followSubclasses) {
		
		identifier=org.apache.commons.lang.StringUtils.strip(identifier);
		
		OWLIndividual ind = getIndividualByReadableId(classNameInstance, identifier,classNameIdentifier,followSubclasses);
		if(ind==null){
			ind = createInstance(classNameInstance);
			OWLIndividual identifierInd = createInstance(classNameIdentifier);
			setProperty(identifierInd, "rdfs:label", identifier);
			try {
				fh.write(classNameInstance+" --" +classNameIdentifier+"---"+identifier+"\n");
				fh.flush();
			} catch (IOException e) {
				// TODO Auto-generated catch block
				e.printStackTrace();
			}
			setProperty(ind, "crm:P48_has_preferred_identifier",
			identifierInd);
		}
		return ind;
	}
	

}