changeset 9:4392a6adf85a default tip

new version der label mit language tag
author dwinter
date Thu, 16 Aug 2012 11:40:17 +0200
parents 919e9f3b5efd
children
files protege.properties src/de/mpiwg/dwinter/duomo/lexdump/LexOWLTransformer.java src/de/mpiwg/dwinter/duomo/lexdump/OWLImporter.java src/de/mpiwg/dwinter/duomo/stanford/AnalyseWithEvents.java
diffstat 4 files changed, 105 insertions(+), 8 deletions(-) [+]
line wrap: on
line diff
--- a/protege.properties	Thu Jun 21 17:08:22 2012 +0200
+++ b/protege.properties	Thu Aug 16 11:40:17 2012 +0200
@@ -1,5 +1,5 @@
 #Protege Properties
-#Thu Jun 21 17:04:13 CEST 2012
+#Wed Aug 15 12:39:38 CEST 2012
+history.projects.reopen=file\:/Users/dwinter/Documents/Projekte/Diss%20-%20data-mining/eclipseWorkspace/de.mpiwg.dwinter.duomo/examples/newspaper/newspaper.pprj,file\:/Users/dwinter/Documents/Projekte/Diss%20-%20data-mining/eclipseWorkspace/de.mpiwg.dwinter.duomo/examples/sqwrl/SQWRLExamples.pprj,file\:/Users/dwinter/Documents/Projekte/Diss%20-%20data-mining/eclipseWorkspace/de.mpiwg.dwinter.duomo/examples/pizza/pizza.owl.pprj
 SwitchableClassDefinitionType=edu.stanford.smi.protegex.owl.ui.cls.LogicClassDefinitionWidgetType
-history.projects.reopen=file\:/Users/dwinter/Documents/Projekte/Diss%20-%20data-mining/eclipseWorkspace/de.mpiwg.dwinter.duomo/examples/newspaper/newspaper.pprj,file\:/Users/dwinter/Documents/Projekte/Diss%20-%20data-mining/eclipseWorkspace/de.mpiwg.dwinter.duomo/examples/sqwrl/SQWRLExamples.pprj,file\:/Users/dwinter/Documents/Projekte/Diss%20-%20data-mining/eclipseWorkspace/de.mpiwg.dwinter.duomo/examples/pizza/pizza.owl.pprj
 OntURIBase=http\://www.owl-ontologies.com
--- a/src/de/mpiwg/dwinter/duomo/lexdump/LexOWLTransformer.java	Thu Jun 21 17:08:22 2012 +0200
+++ b/src/de/mpiwg/dwinter/duomo/lexdump/LexOWLTransformer.java	Thu Aug 16 11:40:17 2012 +0200
@@ -637,7 +637,15 @@
 		String segheader = segHeaderElement.getTextTrim();
 
 		OWLIndividual signatureInstance = owlDoc.createInstance("Signatur");
-		owlDoc.setProperty(signatureInstance, "rdfs:label", segheader);
+		
+		
+		OWLIndividual preferredId = owlDoc.createInstance("Identifier");
+		owlDoc.setProperty(preferredId, "rdfs:label", segheader);
+
+		owlDoc.setProperty(signatureInstance, "crm:P48_has_preferred_identifier",
+				preferredId);
+
+		
 		return signatureInstance;
 	}
 
--- a/src/de/mpiwg/dwinter/duomo/lexdump/OWLImporter.java	Thu Jun 21 17:08:22 2012 +0200
+++ b/src/de/mpiwg/dwinter/duomo/lexdump/OWLImporter.java	Thu Aug 16 11:40:17 2012 +0200
@@ -171,6 +171,42 @@
 	 * @param individual 
 	 * @param propertyName
 	 * @param value
+	 * @param lang
+	 */
+	public void setProperty(OWLIndividual individual, String propertyName,
+			Object value, String lang) {
+		RDFProperty prop = owlModel.getRDFProperty(propertyName);
+		individual.setPropertyValue(prop, value);
+		
+		
+		
+		//TODO: replace the following by  @see{org.openrdf.rio.trig.TriGWriter}
+		String valName="";
+		
+		String triple;
+		if (OWLIndividual.class.isInstance(value)){
+			valName="<"+((OWLIndividual)value).getName()+">";
+			triple = String.format("<%s> <%s> %s.\n", individual.getName(),prop.getName(),valName);}
+		else{
+			valName=escapeRDFLit((String)value);
+		
+		
+			triple = String.format("<%s> <%s> \"%s\"@%s.\n", individual.getName(),prop.getName(),valName,lang);
+		
+		}
+		try {
+			outRDF.write(triple);
+			outRDF.flush();
+		} catch (IOException e) {
+			// TODO Auto-generated catch block
+			e.printStackTrace();
+		}
+	}
+	/**
+	 * Erzeuge eine Prpoerty und schreibe die entsprechenden Informationen in das out-file.
+	 * @param individual 
+	 * @param propertyName
+	 * @param value
 	 */
 	public void setProperty(OWLIndividual individual, String propertyName,
 			Object value) {
@@ -491,6 +527,7 @@
 			ind = createInstance(classNameInstance);
 			OWLIndividual identifierInd = createInstance(classNameIdentifier);
 			setProperty(identifierInd, "rdfs:label", identifier);
+			setProperty(identifierInd, "rdfs:label", identifier,"en");
 			try {
 				fh.write(classNameInstance+" --" +classNameIdentifier+"---"+identifier+"\n");
 				fh.flush();
--- a/src/de/mpiwg/dwinter/duomo/stanford/AnalyseWithEvents.java	Thu Jun 21 17:08:22 2012 +0200
+++ b/src/de/mpiwg/dwinter/duomo/stanford/AnalyseWithEvents.java	Thu Aug 16 11:40:17 2012 +0200
@@ -12,6 +12,7 @@
 import java.io.IOException;
 import java.io.InputStreamReader;
 import java.io.Reader;
+import java.net.URLEncoder;
 import java.util.ArrayList;
 import java.util.Collection;
 import java.util.HashMap;
@@ -34,9 +35,11 @@
 import edu.stanford.nlp.trees.Tree;
 import edu.stanford.nlp.trees.TreebankLanguagePack;
 import edu.stanford.nlp.trees.TypedDependency;
-
 public class AnalyseWithEvents {
 
+	private int prepcount = 0;
+	private String prep_ent="http://entities.mpiwg-berlin.mpg.de/research/duomo/prep/";
+	private String prep_ont="http://ontologies.mpiwg-berlin.mpg.de/research/duomo/prep/";
 	public void analyse(String filename) throws IOException {
 
 		LexicalizedParser lp = LexicalizedParser.loadModel("edu/stanford/nlp/models/lexparser/englishPCFG.ser.gz");
@@ -158,8 +161,8 @@
 
 
 			}
-			//if (count > 5)
-			//	  break;
+//			if (count > 100)
+//				  break;
 		}
 		System.out.println(tuple);
 		System.out.println(tupleLong);
@@ -175,14 +178,21 @@
 
 
 		fw = new FileWriter("/tmp/tupleLong");
-
+		
+		FileWriter fw2 = new FileWriter("/tmp/tupleLong.nt3.rdf");
+		
 		for (String key : tupleLong.keySet()){
 			List<String> val = tupleLong.get(key);
 			
 			fw.write(key+"\t"+String.valueOf(val.size())+"\t"+val.toString()+"\n");
+			String res = writePrepAsTriple(fw2,key);
+			writeEventsToRes(fw2,res,val);
+			
+			
 		}
 		fw.close();
-
+		fw2.close();
+		
 		fw = new FileWriter("/tmp/words");
 
 		for (String key : words.keySet()){
@@ -193,6 +203,48 @@
 		fw.close();
 
 	}
+	private void writeEventsToRes(FileWriter fw2, String prepUri, List<String> val) throws IOException {
+		for (String res :val){
+			fw2.write("<"+res.replace("\"", "")+"><"+prep_ont+"contains> <"+prepUri+">.\n");
+		}
+		fw2.flush();
+		
+	}
+	private String writePrepAsTriple(FileWriter fw2, String prep) throws IOException {
+		
+		String[] splitted = prep.split("\t");
+		prepcount+=1;
+		String resUri=String.format(prep_ent+"prep_%s",prepcount);
+		fw2.write("<"+resUri+ "> rdf:type "+"<"+prep_ont+"Preposition>.\n");
+		
+		if (!splitted[2].equals("")){
+			String wd = URLEncoder.encode(splitted[2],"utf-8");
+		fw2.write("<"+resUri+ "> "+"<"+prep_ont+"main> <"+prep_ent+"Word_"+wd+">.\n");
+		fw2.write("<"+prep_ent+"Word_"+wd+"> rdfs:label \""+splitted[2]+"\"@en .\n");
+		fw2.write("<"+prep_ent+"Word_"+wd+"> rdf:type  "+"<"+prep_ont+"Word>.\n");
+		}
+		
+		if (!splitted[3].equals("")){
+			String wd = URLEncoder.encode(splitted[3],"utf-8");
+		fw2.write("<"+resUri+ "> "+"<"+prep_ont+"specification> <"+prep_ent+"Word_"+wd+">.\n");
+		fw2.write("<"+prep_ent+"Word_"+wd+"> rdfs:label \""+splitted[3]+"\"@en .\n");
+		fw2.write("<"+prep_ent+"Word_"+wd+"> rdf:type  "+"<"+prep_ont+"Word>.\n");
+		}
+		
+		if (!splitted[1].equals("")){
+			String wd = URLEncoder.encode(splitted[1],"utf-8");
+		fw2.write("<"+resUri+ "> "+"<"+prep_ont+"prepType> <"+prep_ent+"Type_"+wd+">.\n");
+		fw2.write("<"+prep_ent+"Type_"+wd+"> rdfs:label \""+splitted[1]+"\"@en .\n");
+		fw2.write("<"+prep_ent+"Word_"+wd+"> rdf:type  "+"<"+prep_ont+"Type>.\n");
+		}
+		
+		fw2.flush();
+		return resUri;
+		
+		
+		
+		
+	}
 	/**
 	 * @param args
 	 */