diff src/de/mpiwg/dwinter/duomo/stanford/AnalyseWithEvents.java @ 9:4392a6adf85a default tip

new version der label mit language tag
author dwinter
date Thu, 16 Aug 2012 11:40:17 +0200
parents 919e9f3b5efd
children
line wrap: on
line diff
--- a/src/de/mpiwg/dwinter/duomo/stanford/AnalyseWithEvents.java	Thu Jun 21 17:08:22 2012 +0200
+++ b/src/de/mpiwg/dwinter/duomo/stanford/AnalyseWithEvents.java	Thu Aug 16 11:40:17 2012 +0200
@@ -12,6 +12,7 @@
 import java.io.IOException;
 import java.io.InputStreamReader;
 import java.io.Reader;
+import java.net.URLEncoder;
 import java.util.ArrayList;
 import java.util.Collection;
 import java.util.HashMap;
@@ -34,9 +35,11 @@
 import edu.stanford.nlp.trees.Tree;
 import edu.stanford.nlp.trees.TreebankLanguagePack;
 import edu.stanford.nlp.trees.TypedDependency;
-
 public class AnalyseWithEvents {
 
+	private int prepcount = 0;
+	private String prep_ent="http://entities.mpiwg-berlin.mpg.de/research/duomo/prep/";
+	private String prep_ont="http://ontologies.mpiwg-berlin.mpg.de/research/duomo/prep/";
 	public void analyse(String filename) throws IOException {
 
 		LexicalizedParser lp = LexicalizedParser.loadModel("edu/stanford/nlp/models/lexparser/englishPCFG.ser.gz");
@@ -158,8 +161,8 @@
 
 
 			}
-			//if (count > 5)
-			//	  break;
+//			if (count > 100)
+//				  break;
 		}
 		System.out.println(tuple);
 		System.out.println(tupleLong);
@@ -175,14 +178,21 @@
 
 
 		fw = new FileWriter("/tmp/tupleLong");
-
+		
+		FileWriter fw2 = new FileWriter("/tmp/tupleLong.nt3.rdf");
+		
 		for (String key : tupleLong.keySet()){
 			List<String> val = tupleLong.get(key);
 			
 			fw.write(key+"\t"+String.valueOf(val.size())+"\t"+val.toString()+"\n");
+			String res = writePrepAsTriple(fw2,key);
+			writeEventsToRes(fw2,res,val);
+			
+			
 		}
 		fw.close();
-
+		fw2.close();
+		
 		fw = new FileWriter("/tmp/words");
 
 		for (String key : words.keySet()){
@@ -193,6 +203,48 @@
 		fw.close();
 
 	}
+	private void writeEventsToRes(FileWriter fw2, String prepUri, List<String> val) throws IOException {
+		for (String res :val){
+			fw2.write("<"+res.replace("\"", "")+"><"+prep_ont+"contains> <"+prepUri+">.\n");
+		}
+		fw2.flush();
+		
+	}
+	private String writePrepAsTriple(FileWriter fw2, String prep) throws IOException {
+		
+		String[] splitted = prep.split("\t");
+		prepcount+=1;
+		String resUri=String.format(prep_ent+"prep_%s",prepcount);
+		fw2.write("<"+resUri+ "> rdf:type "+"<"+prep_ont+"Preposition>.\n");
+		
+		if (!splitted[2].equals("")){
+			String wd = URLEncoder.encode(splitted[2],"utf-8");
+		fw2.write("<"+resUri+ "> "+"<"+prep_ont+"main> <"+prep_ent+"Word_"+wd+">.\n");
+		fw2.write("<"+prep_ent+"Word_"+wd+"> rdfs:label \""+splitted[2]+"\"@en .\n");
+		fw2.write("<"+prep_ent+"Word_"+wd+"> rdf:type  "+"<"+prep_ont+"Word>.\n");
+		}
+		
+		if (!splitted[3].equals("")){
+			String wd = URLEncoder.encode(splitted[3],"utf-8");
+		fw2.write("<"+resUri+ "> "+"<"+prep_ont+"specification> <"+prep_ent+"Word_"+wd+">.\n");
+		fw2.write("<"+prep_ent+"Word_"+wd+"> rdfs:label \""+splitted[3]+"\"@en .\n");
+		fw2.write("<"+prep_ent+"Word_"+wd+"> rdf:type  "+"<"+prep_ont+"Word>.\n");
+		}
+		
+		if (!splitted[1].equals("")){
+			String wd = URLEncoder.encode(splitted[1],"utf-8");
+		fw2.write("<"+resUri+ "> "+"<"+prep_ont+"prepType> <"+prep_ent+"Type_"+wd+">.\n");
+		fw2.write("<"+prep_ent+"Type_"+wd+"> rdfs:label \""+splitted[1]+"\"@en .\n");
+		fw2.write("<"+prep_ent+"Word_"+wd+"> rdf:type  "+"<"+prep_ont+"Type>.\n");
+		}
+		
+		fw2.flush();
+		return resUri;
+		
+		
+		
+		
+	}
 	/**
 	 * @param args
 	 */