Mercurial > hg > duomoOWLProject
diff src/de/mpiwg/dwinter/duomo/stanford/AnalyseWithEvents.java @ 9:4392a6adf85a default tip
new version der label mit language tag
author | dwinter |
---|---|
date | Thu, 16 Aug 2012 11:40:17 +0200 |
parents | 919e9f3b5efd |
children |
line wrap: on
line diff
--- a/src/de/mpiwg/dwinter/duomo/stanford/AnalyseWithEvents.java Thu Jun 21 17:08:22 2012 +0200 +++ b/src/de/mpiwg/dwinter/duomo/stanford/AnalyseWithEvents.java Thu Aug 16 11:40:17 2012 +0200 @@ -12,6 +12,7 @@ import java.io.IOException; import java.io.InputStreamReader; import java.io.Reader; +import java.net.URLEncoder; import java.util.ArrayList; import java.util.Collection; import java.util.HashMap; @@ -34,9 +35,11 @@ import edu.stanford.nlp.trees.Tree; import edu.stanford.nlp.trees.TreebankLanguagePack; import edu.stanford.nlp.trees.TypedDependency; - public class AnalyseWithEvents { + private int prepcount = 0; + private String prep_ent="http://entities.mpiwg-berlin.mpg.de/research/duomo/prep/"; + private String prep_ont="http://ontologies.mpiwg-berlin.mpg.de/research/duomo/prep/"; public void analyse(String filename) throws IOException { LexicalizedParser lp = LexicalizedParser.loadModel("edu/stanford/nlp/models/lexparser/englishPCFG.ser.gz"); @@ -158,8 +161,8 @@ } - //if (count > 5) - // break; +// if (count > 100) +// break; } System.out.println(tuple); System.out.println(tupleLong); @@ -175,14 +178,21 @@ fw = new FileWriter("/tmp/tupleLong"); - + + FileWriter fw2 = new FileWriter("/tmp/tupleLong.nt3.rdf"); + for (String key : tupleLong.keySet()){ List<String> val = tupleLong.get(key); fw.write(key+"\t"+String.valueOf(val.size())+"\t"+val.toString()+"\n"); + String res = writePrepAsTriple(fw2,key); + writeEventsToRes(fw2,res,val); + + } fw.close(); - + fw2.close(); + fw = new FileWriter("/tmp/words"); for (String key : words.keySet()){ @@ -193,6 +203,48 @@ fw.close(); } + private void writeEventsToRes(FileWriter fw2, String prepUri, List<String> val) throws IOException { + for (String res :val){ + fw2.write("<"+res.replace("\"", "")+"><"+prep_ont+"contains> <"+prepUri+">.\n"); + } + fw2.flush(); + + } + private String writePrepAsTriple(FileWriter fw2, String prep) throws IOException { + + String[] splitted = prep.split("\t"); + prepcount+=1; + String resUri=String.format(prep_ent+"prep_%s",prepcount); + fw2.write("<"+resUri+ "> rdf:type "+"<"+prep_ont+"Preposition>.\n"); + + if (!splitted[2].equals("")){ + String wd = URLEncoder.encode(splitted[2],"utf-8"); + fw2.write("<"+resUri+ "> "+"<"+prep_ont+"main> <"+prep_ent+"Word_"+wd+">.\n"); + fw2.write("<"+prep_ent+"Word_"+wd+"> rdfs:label \""+splitted[2]+"\"@en .\n"); + fw2.write("<"+prep_ent+"Word_"+wd+"> rdf:type "+"<"+prep_ont+"Word>.\n"); + } + + if (!splitted[3].equals("")){ + String wd = URLEncoder.encode(splitted[3],"utf-8"); + fw2.write("<"+resUri+ "> "+"<"+prep_ont+"specification> <"+prep_ent+"Word_"+wd+">.\n"); + fw2.write("<"+prep_ent+"Word_"+wd+"> rdfs:label \""+splitted[3]+"\"@en .\n"); + fw2.write("<"+prep_ent+"Word_"+wd+"> rdf:type "+"<"+prep_ont+"Word>.\n"); + } + + if (!splitted[1].equals("")){ + String wd = URLEncoder.encode(splitted[1],"utf-8"); + fw2.write("<"+resUri+ "> "+"<"+prep_ont+"prepType> <"+prep_ent+"Type_"+wd+">.\n"); + fw2.write("<"+prep_ent+"Type_"+wd+"> rdfs:label \""+splitted[1]+"\"@en .\n"); + fw2.write("<"+prep_ent+"Word_"+wd+"> rdf:type "+"<"+prep_ont+"Type>.\n"); + } + + fw2.flush(); + return resUri; + + + + + } /** * @param args */