Mercurial > hg > duomoOWLProject
comparison src/de/mpiwg/dwinter/duomo/stanford/AnalyseWithEvents.java @ 9:4392a6adf85a default tip
new version der label mit language tag
author | dwinter |
---|---|
date | Thu, 16 Aug 2012 11:40:17 +0200 |
parents | 919e9f3b5efd |
children |
comparison
equal
deleted
inserted
replaced
8:919e9f3b5efd | 9:4392a6adf85a |
---|---|
10 import java.io.FileInputStream; | 10 import java.io.FileInputStream; |
11 import java.io.FileWriter; | 11 import java.io.FileWriter; |
12 import java.io.IOException; | 12 import java.io.IOException; |
13 import java.io.InputStreamReader; | 13 import java.io.InputStreamReader; |
14 import java.io.Reader; | 14 import java.io.Reader; |
15 import java.net.URLEncoder; | |
15 import java.util.ArrayList; | 16 import java.util.ArrayList; |
16 import java.util.Collection; | 17 import java.util.Collection; |
17 import java.util.HashMap; | 18 import java.util.HashMap; |
18 import java.util.HashSet; | 19 import java.util.HashSet; |
19 import java.util.List; | 20 import java.util.List; |
32 import edu.stanford.nlp.trees.GrammaticalStructureFactory; | 33 import edu.stanford.nlp.trees.GrammaticalStructureFactory; |
33 import edu.stanford.nlp.trees.PennTreebankLanguagePack; | 34 import edu.stanford.nlp.trees.PennTreebankLanguagePack; |
34 import edu.stanford.nlp.trees.Tree; | 35 import edu.stanford.nlp.trees.Tree; |
35 import edu.stanford.nlp.trees.TreebankLanguagePack; | 36 import edu.stanford.nlp.trees.TreebankLanguagePack; |
36 import edu.stanford.nlp.trees.TypedDependency; | 37 import edu.stanford.nlp.trees.TypedDependency; |
37 | |
38 public class AnalyseWithEvents { | 38 public class AnalyseWithEvents { |
39 | 39 |
40 private int prepcount = 0; | |
41 private String prep_ent="http://entities.mpiwg-berlin.mpg.de/research/duomo/prep/"; | |
42 private String prep_ont="http://ontologies.mpiwg-berlin.mpg.de/research/duomo/prep/"; | |
40 public void analyse(String filename) throws IOException { | 43 public void analyse(String filename) throws IOException { |
41 | 44 |
42 LexicalizedParser lp = LexicalizedParser.loadModel("edu/stanford/nlp/models/lexparser/englishPCFG.ser.gz"); | 45 LexicalizedParser lp = LexicalizedParser.loadModel("edu/stanford/nlp/models/lexparser/englishPCFG.ser.gz"); |
43 // This option shows loading and sentence-segment and tokenizing | 46 // This option shows loading and sentence-segment and tokenizing |
44 // a file using DocumentPreprocessor | 47 // a file using DocumentPreprocessor |
156 count++; | 159 count++; |
157 System.out.println(count); | 160 System.out.println(count); |
158 | 161 |
159 | 162 |
160 } | 163 } |
161 //if (count > 5) | 164 // if (count > 100) |
162 // break; | 165 // break; |
163 } | 166 } |
164 System.out.println(tuple); | 167 System.out.println(tuple); |
165 System.out.println(tupleLong); | 168 System.out.println(tupleLong); |
166 | 169 |
167 FileWriter fw = new FileWriter("/tmp/tuple"); | 170 FileWriter fw = new FileWriter("/tmp/tuple"); |
173 } | 176 } |
174 fw.close(); | 177 fw.close(); |
175 | 178 |
176 | 179 |
177 fw = new FileWriter("/tmp/tupleLong"); | 180 fw = new FileWriter("/tmp/tupleLong"); |
178 | 181 |
182 FileWriter fw2 = new FileWriter("/tmp/tupleLong.nt3.rdf"); | |
183 | |
179 for (String key : tupleLong.keySet()){ | 184 for (String key : tupleLong.keySet()){ |
180 List<String> val = tupleLong.get(key); | 185 List<String> val = tupleLong.get(key); |
181 | 186 |
182 fw.write(key+"\t"+String.valueOf(val.size())+"\t"+val.toString()+"\n"); | 187 fw.write(key+"\t"+String.valueOf(val.size())+"\t"+val.toString()+"\n"); |
188 String res = writePrepAsTriple(fw2,key); | |
189 writeEventsToRes(fw2,res,val); | |
190 | |
191 | |
183 } | 192 } |
184 fw.close(); | 193 fw.close(); |
185 | 194 fw2.close(); |
195 | |
186 fw = new FileWriter("/tmp/words"); | 196 fw = new FileWriter("/tmp/words"); |
187 | 197 |
188 for (String key : words.keySet()){ | 198 for (String key : words.keySet()){ |
189 | 199 |
190 List<String> val = words.get(key); | 200 List<String> val = words.get(key); |
191 fw.write(key+"\t"+String.valueOf(val.size())+"\t"+val.toString()+"\n"); | 201 fw.write(key+"\t"+String.valueOf(val.size())+"\t"+val.toString()+"\n"); |
192 } | 202 } |
193 fw.close(); | 203 fw.close(); |
194 | 204 |
205 } | |
206 private void writeEventsToRes(FileWriter fw2, String prepUri, List<String> val) throws IOException { | |
207 for (String res :val){ | |
208 fw2.write("<"+res.replace("\"", "")+"><"+prep_ont+"contains> <"+prepUri+">.\n"); | |
209 } | |
210 fw2.flush(); | |
211 | |
212 } | |
213 private String writePrepAsTriple(FileWriter fw2, String prep) throws IOException { | |
214 | |
215 String[] splitted = prep.split("\t"); | |
216 prepcount+=1; | |
217 String resUri=String.format(prep_ent+"prep_%s",prepcount); | |
218 fw2.write("<"+resUri+ "> rdf:type "+"<"+prep_ont+"Preposition>.\n"); | |
219 | |
220 if (!splitted[2].equals("")){ | |
221 String wd = URLEncoder.encode(splitted[2],"utf-8"); | |
222 fw2.write("<"+resUri+ "> "+"<"+prep_ont+"main> <"+prep_ent+"Word_"+wd+">.\n"); | |
223 fw2.write("<"+prep_ent+"Word_"+wd+"> rdfs:label \""+splitted[2]+"\"@en .\n"); | |
224 fw2.write("<"+prep_ent+"Word_"+wd+"> rdf:type "+"<"+prep_ont+"Word>.\n"); | |
225 } | |
226 | |
227 if (!splitted[3].equals("")){ | |
228 String wd = URLEncoder.encode(splitted[3],"utf-8"); | |
229 fw2.write("<"+resUri+ "> "+"<"+prep_ont+"specification> <"+prep_ent+"Word_"+wd+">.\n"); | |
230 fw2.write("<"+prep_ent+"Word_"+wd+"> rdfs:label \""+splitted[3]+"\"@en .\n"); | |
231 fw2.write("<"+prep_ent+"Word_"+wd+"> rdf:type "+"<"+prep_ont+"Word>.\n"); | |
232 } | |
233 | |
234 if (!splitted[1].equals("")){ | |
235 String wd = URLEncoder.encode(splitted[1],"utf-8"); | |
236 fw2.write("<"+resUri+ "> "+"<"+prep_ont+"prepType> <"+prep_ent+"Type_"+wd+">.\n"); | |
237 fw2.write("<"+prep_ent+"Type_"+wd+"> rdfs:label \""+splitted[1]+"\"@en .\n"); | |
238 fw2.write("<"+prep_ent+"Word_"+wd+"> rdf:type "+"<"+prep_ont+"Type>.\n"); | |
239 } | |
240 | |
241 fw2.flush(); | |
242 return resUri; | |
243 | |
244 | |
245 | |
246 | |
195 } | 247 } |
196 /** | 248 /** |
197 * @param args | 249 * @param args |
198 */ | 250 */ |
199 public static void main(String[] args) { | 251 public static void main(String[] args) { |