comparison src/de/mpiwg/dwinter/duomo/stanford/AnalyseWithEvents.java @ 9:4392a6adf85a default tip

new version der label mit language tag
author dwinter
date Thu, 16 Aug 2012 11:40:17 +0200
parents 919e9f3b5efd
children
comparison
equal deleted inserted replaced
8:919e9f3b5efd 9:4392a6adf85a
10 import java.io.FileInputStream; 10 import java.io.FileInputStream;
11 import java.io.FileWriter; 11 import java.io.FileWriter;
12 import java.io.IOException; 12 import java.io.IOException;
13 import java.io.InputStreamReader; 13 import java.io.InputStreamReader;
14 import java.io.Reader; 14 import java.io.Reader;
15 import java.net.URLEncoder;
15 import java.util.ArrayList; 16 import java.util.ArrayList;
16 import java.util.Collection; 17 import java.util.Collection;
17 import java.util.HashMap; 18 import java.util.HashMap;
18 import java.util.HashSet; 19 import java.util.HashSet;
19 import java.util.List; 20 import java.util.List;
32 import edu.stanford.nlp.trees.GrammaticalStructureFactory; 33 import edu.stanford.nlp.trees.GrammaticalStructureFactory;
33 import edu.stanford.nlp.trees.PennTreebankLanguagePack; 34 import edu.stanford.nlp.trees.PennTreebankLanguagePack;
34 import edu.stanford.nlp.trees.Tree; 35 import edu.stanford.nlp.trees.Tree;
35 import edu.stanford.nlp.trees.TreebankLanguagePack; 36 import edu.stanford.nlp.trees.TreebankLanguagePack;
36 import edu.stanford.nlp.trees.TypedDependency; 37 import edu.stanford.nlp.trees.TypedDependency;
37
38 public class AnalyseWithEvents { 38 public class AnalyseWithEvents {
39 39
40 private int prepcount = 0;
41 private String prep_ent="http://entities.mpiwg-berlin.mpg.de/research/duomo/prep/";
42 private String prep_ont="http://ontologies.mpiwg-berlin.mpg.de/research/duomo/prep/";
40 public void analyse(String filename) throws IOException { 43 public void analyse(String filename) throws IOException {
41 44
42 LexicalizedParser lp = LexicalizedParser.loadModel("edu/stanford/nlp/models/lexparser/englishPCFG.ser.gz"); 45 LexicalizedParser lp = LexicalizedParser.loadModel("edu/stanford/nlp/models/lexparser/englishPCFG.ser.gz");
43 // This option shows loading and sentence-segment and tokenizing 46 // This option shows loading and sentence-segment and tokenizing
44 // a file using DocumentPreprocessor 47 // a file using DocumentPreprocessor
156 count++; 159 count++;
157 System.out.println(count); 160 System.out.println(count);
158 161
159 162
160 } 163 }
161 //if (count > 5) 164 // if (count > 100)
162 // break; 165 // break;
163 } 166 }
164 System.out.println(tuple); 167 System.out.println(tuple);
165 System.out.println(tupleLong); 168 System.out.println(tupleLong);
166 169
167 FileWriter fw = new FileWriter("/tmp/tuple"); 170 FileWriter fw = new FileWriter("/tmp/tuple");
173 } 176 }
174 fw.close(); 177 fw.close();
175 178
176 179
177 fw = new FileWriter("/tmp/tupleLong"); 180 fw = new FileWriter("/tmp/tupleLong");
178 181
182 FileWriter fw2 = new FileWriter("/tmp/tupleLong.nt3.rdf");
183
179 for (String key : tupleLong.keySet()){ 184 for (String key : tupleLong.keySet()){
180 List<String> val = tupleLong.get(key); 185 List<String> val = tupleLong.get(key);
181 186
182 fw.write(key+"\t"+String.valueOf(val.size())+"\t"+val.toString()+"\n"); 187 fw.write(key+"\t"+String.valueOf(val.size())+"\t"+val.toString()+"\n");
188 String res = writePrepAsTriple(fw2,key);
189 writeEventsToRes(fw2,res,val);
190
191
183 } 192 }
184 fw.close(); 193 fw.close();
185 194 fw2.close();
195
186 fw = new FileWriter("/tmp/words"); 196 fw = new FileWriter("/tmp/words");
187 197
188 for (String key : words.keySet()){ 198 for (String key : words.keySet()){
189 199
190 List<String> val = words.get(key); 200 List<String> val = words.get(key);
191 fw.write(key+"\t"+String.valueOf(val.size())+"\t"+val.toString()+"\n"); 201 fw.write(key+"\t"+String.valueOf(val.size())+"\t"+val.toString()+"\n");
192 } 202 }
193 fw.close(); 203 fw.close();
194 204
205 }
206 private void writeEventsToRes(FileWriter fw2, String prepUri, List<String> val) throws IOException {
207 for (String res :val){
208 fw2.write("<"+res.replace("\"", "")+"><"+prep_ont+"contains> <"+prepUri+">.\n");
209 }
210 fw2.flush();
211
212 }
213 private String writePrepAsTriple(FileWriter fw2, String prep) throws IOException {
214
215 String[] splitted = prep.split("\t");
216 prepcount+=1;
217 String resUri=String.format(prep_ent+"prep_%s",prepcount);
218 fw2.write("<"+resUri+ "> rdf:type "+"<"+prep_ont+"Preposition>.\n");
219
220 if (!splitted[2].equals("")){
221 String wd = URLEncoder.encode(splitted[2],"utf-8");
222 fw2.write("<"+resUri+ "> "+"<"+prep_ont+"main> <"+prep_ent+"Word_"+wd+">.\n");
223 fw2.write("<"+prep_ent+"Word_"+wd+"> rdfs:label \""+splitted[2]+"\"@en .\n");
224 fw2.write("<"+prep_ent+"Word_"+wd+"> rdf:type "+"<"+prep_ont+"Word>.\n");
225 }
226
227 if (!splitted[3].equals("")){
228 String wd = URLEncoder.encode(splitted[3],"utf-8");
229 fw2.write("<"+resUri+ "> "+"<"+prep_ont+"specification> <"+prep_ent+"Word_"+wd+">.\n");
230 fw2.write("<"+prep_ent+"Word_"+wd+"> rdfs:label \""+splitted[3]+"\"@en .\n");
231 fw2.write("<"+prep_ent+"Word_"+wd+"> rdf:type "+"<"+prep_ont+"Word>.\n");
232 }
233
234 if (!splitted[1].equals("")){
235 String wd = URLEncoder.encode(splitted[1],"utf-8");
236 fw2.write("<"+resUri+ "> "+"<"+prep_ont+"prepType> <"+prep_ent+"Type_"+wd+">.\n");
237 fw2.write("<"+prep_ent+"Type_"+wd+"> rdfs:label \""+splitted[1]+"\"@en .\n");
238 fw2.write("<"+prep_ent+"Word_"+wd+"> rdf:type "+"<"+prep_ont+"Type>.\n");
239 }
240
241 fw2.flush();
242 return resUri;
243
244
245
246
195 } 247 }
196 /** 248 /**
197 * @param args 249 * @param args
198 */ 250 */
199 public static void main(String[] args) { 251 public static void main(String[] args) {