annotate src/de/mpiwg/dwinter/duomo/stanford/AnalyseWithEvents.java @ 9:4392a6adf85a default tip

new version der label mit language tag
author dwinter
date Thu, 16 Aug 2012 11:40:17 +0200
parents 919e9f3b5efd
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
8
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
1 // Analisiere calls from the virtuoso store
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
2 // "http://ontologies.mpiwg-berlin.mpg.de/research/duomoAnalysis.owl/RecordedEvent_41164","Term of payment for debt for forced loans."
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
3 // select distinct * where { {?x duomo:has_reges ?y} FILTER(lang(?y)="en")}
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
4
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
5
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
6 package de.mpiwg.dwinter.duomo.stanford;
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
7
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
8 import java.io.BufferedReader;
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
9 import java.io.DataInputStream;
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
10 import java.io.FileInputStream;
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
11 import java.io.FileWriter;
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
12 import java.io.IOException;
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
13 import java.io.InputStreamReader;
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
14 import java.io.Reader;
9
4392a6adf85a new version der label mit language tag
dwinter
parents: 8
diff changeset
15 import java.net.URLEncoder;
8
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
16 import java.util.ArrayList;
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
17 import java.util.Collection;
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
18 import java.util.HashMap;
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
19 import java.util.HashSet;
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
20 import java.util.List;
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
21 import java.util.Map;
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
22 import java.util.Set;
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
23
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
24 import edu.stanford.nlp.io.EncodingPrintWriter.out;
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
25 import edu.stanford.nlp.ling.CyclicCoreLabel;
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
26 import edu.stanford.nlp.ling.DocumentReader;
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
27 import edu.stanford.nlp.ling.HasWord;
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
28 import edu.stanford.nlp.ling.Word;
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
29 import edu.stanford.nlp.parser.lexparser.LexicalizedParser;
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
30 import edu.stanford.nlp.process.DocumentPreprocessor;
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
31 import edu.stanford.nlp.trees.GrammaticalRelation;
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
32 import edu.stanford.nlp.trees.GrammaticalStructure;
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
33 import edu.stanford.nlp.trees.GrammaticalStructureFactory;
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
34 import edu.stanford.nlp.trees.PennTreebankLanguagePack;
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
35 import edu.stanford.nlp.trees.Tree;
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
36 import edu.stanford.nlp.trees.TreebankLanguagePack;
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
37 import edu.stanford.nlp.trees.TypedDependency;
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
38 public class AnalyseWithEvents {
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
39
9
4392a6adf85a new version der label mit language tag
dwinter
parents: 8
diff changeset
40 private int prepcount = 0;
4392a6adf85a new version der label mit language tag
dwinter
parents: 8
diff changeset
41 private String prep_ent="http://entities.mpiwg-berlin.mpg.de/research/duomo/prep/";
4392a6adf85a new version der label mit language tag
dwinter
parents: 8
diff changeset
42 private String prep_ont="http://ontologies.mpiwg-berlin.mpg.de/research/duomo/prep/";
8
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
43 public void analyse(String filename) throws IOException {
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
44
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
45 LexicalizedParser lp = LexicalizedParser.loadModel("edu/stanford/nlp/models/lexparser/englishPCFG.ser.gz");
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
46 // This option shows loading and sentence-segment and tokenizing
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
47 // a file using DocumentPreprocessor
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
48 TreebankLanguagePack tlp = new PennTreebankLanguagePack();
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
49 GrammaticalStructureFactory gsf = tlp.grammaticalStructureFactory();
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
50 // You could also create a tokenier here (as below) and pass it
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
51 // to DocumentPreprocessor
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
52
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
53 int count=0;
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
54 Map<String,List<String>> tuple = new HashMap<String,List<String>>();
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
55 Map<String,List<String>> tupleLong = new HashMap<String,List<String>>();
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
56 Map<String,List<String>> words = new HashMap<String,List<String>>();
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
57
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
58 FileInputStream fstream = new FileInputStream(filename);
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
59 // Get the object of DataInputStream
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
60 DataInputStream in = new DataInputStream(fstream);
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
61 BufferedReader br = new BufferedReader(new InputStreamReader(in));
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
62 String strLineFull;
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
63 //Read File Line By Line
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
64 while ((strLineFull = br.readLine()) != null) {
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
65
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
66 // correct line needs to be completed to a sentence
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
67 String[] splitted = strLineFull.split(",");
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
68
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
69
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
70 // Line hat die Form: "http://ontologies.mpiwg-berlin.mpg.de/research/duomoAnalysis.owl/RecordedEvent_41164","Term of payment for debt for forced loans."
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
71
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
72 String strLine=splitted[1];
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
73 String recordURI = splitted[0];
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
74 strLine=strLine.replace("\"", "");
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
75 strLine="This is a "+strLine;
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
76
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
77
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
78 Reader dr = DocumentReader.getReader(strLine);
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
79
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
80
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
81
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
82 for (List<HasWord> sentence : new DocumentPreprocessor(dr)) {
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
83 Tree parse = lp.apply(sentence);
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
84 //parse.pennPrint();
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
85 //System.out.println();
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
86
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
87 for (HasWord word: sentence)
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
88 {
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
89 Word wd = (Word)word;
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
90
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
91 String st= wd.value().toLowerCase();
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
92
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
93 if (words.containsKey(st)){
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
94 words.get(st).add(recordURI);
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
95 } else {
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
96 List<String> ls =new ArrayList<String>();
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
97 ls.add(recordURI);
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
98 words.put(st, ls);
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
99 }
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
100
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
101 }
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
102
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
103
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
104 GrammaticalStructure gs = gsf.newGrammaticalStructure(parse);
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
105 Collection tdl = gs.typedDependenciesCCprocessed(true);
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
106
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
107 for (Object t: tdl){
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
108 if (TypedDependency.class.isInstance(t)){
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
109
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
110
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
111 TypedDependency td = (TypedDependency)t;
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
112
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
113 GrammaticalRelation reln = td.reln();
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
114 if (reln.getShortName().equals("prep") || reln.getShortName().equals("conj") ){
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
115
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
116 String st = reln.getShortName()
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
117 +"\t";
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
118
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
119 st +=td.gov().label().value()+"\t";
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
120
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
121 st+=td.dep().label().value();
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
122
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
123 st=st.toLowerCase();
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
124
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
125 if (tuple.containsKey(st)){
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
126 tuple.get(st).add(recordURI);
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
127 } else {
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
128 List<String> ls =new ArrayList<String>();
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
129 ls.add(recordURI);
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
130 tuple.put(st, ls);
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
131 }
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
132
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
133
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
134 st = reln.getShortName()+"\t"+reln.getSpecific()+"\t";
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
135
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
136 st +=td.gov().label().value()+"\t";
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
137
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
138 st+=td.dep().label().value();
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
139
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
140 st=st.toLowerCase();
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
141
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
142 if (tupleLong.containsKey(st)){
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
143 tupleLong.get(st).add(recordURI);
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
144 } else {
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
145 List<String> ls =new ArrayList<String>();
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
146 ls.add(recordURI);
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
147 tupleLong.put(st, ls);
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
148 }
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
149
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
150
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
151 }
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
152
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
153 }
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
154
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
155 }
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
156
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
157 //System.out.println(tdl);
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
158 //System.out.println();
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
159 count++;
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
160 System.out.println(count);
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
161
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
162
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
163 }
9
4392a6adf85a new version der label mit language tag
dwinter
parents: 8
diff changeset
164 // if (count > 100)
4392a6adf85a new version der label mit language tag
dwinter
parents: 8
diff changeset
165 // break;
8
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
166 }
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
167 System.out.println(tuple);
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
168 System.out.println(tupleLong);
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
169
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
170 FileWriter fw = new FileWriter("/tmp/tuple");
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
171
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
172 for (String key : tuple.keySet()){
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
173 List<String> val = tuple.get(key);
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
174 fw.write(key+"\t"+String.valueOf(val.size())+"\t"+val.toString()+"\n");
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
175
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
176 }
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
177 fw.close();
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
178
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
179
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
180 fw = new FileWriter("/tmp/tupleLong");
9
4392a6adf85a new version der label mit language tag
dwinter
parents: 8
diff changeset
181
4392a6adf85a new version der label mit language tag
dwinter
parents: 8
diff changeset
182 FileWriter fw2 = new FileWriter("/tmp/tupleLong.nt3.rdf");
4392a6adf85a new version der label mit language tag
dwinter
parents: 8
diff changeset
183
8
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
184 for (String key : tupleLong.keySet()){
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
185 List<String> val = tupleLong.get(key);
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
186
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
187 fw.write(key+"\t"+String.valueOf(val.size())+"\t"+val.toString()+"\n");
9
4392a6adf85a new version der label mit language tag
dwinter
parents: 8
diff changeset
188 String res = writePrepAsTriple(fw2,key);
4392a6adf85a new version der label mit language tag
dwinter
parents: 8
diff changeset
189 writeEventsToRes(fw2,res,val);
4392a6adf85a new version der label mit language tag
dwinter
parents: 8
diff changeset
190
4392a6adf85a new version der label mit language tag
dwinter
parents: 8
diff changeset
191
8
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
192 }
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
193 fw.close();
9
4392a6adf85a new version der label mit language tag
dwinter
parents: 8
diff changeset
194 fw2.close();
4392a6adf85a new version der label mit language tag
dwinter
parents: 8
diff changeset
195
8
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
196 fw = new FileWriter("/tmp/words");
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
197
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
198 for (String key : words.keySet()){
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
199
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
200 List<String> val = words.get(key);
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
201 fw.write(key+"\t"+String.valueOf(val.size())+"\t"+val.toString()+"\n");
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
202 }
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
203 fw.close();
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
204
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
205 }
9
4392a6adf85a new version der label mit language tag
dwinter
parents: 8
diff changeset
206 private void writeEventsToRes(FileWriter fw2, String prepUri, List<String> val) throws IOException {
4392a6adf85a new version der label mit language tag
dwinter
parents: 8
diff changeset
207 for (String res :val){
4392a6adf85a new version der label mit language tag
dwinter
parents: 8
diff changeset
208 fw2.write("<"+res.replace("\"", "")+"><"+prep_ont+"contains> <"+prepUri+">.\n");
4392a6adf85a new version der label mit language tag
dwinter
parents: 8
diff changeset
209 }
4392a6adf85a new version der label mit language tag
dwinter
parents: 8
diff changeset
210 fw2.flush();
4392a6adf85a new version der label mit language tag
dwinter
parents: 8
diff changeset
211
4392a6adf85a new version der label mit language tag
dwinter
parents: 8
diff changeset
212 }
4392a6adf85a new version der label mit language tag
dwinter
parents: 8
diff changeset
213 private String writePrepAsTriple(FileWriter fw2, String prep) throws IOException {
4392a6adf85a new version der label mit language tag
dwinter
parents: 8
diff changeset
214
4392a6adf85a new version der label mit language tag
dwinter
parents: 8
diff changeset
215 String[] splitted = prep.split("\t");
4392a6adf85a new version der label mit language tag
dwinter
parents: 8
diff changeset
216 prepcount+=1;
4392a6adf85a new version der label mit language tag
dwinter
parents: 8
diff changeset
217 String resUri=String.format(prep_ent+"prep_%s",prepcount);
4392a6adf85a new version der label mit language tag
dwinter
parents: 8
diff changeset
218 fw2.write("<"+resUri+ "> rdf:type "+"<"+prep_ont+"Preposition>.\n");
4392a6adf85a new version der label mit language tag
dwinter
parents: 8
diff changeset
219
4392a6adf85a new version der label mit language tag
dwinter
parents: 8
diff changeset
220 if (!splitted[2].equals("")){
4392a6adf85a new version der label mit language tag
dwinter
parents: 8
diff changeset
221 String wd = URLEncoder.encode(splitted[2],"utf-8");
4392a6adf85a new version der label mit language tag
dwinter
parents: 8
diff changeset
222 fw2.write("<"+resUri+ "> "+"<"+prep_ont+"main> <"+prep_ent+"Word_"+wd+">.\n");
4392a6adf85a new version der label mit language tag
dwinter
parents: 8
diff changeset
223 fw2.write("<"+prep_ent+"Word_"+wd+"> rdfs:label \""+splitted[2]+"\"@en .\n");
4392a6adf85a new version der label mit language tag
dwinter
parents: 8
diff changeset
224 fw2.write("<"+prep_ent+"Word_"+wd+"> rdf:type "+"<"+prep_ont+"Word>.\n");
4392a6adf85a new version der label mit language tag
dwinter
parents: 8
diff changeset
225 }
4392a6adf85a new version der label mit language tag
dwinter
parents: 8
diff changeset
226
4392a6adf85a new version der label mit language tag
dwinter
parents: 8
diff changeset
227 if (!splitted[3].equals("")){
4392a6adf85a new version der label mit language tag
dwinter
parents: 8
diff changeset
228 String wd = URLEncoder.encode(splitted[3],"utf-8");
4392a6adf85a new version der label mit language tag
dwinter
parents: 8
diff changeset
229 fw2.write("<"+resUri+ "> "+"<"+prep_ont+"specification> <"+prep_ent+"Word_"+wd+">.\n");
4392a6adf85a new version der label mit language tag
dwinter
parents: 8
diff changeset
230 fw2.write("<"+prep_ent+"Word_"+wd+"> rdfs:label \""+splitted[3]+"\"@en .\n");
4392a6adf85a new version der label mit language tag
dwinter
parents: 8
diff changeset
231 fw2.write("<"+prep_ent+"Word_"+wd+"> rdf:type "+"<"+prep_ont+"Word>.\n");
4392a6adf85a new version der label mit language tag
dwinter
parents: 8
diff changeset
232 }
4392a6adf85a new version der label mit language tag
dwinter
parents: 8
diff changeset
233
4392a6adf85a new version der label mit language tag
dwinter
parents: 8
diff changeset
234 if (!splitted[1].equals("")){
4392a6adf85a new version der label mit language tag
dwinter
parents: 8
diff changeset
235 String wd = URLEncoder.encode(splitted[1],"utf-8");
4392a6adf85a new version der label mit language tag
dwinter
parents: 8
diff changeset
236 fw2.write("<"+resUri+ "> "+"<"+prep_ont+"prepType> <"+prep_ent+"Type_"+wd+">.\n");
4392a6adf85a new version der label mit language tag
dwinter
parents: 8
diff changeset
237 fw2.write("<"+prep_ent+"Type_"+wd+"> rdfs:label \""+splitted[1]+"\"@en .\n");
4392a6adf85a new version der label mit language tag
dwinter
parents: 8
diff changeset
238 fw2.write("<"+prep_ent+"Word_"+wd+"> rdf:type "+"<"+prep_ont+"Type>.\n");
4392a6adf85a new version der label mit language tag
dwinter
parents: 8
diff changeset
239 }
4392a6adf85a new version der label mit language tag
dwinter
parents: 8
diff changeset
240
4392a6adf85a new version der label mit language tag
dwinter
parents: 8
diff changeset
241 fw2.flush();
4392a6adf85a new version der label mit language tag
dwinter
parents: 8
diff changeset
242 return resUri;
4392a6adf85a new version der label mit language tag
dwinter
parents: 8
diff changeset
243
4392a6adf85a new version der label mit language tag
dwinter
parents: 8
diff changeset
244
4392a6adf85a new version der label mit language tag
dwinter
parents: 8
diff changeset
245
4392a6adf85a new version der label mit language tag
dwinter
parents: 8
diff changeset
246
4392a6adf85a new version der label mit language tag
dwinter
parents: 8
diff changeset
247 }
8
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
248 /**
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
249 * @param args
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
250 */
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
251 public static void main(String[] args) {
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
252 AnalyseWithEvents a = new AnalyseWithEvents();
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
253 try {
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
254 a.analyse("/tmp/reges.csv");
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
255 } catch (IOException e) {
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
256 // TODO Auto-generated catch block
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
257 e.printStackTrace();
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
258 }
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
259
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
260 }
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
261
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
262 }