annotate src/de/mpiwg/dwinter/duomo/stanford/AnalyseWithEvents.java @ 8:919e9f3b5efd

neue klassen zur textanalyse (stanford parser eingebaut) alle has_readable_labe Datatype properties durch rdfs:label ersetzt.
author dwinter
date Thu, 21 Jun 2012 17:08:22 +0200
parents
children 4392a6adf85a
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
8
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
1 // Analisiere calls from the virtuoso store
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
2 // "http://ontologies.mpiwg-berlin.mpg.de/research/duomoAnalysis.owl/RecordedEvent_41164","Term of payment for debt for forced loans."
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
3 // select distinct * where { {?x duomo:has_reges ?y} FILTER(lang(?y)="en")}
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
4
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
5
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
6 package de.mpiwg.dwinter.duomo.stanford;
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
7
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
8 import java.io.BufferedReader;
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
9 import java.io.DataInputStream;
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
10 import java.io.FileInputStream;
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
11 import java.io.FileWriter;
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
12 import java.io.IOException;
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
13 import java.io.InputStreamReader;
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
14 import java.io.Reader;
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
15 import java.util.ArrayList;
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
16 import java.util.Collection;
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
17 import java.util.HashMap;
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
18 import java.util.HashSet;
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
19 import java.util.List;
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
20 import java.util.Map;
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
21 import java.util.Set;
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
22
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
23 import edu.stanford.nlp.io.EncodingPrintWriter.out;
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
24 import edu.stanford.nlp.ling.CyclicCoreLabel;
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
25 import edu.stanford.nlp.ling.DocumentReader;
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
26 import edu.stanford.nlp.ling.HasWord;
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
27 import edu.stanford.nlp.ling.Word;
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
28 import edu.stanford.nlp.parser.lexparser.LexicalizedParser;
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
29 import edu.stanford.nlp.process.DocumentPreprocessor;
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
30 import edu.stanford.nlp.trees.GrammaticalRelation;
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
31 import edu.stanford.nlp.trees.GrammaticalStructure;
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
32 import edu.stanford.nlp.trees.GrammaticalStructureFactory;
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
33 import edu.stanford.nlp.trees.PennTreebankLanguagePack;
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
34 import edu.stanford.nlp.trees.Tree;
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
35 import edu.stanford.nlp.trees.TreebankLanguagePack;
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
36 import edu.stanford.nlp.trees.TypedDependency;
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
37
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
38 public class AnalyseWithEvents {
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
39
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
40 public void analyse(String filename) throws IOException {
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
41
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
42 LexicalizedParser lp = LexicalizedParser.loadModel("edu/stanford/nlp/models/lexparser/englishPCFG.ser.gz");
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
43 // This option shows loading and sentence-segment and tokenizing
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
44 // a file using DocumentPreprocessor
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
45 TreebankLanguagePack tlp = new PennTreebankLanguagePack();
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
46 GrammaticalStructureFactory gsf = tlp.grammaticalStructureFactory();
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
47 // You could also create a tokenier here (as below) and pass it
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
48 // to DocumentPreprocessor
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
49
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
50 int count=0;
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
51 Map<String,List<String>> tuple = new HashMap<String,List<String>>();
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
52 Map<String,List<String>> tupleLong = new HashMap<String,List<String>>();
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
53 Map<String,List<String>> words = new HashMap<String,List<String>>();
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
54
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
55 FileInputStream fstream = new FileInputStream(filename);
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
56 // Get the object of DataInputStream
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
57 DataInputStream in = new DataInputStream(fstream);
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
58 BufferedReader br = new BufferedReader(new InputStreamReader(in));
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
59 String strLineFull;
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
60 //Read File Line By Line
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
61 while ((strLineFull = br.readLine()) != null) {
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
62
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
63 // correct line needs to be completed to a sentence
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
64 String[] splitted = strLineFull.split(",");
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
65
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
66
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
67 // Line hat die Form: "http://ontologies.mpiwg-berlin.mpg.de/research/duomoAnalysis.owl/RecordedEvent_41164","Term of payment for debt for forced loans."
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
68
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
69 String strLine=splitted[1];
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
70 String recordURI = splitted[0];
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
71 strLine=strLine.replace("\"", "");
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
72 strLine="This is a "+strLine;
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
73
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
74
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
75 Reader dr = DocumentReader.getReader(strLine);
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
76
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
77
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
78
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
79 for (List<HasWord> sentence : new DocumentPreprocessor(dr)) {
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
80 Tree parse = lp.apply(sentence);
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
81 //parse.pennPrint();
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
82 //System.out.println();
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
83
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
84 for (HasWord word: sentence)
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
85 {
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
86 Word wd = (Word)word;
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
87
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
88 String st= wd.value().toLowerCase();
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
89
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
90 if (words.containsKey(st)){
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
91 words.get(st).add(recordURI);
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
92 } else {
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
93 List<String> ls =new ArrayList<String>();
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
94 ls.add(recordURI);
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
95 words.put(st, ls);
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
96 }
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
97
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
98 }
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
99
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
100
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
101 GrammaticalStructure gs = gsf.newGrammaticalStructure(parse);
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
102 Collection tdl = gs.typedDependenciesCCprocessed(true);
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
103
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
104 for (Object t: tdl){
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
105 if (TypedDependency.class.isInstance(t)){
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
106
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
107
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
108 TypedDependency td = (TypedDependency)t;
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
109
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
110 GrammaticalRelation reln = td.reln();
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
111 if (reln.getShortName().equals("prep") || reln.getShortName().equals("conj") ){
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
112
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
113 String st = reln.getShortName()
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
114 +"\t";
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
115
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
116 st +=td.gov().label().value()+"\t";
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
117
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
118 st+=td.dep().label().value();
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
119
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
120 st=st.toLowerCase();
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
121
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
122 if (tuple.containsKey(st)){
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
123 tuple.get(st).add(recordURI);
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
124 } else {
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
125 List<String> ls =new ArrayList<String>();
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
126 ls.add(recordURI);
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
127 tuple.put(st, ls);
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
128 }
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
129
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
130
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
131 st = reln.getShortName()+"\t"+reln.getSpecific()+"\t";
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
132
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
133 st +=td.gov().label().value()+"\t";
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
134
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
135 st+=td.dep().label().value();
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
136
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
137 st=st.toLowerCase();
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
138
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
139 if (tupleLong.containsKey(st)){
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
140 tupleLong.get(st).add(recordURI);
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
141 } else {
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
142 List<String> ls =new ArrayList<String>();
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
143 ls.add(recordURI);
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
144 tupleLong.put(st, ls);
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
145 }
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
146
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
147
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
148 }
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
149
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
150 }
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
151
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
152 }
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
153
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
154 //System.out.println(tdl);
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
155 //System.out.println();
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
156 count++;
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
157 System.out.println(count);
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
158
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
159
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
160 }
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
161 //if (count > 5)
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
162 // break;
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
163 }
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
164 System.out.println(tuple);
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
165 System.out.println(tupleLong);
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
166
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
167 FileWriter fw = new FileWriter("/tmp/tuple");
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
168
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
169 for (String key : tuple.keySet()){
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
170 List<String> val = tuple.get(key);
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
171 fw.write(key+"\t"+String.valueOf(val.size())+"\t"+val.toString()+"\n");
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
172
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
173 }
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
174 fw.close();
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
175
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
176
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
177 fw = new FileWriter("/tmp/tupleLong");
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
178
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
179 for (String key : tupleLong.keySet()){
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
180 List<String> val = tupleLong.get(key);
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
181
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
182 fw.write(key+"\t"+String.valueOf(val.size())+"\t"+val.toString()+"\n");
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
183 }
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
184 fw.close();
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
185
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
186 fw = new FileWriter("/tmp/words");
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
187
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
188 for (String key : words.keySet()){
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
189
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
190 List<String> val = words.get(key);
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
191 fw.write(key+"\t"+String.valueOf(val.size())+"\t"+val.toString()+"\n");
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
192 }
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
193 fw.close();
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
194
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
195 }
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
196 /**
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
197 * @param args
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
198 */
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
199 public static void main(String[] args) {
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
200 AnalyseWithEvents a = new AnalyseWithEvents();
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
201 try {
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
202 a.analyse("/tmp/reges.csv");
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
203 } catch (IOException e) {
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
204 // TODO Auto-generated catch block
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
205 e.printStackTrace();
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
206 }
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
207
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
208 }
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
209
919e9f3b5efd neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff changeset
210 }