Mercurial > hg > duomoOWLProject
annotate src/de/mpiwg/dwinter/duomo/stanford/AnalyseWithEvents.java @ 9:4392a6adf85a default tip
new version der label mit language tag
author | dwinter |
---|---|
date | Thu, 16 Aug 2012 11:40:17 +0200 |
parents | 919e9f3b5efd |
children |
rev | line source |
---|---|
8
919e9f3b5efd
neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff
changeset
|
1 // Analisiere calls from the virtuoso store |
919e9f3b5efd
neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff
changeset
|
2 // "http://ontologies.mpiwg-berlin.mpg.de/research/duomoAnalysis.owl/RecordedEvent_41164","Term of payment for debt for forced loans." |
919e9f3b5efd
neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff
changeset
|
3 // select distinct * where { {?x duomo:has_reges ?y} FILTER(lang(?y)="en")} |
919e9f3b5efd
neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff
changeset
|
4 |
919e9f3b5efd
neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff
changeset
|
5 |
919e9f3b5efd
neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff
changeset
|
6 package de.mpiwg.dwinter.duomo.stanford; |
919e9f3b5efd
neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff
changeset
|
7 |
919e9f3b5efd
neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff
changeset
|
8 import java.io.BufferedReader; |
919e9f3b5efd
neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff
changeset
|
9 import java.io.DataInputStream; |
919e9f3b5efd
neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff
changeset
|
10 import java.io.FileInputStream; |
919e9f3b5efd
neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff
changeset
|
11 import java.io.FileWriter; |
919e9f3b5efd
neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff
changeset
|
12 import java.io.IOException; |
919e9f3b5efd
neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff
changeset
|
13 import java.io.InputStreamReader; |
919e9f3b5efd
neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff
changeset
|
14 import java.io.Reader; |
9 | 15 import java.net.URLEncoder; |
8
919e9f3b5efd
neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff
changeset
|
16 import java.util.ArrayList; |
919e9f3b5efd
neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff
changeset
|
17 import java.util.Collection; |
919e9f3b5efd
neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff
changeset
|
18 import java.util.HashMap; |
919e9f3b5efd
neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff
changeset
|
19 import java.util.HashSet; |
919e9f3b5efd
neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff
changeset
|
20 import java.util.List; |
919e9f3b5efd
neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff
changeset
|
21 import java.util.Map; |
919e9f3b5efd
neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff
changeset
|
22 import java.util.Set; |
919e9f3b5efd
neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff
changeset
|
23 |
919e9f3b5efd
neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff
changeset
|
24 import edu.stanford.nlp.io.EncodingPrintWriter.out; |
919e9f3b5efd
neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff
changeset
|
25 import edu.stanford.nlp.ling.CyclicCoreLabel; |
919e9f3b5efd
neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff
changeset
|
26 import edu.stanford.nlp.ling.DocumentReader; |
919e9f3b5efd
neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff
changeset
|
27 import edu.stanford.nlp.ling.HasWord; |
919e9f3b5efd
neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff
changeset
|
28 import edu.stanford.nlp.ling.Word; |
919e9f3b5efd
neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff
changeset
|
29 import edu.stanford.nlp.parser.lexparser.LexicalizedParser; |
919e9f3b5efd
neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff
changeset
|
30 import edu.stanford.nlp.process.DocumentPreprocessor; |
919e9f3b5efd
neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff
changeset
|
31 import edu.stanford.nlp.trees.GrammaticalRelation; |
919e9f3b5efd
neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff
changeset
|
32 import edu.stanford.nlp.trees.GrammaticalStructure; |
919e9f3b5efd
neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff
changeset
|
33 import edu.stanford.nlp.trees.GrammaticalStructureFactory; |
919e9f3b5efd
neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff
changeset
|
34 import edu.stanford.nlp.trees.PennTreebankLanguagePack; |
919e9f3b5efd
neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff
changeset
|
35 import edu.stanford.nlp.trees.Tree; |
919e9f3b5efd
neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff
changeset
|
36 import edu.stanford.nlp.trees.TreebankLanguagePack; |
919e9f3b5efd
neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff
changeset
|
37 import edu.stanford.nlp.trees.TypedDependency; |
919e9f3b5efd
neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff
changeset
|
38 public class AnalyseWithEvents { |
919e9f3b5efd
neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff
changeset
|
39 |
9 | 40 private int prepcount = 0; |
41 private String prep_ent="http://entities.mpiwg-berlin.mpg.de/research/duomo/prep/"; | |
42 private String prep_ont="http://ontologies.mpiwg-berlin.mpg.de/research/duomo/prep/"; | |
8
919e9f3b5efd
neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff
changeset
|
43 public void analyse(String filename) throws IOException { |
919e9f3b5efd
neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff
changeset
|
44 |
919e9f3b5efd
neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff
changeset
|
45 LexicalizedParser lp = LexicalizedParser.loadModel("edu/stanford/nlp/models/lexparser/englishPCFG.ser.gz"); |
919e9f3b5efd
neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff
changeset
|
46 // This option shows loading and sentence-segment and tokenizing |
919e9f3b5efd
neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff
changeset
|
47 // a file using DocumentPreprocessor |
919e9f3b5efd
neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff
changeset
|
48 TreebankLanguagePack tlp = new PennTreebankLanguagePack(); |
919e9f3b5efd
neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff
changeset
|
49 GrammaticalStructureFactory gsf = tlp.grammaticalStructureFactory(); |
919e9f3b5efd
neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff
changeset
|
50 // You could also create a tokenier here (as below) and pass it |
919e9f3b5efd
neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff
changeset
|
51 // to DocumentPreprocessor |
919e9f3b5efd
neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff
changeset
|
52 |
919e9f3b5efd
neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff
changeset
|
53 int count=0; |
919e9f3b5efd
neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff
changeset
|
54 Map<String,List<String>> tuple = new HashMap<String,List<String>>(); |
919e9f3b5efd
neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff
changeset
|
55 Map<String,List<String>> tupleLong = new HashMap<String,List<String>>(); |
919e9f3b5efd
neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff
changeset
|
56 Map<String,List<String>> words = new HashMap<String,List<String>>(); |
919e9f3b5efd
neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff
changeset
|
57 |
919e9f3b5efd
neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff
changeset
|
58 FileInputStream fstream = new FileInputStream(filename); |
919e9f3b5efd
neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff
changeset
|
59 // Get the object of DataInputStream |
919e9f3b5efd
neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff
changeset
|
60 DataInputStream in = new DataInputStream(fstream); |
919e9f3b5efd
neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff
changeset
|
61 BufferedReader br = new BufferedReader(new InputStreamReader(in)); |
919e9f3b5efd
neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff
changeset
|
62 String strLineFull; |
919e9f3b5efd
neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff
changeset
|
63 //Read File Line By Line |
919e9f3b5efd
neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff
changeset
|
64 while ((strLineFull = br.readLine()) != null) { |
919e9f3b5efd
neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff
changeset
|
65 |
919e9f3b5efd
neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff
changeset
|
66 // correct line needs to be completed to a sentence |
919e9f3b5efd
neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff
changeset
|
67 String[] splitted = strLineFull.split(","); |
919e9f3b5efd
neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff
changeset
|
68 |
919e9f3b5efd
neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff
changeset
|
69 |
919e9f3b5efd
neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff
changeset
|
70 // Line hat die Form: "http://ontologies.mpiwg-berlin.mpg.de/research/duomoAnalysis.owl/RecordedEvent_41164","Term of payment for debt for forced loans." |
919e9f3b5efd
neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff
changeset
|
71 |
919e9f3b5efd
neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff
changeset
|
72 String strLine=splitted[1]; |
919e9f3b5efd
neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff
changeset
|
73 String recordURI = splitted[0]; |
919e9f3b5efd
neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff
changeset
|
74 strLine=strLine.replace("\"", ""); |
919e9f3b5efd
neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff
changeset
|
75 strLine="This is a "+strLine; |
919e9f3b5efd
neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff
changeset
|
76 |
919e9f3b5efd
neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff
changeset
|
77 |
919e9f3b5efd
neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff
changeset
|
78 Reader dr = DocumentReader.getReader(strLine); |
919e9f3b5efd
neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff
changeset
|
79 |
919e9f3b5efd
neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff
changeset
|
80 |
919e9f3b5efd
neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff
changeset
|
81 |
919e9f3b5efd
neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff
changeset
|
82 for (List<HasWord> sentence : new DocumentPreprocessor(dr)) { |
919e9f3b5efd
neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff
changeset
|
83 Tree parse = lp.apply(sentence); |
919e9f3b5efd
neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff
changeset
|
84 //parse.pennPrint(); |
919e9f3b5efd
neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff
changeset
|
85 //System.out.println(); |
919e9f3b5efd
neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff
changeset
|
86 |
919e9f3b5efd
neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff
changeset
|
87 for (HasWord word: sentence) |
919e9f3b5efd
neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff
changeset
|
88 { |
919e9f3b5efd
neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff
changeset
|
89 Word wd = (Word)word; |
919e9f3b5efd
neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff
changeset
|
90 |
919e9f3b5efd
neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff
changeset
|
91 String st= wd.value().toLowerCase(); |
919e9f3b5efd
neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff
changeset
|
92 |
919e9f3b5efd
neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff
changeset
|
93 if (words.containsKey(st)){ |
919e9f3b5efd
neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff
changeset
|
94 words.get(st).add(recordURI); |
919e9f3b5efd
neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff
changeset
|
95 } else { |
919e9f3b5efd
neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff
changeset
|
96 List<String> ls =new ArrayList<String>(); |
919e9f3b5efd
neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff
changeset
|
97 ls.add(recordURI); |
919e9f3b5efd
neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff
changeset
|
98 words.put(st, ls); |
919e9f3b5efd
neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff
changeset
|
99 } |
919e9f3b5efd
neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff
changeset
|
100 |
919e9f3b5efd
neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff
changeset
|
101 } |
919e9f3b5efd
neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff
changeset
|
102 |
919e9f3b5efd
neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff
changeset
|
103 |
919e9f3b5efd
neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff
changeset
|
104 GrammaticalStructure gs = gsf.newGrammaticalStructure(parse); |
919e9f3b5efd
neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff
changeset
|
105 Collection tdl = gs.typedDependenciesCCprocessed(true); |
919e9f3b5efd
neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff
changeset
|
106 |
919e9f3b5efd
neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff
changeset
|
107 for (Object t: tdl){ |
919e9f3b5efd
neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff
changeset
|
108 if (TypedDependency.class.isInstance(t)){ |
919e9f3b5efd
neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff
changeset
|
109 |
919e9f3b5efd
neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff
changeset
|
110 |
919e9f3b5efd
neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff
changeset
|
111 TypedDependency td = (TypedDependency)t; |
919e9f3b5efd
neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff
changeset
|
112 |
919e9f3b5efd
neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff
changeset
|
113 GrammaticalRelation reln = td.reln(); |
919e9f3b5efd
neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff
changeset
|
114 if (reln.getShortName().equals("prep") || reln.getShortName().equals("conj") ){ |
919e9f3b5efd
neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff
changeset
|
115 |
919e9f3b5efd
neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff
changeset
|
116 String st = reln.getShortName() |
919e9f3b5efd
neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff
changeset
|
117 +"\t"; |
919e9f3b5efd
neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff
changeset
|
118 |
919e9f3b5efd
neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff
changeset
|
119 st +=td.gov().label().value()+"\t"; |
919e9f3b5efd
neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff
changeset
|
120 |
919e9f3b5efd
neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff
changeset
|
121 st+=td.dep().label().value(); |
919e9f3b5efd
neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff
changeset
|
122 |
919e9f3b5efd
neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff
changeset
|
123 st=st.toLowerCase(); |
919e9f3b5efd
neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff
changeset
|
124 |
919e9f3b5efd
neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff
changeset
|
125 if (tuple.containsKey(st)){ |
919e9f3b5efd
neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff
changeset
|
126 tuple.get(st).add(recordURI); |
919e9f3b5efd
neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff
changeset
|
127 } else { |
919e9f3b5efd
neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff
changeset
|
128 List<String> ls =new ArrayList<String>(); |
919e9f3b5efd
neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff
changeset
|
129 ls.add(recordURI); |
919e9f3b5efd
neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff
changeset
|
130 tuple.put(st, ls); |
919e9f3b5efd
neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff
changeset
|
131 } |
919e9f3b5efd
neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff
changeset
|
132 |
919e9f3b5efd
neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff
changeset
|
133 |
919e9f3b5efd
neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff
changeset
|
134 st = reln.getShortName()+"\t"+reln.getSpecific()+"\t"; |
919e9f3b5efd
neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff
changeset
|
135 |
919e9f3b5efd
neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff
changeset
|
136 st +=td.gov().label().value()+"\t"; |
919e9f3b5efd
neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff
changeset
|
137 |
919e9f3b5efd
neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff
changeset
|
138 st+=td.dep().label().value(); |
919e9f3b5efd
neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff
changeset
|
139 |
919e9f3b5efd
neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff
changeset
|
140 st=st.toLowerCase(); |
919e9f3b5efd
neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff
changeset
|
141 |
919e9f3b5efd
neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff
changeset
|
142 if (tupleLong.containsKey(st)){ |
919e9f3b5efd
neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff
changeset
|
143 tupleLong.get(st).add(recordURI); |
919e9f3b5efd
neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff
changeset
|
144 } else { |
919e9f3b5efd
neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff
changeset
|
145 List<String> ls =new ArrayList<String>(); |
919e9f3b5efd
neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff
changeset
|
146 ls.add(recordURI); |
919e9f3b5efd
neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff
changeset
|
147 tupleLong.put(st, ls); |
919e9f3b5efd
neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff
changeset
|
148 } |
919e9f3b5efd
neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff
changeset
|
149 |
919e9f3b5efd
neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff
changeset
|
150 |
919e9f3b5efd
neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff
changeset
|
151 } |
919e9f3b5efd
neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff
changeset
|
152 |
919e9f3b5efd
neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff
changeset
|
153 } |
919e9f3b5efd
neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff
changeset
|
154 |
919e9f3b5efd
neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff
changeset
|
155 } |
919e9f3b5efd
neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff
changeset
|
156 |
919e9f3b5efd
neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff
changeset
|
157 //System.out.println(tdl); |
919e9f3b5efd
neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff
changeset
|
158 //System.out.println(); |
919e9f3b5efd
neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff
changeset
|
159 count++; |
919e9f3b5efd
neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff
changeset
|
160 System.out.println(count); |
919e9f3b5efd
neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff
changeset
|
161 |
919e9f3b5efd
neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff
changeset
|
162 |
919e9f3b5efd
neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff
changeset
|
163 } |
9 | 164 // if (count > 100) |
165 // break; | |
8
919e9f3b5efd
neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff
changeset
|
166 } |
919e9f3b5efd
neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff
changeset
|
167 System.out.println(tuple); |
919e9f3b5efd
neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff
changeset
|
168 System.out.println(tupleLong); |
919e9f3b5efd
neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff
changeset
|
169 |
919e9f3b5efd
neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff
changeset
|
170 FileWriter fw = new FileWriter("/tmp/tuple"); |
919e9f3b5efd
neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff
changeset
|
171 |
919e9f3b5efd
neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff
changeset
|
172 for (String key : tuple.keySet()){ |
919e9f3b5efd
neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff
changeset
|
173 List<String> val = tuple.get(key); |
919e9f3b5efd
neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff
changeset
|
174 fw.write(key+"\t"+String.valueOf(val.size())+"\t"+val.toString()+"\n"); |
919e9f3b5efd
neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff
changeset
|
175 |
919e9f3b5efd
neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff
changeset
|
176 } |
919e9f3b5efd
neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff
changeset
|
177 fw.close(); |
919e9f3b5efd
neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff
changeset
|
178 |
919e9f3b5efd
neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff
changeset
|
179 |
919e9f3b5efd
neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff
changeset
|
180 fw = new FileWriter("/tmp/tupleLong"); |
9 | 181 |
182 FileWriter fw2 = new FileWriter("/tmp/tupleLong.nt3.rdf"); | |
183 | |
8
919e9f3b5efd
neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff
changeset
|
184 for (String key : tupleLong.keySet()){ |
919e9f3b5efd
neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff
changeset
|
185 List<String> val = tupleLong.get(key); |
919e9f3b5efd
neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff
changeset
|
186 |
919e9f3b5efd
neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff
changeset
|
187 fw.write(key+"\t"+String.valueOf(val.size())+"\t"+val.toString()+"\n"); |
9 | 188 String res = writePrepAsTriple(fw2,key); |
189 writeEventsToRes(fw2,res,val); | |
190 | |
191 | |
8
919e9f3b5efd
neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff
changeset
|
192 } |
919e9f3b5efd
neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff
changeset
|
193 fw.close(); |
9 | 194 fw2.close(); |
195 | |
8
919e9f3b5efd
neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff
changeset
|
196 fw = new FileWriter("/tmp/words"); |
919e9f3b5efd
neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff
changeset
|
197 |
919e9f3b5efd
neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff
changeset
|
198 for (String key : words.keySet()){ |
919e9f3b5efd
neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff
changeset
|
199 |
919e9f3b5efd
neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff
changeset
|
200 List<String> val = words.get(key); |
919e9f3b5efd
neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff
changeset
|
201 fw.write(key+"\t"+String.valueOf(val.size())+"\t"+val.toString()+"\n"); |
919e9f3b5efd
neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff
changeset
|
202 } |
919e9f3b5efd
neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff
changeset
|
203 fw.close(); |
919e9f3b5efd
neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff
changeset
|
204 |
919e9f3b5efd
neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff
changeset
|
205 } |
9 | 206 private void writeEventsToRes(FileWriter fw2, String prepUri, List<String> val) throws IOException { |
207 for (String res :val){ | |
208 fw2.write("<"+res.replace("\"", "")+"><"+prep_ont+"contains> <"+prepUri+">.\n"); | |
209 } | |
210 fw2.flush(); | |
211 | |
212 } | |
213 private String writePrepAsTriple(FileWriter fw2, String prep) throws IOException { | |
214 | |
215 String[] splitted = prep.split("\t"); | |
216 prepcount+=1; | |
217 String resUri=String.format(prep_ent+"prep_%s",prepcount); | |
218 fw2.write("<"+resUri+ "> rdf:type "+"<"+prep_ont+"Preposition>.\n"); | |
219 | |
220 if (!splitted[2].equals("")){ | |
221 String wd = URLEncoder.encode(splitted[2],"utf-8"); | |
222 fw2.write("<"+resUri+ "> "+"<"+prep_ont+"main> <"+prep_ent+"Word_"+wd+">.\n"); | |
223 fw2.write("<"+prep_ent+"Word_"+wd+"> rdfs:label \""+splitted[2]+"\"@en .\n"); | |
224 fw2.write("<"+prep_ent+"Word_"+wd+"> rdf:type "+"<"+prep_ont+"Word>.\n"); | |
225 } | |
226 | |
227 if (!splitted[3].equals("")){ | |
228 String wd = URLEncoder.encode(splitted[3],"utf-8"); | |
229 fw2.write("<"+resUri+ "> "+"<"+prep_ont+"specification> <"+prep_ent+"Word_"+wd+">.\n"); | |
230 fw2.write("<"+prep_ent+"Word_"+wd+"> rdfs:label \""+splitted[3]+"\"@en .\n"); | |
231 fw2.write("<"+prep_ent+"Word_"+wd+"> rdf:type "+"<"+prep_ont+"Word>.\n"); | |
232 } | |
233 | |
234 if (!splitted[1].equals("")){ | |
235 String wd = URLEncoder.encode(splitted[1],"utf-8"); | |
236 fw2.write("<"+resUri+ "> "+"<"+prep_ont+"prepType> <"+prep_ent+"Type_"+wd+">.\n"); | |
237 fw2.write("<"+prep_ent+"Type_"+wd+"> rdfs:label \""+splitted[1]+"\"@en .\n"); | |
238 fw2.write("<"+prep_ent+"Word_"+wd+"> rdf:type "+"<"+prep_ont+"Type>.\n"); | |
239 } | |
240 | |
241 fw2.flush(); | |
242 return resUri; | |
243 | |
244 | |
245 | |
246 | |
247 } | |
8
919e9f3b5efd
neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff
changeset
|
248 /** |
919e9f3b5efd
neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff
changeset
|
249 * @param args |
919e9f3b5efd
neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff
changeset
|
250 */ |
919e9f3b5efd
neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff
changeset
|
251 public static void main(String[] args) { |
919e9f3b5efd
neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff
changeset
|
252 AnalyseWithEvents a = new AnalyseWithEvents(); |
919e9f3b5efd
neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff
changeset
|
253 try { |
919e9f3b5efd
neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff
changeset
|
254 a.analyse("/tmp/reges.csv"); |
919e9f3b5efd
neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff
changeset
|
255 } catch (IOException e) { |
919e9f3b5efd
neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff
changeset
|
256 // TODO Auto-generated catch block |
919e9f3b5efd
neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff
changeset
|
257 e.printStackTrace(); |
919e9f3b5efd
neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff
changeset
|
258 } |
919e9f3b5efd
neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff
changeset
|
259 |
919e9f3b5efd
neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff
changeset
|
260 } |
919e9f3b5efd
neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff
changeset
|
261 |
919e9f3b5efd
neue klassen zur textanalyse (stanford parser eingebaut)
dwinter
parents:
diff
changeset
|
262 } |