annotate src/de/mpiwg/anteater/places/ml/preprocessing/LocationFeatureCalculator.java @ 0:036535fcd179

anteater
author jdamerow
date Fri, 14 Sep 2012 10:30:43 +0200
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
036535fcd179 anteater
jdamerow
parents:
diff changeset
1 package de.mpiwg.anteater.places.ml.preprocessing;
036535fcd179 anteater
jdamerow
parents:
diff changeset
2
036535fcd179 anteater
jdamerow
parents:
diff changeset
3 import java.util.HashMap;
036535fcd179 anteater
jdamerow
parents:
diff changeset
4 import java.util.List;
036535fcd179 anteater
jdamerow
parents:
diff changeset
5 import java.util.Map;
036535fcd179 anteater
jdamerow
parents:
diff changeset
6 import java.util.regex.Matcher;
036535fcd179 anteater
jdamerow
parents:
diff changeset
7 import java.util.regex.Pattern;
036535fcd179 anteater
jdamerow
parents:
diff changeset
8
036535fcd179 anteater
jdamerow
parents:
diff changeset
9 import de.mpiwg.anteater.ml.ITextParser;
036535fcd179 anteater
jdamerow
parents:
diff changeset
10 import de.mpiwg.anteater.ml.preprocessing.FeatureCalculator;
036535fcd179 anteater
jdamerow
parents:
diff changeset
11 import de.mpiwg.anteater.persons.APerson;
036535fcd179 anteater
jdamerow
parents:
diff changeset
12 import de.mpiwg.anteater.places.Place;
036535fcd179 anteater
jdamerow
parents:
diff changeset
13 import de.mpiwg.anteater.places.PlaceInformation;
036535fcd179 anteater
jdamerow
parents:
diff changeset
14 import de.mpiwg.anteater.species.scientific.ScientificName;
036535fcd179 anteater
jdamerow
parents:
diff changeset
15 import de.mpiwg.anteater.text.Paragraph;
036535fcd179 anteater
jdamerow
parents:
diff changeset
16 import de.mpiwg.anteater.text.TextPart;
036535fcd179 anteater
jdamerow
parents:
diff changeset
17
036535fcd179 anteater
jdamerow
parents:
diff changeset
18 public class LocationFeatureCalculator extends FeatureCalculator {
036535fcd179 anteater
jdamerow
parents:
diff changeset
19
036535fcd179 anteater
jdamerow
parents:
diff changeset
20 private final String _university = "university";
036535fcd179 anteater
jdamerow
parents:
diff changeset
21 private final String _study = "study";
036535fcd179 anteater
jdamerow
parents:
diff changeset
22 private final String _studies = "studies";
036535fcd179 anteater
jdamerow
parents:
diff changeset
23 private final String _in = " in ";
036535fcd179 anteater
jdamerow
parents:
diff changeset
24 private final String _at = " at ";
036535fcd179 anteater
jdamerow
parents:
diff changeset
25 private final String _survey = "survey";
036535fcd179 anteater
jdamerow
parents:
diff changeset
26 private final String _species = "species";
036535fcd179 anteater
jdamerow
parents:
diff changeset
27
036535fcd179 anteater
jdamerow
parents:
diff changeset
28 private List<ScientificName> foundNames;
036535fcd179 anteater
jdamerow
parents:
diff changeset
29 private List<APerson> foundApplicants;
036535fcd179 anteater
jdamerow
parents:
diff changeset
30 private TextPart text;
036535fcd179 anteater
jdamerow
parents:
diff changeset
31 private Map<String, Integer> typeMap;
036535fcd179 anteater
jdamerow
parents:
diff changeset
32
036535fcd179 anteater
jdamerow
parents:
diff changeset
33 public LocationFeatureCalculator(List<String> sentenceList,
036535fcd179 anteater
jdamerow
parents:
diff changeset
34 ITextParser parser, TextPart text) {
036535fcd179 anteater
jdamerow
parents:
diff changeset
35 super(sentenceList, parser, text.getText());
036535fcd179 anteater
jdamerow
parents:
diff changeset
36 this.text = text;
036535fcd179 anteater
jdamerow
parents:
diff changeset
37
036535fcd179 anteater
jdamerow
parents:
diff changeset
38 typeMap = new HashMap<String, Integer>();
036535fcd179 anteater
jdamerow
parents:
diff changeset
39 typeMap.put(Place.TOWN, 1);
036535fcd179 anteater
jdamerow
parents:
diff changeset
40 typeMap.put(Place.COUNTY, 2);
036535fcd179 anteater
jdamerow
parents:
diff changeset
41 typeMap.put(Place.STATE, 3);
036535fcd179 anteater
jdamerow
parents:
diff changeset
42 typeMap.put(Place.COUNTRY, 4);
036535fcd179 anteater
jdamerow
parents:
diff changeset
43 typeMap.put(Place.SUBURB, 5);
036535fcd179 anteater
jdamerow
parents:
diff changeset
44 typeMap.put(Place.POI, 6);
036535fcd179 anteater
jdamerow
parents:
diff changeset
45 typeMap.put(Place.ZIP, 7);
036535fcd179 anteater
jdamerow
parents:
diff changeset
46 typeMap.put(Place.OCEAN, 8);
036535fcd179 anteater
jdamerow
parents:
diff changeset
47 }
036535fcd179 anteater
jdamerow
parents:
diff changeset
48
036535fcd179 anteater
jdamerow
parents:
diff changeset
49 public double getNumberWordRelation(String[] placeParts) {
036535fcd179 anteater
jdamerow
parents:
diff changeset
50 int numbers = 0;
036535fcd179 anteater
jdamerow
parents:
diff changeset
51 int words = placeParts.length;
036535fcd179 anteater
jdamerow
parents:
diff changeset
52
036535fcd179 anteater
jdamerow
parents:
diff changeset
53 for (String part : placeParts) {
036535fcd179 anteater
jdamerow
parents:
diff changeset
54 if (part.matches("[0-9]{1}.*"))
036535fcd179 anteater
jdamerow
parents:
diff changeset
55 numbers++;
036535fcd179 anteater
jdamerow
parents:
diff changeset
56 }
036535fcd179 anteater
jdamerow
parents:
diff changeset
57
036535fcd179 anteater
jdamerow
parents:
diff changeset
58 return 1.0 * numbers / words;
036535fcd179 anteater
jdamerow
parents:
diff changeset
59 }
036535fcd179 anteater
jdamerow
parents:
diff changeset
60
036535fcd179 anteater
jdamerow
parents:
diff changeset
61 public double getUppercasedWordsToAllRelation(String[] placeParts) {
036535fcd179 anteater
jdamerow
parents:
diff changeset
62 int uppercaseWords = 0;
036535fcd179 anteater
jdamerow
parents:
diff changeset
63 int words = placeParts.length;
036535fcd179 anteater
jdamerow
parents:
diff changeset
64
036535fcd179 anteater
jdamerow
parents:
diff changeset
65 for (String part : placeParts) {
036535fcd179 anteater
jdamerow
parents:
diff changeset
66 if (part.matches("[A-Z]{1}.*"))
036535fcd179 anteater
jdamerow
parents:
diff changeset
67 uppercaseWords++;
036535fcd179 anteater
jdamerow
parents:
diff changeset
68 }
036535fcd179 anteater
jdamerow
parents:
diff changeset
69
036535fcd179 anteater
jdamerow
parents:
diff changeset
70 return 1.0 * uppercaseWords / words;
036535fcd179 anteater
jdamerow
parents:
diff changeset
71 }
036535fcd179 anteater
jdamerow
parents:
diff changeset
72
036535fcd179 anteater
jdamerow
parents:
diff changeset
73 public int contains2UppercaseCharacterWord(String[] placeParts) {
036535fcd179 anteater
jdamerow
parents:
diff changeset
74 for (String part : placeParts) {
036535fcd179 anteater
jdamerow
parents:
diff changeset
75 if (part.matches("[A-Z][A-Z]"))
036535fcd179 anteater
jdamerow
parents:
diff changeset
76 return 1;
036535fcd179 anteater
jdamerow
parents:
diff changeset
77 }
036535fcd179 anteater
jdamerow
parents:
diff changeset
78 return 0;
036535fcd179 anteater
jdamerow
parents:
diff changeset
79 }
036535fcd179 anteater
jdamerow
parents:
diff changeset
80
036535fcd179 anteater
jdamerow
parents:
diff changeset
81 public int containsUniversity(String[] parts) {
036535fcd179 anteater
jdamerow
parents:
diff changeset
82 for (String part : parts) {
036535fcd179 anteater
jdamerow
parents:
diff changeset
83 if (part.trim().toLowerCase().equals(_university))
036535fcd179 anteater
jdamerow
parents:
diff changeset
84 return 1;
036535fcd179 anteater
jdamerow
parents:
diff changeset
85 }
036535fcd179 anteater
jdamerow
parents:
diff changeset
86 return 0;
036535fcd179 anteater
jdamerow
parents:
diff changeset
87 }
036535fcd179 anteater
jdamerow
parents:
diff changeset
88
036535fcd179 anteater
jdamerow
parents:
diff changeset
89 public int isPreceededByAnd(PlaceInformation candidate) {
036535fcd179 anteater
jdamerow
parents:
diff changeset
90 String sentence = getSentenceContainingCandidate(candidate.getStart());
036535fcd179 anteater
jdamerow
parents:
diff changeset
91 if (sentence == null)
036535fcd179 anteater
jdamerow
parents:
diff changeset
92 return 0;
036535fcd179 anteater
jdamerow
parents:
diff changeset
93
036535fcd179 anteater
jdamerow
parents:
diff changeset
94 int offset = getStartOfSentenceContainingCandidiate(candidate.getStart());
036535fcd179 anteater
jdamerow
parents:
diff changeset
95
036535fcd179 anteater
jdamerow
parents:
diff changeset
96 if (sentence.substring(0, candidate.getStart() - offset).trim()
036535fcd179 anteater
jdamerow
parents:
diff changeset
97 .endsWith("and"))
036535fcd179 anteater
jdamerow
parents:
diff changeset
98 return 1;
036535fcd179 anteater
jdamerow
parents:
diff changeset
99 return 0;
036535fcd179 anteater
jdamerow
parents:
diff changeset
100 }
036535fcd179 anteater
jdamerow
parents:
diff changeset
101
036535fcd179 anteater
jdamerow
parents:
diff changeset
102 public int isPreceededByThe(PlaceInformation candidate) {
036535fcd179 anteater
jdamerow
parents:
diff changeset
103 String sentence = getSentenceContainingCandidate(candidate.getStart());
036535fcd179 anteater
jdamerow
parents:
diff changeset
104 if (sentence == null)
036535fcd179 anteater
jdamerow
parents:
diff changeset
105 return 0;
036535fcd179 anteater
jdamerow
parents:
diff changeset
106
036535fcd179 anteater
jdamerow
parents:
diff changeset
107 int offset = getStartOfSentenceContainingCandidiate(candidate.getStart());
036535fcd179 anteater
jdamerow
parents:
diff changeset
108
036535fcd179 anteater
jdamerow
parents:
diff changeset
109 if (sentence.substring(0, candidate.getStart() - offset).trim()
036535fcd179 anteater
jdamerow
parents:
diff changeset
110 .endsWith("the"))
036535fcd179 anteater
jdamerow
parents:
diff changeset
111 return 1;
036535fcd179 anteater
jdamerow
parents:
diff changeset
112 return 0;
036535fcd179 anteater
jdamerow
parents:
diff changeset
113 }
036535fcd179 anteater
jdamerow
parents:
diff changeset
114
036535fcd179 anteater
jdamerow
parents:
diff changeset
115 public int isSurroundedByBrackets(PlaceInformation candidate) {
036535fcd179 anteater
jdamerow
parents:
diff changeset
116 String sentence = getSentenceContainingCandidate(candidate.getStart());
036535fcd179 anteater
jdamerow
parents:
diff changeset
117 if (sentence == null)
036535fcd179 anteater
jdamerow
parents:
diff changeset
118 return 0;
036535fcd179 anteater
jdamerow
parents:
diff changeset
119
036535fcd179 anteater
jdamerow
parents:
diff changeset
120 Pattern pattern = Pattern.compile("\\("
036535fcd179 anteater
jdamerow
parents:
diff changeset
121 + candidate.getReferenceInText().replace("(", "\\(").replace(")","\\)") + "\\)");
036535fcd179 anteater
jdamerow
parents:
diff changeset
122 Matcher matcher = pattern.matcher(sentence);
036535fcd179 anteater
jdamerow
parents:
diff changeset
123 if (matcher.find())
036535fcd179 anteater
jdamerow
parents:
diff changeset
124 return 1;
036535fcd179 anteater
jdamerow
parents:
diff changeset
125 return 0;
036535fcd179 anteater
jdamerow
parents:
diff changeset
126 }
036535fcd179 anteater
jdamerow
parents:
diff changeset
127
036535fcd179 anteater
jdamerow
parents:
diff changeset
128 public int isSurroundedByCommata(PlaceInformation candidate) {
036535fcd179 anteater
jdamerow
parents:
diff changeset
129 String sentence = getSentenceContainingCandidate(candidate.getStart());
036535fcd179 anteater
jdamerow
parents:
diff changeset
130 if (sentence == null)
036535fcd179 anteater
jdamerow
parents:
diff changeset
131 return 0;
036535fcd179 anteater
jdamerow
parents:
diff changeset
132
036535fcd179 anteater
jdamerow
parents:
diff changeset
133 if (sentence.contains(", " + candidate.getReferenceInText() + ","))
036535fcd179 anteater
jdamerow
parents:
diff changeset
134 return 1;
036535fcd179 anteater
jdamerow
parents:
diff changeset
135 return 0;
036535fcd179 anteater
jdamerow
parents:
diff changeset
136 }
036535fcd179 anteater
jdamerow
parents:
diff changeset
137
036535fcd179 anteater
jdamerow
parents:
diff changeset
138 public int getCharsToLastSpeciesInParagraph(PlaceInformation candidate) {
036535fcd179 anteater
jdamerow
parents:
diff changeset
139
036535fcd179 anteater
jdamerow
parents:
diff changeset
140 Paragraph paragraph = getParagraphOfCandidate(candidate);
036535fcd179 anteater
jdamerow
parents:
diff changeset
141 int textcounter = text.getPositionInTextFromParagraph(paragraph, 0);
036535fcd179 anteater
jdamerow
parents:
diff changeset
142
036535fcd179 anteater
jdamerow
parents:
diff changeset
143 // find last species name
036535fcd179 anteater
jdamerow
parents:
diff changeset
144 ScientificName closestName = null;
036535fcd179 anteater
jdamerow
parents:
diff changeset
145 for (ScientificName name : foundNames) {
036535fcd179 anteater
jdamerow
parents:
diff changeset
146 // if species is before candiddate
036535fcd179 anteater
jdamerow
parents:
diff changeset
147 if (name.getStart() < candidate.getStart()
036535fcd179 anteater
jdamerow
parents:
diff changeset
148 && name.getStart() > textcounter) {
036535fcd179 anteater
jdamerow
parents:
diff changeset
149 // if it's closer than other last species replace lastName
036535fcd179 anteater
jdamerow
parents:
diff changeset
150 if (closestName == null
036535fcd179 anteater
jdamerow
parents:
diff changeset
151 || closestName.getStart() < name.getStart())
036535fcd179 anteater
jdamerow
parents:
diff changeset
152 closestName = name;
036535fcd179 anteater
jdamerow
parents:
diff changeset
153 }
036535fcd179 anteater
jdamerow
parents:
diff changeset
154 }
036535fcd179 anteater
jdamerow
parents:
diff changeset
155
036535fcd179 anteater
jdamerow
parents:
diff changeset
156 if (closestName == null)
036535fcd179 anteater
jdamerow
parents:
diff changeset
157 return -1;
036535fcd179 anteater
jdamerow
parents:
diff changeset
158
036535fcd179 anteater
jdamerow
parents:
diff changeset
159 return candidate.getStart() - (closestName.getStart() + closestName.getLength());
036535fcd179 anteater
jdamerow
parents:
diff changeset
160 }
036535fcd179 anteater
jdamerow
parents:
diff changeset
161
036535fcd179 anteater
jdamerow
parents:
diff changeset
162 public int getCharsToNextSpeciesInParagraph(PlaceInformation candidate) {
036535fcd179 anteater
jdamerow
parents:
diff changeset
163
036535fcd179 anteater
jdamerow
parents:
diff changeset
164 Paragraph paragraph = getParagraphOfCandidate(candidate);
036535fcd179 anteater
jdamerow
parents:
diff changeset
165 int textcounter = text.getPositionInTextFromParagraph(paragraph, 0);
036535fcd179 anteater
jdamerow
parents:
diff changeset
166
036535fcd179 anteater
jdamerow
parents:
diff changeset
167 // find last species name
036535fcd179 anteater
jdamerow
parents:
diff changeset
168 ScientificName closestName = null;
036535fcd179 anteater
jdamerow
parents:
diff changeset
169 for (ScientificName name : foundNames) {
036535fcd179 anteater
jdamerow
parents:
diff changeset
170 // if species is before candiddate
036535fcd179 anteater
jdamerow
parents:
diff changeset
171 if (name.getStart() > candidate.getStart() + candidate.getLength()
036535fcd179 anteater
jdamerow
parents:
diff changeset
172 && name.getStart() < textcounter + paragraph.getParagraphText().length()) {
036535fcd179 anteater
jdamerow
parents:
diff changeset
173 // if it's closer than other last species replace lastName
036535fcd179 anteater
jdamerow
parents:
diff changeset
174 if (closestName == null
036535fcd179 anteater
jdamerow
parents:
diff changeset
175 || closestName.getStart() > name.getStart())
036535fcd179 anteater
jdamerow
parents:
diff changeset
176 closestName = name;
036535fcd179 anteater
jdamerow
parents:
diff changeset
177 }
036535fcd179 anteater
jdamerow
parents:
diff changeset
178 }
036535fcd179 anteater
jdamerow
parents:
diff changeset
179
036535fcd179 anteater
jdamerow
parents:
diff changeset
180 if (closestName == null)
036535fcd179 anteater
jdamerow
parents:
diff changeset
181 return -1;
036535fcd179 anteater
jdamerow
parents:
diff changeset
182
036535fcd179 anteater
jdamerow
parents:
diff changeset
183 return closestName.getStart() - (candidate.getStart() + candidate.getLength());
036535fcd179 anteater
jdamerow
parents:
diff changeset
184 }
036535fcd179 anteater
jdamerow
parents:
diff changeset
185
036535fcd179 anteater
jdamerow
parents:
diff changeset
186 public int getCharsToLastApplicantInParagraph(PlaceInformation candidate) {
036535fcd179 anteater
jdamerow
parents:
diff changeset
187
036535fcd179 anteater
jdamerow
parents:
diff changeset
188 Paragraph paragraph = getParagraphOfCandidate(candidate);
036535fcd179 anteater
jdamerow
parents:
diff changeset
189 int textcounter = text.getPositionInTextFromParagraph(paragraph, 0);
036535fcd179 anteater
jdamerow
parents:
diff changeset
190
036535fcd179 anteater
jdamerow
parents:
diff changeset
191 // find last species name
036535fcd179 anteater
jdamerow
parents:
diff changeset
192 APerson closestApplicant = null;
036535fcd179 anteater
jdamerow
parents:
diff changeset
193 for (APerson person : foundApplicants) {
036535fcd179 anteater
jdamerow
parents:
diff changeset
194 // if species is before candiddate
036535fcd179 anteater
jdamerow
parents:
diff changeset
195 if (person.getStart() < candidate.getStart()
036535fcd179 anteater
jdamerow
parents:
diff changeset
196 && person.getStart() > textcounter) {
036535fcd179 anteater
jdamerow
parents:
diff changeset
197 // if it's closer than other last species replace lastName
036535fcd179 anteater
jdamerow
parents:
diff changeset
198 if (closestApplicant == null
036535fcd179 anteater
jdamerow
parents:
diff changeset
199 || closestApplicant.getStart() < person.getStart())
036535fcd179 anteater
jdamerow
parents:
diff changeset
200 closestApplicant = person;
036535fcd179 anteater
jdamerow
parents:
diff changeset
201 }
036535fcd179 anteater
jdamerow
parents:
diff changeset
202 }
036535fcd179 anteater
jdamerow
parents:
diff changeset
203
036535fcd179 anteater
jdamerow
parents:
diff changeset
204 if (closestApplicant == null)
036535fcd179 anteater
jdamerow
parents:
diff changeset
205 return -1;
036535fcd179 anteater
jdamerow
parents:
diff changeset
206
036535fcd179 anteater
jdamerow
parents:
diff changeset
207 return candidate.getStart() - (closestApplicant.getStart() + closestApplicant.getLength());
036535fcd179 anteater
jdamerow
parents:
diff changeset
208 }
036535fcd179 anteater
jdamerow
parents:
diff changeset
209
036535fcd179 anteater
jdamerow
parents:
diff changeset
210 public int getCharToStudyInParagraph(PlaceInformation candidate) {
036535fcd179 anteater
jdamerow
parents:
diff changeset
211 Paragraph paragraph = getParagraphOfCandidate(candidate);
036535fcd179 anteater
jdamerow
parents:
diff changeset
212 int posInPara = text.getPositionInParagraphFromText(candidate.getStart());
036535fcd179 anteater
jdamerow
parents:
diff changeset
213
036535fcd179 anteater
jdamerow
parents:
diff changeset
214 return getOffsetToClosestWord(posInPara, candidate.getLength(), _study, paragraph.getParagraphText());
036535fcd179 anteater
jdamerow
parents:
diff changeset
215 }
036535fcd179 anteater
jdamerow
parents:
diff changeset
216
036535fcd179 anteater
jdamerow
parents:
diff changeset
217 public int getCharToStudiesInParagraph(PlaceInformation candidate) {
036535fcd179 anteater
jdamerow
parents:
diff changeset
218 Paragraph paragraph = getParagraphOfCandidate(candidate);
036535fcd179 anteater
jdamerow
parents:
diff changeset
219 int posInPara = text.getPositionInParagraphFromText(candidate.getStart());
036535fcd179 anteater
jdamerow
parents:
diff changeset
220
036535fcd179 anteater
jdamerow
parents:
diff changeset
221 return getOffsetToClosestWord(posInPara, candidate.getLength(), _studies, paragraph.getParagraphText());
036535fcd179 anteater
jdamerow
parents:
diff changeset
222 }
036535fcd179 anteater
jdamerow
parents:
diff changeset
223
036535fcd179 anteater
jdamerow
parents:
diff changeset
224 public int getCharToSurveyInSentence(PlaceInformation candidate) {
036535fcd179 anteater
jdamerow
parents:
diff changeset
225 String sentence = getSentenceContainingCandidate(candidate.getStart());
036535fcd179 anteater
jdamerow
parents:
diff changeset
226
036535fcd179 anteater
jdamerow
parents:
diff changeset
227 return getOffsetToClosestWord(getPosInSentence(candidate), candidate.getLength(), _survey, sentence);
036535fcd179 anteater
jdamerow
parents:
diff changeset
228 }
036535fcd179 anteater
jdamerow
parents:
diff changeset
229
036535fcd179 anteater
jdamerow
parents:
diff changeset
230 public int getCharToSpeciesInSentence(PlaceInformation candidate) {
036535fcd179 anteater
jdamerow
parents:
diff changeset
231 String sentence = getSentenceContainingCandidate(candidate.getStart());
036535fcd179 anteater
jdamerow
parents:
diff changeset
232
036535fcd179 anteater
jdamerow
parents:
diff changeset
233 return getOffsetToClosestWord(getPosInSentence(candidate), candidate.getLength(), _species, sentence);
036535fcd179 anteater
jdamerow
parents:
diff changeset
234 }
036535fcd179 anteater
jdamerow
parents:
diff changeset
235
036535fcd179 anteater
jdamerow
parents:
diff changeset
236 public int hasComma(PlaceInformation candidate) {
036535fcd179 anteater
jdamerow
parents:
diff changeset
237 if (candidate.getReferenceInText().contains(","))
036535fcd179 anteater
jdamerow
parents:
diff changeset
238 return 1;
036535fcd179 anteater
jdamerow
parents:
diff changeset
239 return 0;
036535fcd179 anteater
jdamerow
parents:
diff changeset
240 }
036535fcd179 anteater
jdamerow
parents:
diff changeset
241
036535fcd179 anteater
jdamerow
parents:
diff changeset
242 public int hasBracket(PlaceInformation candidate) {
036535fcd179 anteater
jdamerow
parents:
diff changeset
243 if (candidate.getReferenceInText().contains(")") || candidate.getReferenceInText().contains("("))
036535fcd179 anteater
jdamerow
parents:
diff changeset
244 return 1;
036535fcd179 anteater
jdamerow
parents:
diff changeset
245 return 0;
036535fcd179 anteater
jdamerow
parents:
diff changeset
246 }
036535fcd179 anteater
jdamerow
parents:
diff changeset
247
036535fcd179 anteater
jdamerow
parents:
diff changeset
248 public int getCharToInInSentence(PlaceInformation candidate) {
036535fcd179 anteater
jdamerow
parents:
diff changeset
249 String sentence = getSentenceContainingCandidate(candidate.getStart());
036535fcd179 anteater
jdamerow
parents:
diff changeset
250
036535fcd179 anteater
jdamerow
parents:
diff changeset
251 return getOffsetToClosestWord(getPosInSentence(candidate), candidate.getLength(), _in, sentence);
036535fcd179 anteater
jdamerow
parents:
diff changeset
252 }
036535fcd179 anteater
jdamerow
parents:
diff changeset
253
036535fcd179 anteater
jdamerow
parents:
diff changeset
254 public int getCharToAtInSentence(PlaceInformation candidate) {
036535fcd179 anteater
jdamerow
parents:
diff changeset
255 String sentence = getSentenceContainingCandidate(candidate.getStart());
036535fcd179 anteater
jdamerow
parents:
diff changeset
256
036535fcd179 anteater
jdamerow
parents:
diff changeset
257 return getOffsetToClosestWord(getPosInSentence(candidate), candidate.getLength(), _at, sentence);
036535fcd179 anteater
jdamerow
parents:
diff changeset
258 }
036535fcd179 anteater
jdamerow
parents:
diff changeset
259
036535fcd179 anteater
jdamerow
parents:
diff changeset
260 private int getPosInSentence(PlaceInformation candidate) {
036535fcd179 anteater
jdamerow
parents:
diff changeset
261 Paragraph paragraph = getParagraphOfCandidate(candidate);
036535fcd179 anteater
jdamerow
parents:
diff changeset
262 String sentence = getSentenceContainingCandidate(candidate.getStart());
036535fcd179 anteater
jdamerow
parents:
diff changeset
263 int indexOfSentence = paragraph.getParagraphText().indexOf(sentence);
036535fcd179 anteater
jdamerow
parents:
diff changeset
264 int posInPara = text.getPositionInParagraphFromText(candidate.getStart());
036535fcd179 anteater
jdamerow
parents:
diff changeset
265 return posInPara - indexOfSentence;
036535fcd179 anteater
jdamerow
parents:
diff changeset
266 }
036535fcd179 anteater
jdamerow
parents:
diff changeset
267
036535fcd179 anteater
jdamerow
parents:
diff changeset
268 private Paragraph getParagraphOfCandidate(PlaceInformation candidate) {
036535fcd179 anteater
jdamerow
parents:
diff changeset
269 int textcounter = 0;
036535fcd179 anteater
jdamerow
parents:
diff changeset
270 Paragraph paragraph = null;
036535fcd179 anteater
jdamerow
parents:
diff changeset
271 for (Paragraph para : text.getParagraphs()) {
036535fcd179 anteater
jdamerow
parents:
diff changeset
272 if (candidate.getStart() >= textcounter
036535fcd179 anteater
jdamerow
parents:
diff changeset
273 && candidate.getStart() < textcounter
036535fcd179 anteater
jdamerow
parents:
diff changeset
274 + para.getParagraphText().length()) {
036535fcd179 anteater
jdamerow
parents:
diff changeset
275 paragraph = para;
036535fcd179 anteater
jdamerow
parents:
diff changeset
276 break;
036535fcd179 anteater
jdamerow
parents:
diff changeset
277 }
036535fcd179 anteater
jdamerow
parents:
diff changeset
278 textcounter += para.getParagraphText().length() + 1;
036535fcd179 anteater
jdamerow
parents:
diff changeset
279 }
036535fcd179 anteater
jdamerow
parents:
diff changeset
280 return paragraph;
036535fcd179 anteater
jdamerow
parents:
diff changeset
281 }
036535fcd179 anteater
jdamerow
parents:
diff changeset
282
036535fcd179 anteater
jdamerow
parents:
diff changeset
283 public int getType(Place candidate) {
036535fcd179 anteater
jdamerow
parents:
diff changeset
284 String type = candidate.getType();
036535fcd179 anteater
jdamerow
parents:
diff changeset
285
036535fcd179 anteater
jdamerow
parents:
diff changeset
286 if (typeMap.get(type) == null)
036535fcd179 anteater
jdamerow
parents:
diff changeset
287 return 0;
036535fcd179 anteater
jdamerow
parents:
diff changeset
288
036535fcd179 anteater
jdamerow
parents:
diff changeset
289 return typeMap.get(type);
036535fcd179 anteater
jdamerow
parents:
diff changeset
290 }
036535fcd179 anteater
jdamerow
parents:
diff changeset
291
036535fcd179 anteater
jdamerow
parents:
diff changeset
292 public List<ScientificName> getFoundSpecies() {
036535fcd179 anteater
jdamerow
parents:
diff changeset
293 return foundNames;
036535fcd179 anteater
jdamerow
parents:
diff changeset
294 }
036535fcd179 anteater
jdamerow
parents:
diff changeset
295
036535fcd179 anteater
jdamerow
parents:
diff changeset
296 public void setFoundSpecies(List<ScientificName> foundSpecies) {
036535fcd179 anteater
jdamerow
parents:
diff changeset
297 this.foundNames = foundSpecies;
036535fcd179 anteater
jdamerow
parents:
diff changeset
298 }
036535fcd179 anteater
jdamerow
parents:
diff changeset
299
036535fcd179 anteater
jdamerow
parents:
diff changeset
300 public List<APerson> getFoundApplicant() {
036535fcd179 anteater
jdamerow
parents:
diff changeset
301 return foundApplicants;
036535fcd179 anteater
jdamerow
parents:
diff changeset
302 }
036535fcd179 anteater
jdamerow
parents:
diff changeset
303
036535fcd179 anteater
jdamerow
parents:
diff changeset
304 public void setFoundApplicant(List<APerson> foundApplicant) {
036535fcd179 anteater
jdamerow
parents:
diff changeset
305 this.foundApplicants = foundApplicant;
036535fcd179 anteater
jdamerow
parents:
diff changeset
306 }
036535fcd179 anteater
jdamerow
parents:
diff changeset
307 }