Mercurial > hg > mpdl-group
comparison software/mpdl-services/mpiwg-mpdl-lt/src/de/mpg/mpiwg/berlin/mpdl/lt/morph/app/Form.java @ 19:4a3641ae14d2
Erstellung
author | Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de> |
---|---|
date | Wed, 09 Nov 2011 15:32:05 +0100 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
18:dc5e9fcb3fdc | 19:4a3641ae14d2 |
---|---|
1 package de.mpg.mpiwg.berlin.mpdl.lt.morph.app; | |
2 | |
3 public class Form implements Comparable<Form> { | |
4 private String provider; | |
5 private String language; | |
6 private String formName; | |
7 private String lemmaName; | |
8 private String pos; | |
9 private String tense; | |
10 private String voice; | |
11 private String casus; | |
12 private String number; | |
13 private String mood; | |
14 private String person; | |
15 private String gender; | |
16 private String definite; | |
17 | |
18 public Form() { | |
19 } | |
20 | |
21 public Form(String provider, String language, String formName) { | |
22 this.provider = provider; | |
23 this.language = language; | |
24 this.formName = formName; | |
25 } | |
26 | |
27 public int compareTo(Form f) { | |
28 return formName.compareTo(f.formName); | |
29 } | |
30 | |
31 public void normalize() { | |
32 // lower case of form and lemma | |
33 formName = formName.toLowerCase(); | |
34 lemmaName = lemmaName.toLowerCase(); | |
35 // XML: special symbols | |
36 formName = formName.replaceAll("&", "&"); | |
37 formName = formName.replaceAll("'", "'"); | |
38 formName = formName.replaceAll("<", "<"); | |
39 formName = formName.replaceAll(">", ">"); | |
40 formName = formName.replaceAll("\"", """); | |
41 lemmaName = lemmaName.replaceAll("&", "&"); | |
42 lemmaName = lemmaName.replaceAll("'", "'"); | |
43 lemmaName = lemmaName.replaceAll("<", "<"); | |
44 lemmaName = lemmaName.replaceAll(">", ">"); | |
45 lemmaName = lemmaName.replaceAll("\"", """); | |
46 // unification of lemma names (homographs) TODO do not unificate the homographs | |
47 lemmaName = lemmaName.replaceAll("#[0-9]", ""); | |
48 if (isArabic()) { | |
49 if (lemmaName != null) { | |
50 int length = lemmaName.length(); | |
51 char lastChar = lemmaName.charAt(length - 1); | |
52 boolean isDigit = Character.isDigit(lastChar); | |
53 if (isDigit) | |
54 lemmaName = lemmaName.substring(0, length - 1); | |
55 } | |
56 } | |
57 // unification of forms and lemmas with hyphens: remove the hyphen | |
58 formName = formName.replaceAll("-", ""); | |
59 lemmaName = lemmaName.replaceAll("-", ""); | |
60 // unification of forms and lemmas with blanks (sequence of words): remove the blanks | |
61 formName = formName.replaceAll(" ", ""); | |
62 lemmaName = lemmaName.replaceAll(" ", ""); | |
63 // unification of forms and lemmas with plus symbols: remove the plus symbol | |
64 formName = formName.replaceAll("\\+", ""); | |
65 lemmaName = lemmaName.replaceAll("\\+", ""); | |
66 // TODO call MpdlMorphDataNormalizer (handle Umlauts in german, accents in french, character classes (longs, s, ...) ...) | |
67 | |
68 } | |
69 | |
70 public boolean isOk() { | |
71 boolean ret = true; | |
72 if (formName == null || lemmaName == null) | |
73 ret = false; | |
74 else if (formName.length() == 0 || lemmaName.length() == 0 || formName.length() == 1 || lemmaName.length() == 1) | |
75 ret = false; | |
76 return ret; | |
77 } | |
78 | |
79 public boolean isGreek() { | |
80 boolean ret = false; | |
81 if (language != null && language.equals("el")) | |
82 ret = true; | |
83 return ret; | |
84 } | |
85 | |
86 public boolean isArabic() { | |
87 boolean ret = false; | |
88 if (language != null && language.equals("ar")) | |
89 ret = true; | |
90 return ret; | |
91 } | |
92 | |
93 public boolean isRicherThan(Form otherForm) { | |
94 boolean richer = false; | |
95 if (! isOk()) | |
96 return false; | |
97 else if (! otherForm.isOk()) | |
98 return true; | |
99 String otherFormPos = otherForm.getPos(); | |
100 if (pos != null && pos.length() > 0 && (otherFormPos == null || otherFormPos.length() == 0)) | |
101 return true; | |
102 // TODO all other cases | |
103 return richer; | |
104 } | |
105 | |
106 public String getXmlString() { | |
107 String xmlString = "<form>\n"; | |
108 if (provider != null) | |
109 xmlString += " <provider>" + provider + "</provider>\n"; | |
110 if (language != null) | |
111 xmlString += " <language>" + language + "</language>\n"; | |
112 if (formName != null) | |
113 xmlString += " <form-name>" + formName + "</form-name>\n"; | |
114 if (lemmaName != null) | |
115 xmlString += " <lemma-name>" + lemmaName + "</lemma-name>\n"; | |
116 if (pos != null) | |
117 xmlString += " <pos>" + pos + "</pos>\n"; | |
118 if (tense != null) | |
119 xmlString += " <tense>" + tense + "</tense>\n"; | |
120 if (voice != null) | |
121 xmlString += " <voice>" + voice + "</voice>\n"; | |
122 if (casus != null) | |
123 xmlString += " <casus>" + casus + "</casus>\n"; | |
124 if (number != null) | |
125 xmlString += " <number>" + number + "</number>\n"; | |
126 if (mood != null) | |
127 xmlString += " <mood>" + mood + "</mood>\n"; | |
128 if (person != null) | |
129 xmlString += " <person>" + person + "</person>\n"; | |
130 if (gender != null) | |
131 xmlString += " <gender>" + gender + "</gender>\n"; | |
132 if (definite != null) | |
133 xmlString += " <definite>" + definite + "</definite>\n"; | |
134 xmlString += "</form>\n"; | |
135 return xmlString; | |
136 } | |
137 | |
138 public String toString() { | |
139 return getXmlString(); | |
140 } | |
141 | |
142 public String getTense() { | |
143 return tense; | |
144 } | |
145 | |
146 public void setTense(String tense) { | |
147 this.tense = tense; | |
148 } | |
149 | |
150 public void addTense(String newTense) { | |
151 if (tense == null) | |
152 this.tense = newTense; | |
153 else | |
154 tense += newTense; | |
155 } | |
156 | |
157 public String getVoice() { | |
158 return voice; | |
159 } | |
160 | |
161 public void setVoice(String voice) { | |
162 this.voice = voice; | |
163 } | |
164 | |
165 public void addVoice(String newVoice) { | |
166 if (voice == null) | |
167 this.voice = newVoice; | |
168 else | |
169 voice += newVoice; | |
170 } | |
171 | |
172 public String getCasus() { | |
173 return casus; | |
174 } | |
175 | |
176 public void setCasus(String casus) { | |
177 this.casus = casus; | |
178 } | |
179 | |
180 public void addCasus(String newCasus) { | |
181 if (casus == null) | |
182 this.casus = newCasus; | |
183 else | |
184 casus += newCasus; | |
185 } | |
186 | |
187 public String getNumber() { | |
188 return number; | |
189 } | |
190 | |
191 public void setNumber(String number) { | |
192 this.number = number; | |
193 } | |
194 | |
195 public void addNumber(String newNumber) { | |
196 if (number == null) | |
197 this.number = newNumber; | |
198 else | |
199 number += newNumber; | |
200 } | |
201 | |
202 public String getMood() { | |
203 return mood; | |
204 } | |
205 | |
206 public void setMood(String mood) { | |
207 this.mood = mood; | |
208 } | |
209 | |
210 public void addMood(String newMood) { | |
211 if (mood == null) | |
212 this.mood = newMood; | |
213 else | |
214 mood += newMood; | |
215 } | |
216 | |
217 public String getPerson() { | |
218 return person; | |
219 } | |
220 | |
221 public void setPerson(String person) { | |
222 this.person = person; | |
223 } | |
224 | |
225 public void addPerson(String newPerson) { | |
226 if (person == null) | |
227 this.person = newPerson; | |
228 else | |
229 person += newPerson; | |
230 } | |
231 | |
232 public String getGender() { | |
233 return gender; | |
234 } | |
235 | |
236 public void setGender(String gender) { | |
237 this.gender = gender; | |
238 } | |
239 | |
240 public void addGender(String newGender) { | |
241 if (gender == null) | |
242 this.gender = newGender; | |
243 else | |
244 gender += newGender; | |
245 } | |
246 | |
247 public String getDefinite() { | |
248 return definite; | |
249 } | |
250 | |
251 public void setDefinite(String definite) { | |
252 this.definite = definite; | |
253 } | |
254 | |
255 public void addDefinite(String newDefinite) { | |
256 if (definite == null) | |
257 this.definite = newDefinite; | |
258 else | |
259 definite += newDefinite; | |
260 } | |
261 | |
262 public String getLemmaName() { | |
263 return lemmaName; | |
264 } | |
265 | |
266 public String getPos() { | |
267 return pos; | |
268 } | |
269 | |
270 public String getProvider() { | |
271 return provider; | |
272 } | |
273 | |
274 public void setProvider(String provider) { | |
275 this.provider = provider; | |
276 } | |
277 | |
278 public void addProvider(String newProvider) { | |
279 if (provider == null) | |
280 this.provider = newProvider; | |
281 else | |
282 provider += newProvider; | |
283 } | |
284 | |
285 public String getLanguage() { | |
286 return language; | |
287 } | |
288 | |
289 public void setLanguage(String language) { | |
290 this.language = language; | |
291 } | |
292 | |
293 public void addLanguage(String newLanguage) { | |
294 if (language == null) | |
295 this.language = newLanguage; | |
296 else | |
297 language += newLanguage; | |
298 } | |
299 | |
300 public String getFormName() { | |
301 return formName; | |
302 } | |
303 | |
304 public void setFormName(String formName) { | |
305 this.formName = formName; | |
306 } | |
307 | |
308 public void addFormName(String newFormName) { | |
309 if (formName == null) | |
310 this.formName = newFormName; | |
311 else | |
312 formName += newFormName; | |
313 } | |
314 | |
315 public void setLemmaName(String lemmaName) { | |
316 this.lemmaName = lemmaName; | |
317 } | |
318 | |
319 public void addLemmaName(String newLemmaName) { | |
320 if (lemmaName == null) | |
321 this.lemmaName = newLemmaName; | |
322 else | |
323 lemmaName += newLemmaName; | |
324 } | |
325 | |
326 public void setPos(String pos) { | |
327 this.pos = pos; | |
328 } | |
329 | |
330 public void addPos(String newPos) { | |
331 if (pos == null) | |
332 this.pos = newPos; | |
333 else | |
334 pos += newPos; | |
335 } | |
336 | |
337 } |