Mercurial > hg > mpdl-group
comparison software/eXist/mpdl-modules/src/de/mpg/mpiwg/berlin/mpdl/lt/morph/app/Form.java @ 0:408254cf2f1d
Erstellung
author | Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de> |
---|---|
date | Wed, 24 Nov 2010 17:24:23 +0100 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:408254cf2f1d |
---|---|
1 package de.mpg.mpiwg.berlin.mpdl.lt.morph.app; | |
2 | |
3 public class Form { | |
4 private String provider; | |
5 private String language; | |
6 private String formName; | |
7 private String lemmaName; | |
8 private String pos; | |
9 private String tense; | |
10 private String voice; | |
11 private String casus; | |
12 private String number; | |
13 private String mood; | |
14 private String person; | |
15 private String gender; | |
16 private String definite; | |
17 | |
18 public Form() { | |
19 } | |
20 | |
21 public Form(String provider, String language, String formName) { | |
22 this.provider = provider; | |
23 this.language = language; | |
24 this.formName = formName; | |
25 } | |
26 | |
27 public void normalize() { | |
28 // lower case of form and lemma | |
29 formName = formName.toLowerCase(); | |
30 lemmaName = lemmaName.toLowerCase(); | |
31 // XML: special symbols | |
32 formName = formName.replaceAll("&", "&"); | |
33 formName = formName.replaceAll("'", "'"); | |
34 formName = formName.replaceAll("<", "<"); | |
35 formName = formName.replaceAll(">", ">"); | |
36 formName = formName.replaceAll("\"", """); | |
37 lemmaName = lemmaName.replaceAll("&", "&"); | |
38 lemmaName = lemmaName.replaceAll("'", "'"); | |
39 lemmaName = lemmaName.replaceAll("<", "<"); | |
40 lemmaName = lemmaName.replaceAll(">", ">"); | |
41 lemmaName = lemmaName.replaceAll("\"", """); | |
42 // unification of lemma names (homographs) TODO do not unificate the homographs | |
43 lemmaName = lemmaName.replaceAll("#[0-9]", ""); | |
44 if (isArabic()) { | |
45 if (lemmaName != null) { | |
46 int length = lemmaName.length(); | |
47 char lastChar = lemmaName.charAt(length - 1); | |
48 boolean isDigit = Character.isDigit(lastChar); | |
49 if (isDigit) | |
50 lemmaName = lemmaName.substring(0, length - 1); | |
51 } | |
52 } | |
53 // unification of forms and lemmas with hyphens: remove the hyphen | |
54 formName = formName.replaceAll("-", ""); | |
55 lemmaName = lemmaName.replaceAll("-", ""); | |
56 // unification of forms and lemmas with blanks (sequence of words): remove the blanks | |
57 formName = formName.replaceAll(" ", ""); | |
58 lemmaName = lemmaName.replaceAll(" ", ""); | |
59 // unification of forms and lemmas with plus symbols: remove the plus symbol | |
60 formName = formName.replaceAll("\\+", ""); | |
61 lemmaName = lemmaName.replaceAll("\\+", ""); | |
62 // TODO call MpdlMorphDataNormalizer (handle Umlauts in german, accents in french, character classes (longs, s, ...) ...) | |
63 | |
64 } | |
65 | |
66 public boolean isOk() { | |
67 boolean ret = true; | |
68 if (formName == null || lemmaName == null) | |
69 ret = false; | |
70 else if (formName.length() == 0 || lemmaName.length() == 0 || formName.length() == 1 || lemmaName.length() == 1) | |
71 ret = false; | |
72 return ret; | |
73 } | |
74 | |
75 public boolean isGreek() { | |
76 boolean ret = false; | |
77 if (language != null && language.equals("el")) | |
78 ret = true; | |
79 return ret; | |
80 } | |
81 | |
82 public boolean isArabic() { | |
83 boolean ret = false; | |
84 if (language != null && language.equals("ar")) | |
85 ret = true; | |
86 return ret; | |
87 } | |
88 | |
89 public boolean isRicherThan(Form otherForm) { | |
90 boolean richer = false; | |
91 if (! isOk()) | |
92 return false; | |
93 else if (! otherForm.isOk()) | |
94 return true; | |
95 String otherFormPos = otherForm.getPos(); | |
96 if (pos != null && pos.length() > 0 && (otherFormPos == null || otherFormPos.length() == 0)) | |
97 return true; | |
98 // TODO all other cases | |
99 return richer; | |
100 } | |
101 | |
102 public String getXmlString() { | |
103 String xmlString = "<form>\n"; | |
104 if (provider != null) | |
105 xmlString += " <provider>" + provider + "</provider>\n"; | |
106 if (language != null) | |
107 xmlString += " <language>" + language + "</language>\n"; | |
108 if (formName != null) | |
109 xmlString += " <form-name>" + formName + "</form-name>\n"; | |
110 if (lemmaName != null) | |
111 xmlString += " <lemma-name>" + lemmaName + "</lemma-name>\n"; | |
112 if (pos != null) | |
113 xmlString += " <pos>" + pos + "</pos>\n"; | |
114 if (tense != null) | |
115 xmlString += " <tense>" + tense + "</tense>\n"; | |
116 if (voice != null) | |
117 xmlString += " <voice>" + voice + "</voice>\n"; | |
118 if (casus != null) | |
119 xmlString += " <casus>" + casus + "</casus>\n"; | |
120 if (number != null) | |
121 xmlString += " <number>" + number + "</number>\n"; | |
122 if (mood != null) | |
123 xmlString += " <mood>" + mood + "</mood>\n"; | |
124 if (person != null) | |
125 xmlString += " <person>" + person + "</person>\n"; | |
126 if (gender != null) | |
127 xmlString += " <gender>" + gender + "</gender>\n"; | |
128 if (definite != null) | |
129 xmlString += " <definite>" + definite + "</definite>\n"; | |
130 xmlString += "</form>\n"; | |
131 return xmlString; | |
132 } | |
133 | |
134 public String toString() { | |
135 return getXmlString(); | |
136 } | |
137 | |
138 public String getTense() { | |
139 return tense; | |
140 } | |
141 | |
142 public void setTense(String tense) { | |
143 this.tense = tense; | |
144 } | |
145 | |
146 public void addTense(String newTense) { | |
147 if (tense == null) | |
148 this.tense = newTense; | |
149 else | |
150 tense += newTense; | |
151 } | |
152 | |
153 public String getVoice() { | |
154 return voice; | |
155 } | |
156 | |
157 public void setVoice(String voice) { | |
158 this.voice = voice; | |
159 } | |
160 | |
161 public void addVoice(String newVoice) { | |
162 if (voice == null) | |
163 this.voice = newVoice; | |
164 else | |
165 voice += newVoice; | |
166 } | |
167 | |
168 public String getCasus() { | |
169 return casus; | |
170 } | |
171 | |
172 public void setCasus(String casus) { | |
173 this.casus = casus; | |
174 } | |
175 | |
176 public void addCasus(String newCasus) { | |
177 if (casus == null) | |
178 this.casus = newCasus; | |
179 else | |
180 casus += newCasus; | |
181 } | |
182 | |
183 public String getNumber() { | |
184 return number; | |
185 } | |
186 | |
187 public void setNumber(String number) { | |
188 this.number = number; | |
189 } | |
190 | |
191 public void addNumber(String newNumber) { | |
192 if (number == null) | |
193 this.number = newNumber; | |
194 else | |
195 number += newNumber; | |
196 } | |
197 | |
198 public String getMood() { | |
199 return mood; | |
200 } | |
201 | |
202 public void setMood(String mood) { | |
203 this.mood = mood; | |
204 } | |
205 | |
206 public void addMood(String newMood) { | |
207 if (mood == null) | |
208 this.mood = newMood; | |
209 else | |
210 mood += newMood; | |
211 } | |
212 | |
213 public String getPerson() { | |
214 return person; | |
215 } | |
216 | |
217 public void setPerson(String person) { | |
218 this.person = person; | |
219 } | |
220 | |
221 public void addPerson(String newPerson) { | |
222 if (person == null) | |
223 this.person = newPerson; | |
224 else | |
225 person += newPerson; | |
226 } | |
227 | |
228 public String getGender() { | |
229 return gender; | |
230 } | |
231 | |
232 public void setGender(String gender) { | |
233 this.gender = gender; | |
234 } | |
235 | |
236 public void addGender(String newGender) { | |
237 if (gender == null) | |
238 this.gender = newGender; | |
239 else | |
240 gender += newGender; | |
241 } | |
242 | |
243 public String getDefinite() { | |
244 return definite; | |
245 } | |
246 | |
247 public void setDefinite(String definite) { | |
248 this.definite = definite; | |
249 } | |
250 | |
251 public void addDefinite(String newDefinite) { | |
252 if (definite == null) | |
253 this.definite = newDefinite; | |
254 else | |
255 definite += newDefinite; | |
256 } | |
257 | |
258 public String getLemmaName() { | |
259 return lemmaName; | |
260 } | |
261 | |
262 public String getPos() { | |
263 return pos; | |
264 } | |
265 | |
266 public String getProvider() { | |
267 return provider; | |
268 } | |
269 | |
270 public void setProvider(String provider) { | |
271 this.provider = provider; | |
272 } | |
273 | |
274 public void addProvider(String newProvider) { | |
275 if (provider == null) | |
276 this.provider = newProvider; | |
277 else | |
278 provider += newProvider; | |
279 } | |
280 | |
281 public String getLanguage() { | |
282 return language; | |
283 } | |
284 | |
285 public void setLanguage(String language) { | |
286 this.language = language; | |
287 } | |
288 | |
289 public void addLanguage(String newLanguage) { | |
290 if (language == null) | |
291 this.language = newLanguage; | |
292 else | |
293 language += newLanguage; | |
294 } | |
295 | |
296 public String getFormName() { | |
297 return formName; | |
298 } | |
299 | |
300 public void setFormName(String formName) { | |
301 this.formName = formName; | |
302 } | |
303 | |
304 public void addFormName(String newFormName) { | |
305 if (formName == null) | |
306 this.formName = newFormName; | |
307 else | |
308 formName += newFormName; | |
309 } | |
310 | |
311 public void setLemmaName(String lemmaName) { | |
312 this.lemmaName = lemmaName; | |
313 } | |
314 | |
315 public void addLemmaName(String newLemmaName) { | |
316 if (lemmaName == null) | |
317 this.lemmaName = newLemmaName; | |
318 else | |
319 lemmaName += newLemmaName; | |
320 } | |
321 | |
322 public void setPos(String pos) { | |
323 this.pos = pos; | |
324 } | |
325 | |
326 public void addPos(String newPos) { | |
327 if (pos == null) | |
328 this.pos = newPos; | |
329 else | |
330 pos += newPos; | |
331 } | |
332 | |
333 } |