Mercurial > hg > mpdl-group
view software/eXist/mpdl-modules/src/de/mpg/mpiwg/berlin/mpdl/lt/morph/app/Form.java @ 0:408254cf2f1d
Erstellung
author | Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de> |
---|---|
date | Wed, 24 Nov 2010 17:24:23 +0100 |
parents | |
children |
line wrap: on
line source
package de.mpg.mpiwg.berlin.mpdl.lt.morph.app; public class Form { private String provider; private String language; private String formName; private String lemmaName; private String pos; private String tense; private String voice; private String casus; private String number; private String mood; private String person; private String gender; private String definite; public Form() { } public Form(String provider, String language, String formName) { this.provider = provider; this.language = language; this.formName = formName; } public void normalize() { // lower case of form and lemma formName = formName.toLowerCase(); lemmaName = lemmaName.toLowerCase(); // XML: special symbols formName = formName.replaceAll("&", "&"); formName = formName.replaceAll("'", "'"); formName = formName.replaceAll("<", "<"); formName = formName.replaceAll(">", ">"); formName = formName.replaceAll("\"", """); lemmaName = lemmaName.replaceAll("&", "&"); lemmaName = lemmaName.replaceAll("'", "'"); lemmaName = lemmaName.replaceAll("<", "<"); lemmaName = lemmaName.replaceAll(">", ">"); lemmaName = lemmaName.replaceAll("\"", """); // unification of lemma names (homographs) TODO do not unificate the homographs lemmaName = lemmaName.replaceAll("#[0-9]", ""); if (isArabic()) { if (lemmaName != null) { int length = lemmaName.length(); char lastChar = lemmaName.charAt(length - 1); boolean isDigit = Character.isDigit(lastChar); if (isDigit) lemmaName = lemmaName.substring(0, length - 1); } } // unification of forms and lemmas with hyphens: remove the hyphen formName = formName.replaceAll("-", ""); lemmaName = lemmaName.replaceAll("-", ""); // unification of forms and lemmas with blanks (sequence of words): remove the blanks formName = formName.replaceAll(" ", ""); lemmaName = lemmaName.replaceAll(" ", ""); // unification of forms and lemmas with plus symbols: remove the plus symbol formName = formName.replaceAll("\\+", ""); lemmaName = lemmaName.replaceAll("\\+", ""); // TODO call MpdlMorphDataNormalizer (handle Umlauts in german, accents in french, character classes (longs, s, ...) ...) } public boolean isOk() { boolean ret = true; if (formName == null || lemmaName == null) ret = false; else if (formName.length() == 0 || lemmaName.length() == 0 || formName.length() == 1 || lemmaName.length() == 1) ret = false; return ret; } public boolean isGreek() { boolean ret = false; if (language != null && language.equals("el")) ret = true; return ret; } public boolean isArabic() { boolean ret = false; if (language != null && language.equals("ar")) ret = true; return ret; } public boolean isRicherThan(Form otherForm) { boolean richer = false; if (! isOk()) return false; else if (! otherForm.isOk()) return true; String otherFormPos = otherForm.getPos(); if (pos != null && pos.length() > 0 && (otherFormPos == null || otherFormPos.length() == 0)) return true; // TODO all other cases return richer; } public String getXmlString() { String xmlString = "<form>\n"; if (provider != null) xmlString += " <provider>" + provider + "</provider>\n"; if (language != null) xmlString += " <language>" + language + "</language>\n"; if (formName != null) xmlString += " <form-name>" + formName + "</form-name>\n"; if (lemmaName != null) xmlString += " <lemma-name>" + lemmaName + "</lemma-name>\n"; if (pos != null) xmlString += " <pos>" + pos + "</pos>\n"; if (tense != null) xmlString += " <tense>" + tense + "</tense>\n"; if (voice != null) xmlString += " <voice>" + voice + "</voice>\n"; if (casus != null) xmlString += " <casus>" + casus + "</casus>\n"; if (number != null) xmlString += " <number>" + number + "</number>\n"; if (mood != null) xmlString += " <mood>" + mood + "</mood>\n"; if (person != null) xmlString += " <person>" + person + "</person>\n"; if (gender != null) xmlString += " <gender>" + gender + "</gender>\n"; if (definite != null) xmlString += " <definite>" + definite + "</definite>\n"; xmlString += "</form>\n"; return xmlString; } public String toString() { return getXmlString(); } public String getTense() { return tense; } public void setTense(String tense) { this.tense = tense; } public void addTense(String newTense) { if (tense == null) this.tense = newTense; else tense += newTense; } public String getVoice() { return voice; } public void setVoice(String voice) { this.voice = voice; } public void addVoice(String newVoice) { if (voice == null) this.voice = newVoice; else voice += newVoice; } public String getCasus() { return casus; } public void setCasus(String casus) { this.casus = casus; } public void addCasus(String newCasus) { if (casus == null) this.casus = newCasus; else casus += newCasus; } public String getNumber() { return number; } public void setNumber(String number) { this.number = number; } public void addNumber(String newNumber) { if (number == null) this.number = newNumber; else number += newNumber; } public String getMood() { return mood; } public void setMood(String mood) { this.mood = mood; } public void addMood(String newMood) { if (mood == null) this.mood = newMood; else mood += newMood; } public String getPerson() { return person; } public void setPerson(String person) { this.person = person; } public void addPerson(String newPerson) { if (person == null) this.person = newPerson; else person += newPerson; } public String getGender() { return gender; } public void setGender(String gender) { this.gender = gender; } public void addGender(String newGender) { if (gender == null) this.gender = newGender; else gender += newGender; } public String getDefinite() { return definite; } public void setDefinite(String definite) { this.definite = definite; } public void addDefinite(String newDefinite) { if (definite == null) this.definite = newDefinite; else definite += newDefinite; } public String getLemmaName() { return lemmaName; } public String getPos() { return pos; } public String getProvider() { return provider; } public void setProvider(String provider) { this.provider = provider; } public void addProvider(String newProvider) { if (provider == null) this.provider = newProvider; else provider += newProvider; } public String getLanguage() { return language; } public void setLanguage(String language) { this.language = language; } public void addLanguage(String newLanguage) { if (language == null) this.language = newLanguage; else language += newLanguage; } public String getFormName() { return formName; } public void setFormName(String formName) { this.formName = formName; } public void addFormName(String newFormName) { if (formName == null) this.formName = newFormName; else formName += newFormName; } public void setLemmaName(String lemmaName) { this.lemmaName = lemmaName; } public void addLemmaName(String newLemmaName) { if (lemmaName == null) this.lemmaName = newLemmaName; else lemmaName += newLemmaName; } public void setPos(String pos) { this.pos = pos; } public void addPos(String newPos) { if (pos == null) this.pos = newPos; else pos += newPos; } }