Mercurial > hg > mpdl-group
diff software/mpdl-services/mpiwg-mpdl-lt/src/de/mpg/mpiwg/berlin/mpdl/lt/dict/app/Lexicon.java @ 19:4a3641ae14d2
Erstellung
author | Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de> |
---|---|
date | Wed, 09 Nov 2011 15:32:05 +0100 |
parents | |
children | 7d6d969b10cf |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/software/mpdl-services/mpiwg-mpdl-lt/src/de/mpg/mpiwg/berlin/mpdl/lt/dict/app/Lexicon.java Wed Nov 09 15:32:05 2011 +0100 @@ -0,0 +1,198 @@ +package de.mpg.mpiwg.berlin.mpdl.lt.dict.app; + +import java.util.ArrayList; +import java.util.Collections; +import java.util.Enumeration; +import java.util.Hashtable; + +import org.apache.commons.lang3.StringEscapeUtils; + +import de.mpg.mpiwg.berlin.mpdl.exception.ApplicationException; +import de.mpg.mpiwg.berlin.mpdl.lt.general.Language; +import de.mpg.mpiwg.berlin.mpdl.lt.text.transcode.Transcoder; + +public class Lexicon implements Comparable<Lexicon> { + private String name; + private String sourceLang; + private String description; + private String queryUrl; + private String type; // local or remote + private Hashtable<String, LexiconEntry> entries; + + public Lexicon(String name, String sourceLanguage) { + this.name = name; + this.sourceLang = sourceLanguage; + this.type = "local"; // default is local + this.entries = new Hashtable<String, LexiconEntry>(); + } + + public int compareTo(Lexicon l) { + return name.compareTo(l.name); + } + + public String getName() { + return name; + } + + public void setName(String name) { + this.name = name; + } + + public String getSourceLanguage() { + return sourceLang; + } + + public String getDescription() { + return description; + } + + public void setDescription(String description) { + this.description = description; + } + + public String getQueryUrl() { + return queryUrl; + } + + public void setQueryUrl(String queryUrl) { + this.queryUrl = queryUrl; + } + + public String getType() { + return type; + } + + public void setType(String type) { + this.type = type; + } + + public boolean isLocalLexicon() { + boolean isLocal = false; + if (type != null && type.equals("local")) + isLocal = true; + return isLocal; + } + + public boolean isBetacodeLexicon() { + boolean isBetacode = false; + if (name.equals("autenrieth") || name.equals("bonitz") || name.equals("lsj")) + isBetacode = true; + return isBetacode; + } + + public boolean isBuckwalterLexicon() { + boolean isBuckwalter = false; + if (name.equals("salmone")) + isBuckwalter = true; + return isBuckwalter; + } + + public ArrayList<LexiconEntry> getEntries() { + ArrayList<LexiconEntry> result = new ArrayList<LexiconEntry>(); + if (entries != null) { + Enumeration<String> entryKeys = entries.keys(); + while(entryKeys.hasMoreElements()) { + String entryKey = entryKeys.nextElement(); + LexiconEntry le = entries.get(entryKey); + result.add(le); + } + } + Collections.sort(result); + if (result.isEmpty()) + return null; + else + return result; + } + + public LexiconEntry getDynamicEntry(String formName) throws ApplicationException { + LexiconEntry lexEntry = new LexiconEntry(name, formName, null); + String linkForm = formName; + if (Language.getInstance().isGreek(sourceLang)) { + linkForm = Transcoder.getInstance().transcodeFromUnicode2BetaCode(formName); + } + if (name.equals("linyutan")) { + linkForm = Transcoder.getInstance().encodeBig5(formName); + } + String remoteUrl = queryUrl + linkForm; + lexEntry.setRemoteUrl(remoteUrl); + return lexEntry; + } + + public boolean isEmpty() { + if (entries == null || entries.isEmpty()) + return true; + else + return false; + } + + public void addEntry(LexiconEntry newEntry) { + if (entries == null) + this.entries = new Hashtable<String, LexiconEntry>(); + entries.put(newEntry.getFormName(), newEntry); + } + + public void addEntries(ArrayList<LexiconEntry> newEntries) { + if (entries == null) + this.entries = new Hashtable<String, LexiconEntry>(); + for (int i=0; i<newEntries.size(); i++) { + LexiconEntry newEntry = newEntries.get(i); + entries.put(newEntry.getFormName(), newEntry); + } + } + + /* + * without lexicon entries (non-Javadoc) + * @see java.lang.Object#clone() + */ + public Lexicon clone() { + Lexicon lex = new Lexicon(name, sourceLang); + lex.description = description; + lex.entries = new Hashtable<String, LexiconEntry>(); + lex.queryUrl = queryUrl; + lex.type = type; + return lex; + } + + public String toXmlString() { + String result = ""; + result = result + "<dictionary>"; + result = result + "<name>" + name + "</name>"; + result = result + "<description>" + description + "</description>"; + result = result + "<entries>"; + for (int i=0; i<entries.size(); i++) { + result = result + "<entry>"; + LexiconEntry entry = getEntries().get(i); + result = result + "<form>" + entry.getFormName() + "</form>"; + if (isLocalLexicon()) { + result = result + "<xml-valid>"; + String xmlValid = "false"; + if (entry.isXmlValid()) + xmlValid = "true"; + result = result + xmlValid; + result = result + "</xml-valid>"; + result = result + "<content>"; + if (entry.isXmlValid()) { + String repairedEntry = entry.getRepairedEntry(); + repairedEntry = repairedEntry.replaceAll("<repaired-entry>", ""); + repairedEntry = repairedEntry.replaceAll("</repaired-entry>", ""); + result = result + repairedEntry; // unicode content of the original entry + } else { + result = result + "<remark>This dictionary entry has no valid XML/HTML content in database so a text version of this entry is shown</remark>"; + String originalEntry = entry.getOriginalEntry(); // original content: not valid and e.g. in Betacode + originalEntry = originalEntry.replaceAll("<original-entry>", ""); + originalEntry = originalEntry.replaceAll("</original-entry>", ""); + originalEntry = StringEscapeUtils.escapeXml(originalEntry); // create text version of the invalid xml content + result = result + originalEntry; + } + result = result + "</content>"; + } + if (entry.getRemoteUrl() != null) + result = result + "<remoteUrl>" + entry.getRemoteUrl() + "</remoteUrl>"; + result = result + "</entry>"; + } + result = result + "</entries>"; + result = result + "</dictionary>"; + return result; + } + +}