Mercurial > hg > mpdl-group
annotate software/mpdl-services/mpiwg-mpdl-lt/src/de/mpg/mpiwg/berlin/mpdl/lt/dict/app/Lexicon.java @ 23:e845310098ba
diverse Korrekturen
author | Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de> |
---|---|
date | Tue, 27 Nov 2012 12:35:19 +0100 |
parents | 7d6d969b10cf |
children |
rev | line source |
---|---|
19 | 1 package de.mpg.mpiwg.berlin.mpdl.lt.dict.app; |
2 | |
3 import java.util.ArrayList; | |
4 import java.util.Collections; | |
5 import java.util.Enumeration; | |
6 import java.util.Hashtable; | |
7 | |
8 import org.apache.commons.lang3.StringEscapeUtils; | |
9 | |
10 import de.mpg.mpiwg.berlin.mpdl.exception.ApplicationException; | |
11 import de.mpg.mpiwg.berlin.mpdl.lt.general.Language; | |
12 import de.mpg.mpiwg.berlin.mpdl.lt.text.transcode.Transcoder; | |
23
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
20
diff
changeset
|
13 import de.mpg.mpiwg.berlin.mpdl.util.StringUtils; |
19 | 14 |
15 public class Lexicon implements Comparable<Lexicon> { | |
16 private String name; | |
17 private String sourceLang; | |
18 private String description; | |
19 private String queryUrl; | |
20 private String type; // local or remote | |
21 private Hashtable<String, LexiconEntry> entries; | |
22 | |
23 public Lexicon(String name, String sourceLanguage) { | |
24 this.name = name; | |
25 this.sourceLang = sourceLanguage; | |
26 this.type = "local"; // default is local | |
27 this.entries = new Hashtable<String, LexiconEntry>(); | |
28 } | |
29 | |
30 public int compareTo(Lexicon l) { | |
23
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
20
diff
changeset
|
31 if (description != null) |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
20
diff
changeset
|
32 return description.compareTo(l.description); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
20
diff
changeset
|
33 else |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
20
diff
changeset
|
34 return name.compareTo(l.name); |
19 | 35 } |
36 | |
37 public String getName() { | |
38 return name; | |
39 } | |
40 | |
41 public void setName(String name) { | |
42 this.name = name; | |
43 } | |
44 | |
45 public String getSourceLanguage() { | |
46 return sourceLang; | |
47 } | |
48 | |
49 public String getDescription() { | |
50 return description; | |
51 } | |
52 | |
53 public void setDescription(String description) { | |
54 this.description = description; | |
55 } | |
56 | |
57 public String getQueryUrl() { | |
58 return queryUrl; | |
59 } | |
60 | |
61 public void setQueryUrl(String queryUrl) { | |
62 this.queryUrl = queryUrl; | |
63 } | |
64 | |
65 public String getType() { | |
66 return type; | |
67 } | |
68 | |
69 public void setType(String type) { | |
70 this.type = type; | |
71 } | |
72 | |
73 public boolean isLocalLexicon() { | |
74 boolean isLocal = false; | |
75 if (type != null && type.equals("local")) | |
76 isLocal = true; | |
77 return isLocal; | |
78 } | |
79 | |
80 public boolean isBetacodeLexicon() { | |
81 boolean isBetacode = false; | |
82 if (name.equals("autenrieth") || name.equals("bonitz") || name.equals("lsj")) | |
83 isBetacode = true; | |
84 return isBetacode; | |
85 } | |
86 | |
87 public boolean isBuckwalterLexicon() { | |
88 boolean isBuckwalter = false; | |
89 if (name.equals("salmone")) | |
90 isBuckwalter = true; | |
91 return isBuckwalter; | |
92 } | |
93 | |
94 public ArrayList<LexiconEntry> getEntries() { | |
95 ArrayList<LexiconEntry> result = new ArrayList<LexiconEntry>(); | |
96 if (entries != null) { | |
97 Enumeration<String> entryKeys = entries.keys(); | |
98 while(entryKeys.hasMoreElements()) { | |
99 String entryKey = entryKeys.nextElement(); | |
100 LexiconEntry le = entries.get(entryKey); | |
101 result.add(le); | |
102 } | |
103 } | |
104 Collections.sort(result); | |
105 if (result.isEmpty()) | |
106 return null; | |
107 else | |
108 return result; | |
109 } | |
110 | |
111 public LexiconEntry getDynamicEntry(String formName) throws ApplicationException { | |
112 LexiconEntry lexEntry = new LexiconEntry(name, formName, null); | |
113 String linkForm = formName; | |
114 if (Language.getInstance().isGreek(sourceLang)) { | |
115 linkForm = Transcoder.getInstance().transcodeFromUnicode2BetaCode(formName); | |
116 } | |
117 if (name.equals("linyutan")) { | |
118 linkForm = Transcoder.getInstance().encodeBig5(formName); | |
119 } | |
120 String remoteUrl = queryUrl + linkForm; | |
121 lexEntry.setRemoteUrl(remoteUrl); | |
122 return lexEntry; | |
123 } | |
124 | |
125 public boolean isEmpty() { | |
126 if (entries == null || entries.isEmpty()) | |
127 return true; | |
128 else | |
129 return false; | |
130 } | |
131 | |
132 public void addEntry(LexiconEntry newEntry) { | |
133 if (entries == null) | |
134 this.entries = new Hashtable<String, LexiconEntry>(); | |
135 entries.put(newEntry.getFormName(), newEntry); | |
136 } | |
137 | |
138 public void addEntries(ArrayList<LexiconEntry> newEntries) { | |
139 if (entries == null) | |
140 this.entries = new Hashtable<String, LexiconEntry>(); | |
141 for (int i=0; i<newEntries.size(); i++) { | |
142 LexiconEntry newEntry = newEntries.get(i); | |
143 entries.put(newEntry.getFormName(), newEntry); | |
144 } | |
145 } | |
146 | |
20
7d6d969b10cf
little corrections
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
19
diff
changeset
|
147 public LexiconEntry getEntry(String lexEntryName) { |
7d6d969b10cf
little corrections
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
19
diff
changeset
|
148 LexiconEntry retEntry = null; |
7d6d969b10cf
little corrections
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
19
diff
changeset
|
149 if (entries == null) { |
7d6d969b10cf
little corrections
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
19
diff
changeset
|
150 entries.get(lexEntryName); |
7d6d969b10cf
little corrections
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
19
diff
changeset
|
151 } |
7d6d969b10cf
little corrections
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
19
diff
changeset
|
152 return retEntry; |
7d6d969b10cf
little corrections
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
19
diff
changeset
|
153 } |
7d6d969b10cf
little corrections
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
19
diff
changeset
|
154 |
19 | 155 /* |
156 * without lexicon entries (non-Javadoc) | |
157 * @see java.lang.Object#clone() | |
158 */ | |
159 public Lexicon clone() { | |
160 Lexicon lex = new Lexicon(name, sourceLang); | |
161 lex.description = description; | |
162 lex.entries = new Hashtable<String, LexiconEntry>(); | |
163 lex.queryUrl = queryUrl; | |
164 lex.type = type; | |
165 return lex; | |
166 } | |
167 | |
168 public String toXmlString() { | |
169 String result = ""; | |
170 result = result + "<dictionary>"; | |
171 result = result + "<name>" + name + "</name>"; | |
20
7d6d969b10cf
little corrections
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
19
diff
changeset
|
172 result = result + "<language>" + sourceLang + "</language>"; |
19 | 173 result = result + "<description>" + description + "</description>"; |
174 result = result + "<entries>"; | |
175 for (int i=0; i<entries.size(); i++) { | |
176 result = result + "<entry>"; | |
177 LexiconEntry entry = getEntries().get(i); | |
178 result = result + "<form>" + entry.getFormName() + "</form>"; | |
179 if (isLocalLexicon()) { | |
180 result = result + "<xml-valid>"; | |
181 String xmlValid = "false"; | |
182 if (entry.isXmlValid()) | |
183 xmlValid = "true"; | |
184 result = result + xmlValid; | |
185 result = result + "</xml-valid>"; | |
186 result = result + "<content>"; | |
187 if (entry.isXmlValid()) { | |
188 String repairedEntry = entry.getRepairedEntry(); | |
189 repairedEntry = repairedEntry.replaceAll("<repaired-entry>", ""); | |
190 repairedEntry = repairedEntry.replaceAll("</repaired-entry>", ""); | |
191 result = result + repairedEntry; // unicode content of the original entry | |
192 } else { | |
193 result = result + "<remark>This dictionary entry has no valid XML/HTML content in database so a text version of this entry is shown</remark>"; | |
194 String originalEntry = entry.getOriginalEntry(); // original content: not valid and e.g. in Betacode | |
195 originalEntry = originalEntry.replaceAll("<original-entry>", ""); | |
196 originalEntry = originalEntry.replaceAll("</original-entry>", ""); | |
197 originalEntry = StringEscapeUtils.escapeXml(originalEntry); // create text version of the invalid xml content | |
198 result = result + originalEntry; | |
199 } | |
200 result = result + "</content>"; | |
201 } | |
23
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
20
diff
changeset
|
202 if (entry.getRemoteUrl() != null) { |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
20
diff
changeset
|
203 String remoteUrl = entry.getRemoteUrl(); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
20
diff
changeset
|
204 remoteUrl = StringEscapeUtils.escapeXml(remoteUrl); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
20
diff
changeset
|
205 result = result + "<remoteUrl>" + remoteUrl + "</remoteUrl>"; |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
20
diff
changeset
|
206 } |
19 | 207 result = result + "</entry>"; |
208 } | |
209 result = result + "</entries>"; | |
210 result = result + "</dictionary>"; | |
211 return result; | |
212 } | |
213 | |
20
7d6d969b10cf
little corrections
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
19
diff
changeset
|
214 public String toXmlStringCompact() { |
7d6d969b10cf
little corrections
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
19
diff
changeset
|
215 String result = ""; |
7d6d969b10cf
little corrections
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
19
diff
changeset
|
216 result = result + "<dictionary>"; |
7d6d969b10cf
little corrections
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
19
diff
changeset
|
217 result = result + "<name>" + name + "</name>"; |
7d6d969b10cf
little corrections
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
19
diff
changeset
|
218 result = result + "<language>" + sourceLang + "</language>"; |
7d6d969b10cf
little corrections
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
19
diff
changeset
|
219 result = result + "<description>" + description + "</description>"; |
7d6d969b10cf
little corrections
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
19
diff
changeset
|
220 result = result + "<entries>"; |
7d6d969b10cf
little corrections
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
19
diff
changeset
|
221 for (int i=0; i<entries.size(); i++) { |
7d6d969b10cf
little corrections
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
19
diff
changeset
|
222 result = result + "<entry>"; |
7d6d969b10cf
little corrections
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
19
diff
changeset
|
223 LexiconEntry entry = getEntries().get(i); |
7d6d969b10cf
little corrections
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
19
diff
changeset
|
224 result = result + "<form>" + entry.getFormName() + "</form>"; |
7d6d969b10cf
little corrections
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
19
diff
changeset
|
225 result = result + "</entry>"; |
7d6d969b10cf
little corrections
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
19
diff
changeset
|
226 } |
7d6d969b10cf
little corrections
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
19
diff
changeset
|
227 result = result + "</entries>"; |
7d6d969b10cf
little corrections
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
19
diff
changeset
|
228 result = result + "</dictionary>"; |
7d6d969b10cf
little corrections
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
19
diff
changeset
|
229 return result; |
7d6d969b10cf
little corrections
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
19
diff
changeset
|
230 } |
19 | 231 } |