Mercurial > hg > mpdl-group
comparison software/mpdl-services/mpiwg-mpdl-lt-web/src/de/mpg/mpiwg/berlin/mpdl/servlets/lt/GetDictionaryEntries.java @ 19:4a3641ae14d2
Erstellung
author | Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de> |
---|---|
date | Wed, 09 Nov 2011 15:32:05 +0100 |
parents | |
children | 4ea0f81a5d08 |
comparison
equal
deleted
inserted
replaced
18:dc5e9fcb3fdc | 19:4a3641ae14d2 |
---|---|
1 package de.mpg.mpiwg.berlin.mpdl.servlets.lt; | |
2 | |
3 import java.io.IOException; | |
4 import java.io.PrintWriter; | |
5 import java.util.ArrayList; | |
6 import java.util.Collections; | |
7 import java.util.Date; | |
8 | |
9 import javax.servlet.ServletConfig; | |
10 import javax.servlet.ServletException; | |
11 import javax.servlet.http.HttpServlet; | |
12 import javax.servlet.http.HttpServletRequest; | |
13 import javax.servlet.http.HttpServletResponse; | |
14 | |
15 import org.apache.commons.lang3.StringEscapeUtils; | |
16 | |
17 import de.mpg.mpiwg.berlin.mpdl.exception.ApplicationException; | |
18 import de.mpg.mpiwg.berlin.mpdl.lt.dict.app.Lexicon; | |
19 import de.mpg.mpiwg.berlin.mpdl.lt.dict.app.LexiconEntry; | |
20 import de.mpg.mpiwg.berlin.mpdl.lt.dict.db.LexHandler; | |
21 import de.mpg.mpiwg.berlin.mpdl.lt.general.Language; | |
22 import de.mpg.mpiwg.berlin.mpdl.lt.morph.app.Form; | |
23 import de.mpg.mpiwg.berlin.mpdl.lt.morph.app.Lemma; | |
24 import de.mpg.mpiwg.berlin.mpdl.servlets.util.ServletUtil; | |
25 | |
26 public class GetDictionaryEntries extends HttpServlet { | |
27 private static final long serialVersionUID = 1L; | |
28 private LexHandler lexHandler; | |
29 | |
30 public GetDictionaryEntries() { | |
31 super(); | |
32 } | |
33 | |
34 public void init(ServletConfig config) throws ServletException { | |
35 super.init(config); | |
36 try { | |
37 lexHandler = LexHandler.getInstance(); | |
38 } catch (ApplicationException e) { | |
39 throw new ServletException(e); | |
40 } | |
41 } | |
42 | |
43 protected void doGet(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException { | |
44 Date begin = new Date(); | |
45 request.setCharacterEncoding("utf-8"); | |
46 response.setCharacterEncoding("utf-8"); | |
47 String query = request.getParameter("query"); | |
48 String language = request.getParameter("language"); | |
49 String inputType = request.getParameter("inputType"); | |
50 String outputFormat = request.getParameter("outputFormat"); | |
51 String outputType = request.getParameter("outputType"); | |
52 String dictionary = request.getParameter("dictionary"); | |
53 String normalization = request.getParameter("normalization"); | |
54 String resultPage = request.getParameter("resultPage"); | |
55 if (query == null) | |
56 query = "a*"; | |
57 if (language == null) | |
58 language = "eng"; | |
59 if (inputType == null || ! (inputType.equals("form") || inputType.equals("lemma"))) | |
60 inputType = "form"; | |
61 if (outputFormat == null || ! (outputFormat.equals("xml") || outputFormat.equals("html"))) | |
62 outputFormat = "xml"; | |
63 if (outputType == null || ! (outputType.equals("compact") || outputType.equals("full"))) | |
64 outputType = "compact"; | |
65 if (normalization == null || ! (normalization.equals("none") || normalization.equals("reg") || normalization.equals("reg norm"))) | |
66 normalization = "norm"; | |
67 String xmlDict = "all"; | |
68 if (dictionary != null) | |
69 xmlDict = dictionary; | |
70 int pn = 1; | |
71 if (resultPage != null) | |
72 pn = new Integer(resultPage); | |
73 boolean isRangeQuery = false; | |
74 if (query.endsWith("*")) | |
75 isRangeQuery = true; | |
76 String xmlQueryString = "<query><name>" + query + "</name>" + "<language>" + language + "</language>" + "<inputType>" + inputType + "</inputType>" + | |
77 "<outputFormat>" + outputFormat + "</outputFormat>" + "<outputType>" + outputType + "</outputType>" + "<dictionary>" + xmlDict + "</dictionary>" + | |
78 "<normalization>" + normalization + "</normalization>" + "</query>"; | |
79 try { | |
80 if (outputFormat.equals("xml")) | |
81 response.setContentType("text/xml"); | |
82 else if (outputFormat.equals("html")) | |
83 response.setContentType("text/html"); | |
84 else | |
85 response.setContentType("text/xml"); | |
86 PrintWriter out = response.getWriter(); | |
87 if (query == null || query.isEmpty()) { | |
88 out.print("request parameter query is empty. Please specify a query."); | |
89 out.close(); | |
90 return; | |
91 } | |
92 ArrayList<Lemma> lemmas = null; | |
93 ArrayList<Lexicon> dictionaries = null; | |
94 if (isRangeQuery) { | |
95 String queryTmp = query.substring(0, query.length() - 1); // without last star | |
96 if (dictionary != null) | |
97 dictionaries = lexHandler.getLexEntriesByLexiconBeginningWith(dictionary, queryTmp, pn); | |
98 else | |
99 dictionaries = lexHandler.getLexEntriesBeginningWith(language, queryTmp, pn); | |
100 } else { | |
101 lemmas = lexHandler.getLemmas(query, inputType, language, normalization); | |
102 dictionaries = lexHandler.getLexEntries(lemmas, language, dictionary); | |
103 } | |
104 String baseUrl = ServletUtil.getInstance().getBaseUrl(request); | |
105 Date end = new Date(); | |
106 String elapsedTime = String.valueOf(end.getTime() - begin.getTime()); | |
107 String result = ""; | |
108 if (outputFormat == null || outputFormat.equals("xml")) | |
109 result = createXmlOutputString(query, lemmas, dictionaries, outputType, baseUrl, xmlQueryString, elapsedTime); | |
110 else if (outputFormat.equals("html")) | |
111 result = createHtmlOutputString(query, lemmas, dictionaries, outputType, elapsedTime); | |
112 else | |
113 result = createXmlOutputString(query, lemmas, dictionaries, outputType, baseUrl, xmlQueryString, elapsedTime); | |
114 out.print(result); | |
115 out.close(); | |
116 } catch (ApplicationException e) { | |
117 throw new ServletException(e); | |
118 } | |
119 } | |
120 | |
121 protected void doPost(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException { | |
122 | |
123 } | |
124 | |
125 private String createXmlOutputString(String query, ArrayList<Lemma> lemmas, ArrayList<Lexicon> lexicons, String outputType, String baseUrl, String xmlQueryString, String elapsedTime) { | |
126 String result = "<result>"; | |
127 result = result + "<provider>" + "MPIWG MPDL language technology service (see: " + "" + baseUrl + "), Max Planck Institute for the History of Science, Berlin." + "</provider>"; | |
128 result = result + xmlQueryString; | |
129 result = result + "<elapsed-time-ms>" + elapsedTime + "</elapsed-time-ms>"; | |
130 if (lemmas != null && ! lemmas.isEmpty()) { | |
131 result = result + "<morphology>"; | |
132 for (int i=0; i<lemmas.size(); i++) { | |
133 Lemma lemma = lemmas.get(i); | |
134 String lemmaName = lemma.getLemmaName(); | |
135 String language = lemma.getLanguage(); | |
136 result = result + "<lemma>"; | |
137 result = result + "<name>" + lemmaName + "</name>"; | |
138 if (outputType != null && outputType.equals("full")) { | |
139 String lemmaProvider = lemma.getProvider(); | |
140 result = result + "<provider>" + lemmaProvider + "</provider>"; | |
141 result = result + "<language>" + language + "</language>"; | |
142 } | |
143 if (Language.getInstance().isArabic(language) || Language.getInstance().isLatin(language)) { | |
144 String remoteUrl = "http://www.perseus.tufts.edu/hopper/morph?l=" + lemmaName + "&la=" + language; | |
145 result = result + "<remoteUrl>" + remoteUrl + "</remoteUrl>"; | |
146 } else if (Language.getInstance().isGreek(language)) { | |
147 String remoteUrl = "http://www.perseus.tufts.edu/hopper/morph?l=" + lemmaName + "&la=" + "greek"; | |
148 result = result + "<remoteUrl>" + remoteUrl + "</remoteUrl>"; | |
149 } | |
150 if (outputType != null && outputType.equals("full")) { | |
151 ArrayList<Form> forms = lemma.getFormsList(); | |
152 Collections.sort(forms); | |
153 if (forms != null && ! forms.isEmpty()) { | |
154 result = result + "<forms>"; | |
155 for (int j=0; j<forms.size(); j++) { | |
156 result = result + "<form>"; | |
157 Form f = forms.get(j); | |
158 String formName = f.getFormName(); | |
159 String formProvider = f.getProvider(); | |
160 result = result + "<provider>" + formProvider + "</provider>"; | |
161 result = result + "<language>" + language + "</language>"; | |
162 result = result + "<name>" + formName + "</name>"; | |
163 result = result + "</form>"; | |
164 } | |
165 result = result + "</forms>"; | |
166 } | |
167 } | |
168 result = result + "</lemma>"; | |
169 } | |
170 result = result + "</morphology>"; | |
171 } | |
172 if (lexicons != null) { | |
173 result = result + "<dictionaries>"; | |
174 for (int i=0; i<lexicons.size(); i++) { | |
175 Lexicon lexicon = lexicons.get(i); | |
176 result = result + lexicon.toXmlString(); | |
177 } | |
178 result = result + "</dictionaries>"; | |
179 } | |
180 if (outputType != null && outputType.equals("full") && lemmas != null && ! lemmas.isEmpty()) { | |
181 result = result + "<wikipedia>"; | |
182 for (int i=0; i<lemmas.size(); i++) { | |
183 Lemma lemma = lemmas.get(i); | |
184 String lemmaName = lemma.getLemmaName(); | |
185 String language = lemma.getLanguage(); | |
186 result = result + "<article>"; | |
187 result = result + "<name>" + lemmaName + "</name>"; | |
188 String wikiHrefExact = "http://" + language + ".wikipedia.org/wiki/" + lemmaName; | |
189 String wikiHrefSearch = "http://" + language + ".wikipedia.org/wiki/index.php?search=" + lemmaName; | |
190 result = result + "<remoteUrl>" + wikiHrefExact + "</remoteUrl>"; | |
191 result = result + "<remoteUrlSearch>" + wikiHrefSearch + "</remoteUrlSearch>"; | |
192 result = result + "</article>"; | |
193 } | |
194 result = result + "</wikipedia>"; | |
195 } | |
196 result = result + "</result>"; | |
197 return result; | |
198 } | |
199 | |
200 private String createHtmlOutputString(String query, ArrayList<Lemma> lemmas, ArrayList<Lexicon> lexicons, String outputType, String elapsedTime) { | |
201 String result = ""; | |
202 result = result + "<html>"; | |
203 result = result + "<head>"; | |
204 result = result + "<title>Word information for: \"" + query + "\"</title>"; | |
205 result = result + "</head>"; | |
206 result = result + "<body>"; | |
207 result = result + "<table align=\"right\" valign=\"top\">"; | |
208 result = result + "<td>[<i>This is a MPIWG MPDL language technology service</i>] <a href=\"/mpiwg-mpdl-lt-web/index.html\"><img src=\"/mpiwg-mpdl-lt-web/images/info.png\" valign=\"bottom\" width=\"15\" height=\"15\" border=\"0\" alt=\"MPIWG MPDL language technology service\"/></a></td>"; | |
209 result = result + "</table>"; | |
210 result = result + "<p/>"; | |
211 result = result + "<h1>Word information for: \"" + query + "\"</h1>"; | |
212 if (lemmas != null && ! lemmas.isEmpty()) { | |
213 result = result + "<h3>Morphology</h3>"; | |
214 result = result + "<ul>"; | |
215 result = result + "<p/>"; | |
216 for (int i=0; i<lemmas.size(); i++) { | |
217 Lemma lemma = lemmas.get(i); | |
218 String lemmaName = lemma.getLemmaName(); | |
219 String language = lemma.getLanguage(); | |
220 result = result + "<li>"; | |
221 result = result + lemmaName; | |
222 if (outputType != null && outputType.equals("full")) { | |
223 String lemmaProvider = lemma.getProvider(); | |
224 result = result + " (data provider: " + lemmaProvider + ")"; | |
225 } | |
226 if (Language.getInstance().isArabic(language) || Language.getInstance().isLatin(language)) | |
227 result = result + " (external link: <a href=\"http://www.perseus.tufts.edu/hopper/morph?l=" + lemmaName + "&la=" + language + "\">" + lemmaName + "</a>)"; | |
228 else if (Language.getInstance().isGreek(language)) | |
229 result = result + " (external link: <a href=\"http://www.perseus.tufts.edu/hopper/morph?l=" + lemmaName + "&la=" + "greek" + "\">" + lemmaName + "</a>)"; | |
230 if (outputType != null && outputType.equals("full")) { | |
231 ArrayList<Form> forms = lemma.getFormsList(); | |
232 Collections.sort(forms); | |
233 if (forms != null && ! forms.isEmpty()) { | |
234 result = result + "<ul>"; | |
235 for (int j=0; j<forms.size(); j++) { | |
236 Form f = forms.get(j); | |
237 String formName = f.getFormName(); | |
238 String formProvider = f.getProvider(); | |
239 result = result + formName + " (data provider: " + formProvider + "), "; | |
240 } | |
241 result = result.substring(0, result.length() - 2); // without last comma and blank | |
242 result = result + "</ul>"; | |
243 } | |
244 } | |
245 result = result + "</li>"; | |
246 } | |
247 result = result + "</ul>"; | |
248 } | |
249 if (lexicons != null && ! lexicons.isEmpty()) { | |
250 result = result + "<h3>Dictionary</h3>"; | |
251 result = result + "<ul>"; | |
252 result = result + "<p/>"; | |
253 for (int i=0; i<lexicons.size(); i++) { | |
254 Lexicon lexicon = lexicons.get(i); | |
255 result = result + "<li>"; | |
256 result = result + "<b>" + lexicon.getDescription() + "</b>"; | |
257 result = result + "<ul>"; | |
258 ArrayList<LexiconEntry> entries = lexicon.getEntries(); | |
259 for (int j=0; j<entries.size(); j++) { | |
260 String entryContent = ""; | |
261 LexiconEntry entry = entries.get(j); | |
262 if (lexicon.isLocalLexicon()) { | |
263 if (entry.isXmlValid()) { | |
264 String repairedEntry = entry.getRepairedEntry(); | |
265 repairedEntry = repairedEntry.replaceAll("<repaired-entry>", ""); | |
266 repairedEntry = repairedEntry.replaceAll("</repaired-entry>", ""); | |
267 entryContent = entryContent + repairedEntry; // valid unicode content of the original entry | |
268 } else { | |
269 entryContent = entryContent + "[Remark: <i> this dictionary entry has no valid XML/HTML content in database so a text version of this entry is shown.</i>]: <br/>"; | |
270 String originalEntry = entry.getOriginalEntry(); | |
271 originalEntry = originalEntry.replaceAll("<original-entry>", ""); | |
272 originalEntry = originalEntry.replaceAll("</original-entry>", ""); | |
273 originalEntry = StringEscapeUtils.escapeXml(originalEntry); // create text version of the invalid xml content | |
274 entryContent = entryContent + originalEntry; | |
275 } | |
276 if (entry.getRemoteUrl() != null) { | |
277 entryContent = entryContent + "<div>(external link: <a href=\"" + entry.getRemoteUrl() + "\">" + entry.getFormName() + "</a>)</div>"; | |
278 } | |
279 } else { | |
280 if (entry.getRemoteUrl() != null) { | |
281 entryContent = entryContent + "external link: <a href=\"" + entry.getRemoteUrl() + "\">" + entry.getFormName() + "</a>"; | |
282 } | |
283 } | |
284 String formName = entry.getFormName(); | |
285 String dictName = lexicon.getName(); | |
286 if (outputType != null && outputType.equals("full")) { | |
287 result = result + "<li>" + "<b>" + formName + "</b><ul><li>" + entryContent + "</li></ul></li>"; | |
288 } else if (outputType != null && outputType.equals("compact")) { | |
289 result = result + "<li>" + "<a href=\"GetDictionaryEntries?query=" + formName + "&dictionary=" + dictName + "&outputFormat=html" + "&outputType=full" + "\">" + formName + "</a></li>"; | |
290 } | |
291 } | |
292 result = result + "</ul>"; | |
293 result = result + "</li>"; // lexicon entry | |
294 } | |
295 result = result + "</ul>"; | |
296 result = result + "</p>"; | |
297 } | |
298 if (outputType != null && outputType.equals("full") && lemmas != null && ! lemmas.isEmpty()) { | |
299 result = result + "<h3>Wikipedia</h3>"; | |
300 result = result + "<ul>"; | |
301 result = result + "<p/>"; | |
302 for (int i=0; i<lemmas.size(); i++) { | |
303 Lemma lemma = lemmas.get(i); | |
304 String lemmaName = lemma.getLemmaName(); | |
305 String language = lemma.getLanguage(); | |
306 result = result + "<li>"; | |
307 String wikiHrefExact = "http://" + language + ".wikipedia.org/wiki/" + lemmaName; | |
308 String wikiHrefSearch = "http://" + language + ".wikipedia.org/wiki/index.php?search=" + lemmaName; | |
309 result = result + "<b>Article: </b>External link: <a href=\"" + wikiHrefExact + "\">" + lemmaName + "</a> (or search for <a href=\"" + wikiHrefSearch + "\">" + lemmaName + "</a>)"; | |
310 result = result + "</li>"; | |
311 } | |
312 result = result + "</ul>"; | |
313 } | |
314 result = result + "[* external links may not function]"; | |
315 result = result + "<hr/>"; | |
316 result = result + "<p/>"; | |
317 result = result + "Elapsed time: " + elapsedTime + " ms, see the <a href=\"/mpiwg-mpdl-lt-web/index.html\">service description</a> of this page, if you find a bug <a href=\"https://it-dev.mpiwg-berlin.mpg.de/tracs/mpdl-project-software/newticket\">let us know</a>"; | |
318 result = result + "</body>"; | |
319 result = result + "</html>"; | |
320 return result; | |
321 } | |
322 | |
323 | |
324 } |