comparison software/mpdl-services/mpiwg-mpdl-lt-web/src/de/mpg/mpiwg/berlin/mpdl/servlets/lt/GetDictionaryEntries.java @ 19:4a3641ae14d2

Erstellung
author Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
date Wed, 09 Nov 2011 15:32:05 +0100
parents
children 4ea0f81a5d08
comparison
equal deleted inserted replaced
18:dc5e9fcb3fdc 19:4a3641ae14d2
1 package de.mpg.mpiwg.berlin.mpdl.servlets.lt;
2
3 import java.io.IOException;
4 import java.io.PrintWriter;
5 import java.util.ArrayList;
6 import java.util.Collections;
7 import java.util.Date;
8
9 import javax.servlet.ServletConfig;
10 import javax.servlet.ServletException;
11 import javax.servlet.http.HttpServlet;
12 import javax.servlet.http.HttpServletRequest;
13 import javax.servlet.http.HttpServletResponse;
14
15 import org.apache.commons.lang3.StringEscapeUtils;
16
17 import de.mpg.mpiwg.berlin.mpdl.exception.ApplicationException;
18 import de.mpg.mpiwg.berlin.mpdl.lt.dict.app.Lexicon;
19 import de.mpg.mpiwg.berlin.mpdl.lt.dict.app.LexiconEntry;
20 import de.mpg.mpiwg.berlin.mpdl.lt.dict.db.LexHandler;
21 import de.mpg.mpiwg.berlin.mpdl.lt.general.Language;
22 import de.mpg.mpiwg.berlin.mpdl.lt.morph.app.Form;
23 import de.mpg.mpiwg.berlin.mpdl.lt.morph.app.Lemma;
24 import de.mpg.mpiwg.berlin.mpdl.servlets.util.ServletUtil;
25
26 public class GetDictionaryEntries extends HttpServlet {
27 private static final long serialVersionUID = 1L;
28 private LexHandler lexHandler;
29
30 public GetDictionaryEntries() {
31 super();
32 }
33
34 public void init(ServletConfig config) throws ServletException {
35 super.init(config);
36 try {
37 lexHandler = LexHandler.getInstance();
38 } catch (ApplicationException e) {
39 throw new ServletException(e);
40 }
41 }
42
43 protected void doGet(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException {
44 Date begin = new Date();
45 request.setCharacterEncoding("utf-8");
46 response.setCharacterEncoding("utf-8");
47 String query = request.getParameter("query");
48 String language = request.getParameter("language");
49 String inputType = request.getParameter("inputType");
50 String outputFormat = request.getParameter("outputFormat");
51 String outputType = request.getParameter("outputType");
52 String dictionary = request.getParameter("dictionary");
53 String normalization = request.getParameter("normalization");
54 String resultPage = request.getParameter("resultPage");
55 if (query == null)
56 query = "a*";
57 if (language == null)
58 language = "eng";
59 if (inputType == null || ! (inputType.equals("form") || inputType.equals("lemma")))
60 inputType = "form";
61 if (outputFormat == null || ! (outputFormat.equals("xml") || outputFormat.equals("html")))
62 outputFormat = "xml";
63 if (outputType == null || ! (outputType.equals("compact") || outputType.equals("full")))
64 outputType = "compact";
65 if (normalization == null || ! (normalization.equals("none") || normalization.equals("reg") || normalization.equals("reg norm")))
66 normalization = "norm";
67 String xmlDict = "all";
68 if (dictionary != null)
69 xmlDict = dictionary;
70 int pn = 1;
71 if (resultPage != null)
72 pn = new Integer(resultPage);
73 boolean isRangeQuery = false;
74 if (query.endsWith("*"))
75 isRangeQuery = true;
76 String xmlQueryString = "<query><name>" + query + "</name>" + "<language>" + language + "</language>" + "<inputType>" + inputType + "</inputType>" +
77 "<outputFormat>" + outputFormat + "</outputFormat>" + "<outputType>" + outputType + "</outputType>" + "<dictionary>" + xmlDict + "</dictionary>" +
78 "<normalization>" + normalization + "</normalization>" + "</query>";
79 try {
80 if (outputFormat.equals("xml"))
81 response.setContentType("text/xml");
82 else if (outputFormat.equals("html"))
83 response.setContentType("text/html");
84 else
85 response.setContentType("text/xml");
86 PrintWriter out = response.getWriter();
87 if (query == null || query.isEmpty()) {
88 out.print("request parameter query is empty. Please specify a query.");
89 out.close();
90 return;
91 }
92 ArrayList<Lemma> lemmas = null;
93 ArrayList<Lexicon> dictionaries = null;
94 if (isRangeQuery) {
95 String queryTmp = query.substring(0, query.length() - 1); // without last star
96 if (dictionary != null)
97 dictionaries = lexHandler.getLexEntriesByLexiconBeginningWith(dictionary, queryTmp, pn);
98 else
99 dictionaries = lexHandler.getLexEntriesBeginningWith(language, queryTmp, pn);
100 } else {
101 lemmas = lexHandler.getLemmas(query, inputType, language, normalization);
102 dictionaries = lexHandler.getLexEntries(lemmas, language, dictionary);
103 }
104 String baseUrl = ServletUtil.getInstance().getBaseUrl(request);
105 Date end = new Date();
106 String elapsedTime = String.valueOf(end.getTime() - begin.getTime());
107 String result = "";
108 if (outputFormat == null || outputFormat.equals("xml"))
109 result = createXmlOutputString(query, lemmas, dictionaries, outputType, baseUrl, xmlQueryString, elapsedTime);
110 else if (outputFormat.equals("html"))
111 result = createHtmlOutputString(query, lemmas, dictionaries, outputType, elapsedTime);
112 else
113 result = createXmlOutputString(query, lemmas, dictionaries, outputType, baseUrl, xmlQueryString, elapsedTime);
114 out.print(result);
115 out.close();
116 } catch (ApplicationException e) {
117 throw new ServletException(e);
118 }
119 }
120
121 protected void doPost(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException {
122
123 }
124
125 private String createXmlOutputString(String query, ArrayList<Lemma> lemmas, ArrayList<Lexicon> lexicons, String outputType, String baseUrl, String xmlQueryString, String elapsedTime) {
126 String result = "<result>";
127 result = result + "<provider>" + "MPIWG MPDL language technology service (see: " + "" + baseUrl + "), Max Planck Institute for the History of Science, Berlin." + "</provider>";
128 result = result + xmlQueryString;
129 result = result + "<elapsed-time-ms>" + elapsedTime + "</elapsed-time-ms>";
130 if (lemmas != null && ! lemmas.isEmpty()) {
131 result = result + "<morphology>";
132 for (int i=0; i<lemmas.size(); i++) {
133 Lemma lemma = lemmas.get(i);
134 String lemmaName = lemma.getLemmaName();
135 String language = lemma.getLanguage();
136 result = result + "<lemma>";
137 result = result + "<name>" + lemmaName + "</name>";
138 if (outputType != null && outputType.equals("full")) {
139 String lemmaProvider = lemma.getProvider();
140 result = result + "<provider>" + lemmaProvider + "</provider>";
141 result = result + "<language>" + language + "</language>";
142 }
143 if (Language.getInstance().isArabic(language) || Language.getInstance().isLatin(language)) {
144 String remoteUrl = "http://www.perseus.tufts.edu/hopper/morph?l=" + lemmaName + "&amp;la=" + language;
145 result = result + "<remoteUrl>" + remoteUrl + "</remoteUrl>";
146 } else if (Language.getInstance().isGreek(language)) {
147 String remoteUrl = "http://www.perseus.tufts.edu/hopper/morph?l=" + lemmaName + "&amp;la=" + "greek";
148 result = result + "<remoteUrl>" + remoteUrl + "</remoteUrl>";
149 }
150 if (outputType != null && outputType.equals("full")) {
151 ArrayList<Form> forms = lemma.getFormsList();
152 Collections.sort(forms);
153 if (forms != null && ! forms.isEmpty()) {
154 result = result + "<forms>";
155 for (int j=0; j<forms.size(); j++) {
156 result = result + "<form>";
157 Form f = forms.get(j);
158 String formName = f.getFormName();
159 String formProvider = f.getProvider();
160 result = result + "<provider>" + formProvider + "</provider>";
161 result = result + "<language>" + language + "</language>";
162 result = result + "<name>" + formName + "</name>";
163 result = result + "</form>";
164 }
165 result = result + "</forms>";
166 }
167 }
168 result = result + "</lemma>";
169 }
170 result = result + "</morphology>";
171 }
172 if (lexicons != null) {
173 result = result + "<dictionaries>";
174 for (int i=0; i<lexicons.size(); i++) {
175 Lexicon lexicon = lexicons.get(i);
176 result = result + lexicon.toXmlString();
177 }
178 result = result + "</dictionaries>";
179 }
180 if (outputType != null && outputType.equals("full") && lemmas != null && ! lemmas.isEmpty()) {
181 result = result + "<wikipedia>";
182 for (int i=0; i<lemmas.size(); i++) {
183 Lemma lemma = lemmas.get(i);
184 String lemmaName = lemma.getLemmaName();
185 String language = lemma.getLanguage();
186 result = result + "<article>";
187 result = result + "<name>" + lemmaName + "</name>";
188 String wikiHrefExact = "http://" + language + ".wikipedia.org/wiki/" + lemmaName;
189 String wikiHrefSearch = "http://" + language + ".wikipedia.org/wiki/index.php?search=" + lemmaName;
190 result = result + "<remoteUrl>" + wikiHrefExact + "</remoteUrl>";
191 result = result + "<remoteUrlSearch>" + wikiHrefSearch + "</remoteUrlSearch>";
192 result = result + "</article>";
193 }
194 result = result + "</wikipedia>";
195 }
196 result = result + "</result>";
197 return result;
198 }
199
200 private String createHtmlOutputString(String query, ArrayList<Lemma> lemmas, ArrayList<Lexicon> lexicons, String outputType, String elapsedTime) {
201 String result = "";
202 result = result + "<html>";
203 result = result + "<head>";
204 result = result + "<title>Word information for: \"" + query + "\"</title>";
205 result = result + "</head>";
206 result = result + "<body>";
207 result = result + "<table align=\"right\" valign=\"top\">";
208 result = result + "<td>[<i>This is a MPIWG MPDL language technology service</i>] <a href=\"/mpiwg-mpdl-lt-web/index.html\"><img src=\"/mpiwg-mpdl-lt-web/images/info.png\" valign=\"bottom\" width=\"15\" height=\"15\" border=\"0\" alt=\"MPIWG MPDL language technology service\"/></a></td>";
209 result = result + "</table>";
210 result = result + "<p/>";
211 result = result + "<h1>Word information for: \"" + query + "\"</h1>";
212 if (lemmas != null && ! lemmas.isEmpty()) {
213 result = result + "<h3>Morphology</h3>";
214 result = result + "<ul>";
215 result = result + "<p/>";
216 for (int i=0; i<lemmas.size(); i++) {
217 Lemma lemma = lemmas.get(i);
218 String lemmaName = lemma.getLemmaName();
219 String language = lemma.getLanguage();
220 result = result + "<li>";
221 result = result + lemmaName;
222 if (outputType != null && outputType.equals("full")) {
223 String lemmaProvider = lemma.getProvider();
224 result = result + " (data provider: " + lemmaProvider + ")";
225 }
226 if (Language.getInstance().isArabic(language) || Language.getInstance().isLatin(language))
227 result = result + " (external link: <a href=\"http://www.perseus.tufts.edu/hopper/morph?l=" + lemmaName + "&amp;la=" + language + "\">" + lemmaName + "</a>)";
228 else if (Language.getInstance().isGreek(language))
229 result = result + " (external link: <a href=\"http://www.perseus.tufts.edu/hopper/morph?l=" + lemmaName + "&amp;la=" + "greek" + "\">" + lemmaName + "</a>)";
230 if (outputType != null && outputType.equals("full")) {
231 ArrayList<Form> forms = lemma.getFormsList();
232 Collections.sort(forms);
233 if (forms != null && ! forms.isEmpty()) {
234 result = result + "<ul>";
235 for (int j=0; j<forms.size(); j++) {
236 Form f = forms.get(j);
237 String formName = f.getFormName();
238 String formProvider = f.getProvider();
239 result = result + formName + " (data provider: " + formProvider + "), ";
240 }
241 result = result.substring(0, result.length() - 2); // without last comma and blank
242 result = result + "</ul>";
243 }
244 }
245 result = result + "</li>";
246 }
247 result = result + "</ul>";
248 }
249 if (lexicons != null && ! lexicons.isEmpty()) {
250 result = result + "<h3>Dictionary</h3>";
251 result = result + "<ul>";
252 result = result + "<p/>";
253 for (int i=0; i<lexicons.size(); i++) {
254 Lexicon lexicon = lexicons.get(i);
255 result = result + "<li>";
256 result = result + "<b>" + lexicon.getDescription() + "</b>";
257 result = result + "<ul>";
258 ArrayList<LexiconEntry> entries = lexicon.getEntries();
259 for (int j=0; j<entries.size(); j++) {
260 String entryContent = "";
261 LexiconEntry entry = entries.get(j);
262 if (lexicon.isLocalLexicon()) {
263 if (entry.isXmlValid()) {
264 String repairedEntry = entry.getRepairedEntry();
265 repairedEntry = repairedEntry.replaceAll("<repaired-entry>", "");
266 repairedEntry = repairedEntry.replaceAll("</repaired-entry>", "");
267 entryContent = entryContent + repairedEntry; // valid unicode content of the original entry
268 } else {
269 entryContent = entryContent + "[Remark: <i> this dictionary entry has no valid XML/HTML content in database so a text version of this entry is shown.</i>]: <br/>";
270 String originalEntry = entry.getOriginalEntry();
271 originalEntry = originalEntry.replaceAll("<original-entry>", "");
272 originalEntry = originalEntry.replaceAll("</original-entry>", "");
273 originalEntry = StringEscapeUtils.escapeXml(originalEntry); // create text version of the invalid xml content
274 entryContent = entryContent + originalEntry;
275 }
276 if (entry.getRemoteUrl() != null) {
277 entryContent = entryContent + "<div>(external link: <a href=\"" + entry.getRemoteUrl() + "\">" + entry.getFormName() + "</a>)</div>";
278 }
279 } else {
280 if (entry.getRemoteUrl() != null) {
281 entryContent = entryContent + "external link: <a href=\"" + entry.getRemoteUrl() + "\">" + entry.getFormName() + "</a>";
282 }
283 }
284 String formName = entry.getFormName();
285 String dictName = lexicon.getName();
286 if (outputType != null && outputType.equals("full")) {
287 result = result + "<li>" + "<b>" + formName + "</b><ul><li>" + entryContent + "</li></ul></li>";
288 } else if (outputType != null && outputType.equals("compact")) {
289 result = result + "<li>" + "<a href=\"GetDictionaryEntries?query=" + formName + "&dictionary=" + dictName + "&outputFormat=html" + "&outputType=full" + "\">" + formName + "</a></li>";
290 }
291 }
292 result = result + "</ul>";
293 result = result + "</li>"; // lexicon entry
294 }
295 result = result + "</ul>";
296 result = result + "</p>";
297 }
298 if (outputType != null && outputType.equals("full") && lemmas != null && ! lemmas.isEmpty()) {
299 result = result + "<h3>Wikipedia</h3>";
300 result = result + "<ul>";
301 result = result + "<p/>";
302 for (int i=0; i<lemmas.size(); i++) {
303 Lemma lemma = lemmas.get(i);
304 String lemmaName = lemma.getLemmaName();
305 String language = lemma.getLanguage();
306 result = result + "<li>";
307 String wikiHrefExact = "http://" + language + ".wikipedia.org/wiki/" + lemmaName;
308 String wikiHrefSearch = "http://" + language + ".wikipedia.org/wiki/index.php?search=" + lemmaName;
309 result = result + "<b>Article: </b>External link: <a href=\"" + wikiHrefExact + "\">" + lemmaName + "</a> (or search for <a href=\"" + wikiHrefSearch + "\">" + lemmaName + "</a>)";
310 result = result + "</li>";
311 }
312 result = result + "</ul>";
313 }
314 result = result + "[* external links may not function]";
315 result = result + "<hr/>";
316 result = result + "<p/>";
317 result = result + "Elapsed time: " + elapsedTime + " ms, see the <a href=\"/mpiwg-mpdl-lt-web/index.html\">service description</a> of this page, if you find a bug <a href=\"https://it-dev.mpiwg-berlin.mpg.de/tracs/mpdl-project-software/newticket\">let us know</a>";
318 result = result + "</body>";
319 result = result + "</html>";
320 return result;
321 }
322
323
324 }