comparison software/mpdl-services-new/mpiwg-mpdl-cms-web/src/de/mpg/mpiwg/berlin/mpdl/servlets/cms/QueryDocument.java @ 25:e9fe3186670c default tip

letzter Stand eingecheckt
author Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
date Tue, 21 May 2013 10:19:32 +0200
parents
children
comparison
equal deleted inserted replaced
23:e845310098ba 25:e9fe3186670c
1 package de.mpg.mpiwg.berlin.mpdl.servlets.cms;
2
3 import java.io.IOException;
4 import java.io.PrintWriter;
5 import java.io.StringReader;
6 import java.util.ArrayList;
7
8 import javax.servlet.ServletConfig;
9 import javax.servlet.ServletContext;
10 import javax.servlet.ServletException;
11 import javax.servlet.http.HttpServlet;
12 import javax.servlet.http.HttpServletRequest;
13 import javax.servlet.http.HttpServletResponse;
14
15 import org.apache.lucene.document.Fieldable;
16 import org.xml.sax.InputSource;
17 import org.xml.sax.SAXException;
18 import org.xml.sax.XMLReader;
19
20 import com.sun.org.apache.xerces.internal.parsers.SAXParser;
21
22 import de.mpg.mpiwg.berlin.mpdl.cms.document.Document;
23 import de.mpg.mpiwg.berlin.mpdl.cms.document.Hits;
24 import de.mpg.mpiwg.berlin.mpdl.cms.document.MetadataRecord;
25 import de.mpg.mpiwg.berlin.mpdl.cms.lucene.IndexHandler;
26 import de.mpg.mpiwg.berlin.mpdl.cms.transform.HighlightContentHandler;
27 import de.mpg.mpiwg.berlin.mpdl.cms.transform.PageTransformer;
28 import de.mpg.mpiwg.berlin.mpdl.exception.ApplicationException;
29
30 public class QueryDocument extends HttpServlet {
31 private static final long serialVersionUID = 1L;
32 private PageTransformer pageTransformer = null;
33
34 public QueryDocument() {
35 super();
36 }
37
38 public void init(ServletConfig config) throws ServletException {
39 super.init(config);
40 ServletContext context = getServletContext();
41 pageTransformer = (PageTransformer) context.getAttribute("pageTransformer");
42 }
43
44 protected void doPost(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException {
45 doGet(request, response);
46 }
47
48 protected void doGet(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException {
49 request.setCharacterEncoding("utf-8");
50 response.setCharacterEncoding("utf-8");
51 String docId = request.getParameter("docId");
52 String query = request.getParameter("query");
53 String[] normFunctions = {"none"};
54 if (query.contains("tokenReg")) // TODO ordentlich behandeln
55 normFunctions[0] = "reg";
56 else if (query.contains("tokenNorm")) // TODO ordentlich behandeln
57 normFunctions[0] = "norm";
58 String[] outputOptions = {};
59 if (query.contains("tokenMorph")) { // TODO ordentlich behandeln
60 outputOptions = new String[1];
61 outputOptions[0] = "withLemmas";
62 }
63 String pageStr = request.getParameter("page");
64 if (pageStr == null)
65 pageStr = "1";
66 int page = Integer.parseInt(pageStr);
67 String pageSizeStr = request.getParameter("pageSize");
68 if (pageSizeStr == null)
69 pageSizeStr = "10";
70 int pageSize = Integer.parseInt(pageSizeStr);
71 int from = (page * pageSize) - pageSize; // e.g. 0
72 int to = page * pageSize - 1; // e.g. 9
73 String outputFormat = request.getParameter("outputFormat");
74 if (outputFormat == null)
75 outputFormat = "xml";
76 try {
77 IndexHandler indexHandler = IndexHandler.getInstance();
78 Hits hits = indexHandler.queryDocument(docId, query, from, to);
79 MetadataRecord docMetadataRecord = indexHandler.getDocMetadata(docId);
80 if (outputFormat.equals("xml"))
81 response.setContentType("text/xml");
82 else if (outputFormat.equals("html"))
83 response.setContentType("text/html");
84 else
85 response.setContentType("text/xml");
86 PrintWriter out = response.getWriter();
87 String resultStr = "";
88 if (outputFormat.equals("xml"))
89 resultStr = createXmlString(docMetadataRecord, query, page, pageSize, normFunctions, outputOptions, hits);
90 else if (outputFormat.equals("html"))
91 resultStr = createHtmlString(docMetadataRecord, query, page, pageSize, normFunctions, outputOptions, hits, request);
92 out.print(resultStr);
93 out.close();
94 } catch (ApplicationException e) {
95 throw new ServletException(e);
96 }
97 }
98
99 private String createXmlString(MetadataRecord docMetadataRecord, String query, int page, int pageSize, String[] normFunctions, String[] outputOptions, Hits hits) throws ApplicationException {
100 String docId = docMetadataRecord.getDocId();
101 ArrayList<Document> docs = null;
102 if (hits != null)
103 docs = hits.getHits();
104 int hitsSize = -1;
105 int docsSize = -1;
106 if (hits != null)
107 hitsSize = hits.getSize();
108 if (docs != null)
109 docsSize = docs.size();
110 StringBuilder xmlStrBuilder = new StringBuilder();
111 xmlStrBuilder.append("<document>");
112 xmlStrBuilder.append("<id>" + docId + "</id>");
113 xmlStrBuilder.append("<query>");
114 xmlStrBuilder.append("<queryText>" + query + "</queryText>");
115 xmlStrBuilder.append("<resultPage>" + page + "</resultPage>");
116 xmlStrBuilder.append("<resultPageSize>" + pageSize + "</resultPageSize>");
117 xmlStrBuilder.append("</query>");
118 xmlStrBuilder.append("<hitsSize>" + hitsSize + "</hitsSize>");
119 xmlStrBuilder.append("<hits>");
120 for (int i=0; i<docsSize; i++) {
121 Document doc = docs.get(i);
122 int num = (page - 1) * pageSize + i + 1;
123 xmlStrBuilder.append("<hit>");
124 xmlStrBuilder.append("<num>" + num + "</num>");
125 String pageNumber = null;
126 Fieldable fPageNumber = doc.getFieldable("pageNumber");
127 if (fPageNumber != null) {
128 pageNumber = fPageNumber.stringValue();
129 xmlStrBuilder.append("<pageNumber>" + pageNumber + "</pageNumber>");
130 }
131 String elementPagePosition = null;
132 Fieldable fElementPagePosition = doc.getFieldable("elementPagePosition");
133 if (fElementPagePosition != null) {
134 elementPagePosition = fElementPagePosition.stringValue();
135 xmlStrBuilder.append("<pagePosition>" + elementPagePosition + "</pagePosition>");
136 }
137 String lineNumber = null;
138 Fieldable fLineNumber = doc.getFieldable("lineNumber");
139 if (fLineNumber != null) {
140 lineNumber = fLineNumber.stringValue();
141 xmlStrBuilder.append("<lineNumber>" + lineNumber + "</lineNumber>");
142 }
143 String elementPosition = null;
144 Fieldable fElementPosition = doc.getFieldable("elementAbsolutePosition");
145 if (fElementPosition != null) {
146 elementPosition = fElementPosition.stringValue();
147 xmlStrBuilder.append("<absolutePosition>" + elementPosition + "</absolutePosition>");
148 }
149 String xpath = null;
150 Fieldable fXPath = doc.getFieldable("xpath");
151 if (fXPath != null) {
152 xpath = fXPath.stringValue();
153 xmlStrBuilder.append("<xpath>" + xpath + "</xpath>");
154 }
155 String xmlId = null;
156 Fieldable fXmlId = doc.getFieldable("xmlId");
157 if (fXmlId != null) {
158 xmlId = fXmlId.stringValue();
159 xmlStrBuilder.append("<xmlId>" + xmlId + "</xmlId>");
160 }
161 String language = null;
162 Fieldable fLanguage = doc.getFieldable("language");
163 if (fLanguage != null) {
164 language = fLanguage.stringValue();
165 xmlStrBuilder.append("<language>" + language + "</language>");
166 }
167 String xmlContentTokenized = null;
168 Fieldable fXmlContentTokenized = doc.getFieldable("xmlContentTokenized");
169 if (fXmlContentTokenized != null) {
170 String highlightQueryType = "orig";
171 if (withLemmas(outputOptions)) {
172 highlightQueryType = "morph";
173 } else if (normFunctions != null) {
174 String normFunction = normFunctions[0];
175 highlightQueryType = normFunction;
176 if (normFunction.equals("none")) {
177 highlightQueryType = "orig";
178 }
179 }
180 xmlContentTokenized = fXmlContentTokenized.stringValue();
181 String xmlPre = "<content xmlns:xhtml=\"http://www.w3.org/1999/xhtml\" xmlns:mml=\"http://www.w3.org/1998/Math/MathML\" xmlns:xlink=\"http://www.w3.org/1999/xlink\">";
182 String xmlPost = "</content>";
183 String xmlInputStr = xmlPre + xmlContentTokenized + xmlPost;
184 String docLanguage = docMetadataRecord.getLanguage();
185 String highlightedXmlStr = highlight(xmlInputStr, highlightQueryType, query, docLanguage);
186 if (highlightedXmlStr == null)
187 highlightedXmlStr = "<content>" + xmlContentTokenized + "</content>";
188 xmlStrBuilder.append(highlightedXmlStr);
189 }
190 xmlStrBuilder.append("</hit>");
191 }
192 xmlStrBuilder.append("</hits>");
193 xmlStrBuilder.append("</document>");
194 return xmlStrBuilder.toString();
195 }
196
197 private String createHtmlString(MetadataRecord docMetadataRecord, String query, int page, int pageSize, String[] normFunctions, String[] outputOptions, Hits hits, HttpServletRequest request) throws ApplicationException {
198 String docId = docMetadataRecord.getDocId();
199 ArrayList<Document> docs = null;
200 if (hits != null)
201 docs = hits.getHits();
202 int hitsSize = -1;
203 int docsSize = -1;
204 if (hits != null)
205 hitsSize = hits.getSize();
206 if (docs != null)
207 docsSize = docs.size();
208 String highlightQueryType = "orig";
209 String normalizationStr = "";
210 String highlightQueryTypeStr = "";
211 if (withLemmas(outputOptions)) {
212 highlightQueryTypeStr = "&amp;highlightQueryType=norm";
213 highlightQueryType = "norm";
214 } else if (normFunctions != null) {
215 String normFunction = normFunctions[0];
216 normalizationStr = "&amp;normalization=" + normFunction;
217 highlightQueryType = normFunction;
218 if (normFunction.equals("none")) {
219 normalizationStr = "&amp;normalization=" + "orig";
220 highlightQueryType = "orig";
221 }
222 }
223 StringBuilder xmlStrBuilder = new StringBuilder();
224 xmlStrBuilder.append("<html>");
225 xmlStrBuilder.append("<head>");
226 xmlStrBuilder.append("<title>Document: \"" + docId + " " + query + "\"</title>");
227 String baseUrl = getBaseUrl(request);
228 String cssUrl = baseUrl + "/css/page.css";
229 String cssShowWordFileName = "pageOrig.css";
230 if (highlightQueryType.equals("reg"))
231 cssShowWordFileName = "pageReg.css";
232 else if (highlightQueryType.equals("norm"))
233 cssShowWordFileName = "pageNorm.css";
234 String showWordCssUrl = baseUrl + "/css/" + cssShowWordFileName;
235 xmlStrBuilder.append("<link rel=\"stylesheet\" type=\"text/css\" href=\"" + showWordCssUrl + "\"/>");
236 xmlStrBuilder.append("<link rel=\"stylesheet\" type=\"text/css\" href=\"" + cssUrl + "\"/>");
237 xmlStrBuilder.append("</head>");
238 xmlStrBuilder.append("<body>");
239 xmlStrBuilder.append("<span class=\"about\">[<span class=\"it\">This is a MPIWG CMS technology service</span>] <a href=\"/mpiwg-mpdl-cms-web/index.html\"><img src=\"/mpiwg-mpdl-cms-web/images/info.png\" valign=\"bottom\" width=\"15\" height=\"15\" border=\"0\" alt=\"MPIWG CMS service\"/></a></span>");
240 xmlStrBuilder.append("<span class=\"query\">Query: " + query + "</span>");
241 xmlStrBuilder.append("<span class=\"result\">");
242 xmlStrBuilder.append("<span class=\"resultPage\">" + page + "</span>");
243 xmlStrBuilder.append("<span class=\"resultPageSize\">" + pageSize + "</span>");
244 xmlStrBuilder.append("<span class=\"hitsSize\">" + hitsSize + "</span>");
245 xmlStrBuilder.append("</span>");
246 xmlStrBuilder.append("<table>");
247 for (int i=0; i<docsSize; i++) {
248 xmlStrBuilder.append("<tr class=\"hit\">");
249 Document doc = docs.get(i);
250 int num = (page - 1) * pageSize + i + 1;
251 xmlStrBuilder.append("<td class=\"hitNum\">" + num + ". " + "</td>");
252 xmlStrBuilder.append("<td class=\"hitLink\">");
253 String posStr = "";
254 String pageNumber = "";
255 Fieldable fPageNumber = doc.getFieldable("pageNumber");
256 if (fPageNumber != null) {
257 pageNumber = fPageNumber.stringValue();
258 posStr = posStr + "Page " + pageNumber + ", ";
259 }
260 String elementName = null;
261 String presElementName = "";
262 Fieldable fElementName = doc.getFieldable("elementName");
263 if (fElementName != null) {
264 elementName = fElementName.stringValue();
265 presElementName = getPresentationName(elementName);
266 }
267 String elementPagePosition = "";
268 Fieldable fElementPagePosition = doc.getFieldable("elementPagePosition");
269 if (fElementPagePosition != null) {
270 elementPagePosition = fElementPagePosition.stringValue();
271 posStr = posStr + presElementName + " " + elementPagePosition + ":";
272 }
273 String language = docMetadataRecord.getLanguage();
274 String getPageLink = baseUrl + "/query/GetPage?docId=" + docId + "&amp;page=" + pageNumber + normalizationStr + "&amp;highlightElem=" + elementName + "&amp;highlightElemPos=" + elementPagePosition + highlightQueryTypeStr + "&amp;highlightQuery=" + query + "&amp;language=" + language;
275 xmlStrBuilder.append("<a href=\"" + getPageLink + "\">" + posStr + "</a>");
276 xmlStrBuilder.append("</td>");
277 String xmlContentTokenized = null;
278 Fieldable fXmlContentTokenized = doc.getFieldable("xmlContentTokenized");
279 if (fXmlContentTokenized != null) {
280 xmlContentTokenized = fXmlContentTokenized.stringValue();
281 String highlightedXmlStr = highlight(xmlContentTokenized, highlightQueryType, query, language);
282 String highlightHtmlStr = pageTransformer.transform(highlightedXmlStr, docMetadataRecord, -1, "html"); // TODO performance: do not highlight each single node but highlight them all in one step
283 xmlStrBuilder.append("<td class=\"hitContent\">");
284 xmlStrBuilder.append(highlightHtmlStr);
285 xmlStrBuilder.append("</td>");
286 }
287 xmlStrBuilder.append("</tr>");
288 }
289 xmlStrBuilder.append("</table>");
290 xmlStrBuilder.append("</body>");
291 xmlStrBuilder.append("</html>");
292 return xmlStrBuilder.toString();
293 }
294
295 private String highlight(String xmlStr, String highlightQueryType, String highlightQuery, String language) throws ApplicationException {
296 String result = null;
297 try {
298 HighlightContentHandler highlightContentHandler = new HighlightContentHandler(null, -1, highlightQueryType, highlightQuery, language);
299 highlightContentHandler.setFirstPageBreakReachedMode(true);
300 XMLReader xmlParser = new SAXParser();
301 xmlParser.setContentHandler(highlightContentHandler);
302 StringReader stringReader = new StringReader(xmlStr);
303 InputSource inputSource = new InputSource(stringReader);
304 xmlParser.parse(inputSource);
305 result = highlightContentHandler.getResult().toString();
306 } catch (SAXException e) {
307 throw new ApplicationException(e);
308 } catch (IOException e) {
309 throw new ApplicationException(e);
310 }
311 return result;
312 }
313
314 private String getPresentationName(String elemName) {
315 String retStr = null;
316 if (elemName != null) {
317 if (elemName.equals("s")) {
318 retStr = "Sentence";
319 } else {
320 // first char to uppercase
321 char[] stringArray = elemName.toCharArray();
322 stringArray[0] = Character.toUpperCase(stringArray[0]);
323 retStr = new String(stringArray);
324 }
325 }
326 return retStr;
327 }
328
329 private String getBaseUrl(HttpServletRequest request) {
330 return getServerUrl(request) + request.getContextPath();
331 }
332
333 private String getServerUrl(HttpServletRequest request) {
334 if ( ( request.getServerPort() == 80 ) || ( request.getServerPort() == 443 ) )
335 return request.getScheme() + "://" + request.getServerName();
336 else
337 return request.getScheme() + "://" + request.getServerName() + ":" + request.getServerPort();
338 }
339
340 private boolean withLemmas(String[] outputOptions) {
341 boolean result = false;
342 for (int i=0; i< outputOptions.length; i++) {
343 String function = outputOptions[i];
344 if (function.equals("withLemmas"))
345 return true;
346 }
347 return result;
348 }
349
350 }