annotate software/mpdl-services/mpiwg-mpdl-cms-web/src/de/mpg/mpiwg/berlin/mpdl/servlets/cms/GetPage.java @ 23:e845310098ba

diverse Korrekturen
author Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
date Tue, 27 Nov 2012 12:35:19 +0100
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
23
e845310098ba diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
1 package de.mpg.mpiwg.berlin.mpdl.servlets.cms;
e845310098ba diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
2
e845310098ba diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
3 import java.io.File;
e845310098ba diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
4 import java.io.IOException;
e845310098ba diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
5 import java.io.PrintWriter;
e845310098ba diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
6 import java.io.StringReader;
e845310098ba diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
7
e845310098ba diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
8 import javax.servlet.ServletConfig;
e845310098ba diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
9 import javax.servlet.ServletContext;
e845310098ba diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
10 import javax.servlet.ServletException;
e845310098ba diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
11 import javax.servlet.http.HttpServlet;
e845310098ba diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
12 import javax.servlet.http.HttpServletRequest;
e845310098ba diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
13 import javax.servlet.http.HttpServletResponse;
e845310098ba diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
14
e845310098ba diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
15 import org.apache.commons.io.FileUtils;
e845310098ba diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
16 import org.xml.sax.InputSource;
e845310098ba diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
17 import org.xml.sax.SAXException;
e845310098ba diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
18 import org.xml.sax.XMLReader;
e845310098ba diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
19
e845310098ba diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
20 import com.sun.org.apache.xerces.internal.parsers.SAXParser;
e845310098ba diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
21
e845310098ba diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
22 import de.mpg.mpiwg.berlin.mpdl.cms.document.DocumentHandler;
e845310098ba diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
23 import de.mpg.mpiwg.berlin.mpdl.cms.document.MetadataRecord;
e845310098ba diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
24 import de.mpg.mpiwg.berlin.mpdl.cms.lucene.IndexHandler;
e845310098ba diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
25 import de.mpg.mpiwg.berlin.mpdl.cms.transform.HighlightContentHandler;
e845310098ba diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
26 import de.mpg.mpiwg.berlin.mpdl.cms.transform.PageTransformer;
e845310098ba diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
27 import de.mpg.mpiwg.berlin.mpdl.exception.ApplicationException;
e845310098ba diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
28 import de.mpg.mpiwg.berlin.mpdl.lt.text.tokenize.WordContentHandler;
e845310098ba diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
29
e845310098ba diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
30 public class GetPage extends HttpServlet {
e845310098ba diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
31 private static final long serialVersionUID = 1L;
e845310098ba diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
32 private PageTransformer pageTransformer;
e845310098ba diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
33
e845310098ba diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
34 public GetPage() {
e845310098ba diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
35 super();
e845310098ba diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
36 }
e845310098ba diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
37
e845310098ba diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
38 public void init(ServletConfig config) throws ServletException {
e845310098ba diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
39 super.init(config);
e845310098ba diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
40 ServletContext context = getServletContext();
e845310098ba diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
41 pageTransformer = (PageTransformer) context.getAttribute("pageTransformer");
e845310098ba diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
42 }
e845310098ba diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
43
e845310098ba diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
44 protected void doGet(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException {
e845310098ba diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
45 String result = "";
e845310098ba diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
46 request.setCharacterEncoding("utf-8");
e845310098ba diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
47 response.setCharacterEncoding("utf-8");
e845310098ba diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
48 String docId = request.getParameter("docId");
e845310098ba diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
49 String pageStr = request.getParameter("page");
e845310098ba diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
50 String normalization = request.getParameter("normalization");
e845310098ba diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
51 String highlightQuery = request.getParameter("highlightQuery");
e845310098ba diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
52 String highlightQueryType = request.getParameter("highlightQueryType");
e845310098ba diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
53 if (highlightQueryType == null)
e845310098ba diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
54 highlightQueryType = "form";
e845310098ba diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
55 String highlightElem = request.getParameter("highlightElem");
e845310098ba diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
56 String highlightElemPosStr = request.getParameter("highlightElemPos");
e845310098ba diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
57 int highlightElemPos = -1;
e845310098ba diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
58 if (highlightElemPosStr != null)
e845310098ba diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
59 highlightElemPos = Integer.parseInt(highlightElemPosStr);
e845310098ba diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
60 String mode = request.getParameter("mode");
e845310098ba diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
61 if (mode == null)
e845310098ba diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
62 mode = "untokenized";
e845310098ba diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
63 String outputFormat = request.getParameter("outputFormat");
e845310098ba diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
64 if (outputFormat == null)
e845310098ba diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
65 outputFormat = "html";
e845310098ba diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
66 String cssUrl = request.getParameter("cssUrl");
e845310098ba diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
67 String baseUrl = getBaseUrl(request);
e845310098ba diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
68 if (cssUrl == null) {
e845310098ba diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
69 cssUrl = baseUrl + "/css/page.css";
e845310098ba diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
70 }
e845310098ba diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
71 int page = 1;
e845310098ba diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
72 if (pageStr != null)
e845310098ba diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
73 page = Integer.parseInt(pageStr);
e845310098ba diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
74 if (outputFormat.equals("xml"))
e845310098ba diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
75 response.setContentType("text/xml");
e845310098ba diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
76 else if (outputFormat.equals("html") || outputFormat.equals("xmlDisplay"))
e845310098ba diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
77 response.setContentType("text/html");
e845310098ba diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
78 // normalization
e845310098ba diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
79 if (normalization == null || ! (normalization.equals("orig") || normalization.equals("reg") || normalization.equals("norm")))
e845310098ba diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
80 normalization = "norm";
e845310098ba diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
81 if (outputFormat.equals("xmlDisplay"))
e845310098ba diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
82 normalization = "orig";
e845310098ba diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
83 PrintWriter out = response.getWriter();
e845310098ba diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
84 try {
e845310098ba diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
85 IndexHandler indexHandler = IndexHandler.getInstance();
e845310098ba diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
86 MetadataRecord mdRecord = indexHandler.getDocMetadata(docId);
e845310098ba diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
87 DocumentHandler docHandler = new DocumentHandler();
e845310098ba diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
88 String docDir = docHandler.getDocDir(docId);
e845310098ba diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
89 String docPageDir = docDir + "/" + "pages";
e845310098ba diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
90 String pageFileName = docPageDir + "/page-" + page + "-morph.xml";
e845310098ba diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
91 File pageFile = new File(pageFileName);
e845310098ba diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
92 if (page == 1 && ! (new File(docPageDir)).exists()) {
e845310098ba diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
93 String docFileName = docHandler.getDocFullFileName(docId);
e845310098ba diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
94 pageFile = new File(docFileName); // when no page breaks are in the document then the whole document is the first page
e845310098ba diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
95 }
e845310098ba diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
96 if (! pageFile.exists()) {
e845310098ba diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
97 out.print("There is no page: " + page + " in document");
e845310098ba diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
98 out.close();
e845310098ba diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
99 return;
e845310098ba diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
100 }
e845310098ba diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
101 String pageHtmlFileName = docPageDir + "/page-" + page + ".html";
e845310098ba diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
102 File pageHtmlFile = new File(pageHtmlFileName);
e845310098ba diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
103 String fragmentMorphStr = FileUtils.readFileToString(pageFile, "utf-8");
e845310098ba diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
104 if (! pageHtmlFile.exists()) // TODO rausnehmen sobald alle Dokumente neu indexiert wurden
e845310098ba diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
105 fragmentMorphStr = enrichWordsOrigRegNorm(fragmentMorphStr);
e845310098ba diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
106 if (outputFormat.equals("html") || outputFormat.equals("xmlDisplay")) {
e845310098ba diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
107 String schemaName = mdRecord.getSchemaName();
e845310098ba diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
108 String title = docId + ", Page: " + page;
e845310098ba diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
109 String xmlHeader = "<?xml version=\"1.0\" encoding=\"utf-8\"?>";
e845310098ba diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
110 String cssShowWordFileName = "pageNormDict.css";
e845310098ba diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
111 if (outputFormat.equals("xmlDisplay"))
e845310098ba diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
112 cssShowWordFileName = "pageOrig.css"; // xml display shows always the original text
e845310098ba diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
113 else if (normalization.equals("orig") && mode.equals("untokenized"))
e845310098ba diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
114 cssShowWordFileName = "pageOrig.css";
e845310098ba diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
115 else if (normalization.equals("orig") && mode.equals("tokenized"))
e845310098ba diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
116 cssShowWordFileName = "pageOrigDict.css";
e845310098ba diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
117 else if (normalization.equals("reg") && mode.equals("untokenized"))
e845310098ba diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
118 cssShowWordFileName = "pageReg.css";
e845310098ba diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
119 else if (normalization.equals("reg") && mode.equals("tokenized"))
e845310098ba diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
120 cssShowWordFileName = "pageRegDict.css";
e845310098ba diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
121 else if (normalization.equals("norm") && mode.equals("untokenized"))
e845310098ba diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
122 cssShowWordFileName = "pageNorm.css";
e845310098ba diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
123 String showWordCssUrl = baseUrl + "/css/" + cssShowWordFileName;
e845310098ba diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
124 String mainCssLink = "<link rel=\"stylesheet\" type=\"text/css\" href=\"" + cssUrl + "\"/>";
e845310098ba diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
125 String showWordCssLink = "<link rel=\"stylesheet\" type=\"text/css\" href=\"" + showWordCssUrl + "\"/>";
e845310098ba diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
126 String head = "<head>" + "<title>" + title + "</title>" + showWordCssLink + mainCssLink + "</head>";
e845310098ba diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
127 String namespace = "";
e845310098ba diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
128 String pageHtmlStr = null;
e845310098ba diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
129 if (pageHtmlFile.exists() && outputFormat.equals("html") && (highlightElem == null && highlightQuery == null)) {
e845310098ba diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
130 pageHtmlStr = FileUtils.readFileToString(pageHtmlFile, "utf-8");
e845310098ba diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
131 } else {
e845310098ba diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
132 if (highlightElem != null || highlightQuery != null) {
e845310098ba diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
133 String hiQueryType = "orig";
e845310098ba diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
134 if (highlightQueryType.equals("morph"))
e845310098ba diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
135 hiQueryType = "morph";
e845310098ba diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
136 else
e845310098ba diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
137 hiQueryType = normalization;
e845310098ba diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
138 String language = mdRecord.getLanguage();
e845310098ba diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
139 fragmentMorphStr = highlight(fragmentMorphStr, highlightElem, highlightElemPos, hiQueryType, highlightQuery, language);
e845310098ba diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
140 }
e845310098ba diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
141 pageHtmlStr = pageTransformer.transform(fragmentMorphStr, mdRecord, page, outputFormat);
e845310098ba diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
142 }
e845310098ba diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
143 if (schemaName != null && schemaName.equals("echo")) {
e845310098ba diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
144 namespace = "xmlns:echo=\"http://www.mpiwg-berlin.mpg.de/ns/echo/1.0/\" xmlns:de=\"http://www.mpiwg-berlin.mpg.de/ns/de/1.0/\" " +
e845310098ba diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
145 "xmlns:dcterms=\"http://purl.org/dc/terms\" " + "xmlns:xhtml=\"http://www.w3.org/1999/xhtml\" xmlns:mml=\"http://www.w3.org/1998/Math/MathML\" " +
e845310098ba diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
146 "xmlns:xlink=\"http://www.w3.org/1999/xlink\"";
e845310098ba diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
147 }
e845310098ba diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
148 result = xmlHeader + "<html " + namespace + ">" + head + "<body>" + pageHtmlStr + "</body>" + "</html>";
e845310098ba diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
149 } else {
e845310098ba diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
150 String pageFileNameOrig = docPageDir + "/page-" + page + ".xml";
e845310098ba diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
151 File pageFileOrig = new File(pageFileNameOrig);
e845310098ba diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
152 if (pageFileOrig.exists())
e845310098ba diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
153 result = FileUtils.readFileToString(pageFileOrig, "utf-8");
e845310098ba diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
154 else
e845310098ba diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
155 result = "";
e845310098ba diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
156 }
e845310098ba diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
157 out.print(result);
e845310098ba diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
158 out.close();
e845310098ba diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
159 } catch (ApplicationException e) {
e845310098ba diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
160 throw new ServletException(e);
e845310098ba diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
161 }
e845310098ba diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
162 }
e845310098ba diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
163
e845310098ba diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
164 protected void doPost(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException {
e845310098ba diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
165 doGet(request, response);
e845310098ba diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
166 }
e845310098ba diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
167
e845310098ba diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
168 private String getBaseUrl(HttpServletRequest request) {
e845310098ba diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
169 return getServerUrl(request) + request.getContextPath();
e845310098ba diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
170 }
e845310098ba diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
171
e845310098ba diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
172 private String getServerUrl(HttpServletRequest request) {
e845310098ba diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
173 if ( ( request.getServerPort() == 80 ) || ( request.getServerPort() == 443 ) )
e845310098ba diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
174 return request.getScheme() + "://" + request.getServerName();
e845310098ba diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
175 else
e845310098ba diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
176 return request.getScheme() + "://" + request.getServerName() + ":" + request.getServerPort();
e845310098ba diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
177 }
e845310098ba diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
178
e845310098ba diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
179 private String enrichWordsOrigRegNorm(String xmlStr) throws ApplicationException {
e845310098ba diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
180 try {
e845310098ba diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
181 WordContentHandler wordContentHandler = new WordContentHandler();
e845310098ba diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
182 XMLReader xmlParser = new SAXParser();
e845310098ba diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
183 xmlParser.setContentHandler(wordContentHandler);
e845310098ba diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
184 StringReader strReader = new StringReader(xmlStr);
e845310098ba diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
185 InputSource inputSource = new InputSource(strReader);
e845310098ba diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
186 xmlParser.parse(inputSource);
e845310098ba diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
187 String result = wordContentHandler.getResult();
e845310098ba diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
188 return result;
e845310098ba diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
189 } catch (SAXException e) {
e845310098ba diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
190 throw new ApplicationException(e);
e845310098ba diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
191 } catch (IOException e) {
e845310098ba diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
192 throw new ApplicationException(e);
e845310098ba diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
193 }
e845310098ba diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
194 }
e845310098ba diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
195
e845310098ba diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
196 private String highlight(String xmlStr, String highlightElem, int highlightElemPos, String highlightQueryType, String highlightQuery, String language) throws ApplicationException {
e845310098ba diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
197 String result = null;
e845310098ba diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
198 try {
e845310098ba diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
199 HighlightContentHandler highlightContentHandler = new HighlightContentHandler(highlightElem, highlightElemPos, highlightQueryType, highlightQuery, language);
e845310098ba diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
200 highlightContentHandler.setFirstPageBreakReachedMode(true);
e845310098ba diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
201 XMLReader xmlParser = new SAXParser();
e845310098ba diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
202 xmlParser.setContentHandler(highlightContentHandler);
e845310098ba diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
203 StringReader stringReader = new StringReader(xmlStr);
e845310098ba diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
204 InputSource inputSource = new InputSource(stringReader);
e845310098ba diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
205 xmlParser.parse(inputSource);
e845310098ba diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
206 result = highlightContentHandler.getResult().toString();
e845310098ba diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
207 } catch (SAXException e) {
e845310098ba diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
208 throw new ApplicationException(e);
e845310098ba diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
209 } catch (IOException e) {
e845310098ba diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
210 throw new ApplicationException(e);
e845310098ba diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
211 }
e845310098ba diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
212 return result;
e845310098ba diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
213 }
e845310098ba diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
214
e845310098ba diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
215 }