Mercurial > hg > mpdl-group
comparison software/mpdl-services-new/mpiwg-mpdl-cms-web/src/de/mpg/mpiwg/berlin/mpdl/servlets/cms/GetPage.java @ 25:e9fe3186670c default tip
letzter Stand eingecheckt
author | Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de> |
---|---|
date | Tue, 21 May 2013 10:19:32 +0200 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
23:e845310098ba | 25:e9fe3186670c |
---|---|
1 package de.mpg.mpiwg.berlin.mpdl.servlets.cms; | |
2 | |
3 import java.io.File; | |
4 import java.io.IOException; | |
5 import java.io.PrintWriter; | |
6 import java.io.StringReader; | |
7 | |
8 import javax.servlet.ServletConfig; | |
9 import javax.servlet.ServletContext; | |
10 import javax.servlet.ServletException; | |
11 import javax.servlet.http.HttpServlet; | |
12 import javax.servlet.http.HttpServletRequest; | |
13 import javax.servlet.http.HttpServletResponse; | |
14 | |
15 import org.apache.commons.io.FileUtils; | |
16 import org.xml.sax.InputSource; | |
17 import org.xml.sax.SAXException; | |
18 import org.xml.sax.XMLReader; | |
19 | |
20 import com.sun.org.apache.xerces.internal.parsers.SAXParser; | |
21 | |
22 import de.mpg.mpiwg.berlin.mpdl.cms.document.DocumentHandler; | |
23 import de.mpg.mpiwg.berlin.mpdl.cms.document.MetadataRecord; | |
24 import de.mpg.mpiwg.berlin.mpdl.cms.lucene.IndexHandler; | |
25 import de.mpg.mpiwg.berlin.mpdl.cms.transform.HighlightContentHandler; | |
26 import de.mpg.mpiwg.berlin.mpdl.cms.transform.PageTransformer; | |
27 import de.mpg.mpiwg.berlin.mpdl.exception.ApplicationException; | |
28 import de.mpg.mpiwg.berlin.mpdl.lt.text.tokenize.WordContentHandler; | |
29 | |
30 public class GetPage extends HttpServlet { | |
31 private static final long serialVersionUID = 1L; | |
32 private PageTransformer pageTransformer; | |
33 | |
34 public GetPage() { | |
35 super(); | |
36 } | |
37 | |
38 public void init(ServletConfig config) throws ServletException { | |
39 super.init(config); | |
40 ServletContext context = getServletContext(); | |
41 pageTransformer = (PageTransformer) context.getAttribute("pageTransformer"); | |
42 } | |
43 | |
44 protected void doGet(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException { | |
45 String result = ""; | |
46 request.setCharacterEncoding("utf-8"); | |
47 response.setCharacterEncoding("utf-8"); | |
48 String docId = request.getParameter("docId"); | |
49 String pageStr = request.getParameter("page"); | |
50 String normalization = request.getParameter("normalization"); | |
51 String highlightQuery = request.getParameter("highlightQuery"); | |
52 String highlightQueryType = request.getParameter("highlightQueryType"); | |
53 if (highlightQueryType == null) | |
54 highlightQueryType = "form"; | |
55 String highlightElem = request.getParameter("highlightElem"); | |
56 String highlightElemPosStr = request.getParameter("highlightElemPos"); | |
57 int highlightElemPos = -1; | |
58 if (highlightElemPosStr != null) | |
59 highlightElemPos = Integer.parseInt(highlightElemPosStr); | |
60 String mode = request.getParameter("mode"); | |
61 if (mode == null) | |
62 mode = "untokenized"; | |
63 String outputFormat = request.getParameter("outputFormat"); | |
64 if (outputFormat == null) | |
65 outputFormat = "html"; | |
66 String cssUrl = request.getParameter("cssUrl"); | |
67 String baseUrl = getBaseUrl(request); | |
68 if (cssUrl == null) { | |
69 cssUrl = baseUrl + "/css/page.css"; | |
70 } | |
71 int page = 1; | |
72 if (pageStr != null) | |
73 page = Integer.parseInt(pageStr); | |
74 if (outputFormat.equals("xml")) | |
75 response.setContentType("text/xml"); | |
76 else if (outputFormat.equals("html") || outputFormat.equals("xmlDisplay")) | |
77 response.setContentType("text/html"); | |
78 // normalization | |
79 if (normalization == null || ! (normalization.equals("orig") || normalization.equals("reg") || normalization.equals("norm"))) | |
80 normalization = "norm"; | |
81 if (outputFormat.equals("xmlDisplay")) | |
82 normalization = "orig"; | |
83 PrintWriter out = response.getWriter(); | |
84 try { | |
85 IndexHandler indexHandler = IndexHandler.getInstance(); | |
86 MetadataRecord mdRecord = indexHandler.getDocMetadata(docId); | |
87 DocumentHandler docHandler = new DocumentHandler(); | |
88 String docDir = docHandler.getDocDir(docId); | |
89 String docPageDir = docDir + "/" + "pages"; | |
90 String pageFileName = docPageDir + "/page-" + page + "-morph.xml"; | |
91 File pageFile = new File(pageFileName); | |
92 if (page == 1 && ! (new File(docPageDir)).exists()) { | |
93 String docFileName = docHandler.getDocFullFileName(docId); | |
94 pageFile = new File(docFileName); // when no page breaks are in the document then the whole document is the first page | |
95 } | |
96 if (! pageFile.exists()) { | |
97 out.print("There is no page: " + page + " in document"); | |
98 out.close(); | |
99 return; | |
100 } | |
101 String pageHtmlFileName = docPageDir + "/page-" + page + ".html"; | |
102 File pageHtmlFile = new File(pageHtmlFileName); | |
103 String fragmentMorphStr = FileUtils.readFileToString(pageFile, "utf-8"); | |
104 if (! pageHtmlFile.exists()) // TODO rausnehmen sobald alle Dokumente neu indexiert wurden | |
105 fragmentMorphStr = enrichWordsOrigRegNorm(fragmentMorphStr); | |
106 if (outputFormat.equals("html") || outputFormat.equals("xmlDisplay")) { | |
107 String schemaName = mdRecord.getSchemaName(); | |
108 String title = docId + ", Page: " + page; | |
109 String xmlHeader = "<?xml version=\"1.0\" encoding=\"utf-8\"?>"; | |
110 String cssShowWordFileName = "pageNormDict.css"; | |
111 if (outputFormat.equals("xmlDisplay")) | |
112 cssShowWordFileName = "pageOrig.css"; // xml display shows always the original text | |
113 else if (normalization.equals("orig") && mode.equals("untokenized")) | |
114 cssShowWordFileName = "pageOrig.css"; | |
115 else if (normalization.equals("orig") && mode.equals("tokenized")) | |
116 cssShowWordFileName = "pageOrigDict.css"; | |
117 else if (normalization.equals("reg") && mode.equals("untokenized")) | |
118 cssShowWordFileName = "pageReg.css"; | |
119 else if (normalization.equals("reg") && mode.equals("tokenized")) | |
120 cssShowWordFileName = "pageRegDict.css"; | |
121 else if (normalization.equals("norm") && mode.equals("untokenized")) | |
122 cssShowWordFileName = "pageNorm.css"; | |
123 String showWordCssUrl = baseUrl + "/css/" + cssShowWordFileName; | |
124 String mainCssLink = "<link rel=\"stylesheet\" type=\"text/css\" href=\"" + cssUrl + "\"/>"; | |
125 String showWordCssLink = "<link rel=\"stylesheet\" type=\"text/css\" href=\"" + showWordCssUrl + "\"/>"; | |
126 String head = "<head>" + "<title>" + title + "</title>" + showWordCssLink + mainCssLink + "</head>"; | |
127 String namespace = ""; | |
128 String pageHtmlStr = null; | |
129 if (pageHtmlFile.exists() && outputFormat.equals("html") && (highlightElem == null && highlightQuery == null)) { | |
130 pageHtmlStr = FileUtils.readFileToString(pageHtmlFile, "utf-8"); | |
131 } else { | |
132 if (highlightElem != null || highlightQuery != null) { | |
133 String hiQueryType = "orig"; | |
134 if (highlightQueryType.equals("morph")) | |
135 hiQueryType = "morph"; | |
136 else | |
137 hiQueryType = normalization; | |
138 String language = mdRecord.getLanguage(); | |
139 fragmentMorphStr = highlight(fragmentMorphStr, highlightElem, highlightElemPos, hiQueryType, highlightQuery, language); | |
140 } | |
141 pageHtmlStr = pageTransformer.transform(fragmentMorphStr, mdRecord, page, outputFormat); | |
142 } | |
143 if (schemaName != null && schemaName.equals("echo")) { | |
144 namespace = "xmlns:echo=\"http://www.mpiwg-berlin.mpg.de/ns/echo/1.0/\" xmlns:de=\"http://www.mpiwg-berlin.mpg.de/ns/de/1.0/\" " + | |
145 "xmlns:dcterms=\"http://purl.org/dc/terms\" " + "xmlns:xhtml=\"http://www.w3.org/1999/xhtml\" xmlns:mml=\"http://www.w3.org/1998/Math/MathML\" " + | |
146 "xmlns:xlink=\"http://www.w3.org/1999/xlink\""; | |
147 } | |
148 result = xmlHeader + "<html " + namespace + ">" + head + "<body>" + pageHtmlStr + "</body>" + "</html>"; | |
149 } else { | |
150 String pageFileNameOrig = docPageDir + "/page-" + page + ".xml"; | |
151 File pageFileOrig = new File(pageFileNameOrig); | |
152 if (pageFileOrig.exists()) | |
153 result = FileUtils.readFileToString(pageFileOrig, "utf-8"); | |
154 else | |
155 result = ""; | |
156 } | |
157 out.print(result); | |
158 out.close(); | |
159 } catch (ApplicationException e) { | |
160 throw new ServletException(e); | |
161 } | |
162 } | |
163 | |
164 protected void doPost(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException { | |
165 doGet(request, response); | |
166 } | |
167 | |
168 private String getBaseUrl(HttpServletRequest request) { | |
169 return getServerUrl(request) + request.getContextPath(); | |
170 } | |
171 | |
172 private String getServerUrl(HttpServletRequest request) { | |
173 if ( ( request.getServerPort() == 80 ) || ( request.getServerPort() == 443 ) ) | |
174 return request.getScheme() + "://" + request.getServerName(); | |
175 else | |
176 return request.getScheme() + "://" + request.getServerName() + ":" + request.getServerPort(); | |
177 } | |
178 | |
179 private String enrichWordsOrigRegNorm(String xmlStr) throws ApplicationException { | |
180 try { | |
181 WordContentHandler wordContentHandler = new WordContentHandler(); | |
182 XMLReader xmlParser = new SAXParser(); | |
183 xmlParser.setContentHandler(wordContentHandler); | |
184 StringReader strReader = new StringReader(xmlStr); | |
185 InputSource inputSource = new InputSource(strReader); | |
186 xmlParser.parse(inputSource); | |
187 String result = wordContentHandler.getResult(); | |
188 return result; | |
189 } catch (SAXException e) { | |
190 throw new ApplicationException(e); | |
191 } catch (IOException e) { | |
192 throw new ApplicationException(e); | |
193 } | |
194 } | |
195 | |
196 private String highlight(String xmlStr, String highlightElem, int highlightElemPos, String highlightQueryType, String highlightQuery, String language) throws ApplicationException { | |
197 String result = null; | |
198 try { | |
199 HighlightContentHandler highlightContentHandler = new HighlightContentHandler(highlightElem, highlightElemPos, highlightQueryType, highlightQuery, language); | |
200 highlightContentHandler.setFirstPageBreakReachedMode(true); | |
201 XMLReader xmlParser = new SAXParser(); | |
202 xmlParser.setContentHandler(highlightContentHandler); | |
203 StringReader stringReader = new StringReader(xmlStr); | |
204 InputSource inputSource = new InputSource(stringReader); | |
205 xmlParser.parse(inputSource); | |
206 result = highlightContentHandler.getResult().toString(); | |
207 } catch (SAXException e) { | |
208 throw new ApplicationException(e); | |
209 } catch (IOException e) { | |
210 throw new ApplicationException(e); | |
211 } | |
212 return result; | |
213 } | |
214 | |
215 } |