Mercurial > hg > mpdl-group
view software/mpdl-services-new/mpiwg-mpdl-cms-web/src/de/mpg/mpiwg/berlin/mpdl/servlets/cms/GetPage.java @ 25:e9fe3186670c default tip
letzter Stand eingecheckt
author | Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de> |
---|---|
date | Tue, 21 May 2013 10:19:32 +0200 |
parents | |
children |
line wrap: on
line source
package de.mpg.mpiwg.berlin.mpdl.servlets.cms; import java.io.File; import java.io.IOException; import java.io.PrintWriter; import java.io.StringReader; import javax.servlet.ServletConfig; import javax.servlet.ServletContext; import javax.servlet.ServletException; import javax.servlet.http.HttpServlet; import javax.servlet.http.HttpServletRequest; import javax.servlet.http.HttpServletResponse; import org.apache.commons.io.FileUtils; import org.xml.sax.InputSource; import org.xml.sax.SAXException; import org.xml.sax.XMLReader; import com.sun.org.apache.xerces.internal.parsers.SAXParser; import de.mpg.mpiwg.berlin.mpdl.cms.document.DocumentHandler; import de.mpg.mpiwg.berlin.mpdl.cms.document.MetadataRecord; import de.mpg.mpiwg.berlin.mpdl.cms.lucene.IndexHandler; import de.mpg.mpiwg.berlin.mpdl.cms.transform.HighlightContentHandler; import de.mpg.mpiwg.berlin.mpdl.cms.transform.PageTransformer; import de.mpg.mpiwg.berlin.mpdl.exception.ApplicationException; import de.mpg.mpiwg.berlin.mpdl.lt.text.tokenize.WordContentHandler; public class GetPage extends HttpServlet { private static final long serialVersionUID = 1L; private PageTransformer pageTransformer; public GetPage() { super(); } public void init(ServletConfig config) throws ServletException { super.init(config); ServletContext context = getServletContext(); pageTransformer = (PageTransformer) context.getAttribute("pageTransformer"); } protected void doGet(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException { String result = ""; request.setCharacterEncoding("utf-8"); response.setCharacterEncoding("utf-8"); String docId = request.getParameter("docId"); String pageStr = request.getParameter("page"); String normalization = request.getParameter("normalization"); String highlightQuery = request.getParameter("highlightQuery"); String highlightQueryType = request.getParameter("highlightQueryType"); if (highlightQueryType == null) highlightQueryType = "form"; String highlightElem = request.getParameter("highlightElem"); String highlightElemPosStr = request.getParameter("highlightElemPos"); int highlightElemPos = -1; if (highlightElemPosStr != null) highlightElemPos = Integer.parseInt(highlightElemPosStr); String mode = request.getParameter("mode"); if (mode == null) mode = "untokenized"; String outputFormat = request.getParameter("outputFormat"); if (outputFormat == null) outputFormat = "html"; String cssUrl = request.getParameter("cssUrl"); String baseUrl = getBaseUrl(request); if (cssUrl == null) { cssUrl = baseUrl + "/css/page.css"; } int page = 1; if (pageStr != null) page = Integer.parseInt(pageStr); if (outputFormat.equals("xml")) response.setContentType("text/xml"); else if (outputFormat.equals("html") || outputFormat.equals("xmlDisplay")) response.setContentType("text/html"); // normalization if (normalization == null || ! (normalization.equals("orig") || normalization.equals("reg") || normalization.equals("norm"))) normalization = "norm"; if (outputFormat.equals("xmlDisplay")) normalization = "orig"; PrintWriter out = response.getWriter(); try { IndexHandler indexHandler = IndexHandler.getInstance(); MetadataRecord mdRecord = indexHandler.getDocMetadata(docId); DocumentHandler docHandler = new DocumentHandler(); String docDir = docHandler.getDocDir(docId); String docPageDir = docDir + "/" + "pages"; String pageFileName = docPageDir + "/page-" + page + "-morph.xml"; File pageFile = new File(pageFileName); if (page == 1 && ! (new File(docPageDir)).exists()) { String docFileName = docHandler.getDocFullFileName(docId); pageFile = new File(docFileName); // when no page breaks are in the document then the whole document is the first page } if (! pageFile.exists()) { out.print("There is no page: " + page + " in document"); out.close(); return; } String pageHtmlFileName = docPageDir + "/page-" + page + ".html"; File pageHtmlFile = new File(pageHtmlFileName); String fragmentMorphStr = FileUtils.readFileToString(pageFile, "utf-8"); if (! pageHtmlFile.exists()) // TODO rausnehmen sobald alle Dokumente neu indexiert wurden fragmentMorphStr = enrichWordsOrigRegNorm(fragmentMorphStr); if (outputFormat.equals("html") || outputFormat.equals("xmlDisplay")) { String schemaName = mdRecord.getSchemaName(); String title = docId + ", Page: " + page; String xmlHeader = "<?xml version=\"1.0\" encoding=\"utf-8\"?>"; String cssShowWordFileName = "pageNormDict.css"; if (outputFormat.equals("xmlDisplay")) cssShowWordFileName = "pageOrig.css"; // xml display shows always the original text else if (normalization.equals("orig") && mode.equals("untokenized")) cssShowWordFileName = "pageOrig.css"; else if (normalization.equals("orig") && mode.equals("tokenized")) cssShowWordFileName = "pageOrigDict.css"; else if (normalization.equals("reg") && mode.equals("untokenized")) cssShowWordFileName = "pageReg.css"; else if (normalization.equals("reg") && mode.equals("tokenized")) cssShowWordFileName = "pageRegDict.css"; else if (normalization.equals("norm") && mode.equals("untokenized")) cssShowWordFileName = "pageNorm.css"; String showWordCssUrl = baseUrl + "/css/" + cssShowWordFileName; String mainCssLink = "<link rel=\"stylesheet\" type=\"text/css\" href=\"" + cssUrl + "\"/>"; String showWordCssLink = "<link rel=\"stylesheet\" type=\"text/css\" href=\"" + showWordCssUrl + "\"/>"; String head = "<head>" + "<title>" + title + "</title>" + showWordCssLink + mainCssLink + "</head>"; String namespace = ""; String pageHtmlStr = null; if (pageHtmlFile.exists() && outputFormat.equals("html") && (highlightElem == null && highlightQuery == null)) { pageHtmlStr = FileUtils.readFileToString(pageHtmlFile, "utf-8"); } else { if (highlightElem != null || highlightQuery != null) { String hiQueryType = "orig"; if (highlightQueryType.equals("morph")) hiQueryType = "morph"; else hiQueryType = normalization; String language = mdRecord.getLanguage(); fragmentMorphStr = highlight(fragmentMorphStr, highlightElem, highlightElemPos, hiQueryType, highlightQuery, language); } pageHtmlStr = pageTransformer.transform(fragmentMorphStr, mdRecord, page, outputFormat); } if (schemaName != null && schemaName.equals("echo")) { namespace = "xmlns:echo=\"http://www.mpiwg-berlin.mpg.de/ns/echo/1.0/\" xmlns:de=\"http://www.mpiwg-berlin.mpg.de/ns/de/1.0/\" " + "xmlns:dcterms=\"http://purl.org/dc/terms\" " + "xmlns:xhtml=\"http://www.w3.org/1999/xhtml\" xmlns:mml=\"http://www.w3.org/1998/Math/MathML\" " + "xmlns:xlink=\"http://www.w3.org/1999/xlink\""; } result = xmlHeader + "<html " + namespace + ">" + head + "<body>" + pageHtmlStr + "</body>" + "</html>"; } else { String pageFileNameOrig = docPageDir + "/page-" + page + ".xml"; File pageFileOrig = new File(pageFileNameOrig); if (pageFileOrig.exists()) result = FileUtils.readFileToString(pageFileOrig, "utf-8"); else result = ""; } out.print(result); out.close(); } catch (ApplicationException e) { throw new ServletException(e); } } protected void doPost(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException { doGet(request, response); } private String getBaseUrl(HttpServletRequest request) { return getServerUrl(request) + request.getContextPath(); } private String getServerUrl(HttpServletRequest request) { if ( ( request.getServerPort() == 80 ) || ( request.getServerPort() == 443 ) ) return request.getScheme() + "://" + request.getServerName(); else return request.getScheme() + "://" + request.getServerName() + ":" + request.getServerPort(); } private String enrichWordsOrigRegNorm(String xmlStr) throws ApplicationException { try { WordContentHandler wordContentHandler = new WordContentHandler(); XMLReader xmlParser = new SAXParser(); xmlParser.setContentHandler(wordContentHandler); StringReader strReader = new StringReader(xmlStr); InputSource inputSource = new InputSource(strReader); xmlParser.parse(inputSource); String result = wordContentHandler.getResult(); return result; } catch (SAXException e) { throw new ApplicationException(e); } catch (IOException e) { throw new ApplicationException(e); } } private String highlight(String xmlStr, String highlightElem, int highlightElemPos, String highlightQueryType, String highlightQuery, String language) throws ApplicationException { String result = null; try { HighlightContentHandler highlightContentHandler = new HighlightContentHandler(highlightElem, highlightElemPos, highlightQueryType, highlightQuery, language); highlightContentHandler.setFirstPageBreakReachedMode(true); XMLReader xmlParser = new SAXParser(); xmlParser.setContentHandler(highlightContentHandler); StringReader stringReader = new StringReader(xmlStr); InputSource inputSource = new InputSource(stringReader); xmlParser.parse(inputSource); result = highlightContentHandler.getResult().toString(); } catch (SAXException e) { throw new ApplicationException(e); } catch (IOException e) { throw new ApplicationException(e); } return result; } }