mpdl-group: software/mpdl-services-new/mpiwg-mpdl-cms-web/src/de/mpg/mpiwg/berlin/mpdl/servlets/cms/GetPage.java comparison

comparison software/mpdl-services-new/mpiwg-mpdl-cms-web/src/de/mpg/mpiwg/berlin/mpdl/servlets/cms/GetPage.java @ 25:e9fe3186670c default tip

letzter Stand eingecheckt

author	Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
date	Tue, 21 May 2013 10:19:32 +0200
parents
children

comparison

equal deleted inserted replaced

-:e845310098ba
+:e9fe3186670c
+package de.mpg.mpiwg.berlin.mpdl.servlets.cms;
+import java.io.File;
+import java.io.IOException;
+import java.io.PrintWriter;
+import java.io.StringReader;
+import javax.servlet.ServletConfig;
+import javax.servlet.ServletContext;
+import javax.servlet.ServletException;
+import javax.servlet.http.HttpServlet;
+import javax.servlet.http.HttpServletRequest;
+import javax.servlet.http.HttpServletResponse;
+import org.apache.commons.io.FileUtils;
+import org.xml.sax.InputSource;
+import org.xml.sax.SAXException;
+import org.xml.sax.XMLReader;
+import com.sun.org.apache.xerces.internal.parsers.SAXParser;
+import de.mpg.mpiwg.berlin.mpdl.cms.document.DocumentHandler;
+import de.mpg.mpiwg.berlin.mpdl.cms.document.MetadataRecord;
+import de.mpg.mpiwg.berlin.mpdl.cms.lucene.IndexHandler;
+import de.mpg.mpiwg.berlin.mpdl.cms.transform.HighlightContentHandler;
+import de.mpg.mpiwg.berlin.mpdl.cms.transform.PageTransformer;
+import de.mpg.mpiwg.berlin.mpdl.exception.ApplicationException;
+import de.mpg.mpiwg.berlin.mpdl.lt.text.tokenize.WordContentHandler;
+public class GetPage extends HttpServlet {
+private static final long serialVersionUID = 1L;
+private PageTransformer pageTransformer;
+public GetPage() {
+super();
+}
+public void init(ServletConfig config) throws ServletException  {
+super.init(config);
+ServletContext context = getServletContext();
+pageTransformer = (PageTransformer) context.getAttribute("pageTransformer");
+}
+protected void doGet(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException {
+String result = "";
+request.setCharacterEncoding("utf-8");
+response.setCharacterEncoding("utf-8");
+String docId = request.getParameter("docId");
+String pageStr = request.getParameter("page");
+String normalization = request.getParameter("normalization");
+String highlightQuery = request.getParameter("highlightQuery");
+String highlightQueryType = request.getParameter("highlightQueryType");
+if (highlightQueryType == null)
+highlightQueryType = "form";
+String highlightElem = request.getParameter("highlightElem");
+String highlightElemPosStr = request.getParameter("highlightElemPos");
+int highlightElemPos = -1;
+if (highlightElemPosStr != null)
+highlightElemPos = Integer.parseInt(highlightElemPosStr);
+String mode = request.getParameter("mode");
+if (mode == null)
+mode = "untokenized";
+String outputFormat = request.getParameter("outputFormat");
+if (outputFormat == null)
+outputFormat = "html";
+String cssUrl = request.getParameter("cssUrl");
+String baseUrl = getBaseUrl(request);
+if (cssUrl == null) {
+cssUrl = baseUrl + "/css/page.css";
+}
+int page = 1;
+if (pageStr != null)
+page = Integer.parseInt(pageStr);
+if (outputFormat.equals("xml"))
+response.setContentType("text/xml");
+else if (outputFormat.equals("html") || outputFormat.equals("xmlDisplay"))
+response.setContentType("text/html");
+// normalization
+if (normalization == null || ! (normalization.equals("orig") || normalization.equals("reg") || normalization.equals("norm")))
+normalization = "norm";
+if (outputFormat.equals("xmlDisplay"))
+normalization = "orig";
+PrintWriter out = response.getWriter();
+try {
+IndexHandler indexHandler = IndexHandler.getInstance();
+MetadataRecord mdRecord = indexHandler.getDocMetadata(docId);
+DocumentHandler docHandler = new DocumentHandler();
+String docDir = docHandler.getDocDir(docId);
+String docPageDir = docDir + "/" + "pages";
+String pageFileName = docPageDir + "/page-" + page + "-morph.xml";
+File pageFile = new File(pageFileName);
+if (page == 1 && ! (new File(docPageDir)).exists()) {
+String docFileName = docHandler.getDocFullFileName(docId);
+pageFile = new File(docFileName);  // when no page breaks are in the document then the whole document is the first page
+}
+if (! pageFile.exists()) {
+out.print("There is no page: " + page + " in document");
+out.close();
+return;
+}
+String pageHtmlFileName = docPageDir + "/page-" + page + ".html";
+File pageHtmlFile = new File(pageHtmlFileName);
+String fragmentMorphStr = FileUtils.readFileToString(pageFile, "utf-8");
+if (! pageHtmlFile.exists())  // TODO rausnehmen sobald alle Dokumente neu indexiert wurden
+fragmentMorphStr = enrichWordsOrigRegNorm(fragmentMorphStr);
+if (outputFormat.equals("html") || outputFormat.equals("xmlDisplay")) {
+String schemaName = mdRecord.getSchemaName();
+String title = docId + ", Page: " + page;
+String xmlHeader = "<?xml version=\"1.0\" encoding=\"utf-8\"?>";
+String cssShowWordFileName = "pageNormDict.css";
+if (outputFormat.equals("xmlDisplay"))
+cssShowWordFileName = "pageOrig.css"; // xml display shows always the original text
+else if (normalization.equals("orig") && mode.equals("untokenized"))
+cssShowWordFileName = "pageOrig.css";
+else if (normalization.equals("orig") && mode.equals("tokenized"))
+cssShowWordFileName = "pageOrigDict.css";
+else if (normalization.equals("reg") && mode.equals("untokenized"))
+cssShowWordFileName = "pageReg.css";
+else if (normalization.equals("reg") && mode.equals("tokenized"))
+cssShowWordFileName = "pageRegDict.css";
+else if (normalization.equals("norm") && mode.equals("untokenized"))
+cssShowWordFileName = "pageNorm.css";
+String showWordCssUrl = baseUrl + "/css/" + cssShowWordFileName;
+String mainCssLink = "<link rel=\"stylesheet\" type=\"text/css\" href=\"" + cssUrl + "\"/>";
+String showWordCssLink = "<link rel=\"stylesheet\" type=\"text/css\" href=\"" + showWordCssUrl + "\"/>";
+String head = "<head>" + "<title>" + title + "</title>" + showWordCssLink + mainCssLink + "</head>";
+String namespace = "";
+String pageHtmlStr = null;
+if (pageHtmlFile.exists() && outputFormat.equals("html") && (highlightElem == null && highlightQuery == null)) {
+pageHtmlStr = FileUtils.readFileToString(pageHtmlFile, "utf-8");
+} else {
+if (highlightElem != null || highlightQuery != null) {
+String hiQueryType = "orig";
+if (highlightQueryType.equals("morph"))
+hiQueryType = "morph";
+else
+hiQueryType = normalization;
+String language = mdRecord.getLanguage();
+fragmentMorphStr = highlight(fragmentMorphStr, highlightElem, highlightElemPos, hiQueryType, highlightQuery, language);
+}
+pageHtmlStr = pageTransformer.transform(fragmentMorphStr, mdRecord, page, outputFormat);
+}
+if (schemaName != null && schemaName.equals("echo")) {
+namespace = "xmlns:echo=\"http://www.mpiwg-berlin.mpg.de/ns/echo/1.0/\" xmlns:de=\"http://www.mpiwg-berlin.mpg.de/ns/de/1.0/\" " +
+"xmlns:dcterms=\"http://purl.org/dc/terms\" " + "xmlns:xhtml=\"http://www.w3.org/1999/xhtml\" xmlns:mml=\"http://www.w3.org/1998/Math/MathML\" " +
+"xmlns:xlink=\"http://www.w3.org/1999/xlink\"";
+}
+result = xmlHeader + "<html " + namespace + ">" + head + "<body>" + pageHtmlStr + "</body>" + "</html>";
+} else {
+String pageFileNameOrig = docPageDir + "/page-" + page + ".xml";
+File pageFileOrig = new File(pageFileNameOrig);
+if (pageFileOrig.exists())
+result = FileUtils.readFileToString(pageFileOrig, "utf-8");
+else
+result = "";
+}
+out.print(result);
+out.close();
+} catch (ApplicationException e) {
+throw new ServletException(e);
+}
+}
+protected void doPost(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException {
+doGet(request, response);
+}
+private String getBaseUrl(HttpServletRequest request) {
+return getServerUrl(request) + request.getContextPath();
+}
+private String getServerUrl(HttpServletRequest request) {
+if ( ( request.getServerPort() == 80 ) || ( request.getServerPort() == 443 ) )
+return request.getScheme() + "://" + request.getServerName();
+else
+return request.getScheme() + "://" + request.getServerName() + ":" + request.getServerPort();
+}
+private String enrichWordsOrigRegNorm(String xmlStr) throws ApplicationException {
+try {
+WordContentHandler wordContentHandler = new WordContentHandler();
+XMLReader xmlParser = new SAXParser();
+xmlParser.setContentHandler(wordContentHandler);
+StringReader strReader = new StringReader(xmlStr);
+InputSource inputSource = new InputSource(strReader);
+xmlParser.parse(inputSource);
+String result = wordContentHandler.getResult();
+return result;
+} catch (SAXException e) {
+throw new ApplicationException(e);
+} catch (IOException e) {
+throw new ApplicationException(e);
+}
+}
+private String highlight(String xmlStr, String highlightElem, int highlightElemPos, String highlightQueryType, String highlightQuery, String language) throws ApplicationException {
+String result = null;
+try {
+HighlightContentHandler highlightContentHandler = new HighlightContentHandler(highlightElem, highlightElemPos, highlightQueryType, highlightQuery, language);
+highlightContentHandler.setFirstPageBreakReachedMode(true);
+XMLReader xmlParser = new SAXParser();
+xmlParser.setContentHandler(highlightContentHandler);
+StringReader stringReader = new StringReader(xmlStr);
+InputSource inputSource = new InputSource(stringReader);
+xmlParser.parse(inputSource);
+result = highlightContentHandler.getResult().toString();
+} catch (SAXException e) {
+throw new ApplicationException(e);
+} catch (IOException e) {
+throw new ApplicationException(e);
+}
+return result;
+}
+}

Mercurial > hg > mpdl-group

comparison software/mpdl-services-new/mpiwg-mpdl-cms-web/src/de/mpg/mpiwg/berlin/mpdl/servlets/cms/GetPage.java @ 25:e9fe3186670c default tip