Mercurial > hg > mpdl-group

package de.mpg.mpiwg.berlin.mpdl.servlets.cms;

import java.io.File;
import java.io.IOException;
import java.io.PrintWriter;
import java.io.StringReader;

import javax.servlet.ServletConfig;
import javax.servlet.ServletContext;
import javax.servlet.ServletException;
import javax.servlet.http.HttpServlet;
import javax.servlet.http.HttpServletRequest;
import javax.servlet.http.HttpServletResponse;

import org.apache.commons.io.FileUtils;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
import org.xml.sax.XMLReader;

import com.sun.org.apache.xerces.internal.parsers.SAXParser;

import de.mpg.mpiwg.berlin.mpdl.cms.document.DocumentHandler;
import de.mpg.mpiwg.berlin.mpdl.cms.document.MetadataRecord;
import de.mpg.mpiwg.berlin.mpdl.cms.lucene.IndexHandler;
import de.mpg.mpiwg.berlin.mpdl.cms.transform.HighlightContentHandler;
import de.mpg.mpiwg.berlin.mpdl.cms.transform.PageTransformer;
import de.mpg.mpiwg.berlin.mpdl.exception.ApplicationException;
import de.mpg.mpiwg.berlin.mpdl.lt.text.tokenize.WordContentHandler;

public class GetPage extends HttpServlet {
  private static final long serialVersionUID = 1L;
  private PageTransformer pageTransformer;

  public GetPage() {
    super();
  }

  public void init(ServletConfig config) throws ServletException  {
    super.init(config);
    ServletContext context = getServletContext();
    pageTransformer = (PageTransformer) context.getAttribute("pageTransformer");
  }

  protected void doGet(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException {
    String result = "";
    request.setCharacterEncoding("utf-8");
    response.setCharacterEncoding("utf-8");
    String docId = request.getParameter("docId");
    String pageStr = request.getParameter("page");
    String normalization = request.getParameter("normalization");
    String highlightQuery = request.getParameter("highlightQuery");
    String highlightQueryType = request.getParameter("highlightQueryType");
    if (highlightQueryType == null)
      highlightQueryType = "form";
    String highlightElem = request.getParameter("highlightElem");
    String highlightElemPosStr = request.getParameter("highlightElemPos");
    int highlightElemPos = -1;
    if (highlightElemPosStr != null)
      highlightElemPos = Integer.parseInt(highlightElemPosStr);
    String mode = request.getParameter("mode");
    if (mode == null)
      mode = "untokenized";
    String outputFormat = request.getParameter("outputFormat");
    if (outputFormat == null)
      outputFormat = "html";
    String cssUrl = request.getParameter("cssUrl");
    String baseUrl = getBaseUrl(request);
    if (cssUrl == null) {
      cssUrl = baseUrl + "/css/page.css";
    }
    int page = 1;
    if (pageStr != null)
      page = Integer.parseInt(pageStr);
    if (outputFormat.equals("xml"))
      response.setContentType("text/xml");
    else if (outputFormat.equals("html") || outputFormat.equals("xmlDisplay"))
      response.setContentType("text/html");
    // normalization
    if (normalization == null || ! (normalization.equals("orig") || normalization.equals("reg") || normalization.equals("norm")))
      normalization = "norm";
    if (outputFormat.equals("xmlDisplay"))
      normalization = "orig";
    PrintWriter out = response.getWriter();
    try {
      IndexHandler indexHandler = IndexHandler.getInstance();
      MetadataRecord mdRecord = indexHandler.getDocMetadata(docId);
      DocumentHandler docHandler = new DocumentHandler();
      String docDir = docHandler.getDocDir(docId);
      String docPageDir = docDir + "/" + "pages";
      String pageFileName = docPageDir + "/page-" + page + "-morph.xml";
      File pageFile = new File(pageFileName);
      if (page == 1 && ! (new File(docPageDir)).exists()) {
        String docFileName = docHandler.getDocFullFileName(docId);
        pageFile = new File(docFileName);  // when no page breaks are in the document then the whole document is the first page
      }
      if (! pageFile.exists()) {
        out.print("There is no page: " + page + " in document");
        out.close();
        return;
      }
      String pageHtmlFileName = docPageDir + "/page-" + page + ".html";
      File pageHtmlFile = new File(pageHtmlFileName);
      String fragmentMorphStr = FileUtils.readFileToString(pageFile, "utf-8");
      if (! pageHtmlFile.exists())  // TODO rausnehmen sobald alle Dokumente neu indexiert wurden
        fragmentMorphStr = enrichWordsOrigRegNorm(fragmentMorphStr);
      if (outputFormat.equals("html") || outputFormat.equals("xmlDisplay")) {
        String schemaName = mdRecord.getSchemaName();
        String title = docId + ", Page: " + page;
        String xmlHeader = "<?xml version=\"1.0\" encoding=\"utf-8\"?>";
        String cssShowWordFileName = "pageNormDict.css";
        if (outputFormat.equals("xmlDisplay"))
          cssShowWordFileName = "pageOrig.css"; // xml display shows always the original text
        else if (normalization.equals("orig") && mode.equals("untokenized"))
          cssShowWordFileName = "pageOrig.css";
        else if (normalization.equals("orig") && mode.equals("tokenized"))
          cssShowWordFileName = "pageOrigDict.css";
        else if (normalization.equals("reg") && mode.equals("untokenized"))
          cssShowWordFileName = "pageReg.css";
        else if (normalization.equals("reg") && mode.equals("tokenized"))
          cssShowWordFileName = "pageRegDict.css";
        else if (normalization.equals("norm") && mode.equals("untokenized"))
          cssShowWordFileName = "pageNorm.css";
        String showWordCssUrl = baseUrl + "/css/" + cssShowWordFileName;
        String mainCssLink = "<link rel=\"stylesheet\" type=\"text/css\" href=\"" + cssUrl + "\"/>";
        String showWordCssLink = "<link rel=\"stylesheet\" type=\"text/css\" href=\"" + showWordCssUrl + "\"/>";
        String head = "<head>" + "<title>" + title + "</title>" + showWordCssLink + mainCssLink + "</head>";
        String namespace = "";
        String pageHtmlStr = null;
        if (pageHtmlFile.exists() && outputFormat.equals("html") && (highlightElem == null && highlightQuery == null)) {
          pageHtmlStr = FileUtils.readFileToString(pageHtmlFile, "utf-8");
        } else {
          if (highlightElem != null || highlightQuery != null) {
            String hiQueryType = "orig";
            if (highlightQueryType.equals("morph"))
              hiQueryType = "morph";
            else
              hiQueryType = normalization;
            String language = mdRecord.getLanguage();
            fragmentMorphStr = highlight(fragmentMorphStr, highlightElem, highlightElemPos, hiQueryType, highlightQuery, language);
          }
          pageHtmlStr = pageTransformer.transform(fragmentMorphStr, mdRecord, page, outputFormat);
        }
        if (schemaName != null && schemaName.equals("echo")) {
          namespace = "xmlns:echo=\"http://www.mpiwg-berlin.mpg.de/ns/echo/1.0/\" xmlns:de=\"http://www.mpiwg-berlin.mpg.de/ns/de/1.0/\" " +
                  "xmlns:dcterms=\"http://purl.org/dc/terms\" " + "xmlns:xhtml=\"http://www.w3.org/1999/xhtml\" xmlns:mml=\"http://www.w3.org/1998/Math/MathML\" " +
                  "xmlns:xlink=\"http://www.w3.org/1999/xlink\"";
        }
        result = xmlHeader + "<html " + namespace + ">" + head + "<body>" + pageHtmlStr + "</body>" + "</html>";
      } else {
        String pageFileNameOrig = docPageDir + "/page-" + page + ".xml";
        File pageFileOrig = new File(pageFileNameOrig);
        if (pageFileOrig.exists())
          result = FileUtils.readFileToString(pageFileOrig, "utf-8");
        else
          result = "";
      }
      out.print(result);
      out.close();
    } catch (ApplicationException e) {
      throw new ServletException(e);
    }
  }

  protected void doPost(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException {
    doGet(request, response);
  }

  private String getBaseUrl(HttpServletRequest request) {
    return getServerUrl(request) + request.getContextPath();
  }

  private String getServerUrl(HttpServletRequest request) {
    if ( ( request.getServerPort() == 80 ) || ( request.getServerPort() == 443 ) )
      return request.getScheme() + "://" + request.getServerName();
    else
      return request.getScheme() + "://" + request.getServerName() + ":" + request.getServerPort();
  }

  private String enrichWordsOrigRegNorm(String xmlStr) throws ApplicationException {
    try {
      WordContentHandler wordContentHandler = new WordContentHandler();
      XMLReader xmlParser = new SAXParser();
      xmlParser.setContentHandler(wordContentHandler);
      StringReader strReader = new StringReader(xmlStr);
      InputSource inputSource = new InputSource(strReader);
      xmlParser.parse(inputSource);
      String result = wordContentHandler.getResult();
      return result;
    } catch (SAXException e) {
      throw new ApplicationException(e);
    } catch (IOException e) {
      throw new ApplicationException(e);
    }
  }

  private String highlight(String xmlStr, String highlightElem, int highlightElemPos, String highlightQueryType, String highlightQuery, String language) throws ApplicationException {
    String result = null;
    try {
      HighlightContentHandler highlightContentHandler = new HighlightContentHandler(highlightElem, highlightElemPos, highlightQueryType, highlightQuery, language);
      highlightContentHandler.setFirstPageBreakReachedMode(true);
      XMLReader xmlParser = new SAXParser();
      xmlParser.setContentHandler(highlightContentHandler);
      StringReader stringReader = new StringReader(xmlStr);
      InputSource inputSource = new InputSource(stringReader);
      xmlParser.parse(inputSource);
      result = highlightContentHandler.getResult().toString();
    } catch (SAXException e) {
      throw new ApplicationException(e);
    } catch (IOException e) {
      throw new ApplicationException(e);
    }
    return result;
  }

}
author	Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
date	Tue, 21 May 2013 10:19:32 +0200
parents
children