view software/mpdl-services/mpiwg-mpdl-cms/src/de/mpg/mpiwg/berlin/mpdl/cms/document/PdfHandler.java @ 23:e845310098ba

diverse Korrekturen
author Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
date Tue, 27 Nov 2012 12:35:19 +0100
parents
children
line wrap: on
line source

package de.mpg.mpiwg.berlin.mpdl.cms.document;

import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.OutputStream;
import java.io.UnsupportedEncodingException;
import java.util.Hashtable;

import org.apache.commons.httpclient.HttpClient;
import org.apache.commons.httpclient.HttpException;
import org.apache.commons.httpclient.methods.GetMethod;
import org.apache.commons.io.FileUtils;
import org.xhtmlrenderer.layout.SharedContext;
import org.xhtmlrenderer.pdf.ITextFontResolver;
import org.xhtmlrenderer.pdf.ITextRenderer;
import org.xhtmlrenderer.util.XRRuntimeException;

import com.lowagie.text.DocumentException;
import com.lowagie.text.pdf.BaseFont;

import de.mpg.mpiwg.berlin.mpdl.exception.ApplicationException;
import de.mpg.mpiwg.berlin.mpdl.cms.general.Constants;
import de.mpg.mpiwg.berlin.mpdl.cms.transform.TocTransformer;

public class PdfHandler {
  private static PdfHandler instance;
  private static String CSS_SHOW_WORD_URL = "http://thrax-dev.rz-berlin.mpg.de/mpiwg-mpdl-cms-web/css/pageOrig.css";
  private static String CSS_URL = "http://thrax-dev.rz-berlin.mpg.de/mpiwg-mpdl-cms-web/css/page.css";
  // private static String CSS_DOCUVIEWER_URL = "http://echo.mpiwg-berlin.mpg.de/ECHOdocuView/template/docuviewer_css";
  private ITextRenderer renderer = new ITextRenderer();
  private Hashtable<String, String> fontFileNames;
  private DocumentHandler docHandler;
  private TocTransformer tocTransformer;

  public static PdfHandler getInstance() throws ApplicationException {
    if (instance == null) {
      instance = new PdfHandler();
      instance.init();
    }
    return instance;
  }

  public void init() throws ApplicationException {
    renderer = new ITextRenderer();
    SharedContext rendererSharedContext = renderer.getSharedContext();
    PdfHandlerUserAgent mpdlUserAgent = new PdfHandlerUserAgent();  // user agent to get a callback handle to the web access of images (getImageResource(url))
    mpdlUserAgent.setSharedContext(rendererSharedContext);
    rendererSharedContext.setUserAgentCallback(mpdlUserAgent);
    fontFileNames = new Hashtable<String, String>();
    String fontJunicodeFileName = Constants.getInstance().getDocumentsDir() + "/../fonts/Junicode-Regular.ttf";
    String fontJunicodeBoldFileName = Constants.getInstance().getDocumentsDir() + "/../fonts/Junicode-Bold.ttf";
    String fontJunicodeItalicFileName = Constants.getInstance().getDocumentsDir() + "/../fonts/Junicode-Italic.ttf";
    String fontJunicodeBoldItalicFileName = Constants.getInstance().getDocumentsDir() + "/../fonts/Junicode-BoldItalic.ttf";
    String fontSunExtAFileName = Constants.getInstance().getDocumentsDir() + "/../fonts/Sun-ExtA.ttf";  // chinese symbols
    String fontSunExtBFileName = Constants.getInstance().getDocumentsDir() + "/../fonts/Sun-ExtB.ttf";  // chinese symbols
    String fontDejaVuFileName = Constants.getInstance().getDocumentsDir() + "/../fonts/DejaVuSans.ttf";  // arabic symbols
    setFont(fontJunicodeFileName);
    setFont(fontJunicodeBoldFileName);
    setFont(fontJunicodeItalicFileName);
    setFont(fontJunicodeBoldItalicFileName);  // if set then some not bold italic characters are shown bold (e.g. in Benedetti_1585.xml)
    setFont(fontSunExtAFileName);
    setFont(fontSunExtBFileName);
    setFont(fontDejaVuFileName);
    docHandler = new DocumentHandler();
    tocTransformer = new TocTransformer();
  }
  
  public void createFile(boolean pdf, boolean html, MetadataRecord mdRecord) throws ApplicationException {
    OutputStream osPdf = null;
    OutputStream osHtml = null;
    OutputStream osHtmlPdf = null;
    String docId = mdRecord.getDocId();
    String language = mdRecord.getLanguage();
    if (docId == null)
      throw new ApplicationException("Pdf/Html-Generation failed: no docId given in mdRecord");
    String docDir = docHandler.getDocDir(docId);
    String docFileName = docHandler.getDocFileName(docId);
    int lastDot = docFileName.lastIndexOf(".");
    String docFileNameWithoutExtension = docFileName.substring(0, lastDot);
    String docIdExtension = docFileName.substring(lastDot + 1);
    String destFileNamePdf = docDir + "/" + docFileNameWithoutExtension + ".pdf";
    String destFileNameHtml = docDir + "/" + docFileNameWithoutExtension + ".html";
    if (docIdExtension != null && docIdExtension.equals("html")) {
      destFileNameHtml = docDir + "/" + docFileNameWithoutExtension + "-gen.html";
    }
    String destFileNameHtmlPdfTmp = docDir + "/" + docFileNameWithoutExtension + "-4Pdf.html";
    try {
      // start document
      if (pdf) {
        osPdf = new FileOutputStream(new File(destFileNamePdf));
        osHtmlPdf = new FileOutputStream(new File(destFileNameHtmlPdfTmp));
      }
      if (html)
        osHtml = new FileOutputStream(new File(destFileNameHtml));
      int countPages = mdRecord.getPageCount();
      // style page
      String pageStyleHtml = "float:left; clear:both; border: thin solid #808080; width: 21.0cm; margin-top: 0.2cm; margin-bottom: 1cm; margin-left: 0.7cm; margin-right: 0.7cm; padding: 0.2cm;";
      // firstPage
      String firstPageHtml = getFirstPageHtml(mdRecord);
      String mdRecordStr = getMdRecordString(mdRecord);
      String htmlHeadStr = getHtmlHead(null, mdRecordStr);
      String fontStyle = getFontStyle(language);
      if(pdf) {
        write("<html>" + htmlHeadStr + "<body style=\"" + fontStyle +  "\">", osHtmlPdf);
        // first page
        write(firstPageHtml, osHtmlPdf);
      }
      if (html) {
        write("<html>" + htmlHeadStr + "<body style=\"" + fontStyle +  "\">", osHtml);
        // first page
        write("<div style=\"" + pageStyleHtml + "\">", osHtml);
        write(firstPageHtml, osHtml);
        write("</div>", osHtml);
      }
      // table of content of document
      String htmlToc = getTocHtml(mdRecord);
      if (html && htmlToc != null) {
        write("<div style=\"" + pageStyleHtml + "\">", osHtml);
        write(htmlToc, osHtml);
        write("</div>", osHtml);
      }
      if(pdf && htmlToc != null) {
        write(htmlToc, osHtmlPdf);
      }
      // all pages of the document
      for(int i=1; i<=countPages; i++) {
        String htmlPageFragment = getPageFragmentHtml(mdRecord, i);
        htmlPageFragment = "<div id=\"page" + i + "\" class=\"page\">" + htmlPageFragment + "</div>";
        if (html) {
          write("<div style=\"" + "clear:both; text-align:right; width:21.0cm; font-weight:bold;" + "\">", osHtml);
          write("</div>", osHtml);
          write("<div style=\"" + pageStyleHtml + "\">", osHtml);
          // String htmlPageFragmentWithImageUrl = htmlPageFragment.replaceAll("<img src=\".+?\"/>", "<img height=\"24\" width=\"24\" src=\"http://" + CMS_WEB_SERVER_PDF + "/" + CMS_WEB_APP_PDF + "/images/camera.png\"/>");
          write(htmlPageFragment, osHtml);
          write("</div>", osHtml);
        }
        if(pdf) {
          // String htmlPageFragmentWithImageUrl = htmlPageFragment.replaceAll("<img src=\".+?\"/>", "<img height=\"24\" width=\"24\" src=\"http://" + CMS_WEB_SERVER_PDF + "/" + CMS_WEB_APP_PDF + "/images/camera.png\"/>");
          write(htmlPageFragment, osHtmlPdf);
        }
      }
      if (html) {
        write("</body></html>", osHtml);
      }
      // create PDF document
      if(pdf) {
        write("</body></html>", osHtmlPdf);
        osHtmlPdf.close();
        renderer.setDocument(new File(destFileNameHtmlPdfTmp));
        renderer.layout();  // takes the most time
        renderer.createPDF(osPdf);
      }
    } catch (Exception e) {
      init();
      String message = e.getMessage();
      if (message != null && message.indexOf("digilib") > 0 && message.indexOf("500") > 0) {
        throw new ApplicationException("fetch image is not possible: " + message);
      }
      throw new ApplicationException(e);
    } finally {
      try {
        osHtmlPdf.close();
        osPdf.close();
        osHtml.close();
        FileUtils.deleteQuietly(new File(destFileNameHtmlPdfTmp));
      } catch (IOException e) {
        // nothing
      }
    }
  }

  private String getFirstPageHtml(MetadataRecord mdRecord) {
    String firstPageHtml = null;
    try {
      // Url to Echo viewer
      String echoId = mdRecord.getEchoId();
      String urlDocuView = "http://echo.mpiwg-berlin.mpg.de/ECHOdocuView?url=" + echoId + "&viewMode=indexonly";
      String wholeFirstPageHtml = performGetRequest(urlDocuView);
      int from = wholeFirstPageHtml.indexOf("<body>");
      int to = wholeFirstPageHtml.indexOf("</body>");
      if (from != -1 && to != -1) {
        firstPageHtml = "<div>" + wholeFirstPageHtml.substring(from + 6, to) + "</div>";
      }
    } catch (ApplicationException e) {
      // nothing
    }
    return firstPageHtml;
  }
  
  private String getFirstPageHtml(MetadataRecord mdRecord, boolean shortPage) {
    String author = mdRecord.getCreator();
    String title = mdRecord.getTitle();
    String year = mdRecord.getYear();
    String firstPageHtml = "<div class=\"firstPage\">";
    firstPageHtml = firstPageHtml + "<h2 style=\"text-align:center\">" + "Max Planck Institute for the History of Science" + "</h2>";
    firstPageHtml = firstPageHtml + "<p style=\"text-align:center\">" + "Max-Planck-Institut fŸr Wissenschaftsgeschichte" + "</p>";
    firstPageHtml = firstPageHtml + "<br></br>";
    firstPageHtml = firstPageHtml + "<br></br>";
    if (! shortPage) {
      firstPageHtml = firstPageHtml + "<br></br>";
      firstPageHtml = firstPageHtml + "<br></br>";
      firstPageHtml = firstPageHtml + "<br></br>";
      firstPageHtml = firstPageHtml + "<br></br>";
    }
    if (author != null) {
      firstPageHtml = firstPageHtml + "<h2 style=\"text-align:center\">" + author + "</h2>";
    }
    if (title != null) {
      firstPageHtml = firstPageHtml + "<h2 style=\"text-align:center\">" + title + "</h2>";
    }
    if (year != null) {
      firstPageHtml = firstPageHtml + "<h2 style=\"text-align:center\">" + year + "</h2>";
    }
    if (! shortPage) {
      firstPageHtml = firstPageHtml + "<br></br>";
      firstPageHtml = firstPageHtml + "<br></br>";
      firstPageHtml = firstPageHtml + "<br></br>";
      firstPageHtml = firstPageHtml + "<br></br>";
      firstPageHtml = firstPageHtml + "<br></br>";
      firstPageHtml = firstPageHtml + "<br></br>";
      firstPageHtml = firstPageHtml + "<br></br>";
      firstPageHtml = firstPageHtml + "<br></br>";
      firstPageHtml = firstPageHtml + "<br></br>";
      firstPageHtml = firstPageHtml + "<br></br>";
      firstPageHtml = firstPageHtml + "<br></br>";
      firstPageHtml = firstPageHtml + "<br></br>";
      firstPageHtml = firstPageHtml + "<br></br>";
      firstPageHtml = firstPageHtml + "<br></br>";
      firstPageHtml = firstPageHtml + "<br></br>";
      firstPageHtml = firstPageHtml + "<br></br>";
      firstPageHtml = firstPageHtml + "<br></br>";
      firstPageHtml = firstPageHtml + "<br></br>";
    }
    firstPageHtml = firstPageHtml + "<br></br>";
    firstPageHtml = firstPageHtml + "<br></br>";
    firstPageHtml = firstPageHtml + "<br></br>";
    firstPageHtml = firstPageHtml + "<br></br>";
    // Url to Echo viewer
    String echoId = mdRecord.getEchoId();
    String urlDocuView = "http://echo.mpiwg-berlin.mpg.de/ECHOdocuView";
    String document = "?url=" + echoId;
    String urlDoc = urlDocuView + document;
    String echoLink = "<a href=\"" + urlDoc + "\">" +  urlDocuView + " <br></br>" + document + "</a>";
    if (echoId == null)
      echoLink = "<a href=\"http://echo.mpiwg-berlin.mpg.de\">" +  "http://echo.mpiwg-berlin.mpg.de" + "</a>";
    firstPageHtml = firstPageHtml + "<p style=\"font:11pt sans-serif;\">Document link: <br></br>" + echoLink + "</p>";
    firstPageHtml = firstPageHtml + "</div>";
    return firstPageHtml;
  }
  
  private String getTocHtml(MetadataRecord mdRecord) throws ApplicationException {
    String htmlStr = null;
    try {
      String docId = mdRecord.getDocId();
      String tocFileName = docHandler.getFullFileName(docId, "toc");
      File tocFile = new File(tocFileName);
      String tocStr = FileUtils.readFileToString(tocFile, "utf-8");
      String htmlToc = tocTransformer.transform(tocStr, "toc", "html");
      if (htmlToc != null) {
        htmlStr = "<div class=\"tocPage\">" + "<text style=\"font-weight:bold; font-size:20pt; margin-left:2%; \">Table of contents</text>" + htmlToc + "</div>";
      }
    } catch (IOException e) {
      throw new ApplicationException(e);
    }
    return htmlStr;
  }
  
  private String getPageFragmentHtml(MetadataRecord mdRecord, int pageNumber) throws ApplicationException {
    String pageHtmlStrFragment = null;
    try {
      String docId = mdRecord.getDocId();
      String docDir = docHandler.getDocDir(docId);
      String docPageDir = docDir + "/" + "pages";
      String pageFileName = docPageDir + "/page-" + pageNumber + ".html";
      File pageFile = new File(pageFileName);
      pageHtmlStrFragment = FileUtils.readFileToString(pageFile, "utf-8");
    } catch (IOException e) {
      throw new ApplicationException(e);
    }
    return pageHtmlStrFragment;
  }
  
  private String getMdRecordString(MetadataRecord mdRecord) {
    String author = mdRecord.getCreator();
    String title = mdRecord.getTitle();
    String year = mdRecord.getYear();
    String mdRecordStr = "";
    if (mdRecord != null) {
      if (author != null && ! author.equals(""))
        mdRecordStr = mdRecordStr + author;
      if (title != null && ! title.equals(""))
        mdRecordStr = mdRecordStr + ". " + title;
      if (year != null && ! year.equals(""))
        mdRecordStr = mdRecordStr + ". " + year + ".";
      if (mdRecordStr.isEmpty()) {
        String docId = mdRecord.getDocId(); 
        mdRecordStr = mdRecordStr + docId;
      }
    }
    return mdRecordStr;
  }
  
  private String getHtmlHead(String stylePageStr, String titleStr) {
    String htmlStr = "<head>";
    if (stylePageStr != null)
      htmlStr = htmlStr + "<style type=\"text/css\">" + stylePageStr + "</style>";
    htmlStr = htmlStr + "<title>" + titleStr + "</title>";
    htmlStr = htmlStr + "<link rel=\"stylesheet\" type=\"text/css\" href=\"" + CSS_SHOW_WORD_URL + "\"/>";
    htmlStr = htmlStr + "<link rel=\"stylesheet\" type=\"text/css\" href=\"" + CSS_URL + "\"/>";
    htmlStr = htmlStr + "</head>";
    return htmlStr;
  }

  private String getFontStyle(String language) {
    String fontFamily = "Junicode";
    if (language.equals("ar"))
      fontFamily = "DejaVu Sans";
    else if (language.equals("zh") || language.equals("zho-Hant"))
      fontFamily = "Sun-ExtA, Sun-ExtB";
    return "font-size:11pt; font-family:" + fontFamily + ";";
  }
  
  private void write(String str, OutputStream out) throws ApplicationException {
    try {
      byte[] bytes = str.getBytes("utf-8");
      out.write(bytes, 0, bytes.length);
      out.flush();
    } catch (UnsupportedEncodingException e) {
      throw new ApplicationException(e);
    } catch (FileNotFoundException e) {
      throw new ApplicationException(e);
    } catch (IOException e) {
      throw new ApplicationException(e);
    } 
  }

  private void setFont(String fontFileName) throws ApplicationException {
    try {
      String existingFontFileName = fontFileNames.get(fontFileName);
      if (existingFontFileName == null) {
        fontFileNames.put(fontFileName, fontFileName);
        ITextFontResolver fontResolver = renderer.getFontResolver();
        fontResolver.addFont(fontFileName, BaseFont.IDENTITY_H, BaseFont.NOT_EMBEDDED);  // Identy_H is Unicode Horizontal; not_embedded means not embedded in the PDF doc
      }
    } catch (XRRuntimeException e) {
      init();
      String message = e.getMessage();
      if (message.indexOf("digilib") > 0 && message.indexOf("500") > 0) {
        throw new ApplicationException("fetch image is not possible: please try again later");
      }
      throw new ApplicationException(e);
    } catch (IOException e) {
      init();
      String message = e.getMessage();
      if (message.indexOf("digilib") > 0 && message.indexOf("500") > 0) {
        throw new ApplicationException("fetch image is not possible: " + message);
      }
      throw new ApplicationException(e);
    } catch (DocumentException e) {
      init();
      String message = e.getMessage();
      if (message.indexOf("digilib") > 0 && message.indexOf("500") > 0) {
        throw new ApplicationException("fetch image is not possible: " + message);
      }
      throw new ApplicationException(e);
    }
  }

  private String performGetRequest(String url) throws ApplicationException {
    String resultStr = null;
    try {
      HttpClient httpClient = new HttpClient();
      GetMethod method = new GetMethod(url);
      httpClient.executeMethod(method); 
      byte[] resultBytes = method.getResponseBody();
      resultStr = new String(resultBytes, "utf-8");
      method.releaseConnection();
    } catch (HttpException e) {
      throw new ApplicationException(e);      
    } catch (IOException e) {
      throw new ApplicationException(e);      
    }
    return resultStr;
  } 
}