diff software/mpdl-services-new/mpiwg-mpdl-cms/src/de/mpg/mpiwg/berlin/mpdl/cms/document/PdfHandler.java @ 25:e9fe3186670c default tip

letzter Stand eingecheckt
author Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
date Tue, 21 May 2013 10:19:32 +0200
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/software/mpdl-services-new/mpiwg-mpdl-cms/src/de/mpg/mpiwg/berlin/mpdl/cms/document/PdfHandler.java	Tue May 21 10:19:32 2013 +0200
@@ -0,0 +1,403 @@
+package de.mpg.mpiwg.berlin.mpdl.cms.document;
+
+import java.io.File;
+import java.io.FileNotFoundException;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.io.OutputStream;
+import java.io.UnsupportedEncodingException;
+import java.util.Hashtable;
+
+import org.apache.commons.httpclient.HttpClient;
+import org.apache.commons.httpclient.HttpException;
+import org.apache.commons.httpclient.methods.GetMethod;
+import org.apache.commons.io.FileUtils;
+import org.xhtmlrenderer.layout.SharedContext;
+import org.xhtmlrenderer.pdf.ITextFontResolver;
+import org.xhtmlrenderer.pdf.ITextRenderer;
+import org.xhtmlrenderer.util.XRRuntimeException;
+
+import com.lowagie.text.DocumentException;
+import com.lowagie.text.pdf.BaseFont;
+
+import de.mpg.mpiwg.berlin.mpdl.exception.ApplicationException;
+import de.mpg.mpiwg.berlin.mpdl.cms.general.Constants;
+import de.mpg.mpiwg.berlin.mpdl.cms.transform.PageTransformer;
+import de.mpg.mpiwg.berlin.mpdl.cms.transform.TocTransformer;
+
+public class PdfHandler {
+  private static PdfHandler instance;
+  private static String CSS_SHOW_WORD_URL = "http://thrax.rz-berlin.mpg.de/mpiwg-mpdl-cms-web/css/pageOrig.css";
+  private static String CSS_URL = "http://thrax.rz-berlin.mpg.de/mpiwg-mpdl-cms-web/css/page.css";
+  // private static String CSS_DOCUVIEWER_URL = "http://echo.mpiwg-berlin.mpg.de/ECHOdocuView/template/docuviewer_css";
+  private ITextRenderer renderer = new ITextRenderer();
+  private Hashtable<String, String> fontFileNames;
+  private DocumentHandler docHandler;
+  private TocTransformer tocTransformer;
+  private PageTransformer pageTransformer;    
+
+  public static PdfHandler getInstance() throws ApplicationException {
+    if (instance == null) {
+      instance = new PdfHandler();
+      instance.init();
+    }
+    return instance;
+  }
+
+  public void init() throws ApplicationException {
+    renderer = new ITextRenderer();
+    SharedContext rendererSharedContext = renderer.getSharedContext();
+    PdfHandlerUserAgent mpdlUserAgent = new PdfHandlerUserAgent();  // user agent to get a callback handle to the web access of images (getImageResource(url))
+    mpdlUserAgent.setSharedContext(rendererSharedContext);
+    rendererSharedContext.setUserAgentCallback(mpdlUserAgent);
+    fontFileNames = new Hashtable<String, String>();
+    String fontJunicodeFileName = Constants.getInstance().getDocumentsDir() + "/../fonts/Junicode-Regular.ttf";
+    String fontJunicodeBoldFileName = Constants.getInstance().getDocumentsDir() + "/../fonts/Junicode-Bold.ttf";
+    String fontJunicodeItalicFileName = Constants.getInstance().getDocumentsDir() + "/../fonts/Junicode-Italic.ttf";
+    String fontJunicodeBoldItalicFileName = Constants.getInstance().getDocumentsDir() + "/../fonts/Junicode-BoldItalic.ttf";
+    String fontSunExtAFileName = Constants.getInstance().getDocumentsDir() + "/../fonts/Sun-ExtA.ttf";  // chinese symbols
+    String fontSunExtBFileName = Constants.getInstance().getDocumentsDir() + "/../fonts/Sun-ExtB.ttf";  // chinese symbols
+    String fontDejaVuFileName = Constants.getInstance().getDocumentsDir() + "/../fonts/DejaVuSans.ttf";  // arabic symbols
+    setFont(fontJunicodeFileName);
+    setFont(fontJunicodeBoldFileName);
+    setFont(fontJunicodeItalicFileName);
+    setFont(fontJunicodeBoldItalicFileName);  // if set then some not bold italic characters are shown bold (e.g. in Benedetti_1585.xml)
+    setFont(fontSunExtAFileName);
+    setFont(fontSunExtBFileName);
+    setFont(fontDejaVuFileName);
+    docHandler = new DocumentHandler();
+    tocTransformer = new TocTransformer();
+    pageTransformer = new PageTransformer();    
+  }
+  
+  public void createFile(boolean pdf, boolean html, MetadataRecord mdRecord) throws ApplicationException {
+    OutputStream osPdf = null;
+    OutputStream osHtml = null;
+    OutputStream osHtmlPdf = null;
+    String docId = mdRecord.getDocId();
+    String language = mdRecord.getLanguage();
+    if (docId == null)
+      throw new ApplicationException("Pdf/Html-Generation failed: no docId given in mdRecord");
+    String docDir = docHandler.getDocDir(docId);
+    String docFileName = docHandler.getDocFileName(docId);
+    int lastDot = docFileName.lastIndexOf(".");
+    String docFileNameWithoutExtension = docFileName.substring(0, lastDot);
+    String docIdExtension = docFileName.substring(lastDot + 1);
+    String destFileNamePdf = docDir + "/" + docFileNameWithoutExtension + ".pdf";
+    String destFileNameHtml = docDir + "/" + docFileNameWithoutExtension + ".html";
+    if (docIdExtension != null && docIdExtension.equals("html")) {
+      destFileNameHtml = docDir + "/" + docFileNameWithoutExtension + "-gen.html";
+    }
+    String destFileNameHtmlPdfTmp = docDir + "/" + docFileNameWithoutExtension + "-4Pdf.html";
+    try {
+      // start document
+      if (pdf) {
+        osPdf = new FileOutputStream(new File(destFileNamePdf));
+        osHtmlPdf = new FileOutputStream(new File(destFileNameHtmlPdfTmp));
+      }
+      if (html)
+        osHtml = new FileOutputStream(new File(destFileNameHtml));
+      int countPages = mdRecord.getPageCount();
+      // style page
+      String pageStyleHtml = "float:left; clear:both; border: thin solid #808080; width: 21.0cm; margin-top: 0.2cm; margin-bottom: 1cm; margin-left: 0.7cm; margin-right: 0.7cm; padding: 0.2cm;";
+      // firstPage
+      String firstPageHtml = getFirstPageHtmlByEchodocuView(mdRecord);
+      String mdRecordStr = getMdRecordString(mdRecord);
+      String htmlHeadStr = getHtmlHead(null, mdRecordStr);
+      String fontStyle = getFontStyle(language);
+      if(pdf) {
+        write("<html>" + htmlHeadStr + "<body style=\"" + fontStyle +  "\">", osHtmlPdf);
+        // first page
+        if (firstPageHtml == null)
+          firstPageHtml = getFirstPageHtml(mdRecord, false);  // long first page
+        write(firstPageHtml, osHtmlPdf);
+      }
+      if (html) {
+        write("<html>" + htmlHeadStr + "<body style=\"" + fontStyle +  "\">", osHtml);
+        // first page
+        write("<div style=\"" + pageStyleHtml + "\">", osHtml);
+        if (firstPageHtml == null)
+          firstPageHtml = getFirstPageHtml(mdRecord, true);  // short first page
+        write(firstPageHtml, osHtml);
+        write("</div>", osHtml);
+      }
+      // table of content of document
+      String htmlToc = getTocHtml(mdRecord);
+      if (html && htmlToc != null) {
+        write("<div style=\"" + pageStyleHtml + "\">", osHtml);
+        write(htmlToc, osHtml);
+        write("</div>", osHtml);
+      }
+      if(pdf && htmlToc != null) {
+        write(htmlToc, osHtmlPdf);
+      }
+      // all pages of the document
+      for(int i=1; i<=countPages; i++) {
+        String htmlPageFragment = getPageFragmentHtml(mdRecord, i, pageTransformer);
+        htmlPageFragment = "<div id=\"page" + i + "\" class=\"page\">" + htmlPageFragment + "</div>";
+        if (html) {
+          write("<div style=\"" + "clear:both; text-align:right; width:21.0cm; font-weight:bold;" + "\">", osHtml);
+          write("</div>", osHtml);
+          write("<div style=\"" + pageStyleHtml + "\">", osHtml);
+          write(htmlPageFragment, osHtml);
+          write("</div>", osHtml);
+        }
+        if(pdf) {
+          write(htmlPageFragment, osHtmlPdf);
+        }
+      }
+      if (html) {
+        write("</body></html>", osHtml);
+      }
+      // create PDF document
+      if(pdf) {
+        write("</body></html>", osHtmlPdf);
+        osHtmlPdf.close();
+        renderer.setDocument(new File(destFileNameHtmlPdfTmp));
+        renderer.layout();  // takes the most time
+        renderer.createPDF(osPdf);
+      }
+    } catch (Exception e) {
+      init();
+      String message = e.getMessage();
+      if (message != null && message.indexOf("digilib") > 0 && message.indexOf("500") > 0) {
+        throw new ApplicationException("fetch image is not possible: " + message);
+      }
+      throw new ApplicationException(e);
+    } finally {
+      try {
+        osHtmlPdf.close();
+        osPdf.close();
+        osHtml.close();
+        FileUtils.deleteQuietly(new File(destFileNameHtmlPdfTmp));
+      } catch (IOException e) {
+        // nothing
+      }
+    }
+  }
+
+  private String getFirstPageHtmlByEchodocuView(MetadataRecord mdRecord) {
+    String firstPageHtml = null;
+    try {
+      // Url to Echo viewer
+      String echoId = mdRecord.getEchoId();
+      if (echoId == null)
+        return null;
+      String urlDocuView = "http://echo.mpiwg-berlin.mpg.de/ECHOdocuView?url=" + echoId + "&viewMode=indexonly";
+      String wholeFirstPageHtml = performGetRequest(urlDocuView);
+      if (wholeFirstPageHtml != null & wholeFirstPageHtml.equals("XXXXUrlErrorXXXX"))
+        return null;
+      int from = wholeFirstPageHtml.indexOf("<body>");
+      int to = wholeFirstPageHtml.indexOf("</body>");
+      if (from != -1 && to != -1) {
+        firstPageHtml = "<div>" + wholeFirstPageHtml.substring(from + 6, to) + "</div>";
+      }
+    } catch (ApplicationException e) {
+      // nothing
+    }
+    return firstPageHtml;
+  }
+  
+  private String getFirstPageHtml(MetadataRecord mdRecord, boolean shortPage) {
+    String author = mdRecord.getCreator();
+    String title = mdRecord.getTitle();
+    String year = mdRecord.getYear();
+    String firstPageHtml = "<div class=\"firstPage\">";
+    firstPageHtml = firstPageHtml + "<h2 style=\"text-align:center\">" + "Max Planck Institute for the History of Science" + "</h2>";
+    firstPageHtml = firstPageHtml + "<p style=\"text-align:center\">" + "Max-Planck-Institut fŸr Wissenschaftsgeschichte" + "</p>";
+    firstPageHtml = firstPageHtml + "<br></br>";
+    firstPageHtml = firstPageHtml + "<br></br>";
+    if (! shortPage) {
+      firstPageHtml = firstPageHtml + "<br></br>";
+      firstPageHtml = firstPageHtml + "<br></br>";
+      firstPageHtml = firstPageHtml + "<br></br>";
+      firstPageHtml = firstPageHtml + "<br></br>";
+    }
+    if (author != null) {
+      firstPageHtml = firstPageHtml + "<h2 style=\"text-align:center\">" + author + "</h2>";
+    }
+    if (title != null) {
+      firstPageHtml = firstPageHtml + "<h2 style=\"text-align:center\">" + title + "</h2>";
+    }
+    if (year != null) {
+      firstPageHtml = firstPageHtml + "<h2 style=\"text-align:center\">" + year + "</h2>";
+    }
+    if (! shortPage) {
+      firstPageHtml = firstPageHtml + "<br></br>";
+      firstPageHtml = firstPageHtml + "<br></br>";
+      firstPageHtml = firstPageHtml + "<br></br>";
+      firstPageHtml = firstPageHtml + "<br></br>";
+      firstPageHtml = firstPageHtml + "<br></br>";
+      firstPageHtml = firstPageHtml + "<br></br>";
+      firstPageHtml = firstPageHtml + "<br></br>";
+      firstPageHtml = firstPageHtml + "<br></br>";
+      firstPageHtml = firstPageHtml + "<br></br>";
+      firstPageHtml = firstPageHtml + "<br></br>";
+      firstPageHtml = firstPageHtml + "<br></br>";
+      firstPageHtml = firstPageHtml + "<br></br>";
+      firstPageHtml = firstPageHtml + "<br></br>";
+      firstPageHtml = firstPageHtml + "<br></br>";
+      firstPageHtml = firstPageHtml + "<br></br>";
+      firstPageHtml = firstPageHtml + "<br></br>";
+      firstPageHtml = firstPageHtml + "<br></br>";
+      firstPageHtml = firstPageHtml + "<br></br>";
+    }
+    firstPageHtml = firstPageHtml + "<br></br>";
+    firstPageHtml = firstPageHtml + "<br></br>";
+    firstPageHtml = firstPageHtml + "<br></br>";
+    firstPageHtml = firstPageHtml + "<br></br>";
+    // Url to Echo viewer
+    String echoId = mdRecord.getEchoId();
+    String urlDocuView = "http://echo.mpiwg-berlin.mpg.de/ECHOdocuView";
+    String document = "?url=" + echoId;
+    String urlDoc = urlDocuView + document;
+    String echoLink = "<a href=\"" + urlDoc + "\">" +  urlDocuView + " <br></br>" + document + "</a>";
+    if (echoId == null)
+      echoLink = "<a href=\"http://echo.mpiwg-berlin.mpg.de\">" +  "http://echo.mpiwg-berlin.mpg.de" + "</a>";
+    firstPageHtml = firstPageHtml + "<p style=\"font:11pt sans-serif;\">Document link: <br></br>" + echoLink + "</p>";
+    firstPageHtml = firstPageHtml + "</div>";
+    return firstPageHtml;
+  }
+  
+  private String getTocHtml(MetadataRecord mdRecord) throws ApplicationException {
+    String htmlStr = null;
+    try {
+      String docId = mdRecord.getDocId();
+      String tocFileName = docHandler.getFullFileName(docId, "toc");
+      File tocFile = new File(tocFileName);
+      String tocStr = FileUtils.readFileToString(tocFile, "utf-8");
+      String htmlToc = tocTransformer.transform(tocStr, "toc", "html");
+      if (htmlToc != null && ! htmlToc.isEmpty()) {
+        htmlStr = "<div class=\"tocPage\">" + "<text style=\"font-weight:bold; font-size:20pt; margin-left:2%; \">Table of contents</text>" + htmlToc + "</div>";
+      }
+    } catch (IOException e) {
+      throw new ApplicationException(e);
+    }
+    return htmlStr;
+  }
+  
+  private String getPageFragmentHtml(MetadataRecord mdRecord, int pageNumber, PageTransformer pageTransformer) throws ApplicationException {
+    String pageHtmlStrFragment = null;
+    try {
+      String docId = mdRecord.getDocId();
+      String docDir = docHandler.getDocDir(docId);
+      String docPageTokenizedFileName = docDir + "/pages/page-" + pageNumber + "-morph.xml";
+      File docPageTokenizedFile = new File(docPageTokenizedFileName);
+      String tokenizedXmlStr = FileUtils.readFileToString(docPageTokenizedFile, "utf-8");
+      pageTransformer.setDisplayWordOptions("orig"); // only orig word spans are build so that the HTML is not too huge for PDF generation 
+      pageHtmlStrFragment = pageTransformer.transform(tokenizedXmlStr, mdRecord, pageNumber, "html");
+    } catch (IOException e) {
+      throw new ApplicationException(e);
+    }
+    return pageHtmlStrFragment;
+  }
+  
+  private String getMdRecordString(MetadataRecord mdRecord) {
+    String author = mdRecord.getCreator();
+    String title = mdRecord.getTitle();
+    String year = mdRecord.getYear();
+    String mdRecordStr = "";
+    if (mdRecord != null) {
+      if (author != null && ! author.equals("")) {
+        mdRecordStr = mdRecordStr + author;
+      }
+      if (title != null && ! title.equals("")) {
+        mdRecordStr = mdRecordStr + ". " + title;
+      }
+      if (year != null && ! year.equals("")) {
+        mdRecordStr = mdRecordStr + ". " + year + ".";
+      }
+      if (mdRecordStr.isEmpty()) {
+        String docId = mdRecord.getDocId(); 
+        mdRecordStr = mdRecordStr + docId;
+      }
+    }
+    return mdRecordStr;
+  }
+  
+  private String getHtmlHead(String stylePageStr, String titleStr) {
+    String htmlStr = "<head>";
+    if (stylePageStr != null)
+      htmlStr = htmlStr + "<style type=\"text/css\">" + stylePageStr + "</style>";
+    htmlStr = htmlStr + "<title>" + titleStr + "</title>";
+    htmlStr = htmlStr + "<link rel=\"stylesheet\" type=\"text/css\" href=\"" + CSS_SHOW_WORD_URL + "\"/>";
+    htmlStr = htmlStr + "<link rel=\"stylesheet\" type=\"text/css\" href=\"" + CSS_URL + "\"/>";
+    htmlStr = htmlStr + "</head>";
+    return htmlStr;
+  }
+
+  private String getFontStyle(String language) {
+    String fontFamily = "Junicode";
+    if (language != null && language.equals("ar"))
+      fontFamily = "DejaVu Sans";
+    else if (language != null && (language.equals("zh") || language.equals("zho-Hant")))
+      fontFamily = "Sun-ExtA, Sun-ExtB";
+    return "font-size:11pt; font-family:" + fontFamily + ";";
+  }
+  
+  private void write(String str, OutputStream out) throws ApplicationException {
+    try {
+      byte[] bytes = str.getBytes("utf-8");
+      out.write(bytes, 0, bytes.length);
+      out.flush();
+    } catch (UnsupportedEncodingException e) {
+      throw new ApplicationException(e);
+    } catch (FileNotFoundException e) {
+      throw new ApplicationException(e);
+    } catch (IOException e) {
+      throw new ApplicationException(e);
+    } 
+  }
+
+  private void setFont(String fontFileName) throws ApplicationException {
+    try {
+      String existingFontFileName = fontFileNames.get(fontFileName);
+      if (existingFontFileName == null) {
+        fontFileNames.put(fontFileName, fontFileName);
+        ITextFontResolver fontResolver = renderer.getFontResolver();
+        fontResolver.addFont(fontFileName, BaseFont.IDENTITY_H, BaseFont.NOT_EMBEDDED);  // Identy_H is Unicode Horizontal; not_embedded means not embedded in the PDF doc
+      }
+    } catch (XRRuntimeException e) {
+      init();
+      String message = e.getMessage();
+      if (message.indexOf("digilib") > 0 && message.indexOf("500") > 0) {
+        throw new ApplicationException("fetch image is not possible: please try again later");
+      }
+      throw new ApplicationException(e);
+    } catch (IOException e) {
+      init();
+      String message = e.getMessage();
+      if (message.indexOf("digilib") > 0 && message.indexOf("500") > 0) {
+        throw new ApplicationException("fetch image is not possible: " + message);
+      }
+      throw new ApplicationException(e);
+    } catch (DocumentException e) {
+      init();
+      String message = e.getMessage();
+      if (message.indexOf("digilib") > 0 && message.indexOf("500") > 0) {
+        throw new ApplicationException("fetch image is not possible: " + message);
+      }
+      throw new ApplicationException(e);
+    }
+  }
+
+  private String performGetRequest(String url) throws ApplicationException {
+    String resultStr = null;
+    try {
+      HttpClient httpClient = new HttpClient();
+      GetMethod method = new GetMethod(url);
+      httpClient.executeMethod(method); 
+      int statusCode = method.getStatusCode();
+      if (statusCode >= 400)
+        return "XXXXUrlErrorXXXX";
+      byte[] resultBytes = method.getResponseBody();
+      resultStr = new String(resultBytes, "utf-8");
+      method.releaseConnection();
+    } catch (HttpException e) {
+      throw new ApplicationException(e);      
+    } catch (IOException e) {
+      throw new ApplicationException(e);      
+    }
+    return resultStr;
+  } 
+}