Mercurial > hg > mpdl-group
diff software/mpdl-services-new/mpiwg-mpdl-cms/src/de/mpg/mpiwg/berlin/mpdl/cms/document/PdfHandler.java @ 25:e9fe3186670c default tip
letzter Stand eingecheckt
author | Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de> |
---|---|
date | Tue, 21 May 2013 10:19:32 +0200 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/software/mpdl-services-new/mpiwg-mpdl-cms/src/de/mpg/mpiwg/berlin/mpdl/cms/document/PdfHandler.java Tue May 21 10:19:32 2013 +0200 @@ -0,0 +1,403 @@ +package de.mpg.mpiwg.berlin.mpdl.cms.document; + +import java.io.File; +import java.io.FileNotFoundException; +import java.io.FileOutputStream; +import java.io.IOException; +import java.io.OutputStream; +import java.io.UnsupportedEncodingException; +import java.util.Hashtable; + +import org.apache.commons.httpclient.HttpClient; +import org.apache.commons.httpclient.HttpException; +import org.apache.commons.httpclient.methods.GetMethod; +import org.apache.commons.io.FileUtils; +import org.xhtmlrenderer.layout.SharedContext; +import org.xhtmlrenderer.pdf.ITextFontResolver; +import org.xhtmlrenderer.pdf.ITextRenderer; +import org.xhtmlrenderer.util.XRRuntimeException; + +import com.lowagie.text.DocumentException; +import com.lowagie.text.pdf.BaseFont; + +import de.mpg.mpiwg.berlin.mpdl.exception.ApplicationException; +import de.mpg.mpiwg.berlin.mpdl.cms.general.Constants; +import de.mpg.mpiwg.berlin.mpdl.cms.transform.PageTransformer; +import de.mpg.mpiwg.berlin.mpdl.cms.transform.TocTransformer; + +public class PdfHandler { + private static PdfHandler instance; + private static String CSS_SHOW_WORD_URL = "http://thrax.rz-berlin.mpg.de/mpiwg-mpdl-cms-web/css/pageOrig.css"; + private static String CSS_URL = "http://thrax.rz-berlin.mpg.de/mpiwg-mpdl-cms-web/css/page.css"; + // private static String CSS_DOCUVIEWER_URL = "http://echo.mpiwg-berlin.mpg.de/ECHOdocuView/template/docuviewer_css"; + private ITextRenderer renderer = new ITextRenderer(); + private Hashtable<String, String> fontFileNames; + private DocumentHandler docHandler; + private TocTransformer tocTransformer; + private PageTransformer pageTransformer; + + public static PdfHandler getInstance() throws ApplicationException { + if (instance == null) { + instance = new PdfHandler(); + instance.init(); + } + return instance; + } + + public void init() throws ApplicationException { + renderer = new ITextRenderer(); + SharedContext rendererSharedContext = renderer.getSharedContext(); + PdfHandlerUserAgent mpdlUserAgent = new PdfHandlerUserAgent(); // user agent to get a callback handle to the web access of images (getImageResource(url)) + mpdlUserAgent.setSharedContext(rendererSharedContext); + rendererSharedContext.setUserAgentCallback(mpdlUserAgent); + fontFileNames = new Hashtable<String, String>(); + String fontJunicodeFileName = Constants.getInstance().getDocumentsDir() + "/../fonts/Junicode-Regular.ttf"; + String fontJunicodeBoldFileName = Constants.getInstance().getDocumentsDir() + "/../fonts/Junicode-Bold.ttf"; + String fontJunicodeItalicFileName = Constants.getInstance().getDocumentsDir() + "/../fonts/Junicode-Italic.ttf"; + String fontJunicodeBoldItalicFileName = Constants.getInstance().getDocumentsDir() + "/../fonts/Junicode-BoldItalic.ttf"; + String fontSunExtAFileName = Constants.getInstance().getDocumentsDir() + "/../fonts/Sun-ExtA.ttf"; // chinese symbols + String fontSunExtBFileName = Constants.getInstance().getDocumentsDir() + "/../fonts/Sun-ExtB.ttf"; // chinese symbols + String fontDejaVuFileName = Constants.getInstance().getDocumentsDir() + "/../fonts/DejaVuSans.ttf"; // arabic symbols + setFont(fontJunicodeFileName); + setFont(fontJunicodeBoldFileName); + setFont(fontJunicodeItalicFileName); + setFont(fontJunicodeBoldItalicFileName); // if set then some not bold italic characters are shown bold (e.g. in Benedetti_1585.xml) + setFont(fontSunExtAFileName); + setFont(fontSunExtBFileName); + setFont(fontDejaVuFileName); + docHandler = new DocumentHandler(); + tocTransformer = new TocTransformer(); + pageTransformer = new PageTransformer(); + } + + public void createFile(boolean pdf, boolean html, MetadataRecord mdRecord) throws ApplicationException { + OutputStream osPdf = null; + OutputStream osHtml = null; + OutputStream osHtmlPdf = null; + String docId = mdRecord.getDocId(); + String language = mdRecord.getLanguage(); + if (docId == null) + throw new ApplicationException("Pdf/Html-Generation failed: no docId given in mdRecord"); + String docDir = docHandler.getDocDir(docId); + String docFileName = docHandler.getDocFileName(docId); + int lastDot = docFileName.lastIndexOf("."); + String docFileNameWithoutExtension = docFileName.substring(0, lastDot); + String docIdExtension = docFileName.substring(lastDot + 1); + String destFileNamePdf = docDir + "/" + docFileNameWithoutExtension + ".pdf"; + String destFileNameHtml = docDir + "/" + docFileNameWithoutExtension + ".html"; + if (docIdExtension != null && docIdExtension.equals("html")) { + destFileNameHtml = docDir + "/" + docFileNameWithoutExtension + "-gen.html"; + } + String destFileNameHtmlPdfTmp = docDir + "/" + docFileNameWithoutExtension + "-4Pdf.html"; + try { + // start document + if (pdf) { + osPdf = new FileOutputStream(new File(destFileNamePdf)); + osHtmlPdf = new FileOutputStream(new File(destFileNameHtmlPdfTmp)); + } + if (html) + osHtml = new FileOutputStream(new File(destFileNameHtml)); + int countPages = mdRecord.getPageCount(); + // style page + String pageStyleHtml = "float:left; clear:both; border: thin solid #808080; width: 21.0cm; margin-top: 0.2cm; margin-bottom: 1cm; margin-left: 0.7cm; margin-right: 0.7cm; padding: 0.2cm;"; + // firstPage + String firstPageHtml = getFirstPageHtmlByEchodocuView(mdRecord); + String mdRecordStr = getMdRecordString(mdRecord); + String htmlHeadStr = getHtmlHead(null, mdRecordStr); + String fontStyle = getFontStyle(language); + if(pdf) { + write("<html>" + htmlHeadStr + "<body style=\"" + fontStyle + "\">", osHtmlPdf); + // first page + if (firstPageHtml == null) + firstPageHtml = getFirstPageHtml(mdRecord, false); // long first page + write(firstPageHtml, osHtmlPdf); + } + if (html) { + write("<html>" + htmlHeadStr + "<body style=\"" + fontStyle + "\">", osHtml); + // first page + write("<div style=\"" + pageStyleHtml + "\">", osHtml); + if (firstPageHtml == null) + firstPageHtml = getFirstPageHtml(mdRecord, true); // short first page + write(firstPageHtml, osHtml); + write("</div>", osHtml); + } + // table of content of document + String htmlToc = getTocHtml(mdRecord); + if (html && htmlToc != null) { + write("<div style=\"" + pageStyleHtml + "\">", osHtml); + write(htmlToc, osHtml); + write("</div>", osHtml); + } + if(pdf && htmlToc != null) { + write(htmlToc, osHtmlPdf); + } + // all pages of the document + for(int i=1; i<=countPages; i++) { + String htmlPageFragment = getPageFragmentHtml(mdRecord, i, pageTransformer); + htmlPageFragment = "<div id=\"page" + i + "\" class=\"page\">" + htmlPageFragment + "</div>"; + if (html) { + write("<div style=\"" + "clear:both; text-align:right; width:21.0cm; font-weight:bold;" + "\">", osHtml); + write("</div>", osHtml); + write("<div style=\"" + pageStyleHtml + "\">", osHtml); + write(htmlPageFragment, osHtml); + write("</div>", osHtml); + } + if(pdf) { + write(htmlPageFragment, osHtmlPdf); + } + } + if (html) { + write("</body></html>", osHtml); + } + // create PDF document + if(pdf) { + write("</body></html>", osHtmlPdf); + osHtmlPdf.close(); + renderer.setDocument(new File(destFileNameHtmlPdfTmp)); + renderer.layout(); // takes the most time + renderer.createPDF(osPdf); + } + } catch (Exception e) { + init(); + String message = e.getMessage(); + if (message != null && message.indexOf("digilib") > 0 && message.indexOf("500") > 0) { + throw new ApplicationException("fetch image is not possible: " + message); + } + throw new ApplicationException(e); + } finally { + try { + osHtmlPdf.close(); + osPdf.close(); + osHtml.close(); + FileUtils.deleteQuietly(new File(destFileNameHtmlPdfTmp)); + } catch (IOException e) { + // nothing + } + } + } + + private String getFirstPageHtmlByEchodocuView(MetadataRecord mdRecord) { + String firstPageHtml = null; + try { + // Url to Echo viewer + String echoId = mdRecord.getEchoId(); + if (echoId == null) + return null; + String urlDocuView = "http://echo.mpiwg-berlin.mpg.de/ECHOdocuView?url=" + echoId + "&viewMode=indexonly"; + String wholeFirstPageHtml = performGetRequest(urlDocuView); + if (wholeFirstPageHtml != null & wholeFirstPageHtml.equals("XXXXUrlErrorXXXX")) + return null; + int from = wholeFirstPageHtml.indexOf("<body>"); + int to = wholeFirstPageHtml.indexOf("</body>"); + if (from != -1 && to != -1) { + firstPageHtml = "<div>" + wholeFirstPageHtml.substring(from + 6, to) + "</div>"; + } + } catch (ApplicationException e) { + // nothing + } + return firstPageHtml; + } + + private String getFirstPageHtml(MetadataRecord mdRecord, boolean shortPage) { + String author = mdRecord.getCreator(); + String title = mdRecord.getTitle(); + String year = mdRecord.getYear(); + String firstPageHtml = "<div class=\"firstPage\">"; + firstPageHtml = firstPageHtml + "<h2 style=\"text-align:center\">" + "Max Planck Institute for the History of Science" + "</h2>"; + firstPageHtml = firstPageHtml + "<p style=\"text-align:center\">" + "Max-Planck-Institut fŸr Wissenschaftsgeschichte" + "</p>"; + firstPageHtml = firstPageHtml + "<br></br>"; + firstPageHtml = firstPageHtml + "<br></br>"; + if (! shortPage) { + firstPageHtml = firstPageHtml + "<br></br>"; + firstPageHtml = firstPageHtml + "<br></br>"; + firstPageHtml = firstPageHtml + "<br></br>"; + firstPageHtml = firstPageHtml + "<br></br>"; + } + if (author != null) { + firstPageHtml = firstPageHtml + "<h2 style=\"text-align:center\">" + author + "</h2>"; + } + if (title != null) { + firstPageHtml = firstPageHtml + "<h2 style=\"text-align:center\">" + title + "</h2>"; + } + if (year != null) { + firstPageHtml = firstPageHtml + "<h2 style=\"text-align:center\">" + year + "</h2>"; + } + if (! shortPage) { + firstPageHtml = firstPageHtml + "<br></br>"; + firstPageHtml = firstPageHtml + "<br></br>"; + firstPageHtml = firstPageHtml + "<br></br>"; + firstPageHtml = firstPageHtml + "<br></br>"; + firstPageHtml = firstPageHtml + "<br></br>"; + firstPageHtml = firstPageHtml + "<br></br>"; + firstPageHtml = firstPageHtml + "<br></br>"; + firstPageHtml = firstPageHtml + "<br></br>"; + firstPageHtml = firstPageHtml + "<br></br>"; + firstPageHtml = firstPageHtml + "<br></br>"; + firstPageHtml = firstPageHtml + "<br></br>"; + firstPageHtml = firstPageHtml + "<br></br>"; + firstPageHtml = firstPageHtml + "<br></br>"; + firstPageHtml = firstPageHtml + "<br></br>"; + firstPageHtml = firstPageHtml + "<br></br>"; + firstPageHtml = firstPageHtml + "<br></br>"; + firstPageHtml = firstPageHtml + "<br></br>"; + firstPageHtml = firstPageHtml + "<br></br>"; + } + firstPageHtml = firstPageHtml + "<br></br>"; + firstPageHtml = firstPageHtml + "<br></br>"; + firstPageHtml = firstPageHtml + "<br></br>"; + firstPageHtml = firstPageHtml + "<br></br>"; + // Url to Echo viewer + String echoId = mdRecord.getEchoId(); + String urlDocuView = "http://echo.mpiwg-berlin.mpg.de/ECHOdocuView"; + String document = "?url=" + echoId; + String urlDoc = urlDocuView + document; + String echoLink = "<a href=\"" + urlDoc + "\">" + urlDocuView + " <br></br>" + document + "</a>"; + if (echoId == null) + echoLink = "<a href=\"http://echo.mpiwg-berlin.mpg.de\">" + "http://echo.mpiwg-berlin.mpg.de" + "</a>"; + firstPageHtml = firstPageHtml + "<p style=\"font:11pt sans-serif;\">Document link: <br></br>" + echoLink + "</p>"; + firstPageHtml = firstPageHtml + "</div>"; + return firstPageHtml; + } + + private String getTocHtml(MetadataRecord mdRecord) throws ApplicationException { + String htmlStr = null; + try { + String docId = mdRecord.getDocId(); + String tocFileName = docHandler.getFullFileName(docId, "toc"); + File tocFile = new File(tocFileName); + String tocStr = FileUtils.readFileToString(tocFile, "utf-8"); + String htmlToc = tocTransformer.transform(tocStr, "toc", "html"); + if (htmlToc != null && ! htmlToc.isEmpty()) { + htmlStr = "<div class=\"tocPage\">" + "<text style=\"font-weight:bold; font-size:20pt; margin-left:2%; \">Table of contents</text>" + htmlToc + "</div>"; + } + } catch (IOException e) { + throw new ApplicationException(e); + } + return htmlStr; + } + + private String getPageFragmentHtml(MetadataRecord mdRecord, int pageNumber, PageTransformer pageTransformer) throws ApplicationException { + String pageHtmlStrFragment = null; + try { + String docId = mdRecord.getDocId(); + String docDir = docHandler.getDocDir(docId); + String docPageTokenizedFileName = docDir + "/pages/page-" + pageNumber + "-morph.xml"; + File docPageTokenizedFile = new File(docPageTokenizedFileName); + String tokenizedXmlStr = FileUtils.readFileToString(docPageTokenizedFile, "utf-8"); + pageTransformer.setDisplayWordOptions("orig"); // only orig word spans are build so that the HTML is not too huge for PDF generation + pageHtmlStrFragment = pageTransformer.transform(tokenizedXmlStr, mdRecord, pageNumber, "html"); + } catch (IOException e) { + throw new ApplicationException(e); + } + return pageHtmlStrFragment; + } + + private String getMdRecordString(MetadataRecord mdRecord) { + String author = mdRecord.getCreator(); + String title = mdRecord.getTitle(); + String year = mdRecord.getYear(); + String mdRecordStr = ""; + if (mdRecord != null) { + if (author != null && ! author.equals("")) { + mdRecordStr = mdRecordStr + author; + } + if (title != null && ! title.equals("")) { + mdRecordStr = mdRecordStr + ". " + title; + } + if (year != null && ! year.equals("")) { + mdRecordStr = mdRecordStr + ". " + year + "."; + } + if (mdRecordStr.isEmpty()) { + String docId = mdRecord.getDocId(); + mdRecordStr = mdRecordStr + docId; + } + } + return mdRecordStr; + } + + private String getHtmlHead(String stylePageStr, String titleStr) { + String htmlStr = "<head>"; + if (stylePageStr != null) + htmlStr = htmlStr + "<style type=\"text/css\">" + stylePageStr + "</style>"; + htmlStr = htmlStr + "<title>" + titleStr + "</title>"; + htmlStr = htmlStr + "<link rel=\"stylesheet\" type=\"text/css\" href=\"" + CSS_SHOW_WORD_URL + "\"/>"; + htmlStr = htmlStr + "<link rel=\"stylesheet\" type=\"text/css\" href=\"" + CSS_URL + "\"/>"; + htmlStr = htmlStr + "</head>"; + return htmlStr; + } + + private String getFontStyle(String language) { + String fontFamily = "Junicode"; + if (language != null && language.equals("ar")) + fontFamily = "DejaVu Sans"; + else if (language != null && (language.equals("zh") || language.equals("zho-Hant"))) + fontFamily = "Sun-ExtA, Sun-ExtB"; + return "font-size:11pt; font-family:" + fontFamily + ";"; + } + + private void write(String str, OutputStream out) throws ApplicationException { + try { + byte[] bytes = str.getBytes("utf-8"); + out.write(bytes, 0, bytes.length); + out.flush(); + } catch (UnsupportedEncodingException e) { + throw new ApplicationException(e); + } catch (FileNotFoundException e) { + throw new ApplicationException(e); + } catch (IOException e) { + throw new ApplicationException(e); + } + } + + private void setFont(String fontFileName) throws ApplicationException { + try { + String existingFontFileName = fontFileNames.get(fontFileName); + if (existingFontFileName == null) { + fontFileNames.put(fontFileName, fontFileName); + ITextFontResolver fontResolver = renderer.getFontResolver(); + fontResolver.addFont(fontFileName, BaseFont.IDENTITY_H, BaseFont.NOT_EMBEDDED); // Identy_H is Unicode Horizontal; not_embedded means not embedded in the PDF doc + } + } catch (XRRuntimeException e) { + init(); + String message = e.getMessage(); + if (message.indexOf("digilib") > 0 && message.indexOf("500") > 0) { + throw new ApplicationException("fetch image is not possible: please try again later"); + } + throw new ApplicationException(e); + } catch (IOException e) { + init(); + String message = e.getMessage(); + if (message.indexOf("digilib") > 0 && message.indexOf("500") > 0) { + throw new ApplicationException("fetch image is not possible: " + message); + } + throw new ApplicationException(e); + } catch (DocumentException e) { + init(); + String message = e.getMessage(); + if (message.indexOf("digilib") > 0 && message.indexOf("500") > 0) { + throw new ApplicationException("fetch image is not possible: " + message); + } + throw new ApplicationException(e); + } + } + + private String performGetRequest(String url) throws ApplicationException { + String resultStr = null; + try { + HttpClient httpClient = new HttpClient(); + GetMethod method = new GetMethod(url); + httpClient.executeMethod(method); + int statusCode = method.getStatusCode(); + if (statusCode >= 400) + return "XXXXUrlErrorXXXX"; + byte[] resultBytes = method.getResponseBody(); + resultStr = new String(resultBytes, "utf-8"); + method.releaseConnection(); + } catch (HttpException e) { + throw new ApplicationException(e); + } catch (IOException e) { + throw new ApplicationException(e); + } + return resultStr; + } +}