Mercurial > hg > mpdl-group
view software/mpdl-services/mpiwg-mpdl-cms/src/de/mpg/mpiwg/berlin/mpdl/cms/document/PdfHandler.java @ 23:e845310098ba
diverse Korrekturen
author | Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de> |
---|---|
date | Tue, 27 Nov 2012 12:35:19 +0100 |
parents | |
children |
line wrap: on
line source
package de.mpg.mpiwg.berlin.mpdl.cms.document; import java.io.File; import java.io.FileNotFoundException; import java.io.FileOutputStream; import java.io.IOException; import java.io.OutputStream; import java.io.UnsupportedEncodingException; import java.util.Hashtable; import org.apache.commons.httpclient.HttpClient; import org.apache.commons.httpclient.HttpException; import org.apache.commons.httpclient.methods.GetMethod; import org.apache.commons.io.FileUtils; import org.xhtmlrenderer.layout.SharedContext; import org.xhtmlrenderer.pdf.ITextFontResolver; import org.xhtmlrenderer.pdf.ITextRenderer; import org.xhtmlrenderer.util.XRRuntimeException; import com.lowagie.text.DocumentException; import com.lowagie.text.pdf.BaseFont; import de.mpg.mpiwg.berlin.mpdl.exception.ApplicationException; import de.mpg.mpiwg.berlin.mpdl.cms.general.Constants; import de.mpg.mpiwg.berlin.mpdl.cms.transform.TocTransformer; public class PdfHandler { private static PdfHandler instance; private static String CSS_SHOW_WORD_URL = "http://thrax-dev.rz-berlin.mpg.de/mpiwg-mpdl-cms-web/css/pageOrig.css"; private static String CSS_URL = "http://thrax-dev.rz-berlin.mpg.de/mpiwg-mpdl-cms-web/css/page.css"; // private static String CSS_DOCUVIEWER_URL = "http://echo.mpiwg-berlin.mpg.de/ECHOdocuView/template/docuviewer_css"; private ITextRenderer renderer = new ITextRenderer(); private Hashtable<String, String> fontFileNames; private DocumentHandler docHandler; private TocTransformer tocTransformer; public static PdfHandler getInstance() throws ApplicationException { if (instance == null) { instance = new PdfHandler(); instance.init(); } return instance; } public void init() throws ApplicationException { renderer = new ITextRenderer(); SharedContext rendererSharedContext = renderer.getSharedContext(); PdfHandlerUserAgent mpdlUserAgent = new PdfHandlerUserAgent(); // user agent to get a callback handle to the web access of images (getImageResource(url)) mpdlUserAgent.setSharedContext(rendererSharedContext); rendererSharedContext.setUserAgentCallback(mpdlUserAgent); fontFileNames = new Hashtable<String, String>(); String fontJunicodeFileName = Constants.getInstance().getDocumentsDir() + "/../fonts/Junicode-Regular.ttf"; String fontJunicodeBoldFileName = Constants.getInstance().getDocumentsDir() + "/../fonts/Junicode-Bold.ttf"; String fontJunicodeItalicFileName = Constants.getInstance().getDocumentsDir() + "/../fonts/Junicode-Italic.ttf"; String fontJunicodeBoldItalicFileName = Constants.getInstance().getDocumentsDir() + "/../fonts/Junicode-BoldItalic.ttf"; String fontSunExtAFileName = Constants.getInstance().getDocumentsDir() + "/../fonts/Sun-ExtA.ttf"; // chinese symbols String fontSunExtBFileName = Constants.getInstance().getDocumentsDir() + "/../fonts/Sun-ExtB.ttf"; // chinese symbols String fontDejaVuFileName = Constants.getInstance().getDocumentsDir() + "/../fonts/DejaVuSans.ttf"; // arabic symbols setFont(fontJunicodeFileName); setFont(fontJunicodeBoldFileName); setFont(fontJunicodeItalicFileName); setFont(fontJunicodeBoldItalicFileName); // if set then some not bold italic characters are shown bold (e.g. in Benedetti_1585.xml) setFont(fontSunExtAFileName); setFont(fontSunExtBFileName); setFont(fontDejaVuFileName); docHandler = new DocumentHandler(); tocTransformer = new TocTransformer(); } public void createFile(boolean pdf, boolean html, MetadataRecord mdRecord) throws ApplicationException { OutputStream osPdf = null; OutputStream osHtml = null; OutputStream osHtmlPdf = null; String docId = mdRecord.getDocId(); String language = mdRecord.getLanguage(); if (docId == null) throw new ApplicationException("Pdf/Html-Generation failed: no docId given in mdRecord"); String docDir = docHandler.getDocDir(docId); String docFileName = docHandler.getDocFileName(docId); int lastDot = docFileName.lastIndexOf("."); String docFileNameWithoutExtension = docFileName.substring(0, lastDot); String docIdExtension = docFileName.substring(lastDot + 1); String destFileNamePdf = docDir + "/" + docFileNameWithoutExtension + ".pdf"; String destFileNameHtml = docDir + "/" + docFileNameWithoutExtension + ".html"; if (docIdExtension != null && docIdExtension.equals("html")) { destFileNameHtml = docDir + "/" + docFileNameWithoutExtension + "-gen.html"; } String destFileNameHtmlPdfTmp = docDir + "/" + docFileNameWithoutExtension + "-4Pdf.html"; try { // start document if (pdf) { osPdf = new FileOutputStream(new File(destFileNamePdf)); osHtmlPdf = new FileOutputStream(new File(destFileNameHtmlPdfTmp)); } if (html) osHtml = new FileOutputStream(new File(destFileNameHtml)); int countPages = mdRecord.getPageCount(); // style page String pageStyleHtml = "float:left; clear:both; border: thin solid #808080; width: 21.0cm; margin-top: 0.2cm; margin-bottom: 1cm; margin-left: 0.7cm; margin-right: 0.7cm; padding: 0.2cm;"; // firstPage String firstPageHtml = getFirstPageHtml(mdRecord); String mdRecordStr = getMdRecordString(mdRecord); String htmlHeadStr = getHtmlHead(null, mdRecordStr); String fontStyle = getFontStyle(language); if(pdf) { write("<html>" + htmlHeadStr + "<body style=\"" + fontStyle + "\">", osHtmlPdf); // first page write(firstPageHtml, osHtmlPdf); } if (html) { write("<html>" + htmlHeadStr + "<body style=\"" + fontStyle + "\">", osHtml); // first page write("<div style=\"" + pageStyleHtml + "\">", osHtml); write(firstPageHtml, osHtml); write("</div>", osHtml); } // table of content of document String htmlToc = getTocHtml(mdRecord); if (html && htmlToc != null) { write("<div style=\"" + pageStyleHtml + "\">", osHtml); write(htmlToc, osHtml); write("</div>", osHtml); } if(pdf && htmlToc != null) { write(htmlToc, osHtmlPdf); } // all pages of the document for(int i=1; i<=countPages; i++) { String htmlPageFragment = getPageFragmentHtml(mdRecord, i); htmlPageFragment = "<div id=\"page" + i + "\" class=\"page\">" + htmlPageFragment + "</div>"; if (html) { write("<div style=\"" + "clear:both; text-align:right; width:21.0cm; font-weight:bold;" + "\">", osHtml); write("</div>", osHtml); write("<div style=\"" + pageStyleHtml + "\">", osHtml); // String htmlPageFragmentWithImageUrl = htmlPageFragment.replaceAll("<img src=\".+?\"/>", "<img height=\"24\" width=\"24\" src=\"http://" + CMS_WEB_SERVER_PDF + "/" + CMS_WEB_APP_PDF + "/images/camera.png\"/>"); write(htmlPageFragment, osHtml); write("</div>", osHtml); } if(pdf) { // String htmlPageFragmentWithImageUrl = htmlPageFragment.replaceAll("<img src=\".+?\"/>", "<img height=\"24\" width=\"24\" src=\"http://" + CMS_WEB_SERVER_PDF + "/" + CMS_WEB_APP_PDF + "/images/camera.png\"/>"); write(htmlPageFragment, osHtmlPdf); } } if (html) { write("</body></html>", osHtml); } // create PDF document if(pdf) { write("</body></html>", osHtmlPdf); osHtmlPdf.close(); renderer.setDocument(new File(destFileNameHtmlPdfTmp)); renderer.layout(); // takes the most time renderer.createPDF(osPdf); } } catch (Exception e) { init(); String message = e.getMessage(); if (message != null && message.indexOf("digilib") > 0 && message.indexOf("500") > 0) { throw new ApplicationException("fetch image is not possible: " + message); } throw new ApplicationException(e); } finally { try { osHtmlPdf.close(); osPdf.close(); osHtml.close(); FileUtils.deleteQuietly(new File(destFileNameHtmlPdfTmp)); } catch (IOException e) { // nothing } } } private String getFirstPageHtml(MetadataRecord mdRecord) { String firstPageHtml = null; try { // Url to Echo viewer String echoId = mdRecord.getEchoId(); String urlDocuView = "http://echo.mpiwg-berlin.mpg.de/ECHOdocuView?url=" + echoId + "&viewMode=indexonly"; String wholeFirstPageHtml = performGetRequest(urlDocuView); int from = wholeFirstPageHtml.indexOf("<body>"); int to = wholeFirstPageHtml.indexOf("</body>"); if (from != -1 && to != -1) { firstPageHtml = "<div>" + wholeFirstPageHtml.substring(from + 6, to) + "</div>"; } } catch (ApplicationException e) { // nothing } return firstPageHtml; } private String getFirstPageHtml(MetadataRecord mdRecord, boolean shortPage) { String author = mdRecord.getCreator(); String title = mdRecord.getTitle(); String year = mdRecord.getYear(); String firstPageHtml = "<div class=\"firstPage\">"; firstPageHtml = firstPageHtml + "<h2 style=\"text-align:center\">" + "Max Planck Institute for the History of Science" + "</h2>"; firstPageHtml = firstPageHtml + "<p style=\"text-align:center\">" + "Max-Planck-Institut fŸr Wissenschaftsgeschichte" + "</p>"; firstPageHtml = firstPageHtml + "<br></br>"; firstPageHtml = firstPageHtml + "<br></br>"; if (! shortPage) { firstPageHtml = firstPageHtml + "<br></br>"; firstPageHtml = firstPageHtml + "<br></br>"; firstPageHtml = firstPageHtml + "<br></br>"; firstPageHtml = firstPageHtml + "<br></br>"; } if (author != null) { firstPageHtml = firstPageHtml + "<h2 style=\"text-align:center\">" + author + "</h2>"; } if (title != null) { firstPageHtml = firstPageHtml + "<h2 style=\"text-align:center\">" + title + "</h2>"; } if (year != null) { firstPageHtml = firstPageHtml + "<h2 style=\"text-align:center\">" + year + "</h2>"; } if (! shortPage) { firstPageHtml = firstPageHtml + "<br></br>"; firstPageHtml = firstPageHtml + "<br></br>"; firstPageHtml = firstPageHtml + "<br></br>"; firstPageHtml = firstPageHtml + "<br></br>"; firstPageHtml = firstPageHtml + "<br></br>"; firstPageHtml = firstPageHtml + "<br></br>"; firstPageHtml = firstPageHtml + "<br></br>"; firstPageHtml = firstPageHtml + "<br></br>"; firstPageHtml = firstPageHtml + "<br></br>"; firstPageHtml = firstPageHtml + "<br></br>"; firstPageHtml = firstPageHtml + "<br></br>"; firstPageHtml = firstPageHtml + "<br></br>"; firstPageHtml = firstPageHtml + "<br></br>"; firstPageHtml = firstPageHtml + "<br></br>"; firstPageHtml = firstPageHtml + "<br></br>"; firstPageHtml = firstPageHtml + "<br></br>"; firstPageHtml = firstPageHtml + "<br></br>"; firstPageHtml = firstPageHtml + "<br></br>"; } firstPageHtml = firstPageHtml + "<br></br>"; firstPageHtml = firstPageHtml + "<br></br>"; firstPageHtml = firstPageHtml + "<br></br>"; firstPageHtml = firstPageHtml + "<br></br>"; // Url to Echo viewer String echoId = mdRecord.getEchoId(); String urlDocuView = "http://echo.mpiwg-berlin.mpg.de/ECHOdocuView"; String document = "?url=" + echoId; String urlDoc = urlDocuView + document; String echoLink = "<a href=\"" + urlDoc + "\">" + urlDocuView + " <br></br>" + document + "</a>"; if (echoId == null) echoLink = "<a href=\"http://echo.mpiwg-berlin.mpg.de\">" + "http://echo.mpiwg-berlin.mpg.de" + "</a>"; firstPageHtml = firstPageHtml + "<p style=\"font:11pt sans-serif;\">Document link: <br></br>" + echoLink + "</p>"; firstPageHtml = firstPageHtml + "</div>"; return firstPageHtml; } private String getTocHtml(MetadataRecord mdRecord) throws ApplicationException { String htmlStr = null; try { String docId = mdRecord.getDocId(); String tocFileName = docHandler.getFullFileName(docId, "toc"); File tocFile = new File(tocFileName); String tocStr = FileUtils.readFileToString(tocFile, "utf-8"); String htmlToc = tocTransformer.transform(tocStr, "toc", "html"); if (htmlToc != null) { htmlStr = "<div class=\"tocPage\">" + "<text style=\"font-weight:bold; font-size:20pt; margin-left:2%; \">Table of contents</text>" + htmlToc + "</div>"; } } catch (IOException e) { throw new ApplicationException(e); } return htmlStr; } private String getPageFragmentHtml(MetadataRecord mdRecord, int pageNumber) throws ApplicationException { String pageHtmlStrFragment = null; try { String docId = mdRecord.getDocId(); String docDir = docHandler.getDocDir(docId); String docPageDir = docDir + "/" + "pages"; String pageFileName = docPageDir + "/page-" + pageNumber + ".html"; File pageFile = new File(pageFileName); pageHtmlStrFragment = FileUtils.readFileToString(pageFile, "utf-8"); } catch (IOException e) { throw new ApplicationException(e); } return pageHtmlStrFragment; } private String getMdRecordString(MetadataRecord mdRecord) { String author = mdRecord.getCreator(); String title = mdRecord.getTitle(); String year = mdRecord.getYear(); String mdRecordStr = ""; if (mdRecord != null) { if (author != null && ! author.equals("")) mdRecordStr = mdRecordStr + author; if (title != null && ! title.equals("")) mdRecordStr = mdRecordStr + ". " + title; if (year != null && ! year.equals("")) mdRecordStr = mdRecordStr + ". " + year + "."; if (mdRecordStr.isEmpty()) { String docId = mdRecord.getDocId(); mdRecordStr = mdRecordStr + docId; } } return mdRecordStr; } private String getHtmlHead(String stylePageStr, String titleStr) { String htmlStr = "<head>"; if (stylePageStr != null) htmlStr = htmlStr + "<style type=\"text/css\">" + stylePageStr + "</style>"; htmlStr = htmlStr + "<title>" + titleStr + "</title>"; htmlStr = htmlStr + "<link rel=\"stylesheet\" type=\"text/css\" href=\"" + CSS_SHOW_WORD_URL + "\"/>"; htmlStr = htmlStr + "<link rel=\"stylesheet\" type=\"text/css\" href=\"" + CSS_URL + "\"/>"; htmlStr = htmlStr + "</head>"; return htmlStr; } private String getFontStyle(String language) { String fontFamily = "Junicode"; if (language.equals("ar")) fontFamily = "DejaVu Sans"; else if (language.equals("zh") || language.equals("zho-Hant")) fontFamily = "Sun-ExtA, Sun-ExtB"; return "font-size:11pt; font-family:" + fontFamily + ";"; } private void write(String str, OutputStream out) throws ApplicationException { try { byte[] bytes = str.getBytes("utf-8"); out.write(bytes, 0, bytes.length); out.flush(); } catch (UnsupportedEncodingException e) { throw new ApplicationException(e); } catch (FileNotFoundException e) { throw new ApplicationException(e); } catch (IOException e) { throw new ApplicationException(e); } } private void setFont(String fontFileName) throws ApplicationException { try { String existingFontFileName = fontFileNames.get(fontFileName); if (existingFontFileName == null) { fontFileNames.put(fontFileName, fontFileName); ITextFontResolver fontResolver = renderer.getFontResolver(); fontResolver.addFont(fontFileName, BaseFont.IDENTITY_H, BaseFont.NOT_EMBEDDED); // Identy_H is Unicode Horizontal; not_embedded means not embedded in the PDF doc } } catch (XRRuntimeException e) { init(); String message = e.getMessage(); if (message.indexOf("digilib") > 0 && message.indexOf("500") > 0) { throw new ApplicationException("fetch image is not possible: please try again later"); } throw new ApplicationException(e); } catch (IOException e) { init(); String message = e.getMessage(); if (message.indexOf("digilib") > 0 && message.indexOf("500") > 0) { throw new ApplicationException("fetch image is not possible: " + message); } throw new ApplicationException(e); } catch (DocumentException e) { init(); String message = e.getMessage(); if (message.indexOf("digilib") > 0 && message.indexOf("500") > 0) { throw new ApplicationException("fetch image is not possible: " + message); } throw new ApplicationException(e); } } private String performGetRequest(String url) throws ApplicationException { String resultStr = null; try { HttpClient httpClient = new HttpClient(); GetMethod method = new GetMethod(url); httpClient.executeMethod(method); byte[] resultBytes = method.getResponseBody(); resultStr = new String(resultBytes, "utf-8"); method.releaseConnection(); } catch (HttpException e) { throw new ApplicationException(e); } catch (IOException e) { throw new ApplicationException(e); } return resultStr; } }