Mercurial > hg > mpdl-group
diff software/mpdl-services/mpiwg-mpdl-cms/src/de/mpg/mpiwg/berlin/mpdl/cms/document/PdfHandler.java @ 23:e845310098ba
diverse Korrekturen
author | Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de> |
---|---|
date | Tue, 27 Nov 2012 12:35:19 +0100 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/software/mpdl-services/mpiwg-mpdl-cms/src/de/mpg/mpiwg/berlin/mpdl/cms/document/PdfHandler.java Tue Nov 27 12:35:19 2012 +0100 @@ -0,0 +1,387 @@ +package de.mpg.mpiwg.berlin.mpdl.cms.document; + +import java.io.File; +import java.io.FileNotFoundException; +import java.io.FileOutputStream; +import java.io.IOException; +import java.io.OutputStream; +import java.io.UnsupportedEncodingException; +import java.util.Hashtable; + +import org.apache.commons.httpclient.HttpClient; +import org.apache.commons.httpclient.HttpException; +import org.apache.commons.httpclient.methods.GetMethod; +import org.apache.commons.io.FileUtils; +import org.xhtmlrenderer.layout.SharedContext; +import org.xhtmlrenderer.pdf.ITextFontResolver; +import org.xhtmlrenderer.pdf.ITextRenderer; +import org.xhtmlrenderer.util.XRRuntimeException; + +import com.lowagie.text.DocumentException; +import com.lowagie.text.pdf.BaseFont; + +import de.mpg.mpiwg.berlin.mpdl.exception.ApplicationException; +import de.mpg.mpiwg.berlin.mpdl.cms.general.Constants; +import de.mpg.mpiwg.berlin.mpdl.cms.transform.TocTransformer; + +public class PdfHandler { + private static PdfHandler instance; + private static String CSS_SHOW_WORD_URL = "http://thrax-dev.rz-berlin.mpg.de/mpiwg-mpdl-cms-web/css/pageOrig.css"; + private static String CSS_URL = "http://thrax-dev.rz-berlin.mpg.de/mpiwg-mpdl-cms-web/css/page.css"; + // private static String CSS_DOCUVIEWER_URL = "http://echo.mpiwg-berlin.mpg.de/ECHOdocuView/template/docuviewer_css"; + private ITextRenderer renderer = new ITextRenderer(); + private Hashtable<String, String> fontFileNames; + private DocumentHandler docHandler; + private TocTransformer tocTransformer; + + public static PdfHandler getInstance() throws ApplicationException { + if (instance == null) { + instance = new PdfHandler(); + instance.init(); + } + return instance; + } + + public void init() throws ApplicationException { + renderer = new ITextRenderer(); + SharedContext rendererSharedContext = renderer.getSharedContext(); + PdfHandlerUserAgent mpdlUserAgent = new PdfHandlerUserAgent(); // user agent to get a callback handle to the web access of images (getImageResource(url)) + mpdlUserAgent.setSharedContext(rendererSharedContext); + rendererSharedContext.setUserAgentCallback(mpdlUserAgent); + fontFileNames = new Hashtable<String, String>(); + String fontJunicodeFileName = Constants.getInstance().getDocumentsDir() + "/../fonts/Junicode-Regular.ttf"; + String fontJunicodeBoldFileName = Constants.getInstance().getDocumentsDir() + "/../fonts/Junicode-Bold.ttf"; + String fontJunicodeItalicFileName = Constants.getInstance().getDocumentsDir() + "/../fonts/Junicode-Italic.ttf"; + String fontJunicodeBoldItalicFileName = Constants.getInstance().getDocumentsDir() + "/../fonts/Junicode-BoldItalic.ttf"; + String fontSunExtAFileName = Constants.getInstance().getDocumentsDir() + "/../fonts/Sun-ExtA.ttf"; // chinese symbols + String fontSunExtBFileName = Constants.getInstance().getDocumentsDir() + "/../fonts/Sun-ExtB.ttf"; // chinese symbols + String fontDejaVuFileName = Constants.getInstance().getDocumentsDir() + "/../fonts/DejaVuSans.ttf"; // arabic symbols + setFont(fontJunicodeFileName); + setFont(fontJunicodeBoldFileName); + setFont(fontJunicodeItalicFileName); + setFont(fontJunicodeBoldItalicFileName); // if set then some not bold italic characters are shown bold (e.g. in Benedetti_1585.xml) + setFont(fontSunExtAFileName); + setFont(fontSunExtBFileName); + setFont(fontDejaVuFileName); + docHandler = new DocumentHandler(); + tocTransformer = new TocTransformer(); + } + + public void createFile(boolean pdf, boolean html, MetadataRecord mdRecord) throws ApplicationException { + OutputStream osPdf = null; + OutputStream osHtml = null; + OutputStream osHtmlPdf = null; + String docId = mdRecord.getDocId(); + String language = mdRecord.getLanguage(); + if (docId == null) + throw new ApplicationException("Pdf/Html-Generation failed: no docId given in mdRecord"); + String docDir = docHandler.getDocDir(docId); + String docFileName = docHandler.getDocFileName(docId); + int lastDot = docFileName.lastIndexOf("."); + String docFileNameWithoutExtension = docFileName.substring(0, lastDot); + String docIdExtension = docFileName.substring(lastDot + 1); + String destFileNamePdf = docDir + "/" + docFileNameWithoutExtension + ".pdf"; + String destFileNameHtml = docDir + "/" + docFileNameWithoutExtension + ".html"; + if (docIdExtension != null && docIdExtension.equals("html")) { + destFileNameHtml = docDir + "/" + docFileNameWithoutExtension + "-gen.html"; + } + String destFileNameHtmlPdfTmp = docDir + "/" + docFileNameWithoutExtension + "-4Pdf.html"; + try { + // start document + if (pdf) { + osPdf = new FileOutputStream(new File(destFileNamePdf)); + osHtmlPdf = new FileOutputStream(new File(destFileNameHtmlPdfTmp)); + } + if (html) + osHtml = new FileOutputStream(new File(destFileNameHtml)); + int countPages = mdRecord.getPageCount(); + // style page + String pageStyleHtml = "float:left; clear:both; border: thin solid #808080; width: 21.0cm; margin-top: 0.2cm; margin-bottom: 1cm; margin-left: 0.7cm; margin-right: 0.7cm; padding: 0.2cm;"; + // firstPage + String firstPageHtml = getFirstPageHtml(mdRecord); + String mdRecordStr = getMdRecordString(mdRecord); + String htmlHeadStr = getHtmlHead(null, mdRecordStr); + String fontStyle = getFontStyle(language); + if(pdf) { + write("<html>" + htmlHeadStr + "<body style=\"" + fontStyle + "\">", osHtmlPdf); + // first page + write(firstPageHtml, osHtmlPdf); + } + if (html) { + write("<html>" + htmlHeadStr + "<body style=\"" + fontStyle + "\">", osHtml); + // first page + write("<div style=\"" + pageStyleHtml + "\">", osHtml); + write(firstPageHtml, osHtml); + write("</div>", osHtml); + } + // table of content of document + String htmlToc = getTocHtml(mdRecord); + if (html && htmlToc != null) { + write("<div style=\"" + pageStyleHtml + "\">", osHtml); + write(htmlToc, osHtml); + write("</div>", osHtml); + } + if(pdf && htmlToc != null) { + write(htmlToc, osHtmlPdf); + } + // all pages of the document + for(int i=1; i<=countPages; i++) { + String htmlPageFragment = getPageFragmentHtml(mdRecord, i); + htmlPageFragment = "<div id=\"page" + i + "\" class=\"page\">" + htmlPageFragment + "</div>"; + if (html) { + write("<div style=\"" + "clear:both; text-align:right; width:21.0cm; font-weight:bold;" + "\">", osHtml); + write("</div>", osHtml); + write("<div style=\"" + pageStyleHtml + "\">", osHtml); + // String htmlPageFragmentWithImageUrl = htmlPageFragment.replaceAll("<img src=\".+?\"/>", "<img height=\"24\" width=\"24\" src=\"http://" + CMS_WEB_SERVER_PDF + "/" + CMS_WEB_APP_PDF + "/images/camera.png\"/>"); + write(htmlPageFragment, osHtml); + write("</div>", osHtml); + } + if(pdf) { + // String htmlPageFragmentWithImageUrl = htmlPageFragment.replaceAll("<img src=\".+?\"/>", "<img height=\"24\" width=\"24\" src=\"http://" + CMS_WEB_SERVER_PDF + "/" + CMS_WEB_APP_PDF + "/images/camera.png\"/>"); + write(htmlPageFragment, osHtmlPdf); + } + } + if (html) { + write("</body></html>", osHtml); + } + // create PDF document + if(pdf) { + write("</body></html>", osHtmlPdf); + osHtmlPdf.close(); + renderer.setDocument(new File(destFileNameHtmlPdfTmp)); + renderer.layout(); // takes the most time + renderer.createPDF(osPdf); + } + } catch (Exception e) { + init(); + String message = e.getMessage(); + if (message != null && message.indexOf("digilib") > 0 && message.indexOf("500") > 0) { + throw new ApplicationException("fetch image is not possible: " + message); + } + throw new ApplicationException(e); + } finally { + try { + osHtmlPdf.close(); + osPdf.close(); + osHtml.close(); + FileUtils.deleteQuietly(new File(destFileNameHtmlPdfTmp)); + } catch (IOException e) { + // nothing + } + } + } + + private String getFirstPageHtml(MetadataRecord mdRecord) { + String firstPageHtml = null; + try { + // Url to Echo viewer + String echoId = mdRecord.getEchoId(); + String urlDocuView = "http://echo.mpiwg-berlin.mpg.de/ECHOdocuView?url=" + echoId + "&viewMode=indexonly"; + String wholeFirstPageHtml = performGetRequest(urlDocuView); + int from = wholeFirstPageHtml.indexOf("<body>"); + int to = wholeFirstPageHtml.indexOf("</body>"); + if (from != -1 && to != -1) { + firstPageHtml = "<div>" + wholeFirstPageHtml.substring(from + 6, to) + "</div>"; + } + } catch (ApplicationException e) { + // nothing + } + return firstPageHtml; + } + + private String getFirstPageHtml(MetadataRecord mdRecord, boolean shortPage) { + String author = mdRecord.getCreator(); + String title = mdRecord.getTitle(); + String year = mdRecord.getYear(); + String firstPageHtml = "<div class=\"firstPage\">"; + firstPageHtml = firstPageHtml + "<h2 style=\"text-align:center\">" + "Max Planck Institute for the History of Science" + "</h2>"; + firstPageHtml = firstPageHtml + "<p style=\"text-align:center\">" + "Max-Planck-Institut fŸr Wissenschaftsgeschichte" + "</p>"; + firstPageHtml = firstPageHtml + "<br></br>"; + firstPageHtml = firstPageHtml + "<br></br>"; + if (! shortPage) { + firstPageHtml = firstPageHtml + "<br></br>"; + firstPageHtml = firstPageHtml + "<br></br>"; + firstPageHtml = firstPageHtml + "<br></br>"; + firstPageHtml = firstPageHtml + "<br></br>"; + } + if (author != null) { + firstPageHtml = firstPageHtml + "<h2 style=\"text-align:center\">" + author + "</h2>"; + } + if (title != null) { + firstPageHtml = firstPageHtml + "<h2 style=\"text-align:center\">" + title + "</h2>"; + } + if (year != null) { + firstPageHtml = firstPageHtml + "<h2 style=\"text-align:center\">" + year + "</h2>"; + } + if (! shortPage) { + firstPageHtml = firstPageHtml + "<br></br>"; + firstPageHtml = firstPageHtml + "<br></br>"; + firstPageHtml = firstPageHtml + "<br></br>"; + firstPageHtml = firstPageHtml + "<br></br>"; + firstPageHtml = firstPageHtml + "<br></br>"; + firstPageHtml = firstPageHtml + "<br></br>"; + firstPageHtml = firstPageHtml + "<br></br>"; + firstPageHtml = firstPageHtml + "<br></br>"; + firstPageHtml = firstPageHtml + "<br></br>"; + firstPageHtml = firstPageHtml + "<br></br>"; + firstPageHtml = firstPageHtml + "<br></br>"; + firstPageHtml = firstPageHtml + "<br></br>"; + firstPageHtml = firstPageHtml + "<br></br>"; + firstPageHtml = firstPageHtml + "<br></br>"; + firstPageHtml = firstPageHtml + "<br></br>"; + firstPageHtml = firstPageHtml + "<br></br>"; + firstPageHtml = firstPageHtml + "<br></br>"; + firstPageHtml = firstPageHtml + "<br></br>"; + } + firstPageHtml = firstPageHtml + "<br></br>"; + firstPageHtml = firstPageHtml + "<br></br>"; + firstPageHtml = firstPageHtml + "<br></br>"; + firstPageHtml = firstPageHtml + "<br></br>"; + // Url to Echo viewer + String echoId = mdRecord.getEchoId(); + String urlDocuView = "http://echo.mpiwg-berlin.mpg.de/ECHOdocuView"; + String document = "?url=" + echoId; + String urlDoc = urlDocuView + document; + String echoLink = "<a href=\"" + urlDoc + "\">" + urlDocuView + " <br></br>" + document + "</a>"; + if (echoId == null) + echoLink = "<a href=\"http://echo.mpiwg-berlin.mpg.de\">" + "http://echo.mpiwg-berlin.mpg.de" + "</a>"; + firstPageHtml = firstPageHtml + "<p style=\"font:11pt sans-serif;\">Document link: <br></br>" + echoLink + "</p>"; + firstPageHtml = firstPageHtml + "</div>"; + return firstPageHtml; + } + + private String getTocHtml(MetadataRecord mdRecord) throws ApplicationException { + String htmlStr = null; + try { + String docId = mdRecord.getDocId(); + String tocFileName = docHandler.getFullFileName(docId, "toc"); + File tocFile = new File(tocFileName); + String tocStr = FileUtils.readFileToString(tocFile, "utf-8"); + String htmlToc = tocTransformer.transform(tocStr, "toc", "html"); + if (htmlToc != null) { + htmlStr = "<div class=\"tocPage\">" + "<text style=\"font-weight:bold; font-size:20pt; margin-left:2%; \">Table of contents</text>" + htmlToc + "</div>"; + } + } catch (IOException e) { + throw new ApplicationException(e); + } + return htmlStr; + } + + private String getPageFragmentHtml(MetadataRecord mdRecord, int pageNumber) throws ApplicationException { + String pageHtmlStrFragment = null; + try { + String docId = mdRecord.getDocId(); + String docDir = docHandler.getDocDir(docId); + String docPageDir = docDir + "/" + "pages"; + String pageFileName = docPageDir + "/page-" + pageNumber + ".html"; + File pageFile = new File(pageFileName); + pageHtmlStrFragment = FileUtils.readFileToString(pageFile, "utf-8"); + } catch (IOException e) { + throw new ApplicationException(e); + } + return pageHtmlStrFragment; + } + + private String getMdRecordString(MetadataRecord mdRecord) { + String author = mdRecord.getCreator(); + String title = mdRecord.getTitle(); + String year = mdRecord.getYear(); + String mdRecordStr = ""; + if (mdRecord != null) { + if (author != null && ! author.equals("")) + mdRecordStr = mdRecordStr + author; + if (title != null && ! title.equals("")) + mdRecordStr = mdRecordStr + ". " + title; + if (year != null && ! year.equals("")) + mdRecordStr = mdRecordStr + ". " + year + "."; + if (mdRecordStr.isEmpty()) { + String docId = mdRecord.getDocId(); + mdRecordStr = mdRecordStr + docId; + } + } + return mdRecordStr; + } + + private String getHtmlHead(String stylePageStr, String titleStr) { + String htmlStr = "<head>"; + if (stylePageStr != null) + htmlStr = htmlStr + "<style type=\"text/css\">" + stylePageStr + "</style>"; + htmlStr = htmlStr + "<title>" + titleStr + "</title>"; + htmlStr = htmlStr + "<link rel=\"stylesheet\" type=\"text/css\" href=\"" + CSS_SHOW_WORD_URL + "\"/>"; + htmlStr = htmlStr + "<link rel=\"stylesheet\" type=\"text/css\" href=\"" + CSS_URL + "\"/>"; + htmlStr = htmlStr + "</head>"; + return htmlStr; + } + + private String getFontStyle(String language) { + String fontFamily = "Junicode"; + if (language.equals("ar")) + fontFamily = "DejaVu Sans"; + else if (language.equals("zh") || language.equals("zho-Hant")) + fontFamily = "Sun-ExtA, Sun-ExtB"; + return "font-size:11pt; font-family:" + fontFamily + ";"; + } + + private void write(String str, OutputStream out) throws ApplicationException { + try { + byte[] bytes = str.getBytes("utf-8"); + out.write(bytes, 0, bytes.length); + out.flush(); + } catch (UnsupportedEncodingException e) { + throw new ApplicationException(e); + } catch (FileNotFoundException e) { + throw new ApplicationException(e); + } catch (IOException e) { + throw new ApplicationException(e); + } + } + + private void setFont(String fontFileName) throws ApplicationException { + try { + String existingFontFileName = fontFileNames.get(fontFileName); + if (existingFontFileName == null) { + fontFileNames.put(fontFileName, fontFileName); + ITextFontResolver fontResolver = renderer.getFontResolver(); + fontResolver.addFont(fontFileName, BaseFont.IDENTITY_H, BaseFont.NOT_EMBEDDED); // Identy_H is Unicode Horizontal; not_embedded means not embedded in the PDF doc + } + } catch (XRRuntimeException e) { + init(); + String message = e.getMessage(); + if (message.indexOf("digilib") > 0 && message.indexOf("500") > 0) { + throw new ApplicationException("fetch image is not possible: please try again later"); + } + throw new ApplicationException(e); + } catch (IOException e) { + init(); + String message = e.getMessage(); + if (message.indexOf("digilib") > 0 && message.indexOf("500") > 0) { + throw new ApplicationException("fetch image is not possible: " + message); + } + throw new ApplicationException(e); + } catch (DocumentException e) { + init(); + String message = e.getMessage(); + if (message.indexOf("digilib") > 0 && message.indexOf("500") > 0) { + throw new ApplicationException("fetch image is not possible: " + message); + } + throw new ApplicationException(e); + } + } + + private String performGetRequest(String url) throws ApplicationException { + String resultStr = null; + try { + HttpClient httpClient = new HttpClient(); + GetMethod method = new GetMethod(url); + httpClient.executeMethod(method); + byte[] resultBytes = method.getResponseBody(); + resultStr = new String(resultBytes, "utf-8"); + method.releaseConnection(); + } catch (HttpException e) { + throw new ApplicationException(e); + } catch (IOException e) { + throw new ApplicationException(e); + } + return resultStr; + } +}