Mercurial > hg > mpdl-group
view software/eXist/mpdl-modules/src/de/mpg/mpiwg/berlin/mpdl/client/DocumentHandler.java @ 10:59ff47d1e237
TEI Unterst?tzung, Fehlerbehebungen, externe Objekte
author | Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de> |
---|---|
date | Fri, 11 Mar 2011 13:33:26 +0100 |
parents | 2396a569e446 |
children | 257f67be5c00 |
line wrap: on
line source
package de.mpg.mpiwg.berlin.mpdl.client; import java.io.File; import java.io.FilenameFilter; import java.net.MalformedURLException; import java.net.URL; import java.util.Date; import de.mpg.mpiwg.berlin.mpdl.escidoc.ESciDocIngestor; import de.mpg.mpiwg.berlin.mpdl.escidoc.MetadataRecord; import de.mpg.mpiwg.berlin.mpdl.exception.ApplicationException; import de.mpg.mpiwg.berlin.mpdl.general.MpdlConstants; import de.mpg.mpiwg.berlin.mpdl.lt.doc.regularization.RegularizationManager; import de.mpg.mpiwg.berlin.mpdl.schedule.MpdlDocOperation; import de.mpg.mpiwg.berlin.mpdl.util.FileUtil; import de.mpg.mpiwg.berlin.mpdl.util.MpdlITextRenderer; import de.mpg.mpiwg.berlin.mpdl.xml.SchemaHandler; import de.mpg.mpiwg.berlin.mpdl.xmlrpc.FilenameFilterExtension; import de.mpg.mpiwg.berlin.mpdl.xmlrpc.MpdlXmlRpcDocHandler; /** * Handler for eXist collections and documents (singleton). * Your local directory structure should look like this: * documents * archimedes * ar * yourDoc1.xml * ... * ... * zh * yourDoc1.xml * ... * echo * ar * yourDoc1.xml * ... * ... * zh * yourDoc1.xml * ... * */ public class DocumentHandler { private MpdlXmlRpcDocHandler mpdlXmlRpcDocHandler; private ESciDocIngestor eSciDocIngestor; private String[] docBases = {"archimedes", "echo", "tei"}; private String[] languages = {"ar", "de", "el", "en", "fr", "it", "la", "nl", "zh"}; private String documentRootCollectionMorph = "/db/mpdl/documents/morph"; private String documentRootCollectionStandard = "/db/mpdl/documents/standard"; private String presentationRootCollection = "/db/mpdl/presentation"; private String schemaRootCollection = "/db/mpdl/schema"; private String localDocumentDirectory = "/Users/jwillenborg/texts/mpdl/documents"; private long beginOfOperation; private long endOfOperation; public DocumentHandler(MpdlXmlRpcDocHandler mpdlXmlRpcDocHandler) throws ApplicationException { this.mpdlXmlRpcDocHandler = mpdlXmlRpcDocHandler; } public DocumentHandler(MpdlXmlRpcDocHandler mpdlXmlRpcDocHandler, ESciDocIngestor eSciDocIngestor) throws ApplicationException { this.mpdlXmlRpcDocHandler = mpdlXmlRpcDocHandler; this.eSciDocIngestor = eSciDocIngestor; } public void doOperation(MpdlDocOperation docOperation) throws ApplicationException{ String operationName = docOperation.getName(); if (operationName.equals("create") || operationName.equals("update")) { createOrUpdate(docOperation); } else if (operationName.equals("delete")) { delete(docOperation); } else if (operationName.equals("updateExist")) { updateExist(docOperation); } else if (operationName.equals("deleteExist")) { deleteExist(docOperation); } else if (operationName.equals("importAllDocumentsLocallyExist")) { importAllDocumentsLocallyExist(); } else if (operationName.equals("generatePdfHtmlDocumentFiles")) { generatePdfHtmlDocumentFiles(); } } private void importAllDocumentsLocallyExist() throws ApplicationException { System.out.println("Start of DocumentHandler. This operation could be time consuming because documents are indexed on eXist (normal indexing times are 10 seconds for a document) ..."); beginOperation(); // deletePresentationCollection(); // createPresentationCollection(); // deleteSchemaCollection(); // createSchemaCollection(); deleteDocumentCollections(); createDocumentCollections(); saveDocumentFiles(); endOperation(); System.out.println("The DocumentHandler needed: " + (endOfOperation - beginOfOperation) + " ms" ); } private void createOrUpdate(MpdlDocOperation docOperation) throws ApplicationException { try { String operationName = docOperation.getName(); String language = docOperation.getLanguage(); String srcUrlStr = docOperation.getSrcUrl(); String eXistIdentifier = docOperation.getDestUrl(); String destFileName = MpdlConstants.MPDL_EXIST_DATA_DIR + "/documents" + eXistIdentifier; URL srcUrl = null; String protocol = null; if (srcUrlStr != null && ! srcUrlStr.equals("empty")) { srcUrl = new URL(srcUrlStr); protocol = srcUrl.getProtocol(); } SchemaHandler schemaHandler = new SchemaHandler(); boolean docExists = mpdlXmlRpcDocHandler.documentExists(docOperation); if (operationName.equals("create") && docExists) { throw new ApplicationException("Document:" + eXistIdentifier + " already exists. Please use another name or perform the operation \"Update\" of that document."); } if (operationName.equals("update") && ! docExists) { throw new ApplicationException("Document:" + eXistIdentifier + " does not exist. Please use a name that exists and perform the operation \"Update\" again or perform the operation \"Create\" of that document"); } // load file to local file system if (protocol.equals("file")) { docOperation.setStatus("upload file: " + srcUrlStr + " to eXist server"); } else { docOperation.setStatus("download file from: " + srcUrlStr + " to eXist server"); } FileUtil.getInstance().saveUrlToLocalFile(srcUrl, destFileName); // perform validations docOperation.setStatus("validate document: " + eXistIdentifier); schemaHandler.validate(destFileName, docOperation); // perform operation on eXist docOperation.setStatus(operationName + " regularizations of document: " + eXistIdentifier + " on eXist server"); RegularizationManager regManager = RegularizationManager.getInstance(); regManager.saveRegularizations(language, destFileName); docOperation.setStatus(operationName + " document: " + eXistIdentifier + " on eXist server"); mpdlXmlRpcDocHandler.saveDocumentFile(docOperation); // save PDF and HTML versions of the document boolean includePdf = docOperation.includePdf(); if (includePdf) { docOperation.setStatus("create PDF and HTML versions of the document: " + eXistIdentifier); MpdlITextRenderer mpdlRenderer = MpdlITextRenderer.getInstance(); MetadataRecord mdRecord = docOperation.getMdRecord(); // after validation, docOperation has a mdRecord mpdlRenderer.createFile(true, true, "text", mdRecord); // generate Pdf/Html document } // perform operation on eSciDoc eSciDocIngestor.execute(docOperation); } catch (MalformedURLException e) { throw new ApplicationException(e); } } private void delete(MpdlDocOperation docOperation) throws ApplicationException { String operationName = docOperation.getName(); String eXistIdentifier = docOperation.getDestUrl(); String fileName = docOperation.getFileName(); if (fileName == null || fileName.trim().equals("")) throw new ApplicationException("Your document file name is empty. Please specify a file name for your document."); if (! fileName.endsWith(".xml")) throw new ApplicationException("Your document file name does not end with \".xml\". Please specify a file name with the suffix \".xml\" for your document."); boolean docExists = mpdlXmlRpcDocHandler.documentExists(docOperation); if (! docExists) { throw new ApplicationException("Document:" + eXistIdentifier + " does not exists. Please use a name that exists and perform the operation \"Delete\" again."); } // perform operation on eXist docOperation.setStatus(operationName + " document: " + eXistIdentifier + " on eXist server"); // delete file on local eXist file system: xml, pdf and html String eXistIdentifierWithoutExtension = eXistIdentifier.substring(0, eXistIdentifier.length() - 4); // without ".xml" String destFileNameXml = MpdlConstants.MPDL_EXIST_DATA_DIR + "/documents" + eXistIdentifier; FileUtil.getInstance().deleteFile(destFileNameXml); boolean includePdf = docOperation.includePdf(); if (includePdf) { String destFileNamePdf = MpdlConstants.MPDL_EXIST_DATA_DIR + "/documents" + eXistIdentifierWithoutExtension + ".pdf"; String destFileNameHtml = MpdlConstants.MPDL_EXIST_DATA_DIR + "/documents" + eXistIdentifierWithoutExtension + ".html"; FileUtil.getInstance().deleteFile(destFileNamePdf); FileUtil.getInstance().deleteFile(destFileNameHtml); } // delete document in eXist mpdlXmlRpcDocHandler.deleteDocumentFile(docOperation); // perform operation on eSciDoc eSciDocIngestor.execute(docOperation); } private void updateExist(MpdlDocOperation docOperation) throws ApplicationException { try { String operationName = docOperation.getName(); String language = docOperation.getLanguage(); String srcUrlStr = docOperation.getSrcUrl(); String eXistIdentifier = docOperation.getDestUrl(); String destFileName = MpdlConstants.MPDL_EXIST_DATA_DIR + "/documents" + eXistIdentifier; URL srcUrl = null; String protocol = null; if (srcUrlStr != null && ! srcUrlStr.equals("empty")) { srcUrl = new URL(srcUrlStr); protocol = srcUrl.getProtocol(); } SchemaHandler schemaHandler = new SchemaHandler(); if (protocol.equals("file")) { docOperation.setStatus("upload file: " + srcUrlStr + " to eXist server"); } else { docOperation.setStatus("download file from: " + srcUrlStr + " to eXist server"); } // load file to local file system FileUtil.getInstance().saveUrlToLocalFile(srcUrl, destFileName); // validation docOperation.setStatus("validate document: " + eXistIdentifier); schemaHandler.validate(destFileName, docOperation); // save regularizations of the document docOperation.setStatus(operationName + " regularizations of document: " + eXistIdentifier + " on eXist server"); RegularizationManager regManager = RegularizationManager.getInstance(); regManager.saveRegularizations(language, destFileName); // perform operation on eXist docOperation.setStatus(operationName + " document: " + eXistIdentifier + " on eXist server"); mpdlXmlRpcDocHandler.saveDocumentFile(docOperation); // save PDF and HTML versions of the document boolean includePdf = docOperation.includePdf(); if (includePdf) { docOperation.setStatus("create PDF and HTML versions of the document: " + eXistIdentifier); MpdlITextRenderer mpdlRenderer = MpdlITextRenderer.getInstance(); MetadataRecord mdRecord = docOperation.getMdRecord(); // after validation, docOperation has a mdRecord mpdlRenderer.createFile(true, true, "text", mdRecord); // generate Pdf/Html document } } catch (MalformedURLException e) { throw new ApplicationException(e); } } private void deleteExist(MpdlDocOperation docOperation) throws ApplicationException { String operationName = docOperation.getName(); String eXistIdentifier = docOperation.getDestUrl(); String fileName = docOperation.getFileName(); if (fileName == null || fileName.trim().equals("")) throw new ApplicationException("Your document file name is empty. Please specify a file name for your document."); if (! fileName.endsWith(".xml")) throw new ApplicationException("Your document file name does not end with \".xml\". Please specify a file name with the suffix \".xml\" for your document."); boolean docExists = mpdlXmlRpcDocHandler.documentExists(docOperation); if (! docExists) throw new ApplicationException("Document:" + eXistIdentifier + " does not exist."); // perform operation docOperation.setStatus(operationName + " document: " + eXistIdentifier + " on eXist server"); // delete file on local file system: xml, pdf and html String eXistIdentifierWithoutExtension = eXistIdentifier.substring(0, eXistIdentifier.length() - 4); // without ".xml" String destFileNameXml = MpdlConstants.MPDL_EXIST_DATA_DIR + "/documents" + eXistIdentifier; FileUtil.getInstance().deleteFile(destFileNameXml); boolean includePdf = docOperation.includePdf(); if (includePdf) { String destFileNamePdf = MpdlConstants.MPDL_EXIST_DATA_DIR + "/documents" + eXistIdentifierWithoutExtension + ".pdf"; String destFileNameHtml = MpdlConstants.MPDL_EXIST_DATA_DIR + "/documents" + eXistIdentifierWithoutExtension + ".html"; FileUtil.getInstance().deleteFile(destFileNamePdf); FileUtil.getInstance().deleteFile(destFileNameHtml); } // delete document in eXist mpdlXmlRpcDocHandler.deleteDocumentFile(docOperation); } private void deleteDocumentCollections() throws ApplicationException { mpdlXmlRpcDocHandler.deleteCollection(documentRootCollectionMorph); mpdlXmlRpcDocHandler.deleteCollection(documentRootCollectionStandard); } private void createDocumentCollections() throws ApplicationException { for (int i=0; i < docBases.length; i++) { String docBase = docBases[i]; for (int j=0; j < languages.length; j++) { String language = languages[j]; String documentCollectionMorph = documentRootCollectionMorph + "/" + docBase + "/" + language; mpdlXmlRpcDocHandler.createCollection(documentCollectionMorph); String documentCollectionStandard = documentRootCollectionStandard + "/" + docBase + "/" + language; mpdlXmlRpcDocHandler.createCollection(documentCollectionStandard); } } } private void saveDocumentFiles() throws ApplicationException { int counter = 0; for (int i=0; i < docBases.length; i++) { String docBase = docBases[i]; for (int j=0; j < languages.length; j++) { String language = languages[j]; String documentCollection = "/" + docBase + "/" + language; String localFileDirStr = localDocumentDirectory + "/" + docBase + "/" + language; File localFileDir = new File(localFileDirStr); FilenameFilter filter = new FilenameFilterExtension("xml"); File[] files = localFileDir.listFiles(filter); System.out.println("Adding all documents in path: \"" + localFileDirStr + "\" to eXist collection: \"" + documentCollection + "\" ..."); if (files != null) { for (int k=0; k < files.length; k++) { File f = files[k]; String localFileNameWithoutPath = f.getName(); String fullLocalFileName = f.getPath(); String srcUrl = "file://" + fullLocalFileName; MpdlDocOperation docOperation = new MpdlDocOperation("updateExist", srcUrl, null, docBase, language, localFileNameWithoutPath); long begin = new Date().getTime(); doOperation(docOperation); long end = new Date().getTime(); System.out.println("Added document \"" + fullLocalFileName + "\" to eXist collection: \"" + documentCollection + "\" (" + (end - begin) + " ms)" ); counter++; } } } } System.out.println("Imported documents: " + counter); } private void generatePdfHtmlDocumentFiles() throws ApplicationException { int counter = 0; MpdlITextRenderer mpdlRenderer = MpdlITextRenderer.getInstance(); for (int i=0; i < docBases.length; i++) { String docBase = docBases[i]; for (int j=0; j < languages.length; j++) { String language = languages[j]; String localFileDirStr = localDocumentDirectory + "/" + docBase + "/" + language; File localFileDir = new File(localFileDirStr); FilenameFilter filter = new FilenameFilterExtension("xml"); File[] files = localFileDir.listFiles(filter); System.out.println("Generating Pdf/Html documents in path: \"" + localFileDirStr + "\" ..."); if (files != null) { for (int k=0; k < files.length; k++) { File f = files[k]; String localFileName = f.getName(); String fullLocalFileName = f.getPath(); String srcUrl = "file://" + fullLocalFileName; String localFileNameWithoutExtension = localFileName.substring(0, localFileName.length() - 4); // without ".xml" String fullLocalPdfFileName = MpdlConstants.MPDL_EXIST_DATA_DIR + "/documents/" + docBase + "/" + language + "/" + localFileNameWithoutExtension + ".pdf"; File localPdfFile = new File(fullLocalPdfFileName); boolean pdfFileAlreadyExists = localPdfFile.exists(); // generate Pdf/Html file only if pdf file does not already exist if (! pdfFileAlreadyExists) { MpdlDocOperation docOperation = new MpdlDocOperation("generatePdf", srcUrl, null, docBase, language, localFileName); SchemaHandler schemaHandler = new SchemaHandler(); schemaHandler.validate(fullLocalFileName, docOperation); long begin = new Date().getTime(); MetadataRecord mdRecord = docOperation.getMdRecord(); // after validation, docOperation has a mdRecord mpdlRenderer.createFile(true, true, "text", mdRecord); // generate Pdf/Html document long end = new Date().getTime(); System.out.println("Generate Pdf/Html document for: \"" + fullLocalFileName + "\" (" + (end - begin) + " ms)" ); counter++; try { Thread.sleep(60000); // delay so that called servers (digilib, eXist) are not stressed too much } catch (InterruptedException e) { throw new ApplicationException(e); } } } } } } System.out.println("Generated documents: " + counter); } private void deletePresentationCollection() throws ApplicationException { mpdlXmlRpcDocHandler.deleteCollection(presentationRootCollection); } private void createPresentationCollection() throws ApplicationException { mpdlXmlRpcDocHandler.createCollection(presentationRootCollection); } private void deleteSchemaCollection() throws ApplicationException { mpdlXmlRpcDocHandler.deleteCollection(schemaRootCollection); } private void createSchemaCollection() throws ApplicationException { mpdlXmlRpcDocHandler.createCollection(schemaRootCollection); } private void beginOperation() { beginOfOperation = new Date().getTime(); } private void endOperation() { endOfOperation = new Date().getTime(); } }