Mercurial > hg > mpdl-group
diff software/eXist/mpdl-modules/src/de/mpg/mpiwg/berlin/mpdl/client/DocumentHandler.java @ 0:408254cf2f1d
Erstellung
author | Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de> |
---|---|
date | Wed, 24 Nov 2010 17:24:23 +0100 |
parents | |
children | fdbdcffe6b90 |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/software/eXist/mpdl-modules/src/de/mpg/mpiwg/berlin/mpdl/client/DocumentHandler.java Wed Nov 24 17:24:23 2010 +0100 @@ -0,0 +1,370 @@ +package de.mpg.mpiwg.berlin.mpdl.client; + +import java.io.File; +import java.io.FilenameFilter; +import java.net.MalformedURLException; +import java.net.URL; +import java.util.Date; + +import de.mpg.mpiwg.berlin.mpdl.escidoc.ESciDocIngestor; +import de.mpg.mpiwg.berlin.mpdl.escidoc.MetadataRecord; +import de.mpg.mpiwg.berlin.mpdl.exception.ApplicationException; +import de.mpg.mpiwg.berlin.mpdl.general.MpdlConstants; +import de.mpg.mpiwg.berlin.mpdl.lt.doc.regularization.RegularizationManager; +import de.mpg.mpiwg.berlin.mpdl.schedule.MpdlDocOperation; +import de.mpg.mpiwg.berlin.mpdl.util.FileUtil; +import de.mpg.mpiwg.berlin.mpdl.util.MpdlITextRenderer; +import de.mpg.mpiwg.berlin.mpdl.xml.SchemaHandler; +import de.mpg.mpiwg.berlin.mpdl.xmlrpc.FilenameFilterExtension; +import de.mpg.mpiwg.berlin.mpdl.xmlrpc.MpdlXmlRpcDocHandler; + +/** + * Handler for eXist collections and documents (singleton). + * Your local directory structure should look like this: + * documents + * archimedes + * ar + * yourDoc1.xml + * ... + * ... + * zh + * yourDoc1.xml + * ... + * echo + * ar + * yourDoc1.xml + * ... + * ... + * zh + * yourDoc1.xml + * ... + * + */ +public class DocumentHandler { + private MpdlXmlRpcDocHandler mpdlXmlRpcDocHandler; + private ESciDocIngestor eSciDocIngestor; + + private String[] docBases = {"archimedes", "echo"}; + private String[] languages = {"ar", "de", "el", "en", "fr", "it", "la", "nl", "zh"}; + private String documentRootCollectionMorph = "/db/mpdl/documents/morph"; + private String documentRootCollectionStandard = "/db/mpdl/documents/standard"; + private String presentationRootCollection = "/db/mpdl/presentation"; + private String schemaRootCollection = "/db/mpdl/schema"; + private String localDocumentDirectory = "/Users/jwillenborg/texts/mpdl/documents"; + + private long beginOfOperation; + private long endOfOperation; + + + public DocumentHandler(MpdlXmlRpcDocHandler mpdlXmlRpcDocHandler) throws ApplicationException { + this.mpdlXmlRpcDocHandler = mpdlXmlRpcDocHandler; + } + + public DocumentHandler(MpdlXmlRpcDocHandler mpdlXmlRpcDocHandler, ESciDocIngestor eSciDocIngestor) throws ApplicationException { + this.mpdlXmlRpcDocHandler = mpdlXmlRpcDocHandler; + this.eSciDocIngestor = eSciDocIngestor; + } + + public void doOperation(MpdlDocOperation docOperation) throws ApplicationException{ + String operationName = docOperation.getName(); + if (operationName.equals("create") || operationName.equals("update")) { + createOrUpdate(docOperation); + } else if (operationName.equals("delete")) { + delete(docOperation); + } else if (operationName.equals("updateExist")) { + updateExist(docOperation); + } else if (operationName.equals("deleteExist")) { + deleteExist(docOperation); + } else if (operationName.equals("importAllDocumentsLocallyExist")) { + importAllDocumentsLocallyExist(); + } else if (operationName.equals("generatePdfHtmlDocumentFiles")) { + generatePdfHtmlDocumentFiles(); + } + } + + private void importAllDocumentsLocallyExist() throws ApplicationException { + System.out.println("Start of DocumentHandler. This operation could be time consuming because documents are indexed on eXist (normal indexing times are 10 seconds for a document) ..."); + beginOperation(); + // deletePresentationCollection(); + // createPresentationCollection(); + // deleteSchemaCollection(); + // createSchemaCollection(); + + deleteDocumentCollections(); + createDocumentCollections(); + saveDocumentFiles(); + endOperation(); + System.out.println("The DocumentHandler needed: " + (endOfOperation - beginOfOperation) + " ms" ); + } + + private void createOrUpdate(MpdlDocOperation docOperation) throws ApplicationException { + try { + String operationName = docOperation.getName(); + String language = docOperation.getLanguage(); + String srcUrlStr = docOperation.getSrcUrl(); + String eXistIdentifier = docOperation.getDestUrl(); + String destFileName = MpdlConstants.MPDL_EXIST_DATA_DIR + "/documents" + eXistIdentifier; + URL srcUrl = null; + String protocol = null; + if (srcUrlStr != null && ! srcUrlStr.equals("empty")) { + srcUrl = new URL(srcUrlStr); + protocol = srcUrl.getProtocol(); + } + SchemaHandler schemaHandler = new SchemaHandler(); + boolean docExists = mpdlXmlRpcDocHandler.documentExists(docOperation); + if (operationName.equals("create") && docExists) { + throw new ApplicationException("Document:" + eXistIdentifier + " already exists. Please use another name or perform the operation \"Update\" of that document."); + } + if (operationName.equals("update") && ! docExists) { + throw new ApplicationException("Document:" + eXistIdentifier + " does not exist. Please use a name that exists and perform the operation \"Update\" again or perform the operation \"Create\" of that document"); + } + // load file to local file system + if (protocol.equals("file")) { + docOperation.setStatus("upload file: " + srcUrlStr + " to eXist server"); + } else { + docOperation.setStatus("download file from: " + srcUrlStr + " to eXist server"); + } + FileUtil.getInstance().saveUrlToLocalFile(srcUrl, destFileName); + // perform validations + docOperation.setStatus("validate document: " + eXistIdentifier); + schemaHandler.validate(destFileName, docOperation); + // perform operation on eXist + docOperation.setStatus(operationName + " regularizations of document: " + eXistIdentifier + " on eXist server"); + RegularizationManager regManager = RegularizationManager.getInstance(); + regManager.saveRegularizations(language, destFileName); + docOperation.setStatus(operationName + " document: " + eXistIdentifier + " on eXist server"); + mpdlXmlRpcDocHandler.saveDocumentFile(docOperation); + // save PDF and HTML versions of the document + boolean includePdf = docOperation.includePdf(); + if (includePdf) { + docOperation.setStatus("create PDF and HTML versions of the document: " + eXistIdentifier); + MpdlITextRenderer mpdlRenderer = MpdlITextRenderer.getInstance(); + MetadataRecord mdRecord = docOperation.getMdRecord(); // after validation, docOperation has a mdRecord + mpdlRenderer.createFile(true, true, "text", mdRecord); // generate Pdf/Html document + } + // perform operation on eSciDoc + eSciDocIngestor.execute(docOperation); + } catch (MalformedURLException e) { + throw new ApplicationException(e); + } + } + + private void delete(MpdlDocOperation docOperation) throws ApplicationException { + String operationName = docOperation.getName(); + String eXistIdentifier = docOperation.getDestUrl(); + String fileName = docOperation.getFileName(); + if (fileName == null || fileName.trim().equals("")) + throw new ApplicationException("Your document file name is empty. Please specify a file name for your document."); + if (! fileName.endsWith(".xml")) + throw new ApplicationException("Your document file name does not end with \".xml\". Please specify a file name with the suffix \".xml\" for your document."); + boolean docExists = mpdlXmlRpcDocHandler.documentExists(docOperation); + if (! docExists) { + throw new ApplicationException("Document:" + eXistIdentifier + " does not exists. Please use a name that exists and perform the operation \"Delete\" again."); + } + // perform operation on eXist + docOperation.setStatus(operationName + " document: " + eXistIdentifier + " on eXist server"); + // delete file on local eXist file system: xml, pdf and html + String eXistIdentifierWithoutExtension = eXistIdentifier.substring(0, eXistIdentifier.length() - 4); // without ".xml" + String destFileNameXml = MpdlConstants.MPDL_EXIST_DATA_DIR + "/documents" + eXistIdentifier; + FileUtil.getInstance().deleteFile(destFileNameXml); + boolean includePdf = docOperation.includePdf(); + if (includePdf) { + String destFileNamePdf = MpdlConstants.MPDL_EXIST_DATA_DIR + "/documents" + eXistIdentifierWithoutExtension + ".pdf"; + String destFileNameHtml = MpdlConstants.MPDL_EXIST_DATA_DIR + "/documents" + eXistIdentifierWithoutExtension + ".html"; + FileUtil.getInstance().deleteFile(destFileNamePdf); + FileUtil.getInstance().deleteFile(destFileNameHtml); + } + // delete document in eXist + mpdlXmlRpcDocHandler.deleteDocumentFile(docOperation); + // perform operation on eSciDoc + eSciDocIngestor.execute(docOperation); + } + + private void updateExist(MpdlDocOperation docOperation) throws ApplicationException { + try { + String operationName = docOperation.getName(); + String language = docOperation.getLanguage(); + String srcUrlStr = docOperation.getSrcUrl(); + String eXistIdentifier = docOperation.getDestUrl(); + String destFileName = MpdlConstants.MPDL_EXIST_DATA_DIR + "/documents" + eXistIdentifier; + URL srcUrl = null; + String protocol = null; + if (srcUrlStr != null && ! srcUrlStr.equals("empty")) { + srcUrl = new URL(srcUrlStr); + protocol = srcUrl.getProtocol(); + } + SchemaHandler schemaHandler = new SchemaHandler(); + if (protocol.equals("file")) { + docOperation.setStatus("upload file: " + srcUrlStr + " to eXist server"); + } else { + docOperation.setStatus("download file from: " + srcUrlStr + " to eXist server"); + } + // load file to local file system + FileUtil.getInstance().saveUrlToLocalFile(srcUrl, destFileName); + // validation + docOperation.setStatus("validate document: " + eXistIdentifier); + schemaHandler.validate(destFileName, docOperation); + // save regularizations of the document + docOperation.setStatus(operationName + " regularizations of document: " + eXistIdentifier + " on eXist server"); + RegularizationManager regManager = RegularizationManager.getInstance(); + regManager.saveRegularizations(language, destFileName); + // perform operation on eXist + docOperation.setStatus(operationName + " document: " + eXistIdentifier + " on eXist server"); + mpdlXmlRpcDocHandler.saveDocumentFile(docOperation); + // save PDF and HTML versions of the document + boolean includePdf = docOperation.includePdf(); + if (includePdf) { + docOperation.setStatus("create PDF and HTML versions of the document: " + eXistIdentifier); + MpdlITextRenderer mpdlRenderer = MpdlITextRenderer.getInstance(); + MetadataRecord mdRecord = docOperation.getMdRecord(); // after validation, docOperation has a mdRecord + mpdlRenderer.createFile(true, true, "text", mdRecord); // generate Pdf/Html document + } + } catch (MalformedURLException e) { + throw new ApplicationException(e); + } + } + + private void deleteExist(MpdlDocOperation docOperation) throws ApplicationException { + String operationName = docOperation.getName(); + String eXistIdentifier = docOperation.getDestUrl(); + String fileName = docOperation.getFileName(); + if (fileName == null || fileName.trim().equals("")) + throw new ApplicationException("Your document file name is empty. Please specify a file name for your document."); + if (! fileName.endsWith(".xml")) + throw new ApplicationException("Your document file name does not end with \".xml\". Please specify a file name with the suffix \".xml\" for your document."); + boolean docExists = mpdlXmlRpcDocHandler.documentExists(docOperation); + if (! docExists) + throw new ApplicationException("Document:" + eXistIdentifier + " does not exist."); + // perform operation + docOperation.setStatus(operationName + " document: " + eXistIdentifier + " on eXist server"); + // delete file on local file system: xml, pdf and html + String eXistIdentifierWithoutExtension = eXistIdentifier.substring(0, eXistIdentifier.length() - 4); // without ".xml" + String destFileNameXml = MpdlConstants.MPDL_EXIST_DATA_DIR + "/documents" + eXistIdentifier; + FileUtil.getInstance().deleteFile(destFileNameXml); + boolean includePdf = docOperation.includePdf(); + if (includePdf) { + String destFileNamePdf = MpdlConstants.MPDL_EXIST_DATA_DIR + "/documents" + eXistIdentifierWithoutExtension + ".pdf"; + String destFileNameHtml = MpdlConstants.MPDL_EXIST_DATA_DIR + "/documents" + eXistIdentifierWithoutExtension + ".html"; + FileUtil.getInstance().deleteFile(destFileNamePdf); + FileUtil.getInstance().deleteFile(destFileNameHtml); + } + // delete document in eXist + mpdlXmlRpcDocHandler.deleteDocumentFile(docOperation); + } + + private void deleteDocumentCollections() throws ApplicationException { + mpdlXmlRpcDocHandler.deleteCollection(documentRootCollectionMorph); + mpdlXmlRpcDocHandler.deleteCollection(documentRootCollectionStandard); + } + + private void createDocumentCollections() throws ApplicationException { + for (int i=0; i < docBases.length; i++) { + String docBase = docBases[i]; + for (int j=0; j < languages.length; j++) { + String language = languages[j]; + String documentCollectionMorph = documentRootCollectionMorph + "/" + docBase + "/" + language; + mpdlXmlRpcDocHandler.createCollection(documentCollectionMorph); + String documentCollectionStandard = documentRootCollectionStandard + "/" + docBase + "/" + language; + mpdlXmlRpcDocHandler.createCollection(documentCollectionStandard); + } + } + } + + private void saveDocumentFiles() throws ApplicationException { + int counter = 0; + for (int i=0; i < docBases.length; i++) { + String docBase = docBases[i]; + for (int j=0; j < languages.length; j++) { + String language = languages[j]; + String documentCollection = "/" + docBase + "/" + language; + String localFileDirStr = localDocumentDirectory + "/" + docBase + "/" + language; + File localFileDir = new File(localFileDirStr); + FilenameFilter filter = new FilenameFilterExtension("xml"); + File[] files = localFileDir.listFiles(filter); + System.out.println("Adding all documents in path: \"" + localFileDirStr + "\" to eXist collection: \"" + documentCollection + "\" ..."); + for (int k=0; k < files.length; k++) { + File f = files[k]; + String localFileNameWithoutPath = f.getName(); + String fullLocalFileName = f.getPath(); + String srcUrl = "file://" + fullLocalFileName; + MpdlDocOperation docOperation = new MpdlDocOperation("updateExist", srcUrl, null, docBase, language, localFileNameWithoutPath); + long begin = new Date().getTime(); + doOperation(docOperation); + long end = new Date().getTime(); + System.out.println("Added document \"" + fullLocalFileName + "\" to eXist collection: \"" + documentCollection + "\" (" + (end - begin) + " ms)" ); + counter++; + } + } + } + System.out.println("Imported documents: " + counter); + } + + private void generatePdfHtmlDocumentFiles() throws ApplicationException { + int counter = 0; + MpdlITextRenderer mpdlRenderer = MpdlITextRenderer.getInstance(); + for (int i=0; i < docBases.length; i++) { + String docBase = docBases[i]; + for (int j=0; j < languages.length; j++) { + String language = languages[j]; + String localFileDirStr = localDocumentDirectory + "/" + docBase + "/" + language; + File localFileDir = new File(localFileDirStr); + FilenameFilter filter = new FilenameFilterExtension("xml"); + File[] files = localFileDir.listFiles(filter); + System.out.println("Generating Pdf/Html documents in path: \"" + localFileDirStr + "\" ..."); + for (int k=0; k < files.length; k++) { + File f = files[k]; + String localFileName = f.getName(); + String fullLocalFileName = f.getPath(); + String srcUrl = "file://" + fullLocalFileName; + String localFileNameWithoutExtension = localFileName.substring(0, localFileName.length() - 4); // without ".xml" + String fullLocalPdfFileName = MpdlConstants.MPDL_EXIST_DATA_DIR + "/documents/" + docBase + "/" + language + "/" + localFileNameWithoutExtension + ".pdf"; + File localPdfFile = new File(fullLocalPdfFileName); + boolean pdfFileAlreadyExists = localPdfFile.exists(); + // generate Pdf/Html file only if pdf file does not already exist + if (! pdfFileAlreadyExists) { + MpdlDocOperation docOperation = new MpdlDocOperation("generatePdf", srcUrl, null, docBase, language, localFileName); + SchemaHandler schemaHandler = new SchemaHandler(); + schemaHandler.validate(fullLocalFileName, docOperation); + long begin = new Date().getTime(); + MetadataRecord mdRecord = docOperation.getMdRecord(); // after validation, docOperation has a mdRecord + mpdlRenderer.createFile(true, true, "text", mdRecord); // generate Pdf/Html document + long end = new Date().getTime(); + System.out.println("Generate Pdf/Html document for: \"" + fullLocalFileName + "\" (" + (end - begin) + " ms)" ); + counter++; + try { + Thread.sleep(60000); // delay so that called servers (digilib, eXist) are not stressed too much + } catch (InterruptedException e) { + throw new ApplicationException(e); + } + } + } + } + } + System.out.println("Generated documents: " + counter); + } + + private void deletePresentationCollection() throws ApplicationException { + mpdlXmlRpcDocHandler.deleteCollection(presentationRootCollection); + } + + private void createPresentationCollection() throws ApplicationException { + mpdlXmlRpcDocHandler.createCollection(presentationRootCollection); + } + + private void deleteSchemaCollection() throws ApplicationException { + mpdlXmlRpcDocHandler.deleteCollection(schemaRootCollection); + } + + private void createSchemaCollection() throws ApplicationException { + mpdlXmlRpcDocHandler.createCollection(schemaRootCollection); + } + + private void beginOperation() { + beginOfOperation = new Date().getTime(); + } + + private void endOperation() { + endOfOperation = new Date().getTime(); + } + +} \ No newline at end of file