diff software/eXist/mpdl-modules/src/de/mpg/mpiwg/berlin/mpdl/client/DocumentHandler.java @ 0:408254cf2f1d

Erstellung
author Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
date Wed, 24 Nov 2010 17:24:23 +0100
parents
children fdbdcffe6b90
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/software/eXist/mpdl-modules/src/de/mpg/mpiwg/berlin/mpdl/client/DocumentHandler.java	Wed Nov 24 17:24:23 2010 +0100
@@ -0,0 +1,370 @@
+package de.mpg.mpiwg.berlin.mpdl.client;
+
+import java.io.File;
+import java.io.FilenameFilter;
+import java.net.MalformedURLException;
+import java.net.URL;
+import java.util.Date;
+
+import de.mpg.mpiwg.berlin.mpdl.escidoc.ESciDocIngestor;
+import de.mpg.mpiwg.berlin.mpdl.escidoc.MetadataRecord;
+import de.mpg.mpiwg.berlin.mpdl.exception.ApplicationException;
+import de.mpg.mpiwg.berlin.mpdl.general.MpdlConstants;
+import de.mpg.mpiwg.berlin.mpdl.lt.doc.regularization.RegularizationManager;
+import de.mpg.mpiwg.berlin.mpdl.schedule.MpdlDocOperation;
+import de.mpg.mpiwg.berlin.mpdl.util.FileUtil;
+import de.mpg.mpiwg.berlin.mpdl.util.MpdlITextRenderer;
+import de.mpg.mpiwg.berlin.mpdl.xml.SchemaHandler;
+import de.mpg.mpiwg.berlin.mpdl.xmlrpc.FilenameFilterExtension;
+import de.mpg.mpiwg.berlin.mpdl.xmlrpc.MpdlXmlRpcDocHandler;
+
+/**
+ * Handler for eXist collections and documents (singleton). 
+ * Your local directory structure should look like this:
+ * documents
+ *   archimedes
+ *     ar
+ *       yourDoc1.xml
+ *       ...
+ *     ...
+ *     zh
+ *       yourDoc1.xml
+ *       ...
+ *   echo
+ *     ar
+ *       yourDoc1.xml
+ *       ...
+ *     ...
+ *     zh
+ *       yourDoc1.xml
+ *       ...
+ *       
+ */
+public class DocumentHandler {
+  private MpdlXmlRpcDocHandler mpdlXmlRpcDocHandler;
+  private ESciDocIngestor eSciDocIngestor;
+
+  private String[] docBases = {"archimedes", "echo"};
+  private String[] languages = {"ar", "de", "el", "en", "fr", "it", "la", "nl", "zh"};
+  private String documentRootCollectionMorph = "/db/mpdl/documents/morph";
+  private String documentRootCollectionStandard = "/db/mpdl/documents/standard";
+  private String presentationRootCollection = "/db/mpdl/presentation";
+  private String schemaRootCollection = "/db/mpdl/schema";
+  private String localDocumentDirectory = "/Users/jwillenborg/texts/mpdl/documents";
+  
+  private long beginOfOperation;
+  private long endOfOperation;
+  
+  
+  public DocumentHandler(MpdlXmlRpcDocHandler mpdlXmlRpcDocHandler) throws ApplicationException {
+    this.mpdlXmlRpcDocHandler = mpdlXmlRpcDocHandler;
+  }
+
+  public DocumentHandler(MpdlXmlRpcDocHandler mpdlXmlRpcDocHandler, ESciDocIngestor eSciDocIngestor) throws ApplicationException {
+    this.mpdlXmlRpcDocHandler = mpdlXmlRpcDocHandler;
+    this.eSciDocIngestor = eSciDocIngestor;
+  }
+
+  public void doOperation(MpdlDocOperation docOperation) throws ApplicationException{
+    String operationName = docOperation.getName();  
+    if (operationName.equals("create") || operationName.equals("update")) {
+      createOrUpdate(docOperation);
+    } else if (operationName.equals("delete")) {
+      delete(docOperation);
+    } else if (operationName.equals("updateExist")) {
+      updateExist(docOperation);
+    } else if (operationName.equals("deleteExist")) {
+      deleteExist(docOperation);
+    } else if (operationName.equals("importAllDocumentsLocallyExist")) {
+      importAllDocumentsLocallyExist();
+    } else if (operationName.equals("generatePdfHtmlDocumentFiles")) {
+      generatePdfHtmlDocumentFiles();
+    }
+  }
+  
+  private void importAllDocumentsLocallyExist() throws ApplicationException {
+    System.out.println("Start of DocumentHandler. This operation could be time consuming because documents are indexed on eXist (normal indexing times are 10 seconds for a document) ...");
+    beginOperation();
+    // deletePresentationCollection();
+    // createPresentationCollection();
+    // deleteSchemaCollection();
+    // createSchemaCollection();
+    
+    deleteDocumentCollections();
+    createDocumentCollections();
+    saveDocumentFiles();
+    endOperation();
+    System.out.println("The DocumentHandler needed: " + (endOfOperation - beginOfOperation) + " ms" );
+  }
+  
+  private void createOrUpdate(MpdlDocOperation docOperation) throws ApplicationException {
+    try {
+      String operationName = docOperation.getName();  
+      String language = docOperation.getLanguage();  
+      String srcUrlStr = docOperation.getSrcUrl(); 
+      String eXistIdentifier = docOperation.getDestUrl();
+      String destFileName = MpdlConstants.MPDL_EXIST_DATA_DIR + "/documents" + eXistIdentifier;
+      URL srcUrl = null;
+      String protocol = null;
+      if (srcUrlStr != null && ! srcUrlStr.equals("empty")) {
+        srcUrl = new URL(srcUrlStr);
+        protocol = srcUrl.getProtocol();
+      }
+      SchemaHandler schemaHandler = new SchemaHandler();
+      boolean docExists = mpdlXmlRpcDocHandler.documentExists(docOperation);
+      if (operationName.equals("create") && docExists) {
+        throw new ApplicationException("Document:" + eXistIdentifier + " already exists. Please use another name or perform the operation \"Update\" of that document.");
+      }
+      if (operationName.equals("update") && ! docExists) {
+        throw new ApplicationException("Document:" + eXistIdentifier + " does not exist. Please use a name that exists and perform the operation \"Update\" again or perform the operation \"Create\" of that document");
+      }
+      // load file to local file system
+      if (protocol.equals("file")) {
+        docOperation.setStatus("upload file: " + srcUrlStr + " to eXist server");
+      } else {
+        docOperation.setStatus("download file from: " + srcUrlStr + " to eXist server");
+      }
+      FileUtil.getInstance().saveUrlToLocalFile(srcUrl, destFileName);
+      //  perform validations
+      docOperation.setStatus("validate document: " + eXistIdentifier);
+      schemaHandler.validate(destFileName, docOperation);
+      // perform operation on eXist
+      docOperation.setStatus(operationName + " regularizations of document: " + eXistIdentifier + " on eXist server");
+      RegularizationManager regManager = RegularizationManager.getInstance();
+      regManager.saveRegularizations(language, destFileName);
+      docOperation.setStatus(operationName + " document: " + eXistIdentifier + " on eXist server");
+      mpdlXmlRpcDocHandler.saveDocumentFile(docOperation);
+      // save PDF and HTML versions of the document
+      boolean includePdf = docOperation.includePdf();
+      if (includePdf) {
+        docOperation.setStatus("create PDF and HTML versions of the document: " + eXistIdentifier);
+        MpdlITextRenderer mpdlRenderer = MpdlITextRenderer.getInstance();
+        MetadataRecord mdRecord = docOperation.getMdRecord();  // after validation, docOperation has a mdRecord
+        mpdlRenderer.createFile(true, true, "text", mdRecord);  // generate Pdf/Html document
+      }
+      // perform operation on eSciDoc
+      eSciDocIngestor.execute(docOperation);
+    } catch (MalformedURLException e) {
+      throw new ApplicationException(e);
+    }
+  }
+  
+  private void delete(MpdlDocOperation docOperation) throws ApplicationException {
+    String operationName = docOperation.getName();  
+    String eXistIdentifier = docOperation.getDestUrl();
+    String fileName = docOperation.getFileName();
+    if (fileName == null || fileName.trim().equals(""))
+      throw new ApplicationException("Your document file name is empty. Please specify a file name for your document.");
+    if (! fileName.endsWith(".xml"))
+      throw new ApplicationException("Your document file name does not end with \".xml\". Please specify a file name with the suffix \".xml\" for your document.");
+    boolean docExists = mpdlXmlRpcDocHandler.documentExists(docOperation);
+    if (! docExists) {
+      throw new ApplicationException("Document:" + eXistIdentifier + " does not exists. Please use a name that exists and perform the operation \"Delete\" again.");
+    }
+    // perform operation on eXist
+    docOperation.setStatus(operationName + " document: " + eXistIdentifier + " on eXist server");
+    // delete file on local eXist file system: xml, pdf and html
+    String eXistIdentifierWithoutExtension = eXistIdentifier.substring(0, eXistIdentifier.length() - 4);  // without ".xml"
+    String destFileNameXml = MpdlConstants.MPDL_EXIST_DATA_DIR + "/documents" + eXistIdentifier;
+    FileUtil.getInstance().deleteFile(destFileNameXml);
+    boolean includePdf = docOperation.includePdf();
+    if (includePdf) {
+      String destFileNamePdf = MpdlConstants.MPDL_EXIST_DATA_DIR + "/documents" + eXistIdentifierWithoutExtension + ".pdf";
+      String destFileNameHtml = MpdlConstants.MPDL_EXIST_DATA_DIR + "/documents" + eXistIdentifierWithoutExtension + ".html";
+      FileUtil.getInstance().deleteFile(destFileNamePdf);
+      FileUtil.getInstance().deleteFile(destFileNameHtml);
+    }
+    // delete document in eXist
+    mpdlXmlRpcDocHandler.deleteDocumentFile(docOperation);
+    // perform operation on eSciDoc
+    eSciDocIngestor.execute(docOperation);
+  }
+  
+  private void updateExist(MpdlDocOperation docOperation) throws ApplicationException {
+    try {
+      String operationName = docOperation.getName();  
+      String language = docOperation.getLanguage();  
+      String srcUrlStr = docOperation.getSrcUrl(); 
+      String eXistIdentifier = docOperation.getDestUrl();
+      String destFileName = MpdlConstants.MPDL_EXIST_DATA_DIR + "/documents" + eXistIdentifier;
+      URL srcUrl = null;
+      String protocol = null;
+      if (srcUrlStr != null && ! srcUrlStr.equals("empty")) {
+        srcUrl = new URL(srcUrlStr);
+        protocol = srcUrl.getProtocol();
+      }
+      SchemaHandler schemaHandler = new SchemaHandler();
+      if (protocol.equals("file")) {
+        docOperation.setStatus("upload file: " + srcUrlStr + " to eXist server");
+      } else {
+        docOperation.setStatus("download file from: " + srcUrlStr + " to eXist server");
+      }
+      // load file to local file system
+      FileUtil.getInstance().saveUrlToLocalFile(srcUrl, destFileName);
+      //  validation
+      docOperation.setStatus("validate document: " + eXistIdentifier);
+      schemaHandler.validate(destFileName, docOperation);
+      // save regularizations of the document
+      docOperation.setStatus(operationName + " regularizations of document: " + eXistIdentifier + " on eXist server");
+      RegularizationManager regManager = RegularizationManager.getInstance();
+      regManager.saveRegularizations(language, destFileName);
+      // perform operation on eXist
+      docOperation.setStatus(operationName + " document: " + eXistIdentifier + " on eXist server");
+      mpdlXmlRpcDocHandler.saveDocumentFile(docOperation);
+      // save PDF and HTML versions of the document
+      boolean includePdf = docOperation.includePdf();
+      if (includePdf) {
+        docOperation.setStatus("create PDF and HTML versions of the document: " + eXistIdentifier);
+        MpdlITextRenderer mpdlRenderer = MpdlITextRenderer.getInstance();
+        MetadataRecord mdRecord = docOperation.getMdRecord();  // after validation, docOperation has a mdRecord
+        mpdlRenderer.createFile(true, true, "text", mdRecord);   // generate Pdf/Html document
+      }
+    } catch (MalformedURLException e) {
+      throw new ApplicationException(e);
+    }
+  }
+  
+  private void deleteExist(MpdlDocOperation docOperation) throws ApplicationException {
+    String operationName = docOperation.getName();  
+    String eXistIdentifier = docOperation.getDestUrl();
+    String fileName = docOperation.getFileName();
+    if (fileName == null || fileName.trim().equals(""))
+      throw new ApplicationException("Your document file name is empty. Please specify a file name for your document.");
+    if (! fileName.endsWith(".xml"))
+      throw new ApplicationException("Your document file name does not end with \".xml\". Please specify a file name with the suffix \".xml\" for your document.");
+    boolean docExists = mpdlXmlRpcDocHandler.documentExists(docOperation);
+    if (! docExists)
+      throw new ApplicationException("Document:" + eXistIdentifier + " does not exist.");
+    // perform operation
+    docOperation.setStatus(operationName + " document: " + eXistIdentifier + " on eXist server");
+    // delete file on local file system: xml, pdf and html
+    String eXistIdentifierWithoutExtension = eXistIdentifier.substring(0, eXistIdentifier.length() - 4);  // without ".xml"
+    String destFileNameXml = MpdlConstants.MPDL_EXIST_DATA_DIR + "/documents" + eXistIdentifier;
+    FileUtil.getInstance().deleteFile(destFileNameXml);
+    boolean includePdf = docOperation.includePdf();
+    if (includePdf) {
+      String destFileNamePdf = MpdlConstants.MPDL_EXIST_DATA_DIR + "/documents" + eXistIdentifierWithoutExtension + ".pdf";
+      String destFileNameHtml = MpdlConstants.MPDL_EXIST_DATA_DIR + "/documents" + eXistIdentifierWithoutExtension + ".html";
+      FileUtil.getInstance().deleteFile(destFileNamePdf);
+      FileUtil.getInstance().deleteFile(destFileNameHtml);
+    }
+    // delete document in eXist
+    mpdlXmlRpcDocHandler.deleteDocumentFile(docOperation);
+  }
+  
+  private void deleteDocumentCollections() throws ApplicationException {
+    mpdlXmlRpcDocHandler.deleteCollection(documentRootCollectionMorph);
+    mpdlXmlRpcDocHandler.deleteCollection(documentRootCollectionStandard);
+  }
+
+  private void createDocumentCollections() throws ApplicationException {
+    for (int i=0; i < docBases.length; i++) {
+      String docBase = docBases[i];
+      for (int j=0; j < languages.length; j++) {
+        String language = languages[j];
+        String documentCollectionMorph = documentRootCollectionMorph + "/" + docBase + "/" + language;
+        mpdlXmlRpcDocHandler.createCollection(documentCollectionMorph);
+        String documentCollectionStandard = documentRootCollectionStandard + "/" + docBase + "/" + language;
+        mpdlXmlRpcDocHandler.createCollection(documentCollectionStandard);
+      }
+    }
+  }
+
+  private void saveDocumentFiles() throws ApplicationException {
+    int counter = 0;
+    for (int i=0; i < docBases.length; i++) {
+      String docBase = docBases[i];
+      for (int j=0; j < languages.length; j++) {
+        String language = languages[j];
+        String documentCollection = "/" + docBase + "/" + language;
+        String localFileDirStr = localDocumentDirectory + "/" + docBase + "/" + language;
+        File localFileDir = new File(localFileDirStr);
+        FilenameFilter filter = new FilenameFilterExtension("xml");
+        File[] files = localFileDir.listFiles(filter);
+        System.out.println("Adding all documents in path: \"" + localFileDirStr + "\" to eXist collection: \"" + documentCollection + "\" ...");
+        for (int k=0; k < files.length; k++) {
+          File f = files[k];
+          String localFileNameWithoutPath = f.getName();
+          String fullLocalFileName = f.getPath();
+          String srcUrl = "file://" + fullLocalFileName;
+          MpdlDocOperation docOperation = new MpdlDocOperation("updateExist", srcUrl, null, docBase, language, localFileNameWithoutPath); 
+          long begin = new Date().getTime();
+          doOperation(docOperation);
+          long end = new Date().getTime();
+          System.out.println("Added document \"" + fullLocalFileName + "\" to eXist collection: \"" + documentCollection + "\" (" + (end - begin) + " ms)" );
+          counter++;
+        }
+      }
+    }
+    System.out.println("Imported documents: " + counter);
+  }
+
+  private void generatePdfHtmlDocumentFiles() throws ApplicationException {
+    int counter = 0;
+    MpdlITextRenderer mpdlRenderer = MpdlITextRenderer.getInstance();
+    for (int i=0; i < docBases.length; i++) {
+      String docBase = docBases[i];
+      for (int j=0; j < languages.length; j++) {
+        String language = languages[j];
+        String localFileDirStr = localDocumentDirectory + "/" + docBase + "/" + language;
+        File localFileDir = new File(localFileDirStr);
+        FilenameFilter filter = new FilenameFilterExtension("xml");
+        File[] files = localFileDir.listFiles(filter);
+        System.out.println("Generating Pdf/Html documents in path: \"" + localFileDirStr + "\" ...");
+        for (int k=0; k < files.length; k++) {
+          File f = files[k];
+          String localFileName = f.getName();
+          String fullLocalFileName = f.getPath();
+          String srcUrl = "file://" + fullLocalFileName;
+          String localFileNameWithoutExtension = localFileName.substring(0, localFileName.length() - 4);  // without ".xml"
+          String fullLocalPdfFileName = MpdlConstants.MPDL_EXIST_DATA_DIR + "/documents/" + docBase + "/" + language + "/" + localFileNameWithoutExtension + ".pdf";
+          File localPdfFile = new File(fullLocalPdfFileName);
+          boolean pdfFileAlreadyExists = localPdfFile.exists();
+          // generate Pdf/Html file only if pdf file does not already exist
+          if (! pdfFileAlreadyExists) {
+            MpdlDocOperation docOperation = new MpdlDocOperation("generatePdf", srcUrl, null, docBase, language, localFileName); 
+            SchemaHandler schemaHandler = new SchemaHandler();
+            schemaHandler.validate(fullLocalFileName, docOperation);
+            long begin = new Date().getTime();
+            MetadataRecord mdRecord = docOperation.getMdRecord();  // after validation, docOperation has a mdRecord
+            mpdlRenderer.createFile(true, true, "text", mdRecord);  // generate Pdf/Html document
+            long end = new Date().getTime();
+            System.out.println("Generate Pdf/Html document for: \"" + fullLocalFileName + "\" (" + (end - begin) + " ms)" );
+            counter++;
+            try {
+              Thread.sleep(60000);  // delay so that called servers (digilib, eXist) are not stressed too much
+            } catch (InterruptedException e) {
+              throw new ApplicationException(e);
+            }
+          }
+        }
+      }
+    }
+    System.out.println("Generated documents: " + counter);
+  }
+
+  private void deletePresentationCollection() throws ApplicationException {
+    mpdlXmlRpcDocHandler.deleteCollection(presentationRootCollection);
+  }
+
+  private void createPresentationCollection() throws ApplicationException {
+    mpdlXmlRpcDocHandler.createCollection(presentationRootCollection);
+  }
+
+  private void deleteSchemaCollection() throws ApplicationException {
+    mpdlXmlRpcDocHandler.deleteCollection(schemaRootCollection);
+  }
+
+  private void createSchemaCollection() throws ApplicationException {
+    mpdlXmlRpcDocHandler.createCollection(schemaRootCollection);
+  }
+
+  private void beginOperation() {
+    beginOfOperation = new Date().getTime();
+  }
+
+  private void endOperation() {
+    endOfOperation = new Date().getTime();
+  }
+
+}
\ No newline at end of file