view software/eXist/mpdl-modules/src/de/mpg/mpiwg/berlin/mpdl/client/DocumentHandler.java @ 0:408254cf2f1d

Erstellung
author Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
date Wed, 24 Nov 2010 17:24:23 +0100
parents
children fdbdcffe6b90
line wrap: on
line source

package de.mpg.mpiwg.berlin.mpdl.client;

import java.io.File;
import java.io.FilenameFilter;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.Date;

import de.mpg.mpiwg.berlin.mpdl.escidoc.ESciDocIngestor;
import de.mpg.mpiwg.berlin.mpdl.escidoc.MetadataRecord;
import de.mpg.mpiwg.berlin.mpdl.exception.ApplicationException;
import de.mpg.mpiwg.berlin.mpdl.general.MpdlConstants;
import de.mpg.mpiwg.berlin.mpdl.lt.doc.regularization.RegularizationManager;
import de.mpg.mpiwg.berlin.mpdl.schedule.MpdlDocOperation;
import de.mpg.mpiwg.berlin.mpdl.util.FileUtil;
import de.mpg.mpiwg.berlin.mpdl.util.MpdlITextRenderer;
import de.mpg.mpiwg.berlin.mpdl.xml.SchemaHandler;
import de.mpg.mpiwg.berlin.mpdl.xmlrpc.FilenameFilterExtension;
import de.mpg.mpiwg.berlin.mpdl.xmlrpc.MpdlXmlRpcDocHandler;

/**
 * Handler for eXist collections and documents (singleton). 
 * Your local directory structure should look like this:
 * documents
 *   archimedes
 *     ar
 *       yourDoc1.xml
 *       ...
 *     ...
 *     zh
 *       yourDoc1.xml
 *       ...
 *   echo
 *     ar
 *       yourDoc1.xml
 *       ...
 *     ...
 *     zh
 *       yourDoc1.xml
 *       ...
 *       
 */
public class DocumentHandler {
  private MpdlXmlRpcDocHandler mpdlXmlRpcDocHandler;
  private ESciDocIngestor eSciDocIngestor;

  private String[] docBases = {"archimedes", "echo"};
  private String[] languages = {"ar", "de", "el", "en", "fr", "it", "la", "nl", "zh"};
  private String documentRootCollectionMorph = "/db/mpdl/documents/morph";
  private String documentRootCollectionStandard = "/db/mpdl/documents/standard";
  private String presentationRootCollection = "/db/mpdl/presentation";
  private String schemaRootCollection = "/db/mpdl/schema";
  private String localDocumentDirectory = "/Users/jwillenborg/texts/mpdl/documents";
  
  private long beginOfOperation;
  private long endOfOperation;
  
  
  public DocumentHandler(MpdlXmlRpcDocHandler mpdlXmlRpcDocHandler) throws ApplicationException {
    this.mpdlXmlRpcDocHandler = mpdlXmlRpcDocHandler;
  }

  public DocumentHandler(MpdlXmlRpcDocHandler mpdlXmlRpcDocHandler, ESciDocIngestor eSciDocIngestor) throws ApplicationException {
    this.mpdlXmlRpcDocHandler = mpdlXmlRpcDocHandler;
    this.eSciDocIngestor = eSciDocIngestor;
  }

  public void doOperation(MpdlDocOperation docOperation) throws ApplicationException{
    String operationName = docOperation.getName();  
    if (operationName.equals("create") || operationName.equals("update")) {
      createOrUpdate(docOperation);
    } else if (operationName.equals("delete")) {
      delete(docOperation);
    } else if (operationName.equals("updateExist")) {
      updateExist(docOperation);
    } else if (operationName.equals("deleteExist")) {
      deleteExist(docOperation);
    } else if (operationName.equals("importAllDocumentsLocallyExist")) {
      importAllDocumentsLocallyExist();
    } else if (operationName.equals("generatePdfHtmlDocumentFiles")) {
      generatePdfHtmlDocumentFiles();
    }
  }
  
  private void importAllDocumentsLocallyExist() throws ApplicationException {
    System.out.println("Start of DocumentHandler. This operation could be time consuming because documents are indexed on eXist (normal indexing times are 10 seconds for a document) ...");
    beginOperation();
    // deletePresentationCollection();
    // createPresentationCollection();
    // deleteSchemaCollection();
    // createSchemaCollection();
    
    deleteDocumentCollections();
    createDocumentCollections();
    saveDocumentFiles();
    endOperation();
    System.out.println("The DocumentHandler needed: " + (endOfOperation - beginOfOperation) + " ms" );
  }
  
  private void createOrUpdate(MpdlDocOperation docOperation) throws ApplicationException {
    try {
      String operationName = docOperation.getName();  
      String language = docOperation.getLanguage();  
      String srcUrlStr = docOperation.getSrcUrl(); 
      String eXistIdentifier = docOperation.getDestUrl();
      String destFileName = MpdlConstants.MPDL_EXIST_DATA_DIR + "/documents" + eXistIdentifier;
      URL srcUrl = null;
      String protocol = null;
      if (srcUrlStr != null && ! srcUrlStr.equals("empty")) {
        srcUrl = new URL(srcUrlStr);
        protocol = srcUrl.getProtocol();
      }
      SchemaHandler schemaHandler = new SchemaHandler();
      boolean docExists = mpdlXmlRpcDocHandler.documentExists(docOperation);
      if (operationName.equals("create") && docExists) {
        throw new ApplicationException("Document:" + eXistIdentifier + " already exists. Please use another name or perform the operation \"Update\" of that document.");
      }
      if (operationName.equals("update") && ! docExists) {
        throw new ApplicationException("Document:" + eXistIdentifier + " does not exist. Please use a name that exists and perform the operation \"Update\" again or perform the operation \"Create\" of that document");
      }
      // load file to local file system
      if (protocol.equals("file")) {
        docOperation.setStatus("upload file: " + srcUrlStr + " to eXist server");
      } else {
        docOperation.setStatus("download file from: " + srcUrlStr + " to eXist server");
      }
      FileUtil.getInstance().saveUrlToLocalFile(srcUrl, destFileName);
      //  perform validations
      docOperation.setStatus("validate document: " + eXistIdentifier);
      schemaHandler.validate(destFileName, docOperation);
      // perform operation on eXist
      docOperation.setStatus(operationName + " regularizations of document: " + eXistIdentifier + " on eXist server");
      RegularizationManager regManager = RegularizationManager.getInstance();
      regManager.saveRegularizations(language, destFileName);
      docOperation.setStatus(operationName + " document: " + eXistIdentifier + " on eXist server");
      mpdlXmlRpcDocHandler.saveDocumentFile(docOperation);
      // save PDF and HTML versions of the document
      boolean includePdf = docOperation.includePdf();
      if (includePdf) {
        docOperation.setStatus("create PDF and HTML versions of the document: " + eXistIdentifier);
        MpdlITextRenderer mpdlRenderer = MpdlITextRenderer.getInstance();
        MetadataRecord mdRecord = docOperation.getMdRecord();  // after validation, docOperation has a mdRecord
        mpdlRenderer.createFile(true, true, "text", mdRecord);  // generate Pdf/Html document
      }
      // perform operation on eSciDoc
      eSciDocIngestor.execute(docOperation);
    } catch (MalformedURLException e) {
      throw new ApplicationException(e);
    }
  }
  
  private void delete(MpdlDocOperation docOperation) throws ApplicationException {
    String operationName = docOperation.getName();  
    String eXistIdentifier = docOperation.getDestUrl();
    String fileName = docOperation.getFileName();
    if (fileName == null || fileName.trim().equals(""))
      throw new ApplicationException("Your document file name is empty. Please specify a file name for your document.");
    if (! fileName.endsWith(".xml"))
      throw new ApplicationException("Your document file name does not end with \".xml\". Please specify a file name with the suffix \".xml\" for your document.");
    boolean docExists = mpdlXmlRpcDocHandler.documentExists(docOperation);
    if (! docExists) {
      throw new ApplicationException("Document:" + eXistIdentifier + " does not exists. Please use a name that exists and perform the operation \"Delete\" again.");
    }
    // perform operation on eXist
    docOperation.setStatus(operationName + " document: " + eXistIdentifier + " on eXist server");
    // delete file on local eXist file system: xml, pdf and html
    String eXistIdentifierWithoutExtension = eXistIdentifier.substring(0, eXistIdentifier.length() - 4);  // without ".xml"
    String destFileNameXml = MpdlConstants.MPDL_EXIST_DATA_DIR + "/documents" + eXistIdentifier;
    FileUtil.getInstance().deleteFile(destFileNameXml);
    boolean includePdf = docOperation.includePdf();
    if (includePdf) {
      String destFileNamePdf = MpdlConstants.MPDL_EXIST_DATA_DIR + "/documents" + eXistIdentifierWithoutExtension + ".pdf";
      String destFileNameHtml = MpdlConstants.MPDL_EXIST_DATA_DIR + "/documents" + eXistIdentifierWithoutExtension + ".html";
      FileUtil.getInstance().deleteFile(destFileNamePdf);
      FileUtil.getInstance().deleteFile(destFileNameHtml);
    }
    // delete document in eXist
    mpdlXmlRpcDocHandler.deleteDocumentFile(docOperation);
    // perform operation on eSciDoc
    eSciDocIngestor.execute(docOperation);
  }
  
  private void updateExist(MpdlDocOperation docOperation) throws ApplicationException {
    try {
      String operationName = docOperation.getName();  
      String language = docOperation.getLanguage();  
      String srcUrlStr = docOperation.getSrcUrl(); 
      String eXistIdentifier = docOperation.getDestUrl();
      String destFileName = MpdlConstants.MPDL_EXIST_DATA_DIR + "/documents" + eXistIdentifier;
      URL srcUrl = null;
      String protocol = null;
      if (srcUrlStr != null && ! srcUrlStr.equals("empty")) {
        srcUrl = new URL(srcUrlStr);
        protocol = srcUrl.getProtocol();
      }
      SchemaHandler schemaHandler = new SchemaHandler();
      if (protocol.equals("file")) {
        docOperation.setStatus("upload file: " + srcUrlStr + " to eXist server");
      } else {
        docOperation.setStatus("download file from: " + srcUrlStr + " to eXist server");
      }
      // load file to local file system
      FileUtil.getInstance().saveUrlToLocalFile(srcUrl, destFileName);
      //  validation
      docOperation.setStatus("validate document: " + eXistIdentifier);
      schemaHandler.validate(destFileName, docOperation);
      // save regularizations of the document
      docOperation.setStatus(operationName + " regularizations of document: " + eXistIdentifier + " on eXist server");
      RegularizationManager regManager = RegularizationManager.getInstance();
      regManager.saveRegularizations(language, destFileName);
      // perform operation on eXist
      docOperation.setStatus(operationName + " document: " + eXistIdentifier + " on eXist server");
      mpdlXmlRpcDocHandler.saveDocumentFile(docOperation);
      // save PDF and HTML versions of the document
      boolean includePdf = docOperation.includePdf();
      if (includePdf) {
        docOperation.setStatus("create PDF and HTML versions of the document: " + eXistIdentifier);
        MpdlITextRenderer mpdlRenderer = MpdlITextRenderer.getInstance();
        MetadataRecord mdRecord = docOperation.getMdRecord();  // after validation, docOperation has a mdRecord
        mpdlRenderer.createFile(true, true, "text", mdRecord);   // generate Pdf/Html document
      }
    } catch (MalformedURLException e) {
      throw new ApplicationException(e);
    }
  }
  
  private void deleteExist(MpdlDocOperation docOperation) throws ApplicationException {
    String operationName = docOperation.getName();  
    String eXistIdentifier = docOperation.getDestUrl();
    String fileName = docOperation.getFileName();
    if (fileName == null || fileName.trim().equals(""))
      throw new ApplicationException("Your document file name is empty. Please specify a file name for your document.");
    if (! fileName.endsWith(".xml"))
      throw new ApplicationException("Your document file name does not end with \".xml\". Please specify a file name with the suffix \".xml\" for your document.");
    boolean docExists = mpdlXmlRpcDocHandler.documentExists(docOperation);
    if (! docExists)
      throw new ApplicationException("Document:" + eXistIdentifier + " does not exist.");
    // perform operation
    docOperation.setStatus(operationName + " document: " + eXistIdentifier + " on eXist server");
    // delete file on local file system: xml, pdf and html
    String eXistIdentifierWithoutExtension = eXistIdentifier.substring(0, eXistIdentifier.length() - 4);  // without ".xml"
    String destFileNameXml = MpdlConstants.MPDL_EXIST_DATA_DIR + "/documents" + eXistIdentifier;
    FileUtil.getInstance().deleteFile(destFileNameXml);
    boolean includePdf = docOperation.includePdf();
    if (includePdf) {
      String destFileNamePdf = MpdlConstants.MPDL_EXIST_DATA_DIR + "/documents" + eXistIdentifierWithoutExtension + ".pdf";
      String destFileNameHtml = MpdlConstants.MPDL_EXIST_DATA_DIR + "/documents" + eXistIdentifierWithoutExtension + ".html";
      FileUtil.getInstance().deleteFile(destFileNamePdf);
      FileUtil.getInstance().deleteFile(destFileNameHtml);
    }
    // delete document in eXist
    mpdlXmlRpcDocHandler.deleteDocumentFile(docOperation);
  }
  
  private void deleteDocumentCollections() throws ApplicationException {
    mpdlXmlRpcDocHandler.deleteCollection(documentRootCollectionMorph);
    mpdlXmlRpcDocHandler.deleteCollection(documentRootCollectionStandard);
  }

  private void createDocumentCollections() throws ApplicationException {
    for (int i=0; i < docBases.length; i++) {
      String docBase = docBases[i];
      for (int j=0; j < languages.length; j++) {
        String language = languages[j];
        String documentCollectionMorph = documentRootCollectionMorph + "/" + docBase + "/" + language;
        mpdlXmlRpcDocHandler.createCollection(documentCollectionMorph);
        String documentCollectionStandard = documentRootCollectionStandard + "/" + docBase + "/" + language;
        mpdlXmlRpcDocHandler.createCollection(documentCollectionStandard);
      }
    }
  }

  private void saveDocumentFiles() throws ApplicationException {
    int counter = 0;
    for (int i=0; i < docBases.length; i++) {
      String docBase = docBases[i];
      for (int j=0; j < languages.length; j++) {
        String language = languages[j];
        String documentCollection = "/" + docBase + "/" + language;
        String localFileDirStr = localDocumentDirectory + "/" + docBase + "/" + language;
        File localFileDir = new File(localFileDirStr);
        FilenameFilter filter = new FilenameFilterExtension("xml");
        File[] files = localFileDir.listFiles(filter);
        System.out.println("Adding all documents in path: \"" + localFileDirStr + "\" to eXist collection: \"" + documentCollection + "\" ...");
        for (int k=0; k < files.length; k++) {
          File f = files[k];
          String localFileNameWithoutPath = f.getName();
          String fullLocalFileName = f.getPath();
          String srcUrl = "file://" + fullLocalFileName;
          MpdlDocOperation docOperation = new MpdlDocOperation("updateExist", srcUrl, null, docBase, language, localFileNameWithoutPath); 
          long begin = new Date().getTime();
          doOperation(docOperation);
          long end = new Date().getTime();
          System.out.println("Added document \"" + fullLocalFileName + "\" to eXist collection: \"" + documentCollection + "\" (" + (end - begin) + " ms)" );
          counter++;
        }
      }
    }
    System.out.println("Imported documents: " + counter);
  }

  private void generatePdfHtmlDocumentFiles() throws ApplicationException {
    int counter = 0;
    MpdlITextRenderer mpdlRenderer = MpdlITextRenderer.getInstance();
    for (int i=0; i < docBases.length; i++) {
      String docBase = docBases[i];
      for (int j=0; j < languages.length; j++) {
        String language = languages[j];
        String localFileDirStr = localDocumentDirectory + "/" + docBase + "/" + language;
        File localFileDir = new File(localFileDirStr);
        FilenameFilter filter = new FilenameFilterExtension("xml");
        File[] files = localFileDir.listFiles(filter);
        System.out.println("Generating Pdf/Html documents in path: \"" + localFileDirStr + "\" ...");
        for (int k=0; k < files.length; k++) {
          File f = files[k];
          String localFileName = f.getName();
          String fullLocalFileName = f.getPath();
          String srcUrl = "file://" + fullLocalFileName;
          String localFileNameWithoutExtension = localFileName.substring(0, localFileName.length() - 4);  // without ".xml"
          String fullLocalPdfFileName = MpdlConstants.MPDL_EXIST_DATA_DIR + "/documents/" + docBase + "/" + language + "/" + localFileNameWithoutExtension + ".pdf";
          File localPdfFile = new File(fullLocalPdfFileName);
          boolean pdfFileAlreadyExists = localPdfFile.exists();
          // generate Pdf/Html file only if pdf file does not already exist
          if (! pdfFileAlreadyExists) {
            MpdlDocOperation docOperation = new MpdlDocOperation("generatePdf", srcUrl, null, docBase, language, localFileName); 
            SchemaHandler schemaHandler = new SchemaHandler();
            schemaHandler.validate(fullLocalFileName, docOperation);
            long begin = new Date().getTime();
            MetadataRecord mdRecord = docOperation.getMdRecord();  // after validation, docOperation has a mdRecord
            mpdlRenderer.createFile(true, true, "text", mdRecord);  // generate Pdf/Html document
            long end = new Date().getTime();
            System.out.println("Generate Pdf/Html document for: \"" + fullLocalFileName + "\" (" + (end - begin) + " ms)" );
            counter++;
            try {
              Thread.sleep(60000);  // delay so that called servers (digilib, eXist) are not stressed too much
            } catch (InterruptedException e) {
              throw new ApplicationException(e);
            }
          }
        }
      }
    }
    System.out.println("Generated documents: " + counter);
  }

  private void deletePresentationCollection() throws ApplicationException {
    mpdlXmlRpcDocHandler.deleteCollection(presentationRootCollection);
  }

  private void createPresentationCollection() throws ApplicationException {
    mpdlXmlRpcDocHandler.createCollection(presentationRootCollection);
  }

  private void deleteSchemaCollection() throws ApplicationException {
    mpdlXmlRpcDocHandler.deleteCollection(schemaRootCollection);
  }

  private void createSchemaCollection() throws ApplicationException {
    mpdlXmlRpcDocHandler.createCollection(schemaRootCollection);
  }

  private void beginOperation() {
    beginOfOperation = new Date().getTime();
  }

  private void endOperation() {
    endOfOperation = new Date().getTime();
  }

}