view software/eXist/mpdl-modules/src/de/mpg/mpiwg/berlin/mpdl/escidoc/ESciDocIngestor.java @ 0:408254cf2f1d

Erstellung
author Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
date Wed, 24 Nov 2010 17:24:23 +0100
parents
children
line wrap: on
line source

package de.mpg.mpiwg.berlin.mpdl.escidoc;

import java.util.ArrayList;
import java.util.Date;

import javax.xml.namespace.NamespaceContext;

import org.w3c.dom.Node;

import de.mpg.mpiwg.berlin.mpdl.exception.ApplicationException;
import de.mpg.mpiwg.berlin.mpdl.general.MpdlConstants;
import de.mpg.mpiwg.berlin.mpdl.schedule.MpdlDocOperation;
import de.mpg.mpiwg.berlin.mpdl.util.StringUtilEscapeChars;
import de.mpg.mpiwg.berlin.mpdl.util.XmlUtil;
import de.mpg.mpiwg.berlin.mpdl.xml.SchemaHandler;

public class ESciDocIngestor {
  private ESciDocRestSession eSciDocSession;
  
  public ESciDocIngestor(ESciDocRestSession eSciDocSession) {
    this.eSciDocSession = eSciDocSession;
  }

  public String execute(MpdlDocOperation docOperation) throws ApplicationException {
    String performedContainerId = null;
    String operationName = docOperation.getName();
    if (operationName.equals("create")) {
      performedContainerId = createDocument(docOperation);
    } else if (operationName.equals("update")) {
      performedContainerId = updateDocument(docOperation);
    } else if (operationName.equals("delete")) {
      performedContainerId = deleteDocument(docOperation);
    }
    String performedESciDocUrl = "http://" + MpdlConstants.MPDL_ESCIDOC_HOST_NAME + ":" + MpdlConstants.MPDL_ESCIDOC_PORT + performedContainerId;
    docOperation.setESciDocDestUrl(performedESciDocUrl);
    return performedESciDocUrl;
  }
  
  private String createDocument(MpdlDocOperation docOperation) throws ApplicationException {
    String pid = eSciDocSession.getPid();
    String docBaseContainerId = MpdlConstants.MPDL_ESCIDOC_ECHO_CONTAINER_ID;
    String eXistIdentifier = docOperation.getDestUrl();  // e.g. /echo/la/Benedetti_1585.xml
    String docBase = docOperation.getDocBase();
    if (docBase != null && docBase.equals("archimedes"))
      docBaseContainerId = MpdlConstants.MPDL_ESCIDOC_ARCHIMEDES_CONTAINER_ID;
    docOperation.setStatus("create document: " + eXistIdentifier + " on eSciDoc server");
    String destFileName = MpdlConstants.MPDL_EXIST_DATA_DIR + "/documents" + eXistIdentifier;
    // upload the file to the eSciDoc stage area
    String eSciDocStageAreaUrl = eSciDocSession.uploadFileToESciDocStageArea(destFileName);
    Node docNode = docOperation.getDocNode();
    MetadataRecord mdRecord = docOperation.getMdRecord();
    if (mdRecord != null) {
      mdRecord.setMediaType("fulltext");
    }
    // create document container for all items
    Container newContainer = eSciDocSession.createContainerInContainer(pid, mdRecord, docBaseContainerId);
    String newContainerId = newContainer.getId();
    Date lastModificationDate = newContainer.getLastModificationDate();
    eSciDocSession.submitContainer(newContainerId, lastModificationDate, "create document");
    // create the fulltext item
    String existViewerUrl = "http://mpdl-proto.mpiwg-berlin.mpg.de/mpdl/page-query-result.xql?document=" + eXistIdentifier + "&amp;mode=text";
    ArrayList<Component> components = new ArrayList<Component>();
    String contentCategory = "fulltext XML - ECHO";
    if (docBase != null && docBase.equals("archimedes"))
      contentCategory = "fulltext XML - Archimedes";
    Component componentXmlFulltext = new Component("valid", "public", contentCategory, "text/xml", eSciDocStageAreaUrl, "internal-managed");
    Component componentExistViewer = new Component("valid", "public", contentCategory, "text/html", existViewerUrl, "external-url");
    components.add(componentXmlFulltext);
    components.add(componentExistViewer);
    Item fulltextItem = eSciDocSession.createItemInContainer(newContainerId, pid, mdRecord, components);
    String fulltextItemId = fulltextItem.getId();
    Date fulltextItemLastModificationDate = fulltextItem.getLastModificationDate();
    eSciDocSession.submitItem(fulltextItemId, fulltextItemLastModificationDate, "create document");
    // page items: for each page create one item
    SchemaHandler schemaHandler = new SchemaHandler();
    ArrayList<String> pbFileNamesArrayStr = schemaHandler.getPBFileNames(docNode, docBase); 
    createPageItemsInContainer(docOperation, mdRecord, pbFileNamesArrayStr, newContainerId);
    return newContainerId;
  }
  
  private String updateDocument(MpdlDocOperation docOperation) throws ApplicationException {
    String docBase = docOperation.getDocBase();
    String eXistIdentifier = docOperation.getDestUrl();
    String pid = eSciDocSession.getPid();
    String destFileName = MpdlConstants.MPDL_EXIST_DATA_DIR + "/documents" + eXistIdentifier;
    String documentContainerId = eSciDocSession.getContainerIdByEXistId(eXistIdentifier);
    if (documentContainerId == null)
      throw new ApplicationException("Document:" + eXistIdentifier + " does not exist.");
    docOperation.setStatus("update document: " + eXistIdentifier + " on eSciDoc server");
    // first: upload file to eSciDoc stage area and validate it
    String eSciDocStageAreaUrl = eSciDocSession.uploadFileToESciDocStageArea(destFileName);
    // RelaxNG schema validation
    Node docNode = docOperation.getDocNode();
    MetadataRecord mdRecord = docOperation.getMdRecord();
    if (mdRecord != null) {
      mdRecord.setMediaType("fulltext");
    }
    // second: delete all members of the container (page image reference items)
    String deleteFilter = "<filter name=\"/md-records/md-record/metadata/mediaType\">" + "image" + "</filter>";  // filter to find items of type image
    deleteContainerItems(docOperation, documentContainerId, deleteFilter);
    // third: update the fulltext item
    String fulltextItemFilter = "<filter name=\"/md-records/md-record/metadata/mediaType\">" + "fulltext" + "</filter>";  // filter to find items of type fulltext
    String fulltextItemsXmlStr = eSciDocSession.getMembersByContainerIdAndFilter(documentContainerId, fulltextItemFilter);
    String fulltextItemId = eSciDocSession.getFirstItemId(fulltextItemsXmlStr);
    if (fulltextItemId == null || fulltextItemId.trim().equals(""))
      throw new ApplicationException("Update of document is not possible: there is no fulltext item in the document container.");
    Date fulltextItemVersionDate = eSciDocSession.getVersionDate(fulltextItemsXmlStr);
    ArrayList<Component> components = new ArrayList<Component>();
    String existViewerUrl = "http://mpdl-proto.mpiwg-berlin.mpg.de/mpdl/page-query-result.xql?document=" + eXistIdentifier + "&amp;mode=text";
    String contentCategory = "fulltext XML - ECHO";
    if (docBase != null && docBase.equals("archimedes"))
      contentCategory = "fulltext XML - Archimedes";
    Component componentXmlFulltext = new Component("valid", "public", contentCategory, "text/xml", eSciDocStageAreaUrl, "internal-managed");
    Component componentExistViewer = new Component("valid", "public", contentCategory, "text/html", existViewerUrl, "external-url");
    components.add(componentXmlFulltext);
    components.add(componentExistViewer);
    eSciDocSession.updateItem(fulltextItemId, fulltextItemVersionDate, pid, mdRecord, components);
    // fourth: page items: for each page create one item
    SchemaHandler schemaHandler = new SchemaHandler();
    ArrayList<String> pbFileNamesArrayStr = schemaHandler.getPBFileNames(docNode, docBase); 
    createPageItemsInContainer(docOperation, mdRecord, pbFileNamesArrayStr, documentContainerId);
    return documentContainerId;    
  }
  
  private String deleteDocument(MpdlDocOperation docOperation) throws ApplicationException {
    String eXistIdentifier = docOperation.getDestUrl();  // e.g. /echo/la/bla.xml
    String documentContainerId = eSciDocSession.getContainerIdByEXistId(eXistIdentifier);
    if (documentContainerId == null)
      throw new ApplicationException("eSciDoc: Deletion of eSciDoc container is not possible. There is no eSciDoc container for your document: " + eXistIdentifier);
    deleteContainer(docOperation, documentContainerId);
    return documentContainerId;
  }

  public void deleteContainer(MpdlDocOperation docOperation, String containerId) throws ApplicationException {
    String eXistIdentifier = docOperation.getDestUrl();  // e.g. /echo/la/bla.xml
    docOperation.setStatus("delete document: " + eXistIdentifier + " on eSciDoc server");
    // first: delete all members
    deleteContainerItems(docOperation, containerId, null);
    // second: delete container itself
    eSciDocSession.deleteContainer(containerId);
  }
  
  public void deleteContainerItems(MpdlDocOperation docOperation, String containerId, String filter) throws ApplicationException {
    String operationName = docOperation.getName();
    String eXistIdentifier = docOperation.getDestUrl(); 
    NamespaceContext nsContext = ESciDocRestSession.getNsContext();
    XmlUtil xmlUtil = XmlUtil.getInstance();
    String containerXmlStr = eSciDocSession.getContainer(containerId);
    Date lastModificationDate = eSciDocSession.getLastModificationDate(containerXmlStr);
    String membersXmlStr = eSciDocSession.getMembersByContainerIdAndFilter(containerId, filter);
    ArrayList<String> itemMemberIds = xmlUtil.evaluateToStringArray(membersXmlStr, "//escidocItem:item/@xlink:href", nsContext);
    if (itemMemberIds != null) {
      eSciDocSession.removeMembers(containerId, lastModificationDate, itemMemberIds);
      for (int i=0; i< itemMemberIds.size(); i++) {
        String itemId = itemMemberIds.get(i);
        int pageNumber = i + 1;
        docOperation.setStatus(operationName + " document: " + eXistIdentifier + " on eSciDoc server (delete " + pageNumber + " of " + itemMemberIds.size() + " fulltext and image reference items)");
        eSciDocSession.deleteItem(itemId);
      }
    }
  }
  
  private void createPageItemsInContainer(MpdlDocOperation docOperation, MetadataRecord mdRecord, ArrayList<String> pbFileNamesArrayStr, String containerId) throws ApplicationException {
    if (mdRecord == null || pbFileNamesArrayStr == null || containerId == null)
      return;
    String operationName = docOperation.getName();
    String eXistIdentifier = docOperation.getDestUrl(); 
    ArrayList<String> memberIds = new ArrayList<String>();
    SchemaHandler schemaHandler = new SchemaHandler();
    String pageImgDir = schemaHandler.getPageImgDir(mdRecord);
    String docBase = docOperation.getDocBase();
    for (int i=0; i< 10; i++) {  // TODO
    // for (int i=0; i< pbFileNamesArrayStr.size(); i++) {
      String pid = eSciDocSession.getPid();
      int pageNumber = i + 1;
      docOperation.setStatus(operationName + " document: " + eXistIdentifier + " on eSciDoc server (create " + pageNumber + " of " + pbFileNamesArrayStr.size() + " image references)");
      String fileName = pbFileNamesArrayStr.get(i);
      fileName = StringUtilEscapeChars.deresolveXmlEntities(fileName);
      MetadataRecord mdRecordImage = new MetadataRecord();
      mdRecordImage.setIdentifier(fileName);
      mdRecordImage.setTitle("Page: " + pageNumber);
      mdRecordImage.setMediaType("image");
      ArrayList<Component> components = new ArrayList<Component>();
      String imageEchoViewerUrl = "http://echo.mpiwg-berlin.mpg.de/zogilib?fn=" + pageImgDir + "/" + fileName + "&amp;pn=" + pageNumber;
      String imageExistViewerUrl = "http://mpdl-proto.mpiwg-berlin.mpg.de/mpdl/page-query-result.xql?document=" + eXistIdentifier + "&amp;mode=image" + "&amp;pn=" + pageNumber;
      String fulltextExistViewerUrl = "http://mpdl-proto.mpiwg-berlin.mpg.de/mpdl/page-query-result.xql?document=" + eXistIdentifier + "&amp;mode=text" + "&amp;pn=" + pageNumber;
      Component componentImageEchoViewer = new Component("valid", "public", "JPEG_DEFAULT", "text/html", imageEchoViewerUrl, "external-url");
      Component componentImageExistViewer = new Component("valid", "public", "JPEG_DEFAULT", "text/html", imageExistViewerUrl, "external-url");
      String contentCategory = "fulltext XML - ECHO";
      if (docBase != null && docBase.equals("archimedes"))
        contentCategory = "fulltext XML - Archimedes";
      Component componentFulltextExistViewer = new Component("valid", "public", contentCategory, "text/html", fulltextExistViewerUrl, "external-url");
      components.add(componentImageEchoViewer);
      components.add(componentImageExistViewer);
      components.add(componentFulltextExistViewer);
      Item item = eSciDocSession.createItem(pid, mdRecordImage, components);
      String itemId = item.getId();
      Date lastModificationDate = item.getLastModificationDate();
      eSciDocSession.submitItem(itemId, lastModificationDate, "create document");
      String memberId = null;
      if (itemId != null) {
        int index = itemId.indexOf(":");
        if (index > 0) {
          memberId = itemId.substring(index + 1);
          memberIds.add(memberId);
        }
      }
    }
    String containerXmlStr = eSciDocSession.getContainer(containerId);
    Date lastModificationDate = eSciDocSession.getLastModificationDate(containerXmlStr);
    eSciDocSession.addMembers(containerId, lastModificationDate, memberIds);
  }
}