Mercurial > hg > mpdl-group
view software/eXist/mpdl-modules/src/de/mpg/mpiwg/berlin/mpdl/escidoc/ESciDocIngestor.java @ 0:408254cf2f1d
Erstellung
author | Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de> |
---|---|
date | Wed, 24 Nov 2010 17:24:23 +0100 |
parents | |
children |
line wrap: on
line source
package de.mpg.mpiwg.berlin.mpdl.escidoc; import java.util.ArrayList; import java.util.Date; import javax.xml.namespace.NamespaceContext; import org.w3c.dom.Node; import de.mpg.mpiwg.berlin.mpdl.exception.ApplicationException; import de.mpg.mpiwg.berlin.mpdl.general.MpdlConstants; import de.mpg.mpiwg.berlin.mpdl.schedule.MpdlDocOperation; import de.mpg.mpiwg.berlin.mpdl.util.StringUtilEscapeChars; import de.mpg.mpiwg.berlin.mpdl.util.XmlUtil; import de.mpg.mpiwg.berlin.mpdl.xml.SchemaHandler; public class ESciDocIngestor { private ESciDocRestSession eSciDocSession; public ESciDocIngestor(ESciDocRestSession eSciDocSession) { this.eSciDocSession = eSciDocSession; } public String execute(MpdlDocOperation docOperation) throws ApplicationException { String performedContainerId = null; String operationName = docOperation.getName(); if (operationName.equals("create")) { performedContainerId = createDocument(docOperation); } else if (operationName.equals("update")) { performedContainerId = updateDocument(docOperation); } else if (operationName.equals("delete")) { performedContainerId = deleteDocument(docOperation); } String performedESciDocUrl = "http://" + MpdlConstants.MPDL_ESCIDOC_HOST_NAME + ":" + MpdlConstants.MPDL_ESCIDOC_PORT + performedContainerId; docOperation.setESciDocDestUrl(performedESciDocUrl); return performedESciDocUrl; } private String createDocument(MpdlDocOperation docOperation) throws ApplicationException { String pid = eSciDocSession.getPid(); String docBaseContainerId = MpdlConstants.MPDL_ESCIDOC_ECHO_CONTAINER_ID; String eXistIdentifier = docOperation.getDestUrl(); // e.g. /echo/la/Benedetti_1585.xml String docBase = docOperation.getDocBase(); if (docBase != null && docBase.equals("archimedes")) docBaseContainerId = MpdlConstants.MPDL_ESCIDOC_ARCHIMEDES_CONTAINER_ID; docOperation.setStatus("create document: " + eXistIdentifier + " on eSciDoc server"); String destFileName = MpdlConstants.MPDL_EXIST_DATA_DIR + "/documents" + eXistIdentifier; // upload the file to the eSciDoc stage area String eSciDocStageAreaUrl = eSciDocSession.uploadFileToESciDocStageArea(destFileName); Node docNode = docOperation.getDocNode(); MetadataRecord mdRecord = docOperation.getMdRecord(); if (mdRecord != null) { mdRecord.setMediaType("fulltext"); } // create document container for all items Container newContainer = eSciDocSession.createContainerInContainer(pid, mdRecord, docBaseContainerId); String newContainerId = newContainer.getId(); Date lastModificationDate = newContainer.getLastModificationDate(); eSciDocSession.submitContainer(newContainerId, lastModificationDate, "create document"); // create the fulltext item String existViewerUrl = "http://mpdl-proto.mpiwg-berlin.mpg.de/mpdl/page-query-result.xql?document=" + eXistIdentifier + "&mode=text"; ArrayList<Component> components = new ArrayList<Component>(); String contentCategory = "fulltext XML - ECHO"; if (docBase != null && docBase.equals("archimedes")) contentCategory = "fulltext XML - Archimedes"; Component componentXmlFulltext = new Component("valid", "public", contentCategory, "text/xml", eSciDocStageAreaUrl, "internal-managed"); Component componentExistViewer = new Component("valid", "public", contentCategory, "text/html", existViewerUrl, "external-url"); components.add(componentXmlFulltext); components.add(componentExistViewer); Item fulltextItem = eSciDocSession.createItemInContainer(newContainerId, pid, mdRecord, components); String fulltextItemId = fulltextItem.getId(); Date fulltextItemLastModificationDate = fulltextItem.getLastModificationDate(); eSciDocSession.submitItem(fulltextItemId, fulltextItemLastModificationDate, "create document"); // page items: for each page create one item SchemaHandler schemaHandler = new SchemaHandler(); ArrayList<String> pbFileNamesArrayStr = schemaHandler.getPBFileNames(docNode, docBase); createPageItemsInContainer(docOperation, mdRecord, pbFileNamesArrayStr, newContainerId); return newContainerId; } private String updateDocument(MpdlDocOperation docOperation) throws ApplicationException { String docBase = docOperation.getDocBase(); String eXistIdentifier = docOperation.getDestUrl(); String pid = eSciDocSession.getPid(); String destFileName = MpdlConstants.MPDL_EXIST_DATA_DIR + "/documents" + eXistIdentifier; String documentContainerId = eSciDocSession.getContainerIdByEXistId(eXistIdentifier); if (documentContainerId == null) throw new ApplicationException("Document:" + eXistIdentifier + " does not exist."); docOperation.setStatus("update document: " + eXistIdentifier + " on eSciDoc server"); // first: upload file to eSciDoc stage area and validate it String eSciDocStageAreaUrl = eSciDocSession.uploadFileToESciDocStageArea(destFileName); // RelaxNG schema validation Node docNode = docOperation.getDocNode(); MetadataRecord mdRecord = docOperation.getMdRecord(); if (mdRecord != null) { mdRecord.setMediaType("fulltext"); } // second: delete all members of the container (page image reference items) String deleteFilter = "<filter name=\"/md-records/md-record/metadata/mediaType\">" + "image" + "</filter>"; // filter to find items of type image deleteContainerItems(docOperation, documentContainerId, deleteFilter); // third: update the fulltext item String fulltextItemFilter = "<filter name=\"/md-records/md-record/metadata/mediaType\">" + "fulltext" + "</filter>"; // filter to find items of type fulltext String fulltextItemsXmlStr = eSciDocSession.getMembersByContainerIdAndFilter(documentContainerId, fulltextItemFilter); String fulltextItemId = eSciDocSession.getFirstItemId(fulltextItemsXmlStr); if (fulltextItemId == null || fulltextItemId.trim().equals("")) throw new ApplicationException("Update of document is not possible: there is no fulltext item in the document container."); Date fulltextItemVersionDate = eSciDocSession.getVersionDate(fulltextItemsXmlStr); ArrayList<Component> components = new ArrayList<Component>(); String existViewerUrl = "http://mpdl-proto.mpiwg-berlin.mpg.de/mpdl/page-query-result.xql?document=" + eXistIdentifier + "&mode=text"; String contentCategory = "fulltext XML - ECHO"; if (docBase != null && docBase.equals("archimedes")) contentCategory = "fulltext XML - Archimedes"; Component componentXmlFulltext = new Component("valid", "public", contentCategory, "text/xml", eSciDocStageAreaUrl, "internal-managed"); Component componentExistViewer = new Component("valid", "public", contentCategory, "text/html", existViewerUrl, "external-url"); components.add(componentXmlFulltext); components.add(componentExistViewer); eSciDocSession.updateItem(fulltextItemId, fulltextItemVersionDate, pid, mdRecord, components); // fourth: page items: for each page create one item SchemaHandler schemaHandler = new SchemaHandler(); ArrayList<String> pbFileNamesArrayStr = schemaHandler.getPBFileNames(docNode, docBase); createPageItemsInContainer(docOperation, mdRecord, pbFileNamesArrayStr, documentContainerId); return documentContainerId; } private String deleteDocument(MpdlDocOperation docOperation) throws ApplicationException { String eXistIdentifier = docOperation.getDestUrl(); // e.g. /echo/la/bla.xml String documentContainerId = eSciDocSession.getContainerIdByEXistId(eXistIdentifier); if (documentContainerId == null) throw new ApplicationException("eSciDoc: Deletion of eSciDoc container is not possible. There is no eSciDoc container for your document: " + eXistIdentifier); deleteContainer(docOperation, documentContainerId); return documentContainerId; } public void deleteContainer(MpdlDocOperation docOperation, String containerId) throws ApplicationException { String eXistIdentifier = docOperation.getDestUrl(); // e.g. /echo/la/bla.xml docOperation.setStatus("delete document: " + eXistIdentifier + " on eSciDoc server"); // first: delete all members deleteContainerItems(docOperation, containerId, null); // second: delete container itself eSciDocSession.deleteContainer(containerId); } public void deleteContainerItems(MpdlDocOperation docOperation, String containerId, String filter) throws ApplicationException { String operationName = docOperation.getName(); String eXistIdentifier = docOperation.getDestUrl(); NamespaceContext nsContext = ESciDocRestSession.getNsContext(); XmlUtil xmlUtil = XmlUtil.getInstance(); String containerXmlStr = eSciDocSession.getContainer(containerId); Date lastModificationDate = eSciDocSession.getLastModificationDate(containerXmlStr); String membersXmlStr = eSciDocSession.getMembersByContainerIdAndFilter(containerId, filter); ArrayList<String> itemMemberIds = xmlUtil.evaluateToStringArray(membersXmlStr, "//escidocItem:item/@xlink:href", nsContext); if (itemMemberIds != null) { eSciDocSession.removeMembers(containerId, lastModificationDate, itemMemberIds); for (int i=0; i< itemMemberIds.size(); i++) { String itemId = itemMemberIds.get(i); int pageNumber = i + 1; docOperation.setStatus(operationName + " document: " + eXistIdentifier + " on eSciDoc server (delete " + pageNumber + " of " + itemMemberIds.size() + " fulltext and image reference items)"); eSciDocSession.deleteItem(itemId); } } } private void createPageItemsInContainer(MpdlDocOperation docOperation, MetadataRecord mdRecord, ArrayList<String> pbFileNamesArrayStr, String containerId) throws ApplicationException { if (mdRecord == null || pbFileNamesArrayStr == null || containerId == null) return; String operationName = docOperation.getName(); String eXistIdentifier = docOperation.getDestUrl(); ArrayList<String> memberIds = new ArrayList<String>(); SchemaHandler schemaHandler = new SchemaHandler(); String pageImgDir = schemaHandler.getPageImgDir(mdRecord); String docBase = docOperation.getDocBase(); for (int i=0; i< 10; i++) { // TODO // for (int i=0; i< pbFileNamesArrayStr.size(); i++) { String pid = eSciDocSession.getPid(); int pageNumber = i + 1; docOperation.setStatus(operationName + " document: " + eXistIdentifier + " on eSciDoc server (create " + pageNumber + " of " + pbFileNamesArrayStr.size() + " image references)"); String fileName = pbFileNamesArrayStr.get(i); fileName = StringUtilEscapeChars.deresolveXmlEntities(fileName); MetadataRecord mdRecordImage = new MetadataRecord(); mdRecordImage.setIdentifier(fileName); mdRecordImage.setTitle("Page: " + pageNumber); mdRecordImage.setMediaType("image"); ArrayList<Component> components = new ArrayList<Component>(); String imageEchoViewerUrl = "http://echo.mpiwg-berlin.mpg.de/zogilib?fn=" + pageImgDir + "/" + fileName + "&pn=" + pageNumber; String imageExistViewerUrl = "http://mpdl-proto.mpiwg-berlin.mpg.de/mpdl/page-query-result.xql?document=" + eXistIdentifier + "&mode=image" + "&pn=" + pageNumber; String fulltextExistViewerUrl = "http://mpdl-proto.mpiwg-berlin.mpg.de/mpdl/page-query-result.xql?document=" + eXistIdentifier + "&mode=text" + "&pn=" + pageNumber; Component componentImageEchoViewer = new Component("valid", "public", "JPEG_DEFAULT", "text/html", imageEchoViewerUrl, "external-url"); Component componentImageExistViewer = new Component("valid", "public", "JPEG_DEFAULT", "text/html", imageExistViewerUrl, "external-url"); String contentCategory = "fulltext XML - ECHO"; if (docBase != null && docBase.equals("archimedes")) contentCategory = "fulltext XML - Archimedes"; Component componentFulltextExistViewer = new Component("valid", "public", contentCategory, "text/html", fulltextExistViewerUrl, "external-url"); components.add(componentImageEchoViewer); components.add(componentImageExistViewer); components.add(componentFulltextExistViewer); Item item = eSciDocSession.createItem(pid, mdRecordImage, components); String itemId = item.getId(); Date lastModificationDate = item.getLastModificationDate(); eSciDocSession.submitItem(itemId, lastModificationDate, "create document"); String memberId = null; if (itemId != null) { int index = itemId.indexOf(":"); if (index > 0) { memberId = itemId.substring(index + 1); memberIds.add(memberId); } } } String containerXmlStr = eSciDocSession.getContainer(containerId); Date lastModificationDate = eSciDocSession.getLastModificationDate(containerXmlStr); eSciDocSession.addMembers(containerId, lastModificationDate, memberIds); } }