comparison software/eXist/mpdl-modules/src/de/mpg/mpiwg/berlin/mpdl/escidoc/ESciDocIngestor.java @ 0:408254cf2f1d

Erstellung
author Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
date Wed, 24 Nov 2010 17:24:23 +0100
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:408254cf2f1d
1 package de.mpg.mpiwg.berlin.mpdl.escidoc;
2
3 import java.util.ArrayList;
4 import java.util.Date;
5
6 import javax.xml.namespace.NamespaceContext;
7
8 import org.w3c.dom.Node;
9
10 import de.mpg.mpiwg.berlin.mpdl.exception.ApplicationException;
11 import de.mpg.mpiwg.berlin.mpdl.general.MpdlConstants;
12 import de.mpg.mpiwg.berlin.mpdl.schedule.MpdlDocOperation;
13 import de.mpg.mpiwg.berlin.mpdl.util.StringUtilEscapeChars;
14 import de.mpg.mpiwg.berlin.mpdl.util.XmlUtil;
15 import de.mpg.mpiwg.berlin.mpdl.xml.SchemaHandler;
16
17 public class ESciDocIngestor {
18 private ESciDocRestSession eSciDocSession;
19
20 public ESciDocIngestor(ESciDocRestSession eSciDocSession) {
21 this.eSciDocSession = eSciDocSession;
22 }
23
24 public String execute(MpdlDocOperation docOperation) throws ApplicationException {
25 String performedContainerId = null;
26 String operationName = docOperation.getName();
27 if (operationName.equals("create")) {
28 performedContainerId = createDocument(docOperation);
29 } else if (operationName.equals("update")) {
30 performedContainerId = updateDocument(docOperation);
31 } else if (operationName.equals("delete")) {
32 performedContainerId = deleteDocument(docOperation);
33 }
34 String performedESciDocUrl = "http://" + MpdlConstants.MPDL_ESCIDOC_HOST_NAME + ":" + MpdlConstants.MPDL_ESCIDOC_PORT + performedContainerId;
35 docOperation.setESciDocDestUrl(performedESciDocUrl);
36 return performedESciDocUrl;
37 }
38
39 private String createDocument(MpdlDocOperation docOperation) throws ApplicationException {
40 String pid = eSciDocSession.getPid();
41 String docBaseContainerId = MpdlConstants.MPDL_ESCIDOC_ECHO_CONTAINER_ID;
42 String eXistIdentifier = docOperation.getDestUrl(); // e.g. /echo/la/Benedetti_1585.xml
43 String docBase = docOperation.getDocBase();
44 if (docBase != null && docBase.equals("archimedes"))
45 docBaseContainerId = MpdlConstants.MPDL_ESCIDOC_ARCHIMEDES_CONTAINER_ID;
46 docOperation.setStatus("create document: " + eXistIdentifier + " on eSciDoc server");
47 String destFileName = MpdlConstants.MPDL_EXIST_DATA_DIR + "/documents" + eXistIdentifier;
48 // upload the file to the eSciDoc stage area
49 String eSciDocStageAreaUrl = eSciDocSession.uploadFileToESciDocStageArea(destFileName);
50 Node docNode = docOperation.getDocNode();
51 MetadataRecord mdRecord = docOperation.getMdRecord();
52 if (mdRecord != null) {
53 mdRecord.setMediaType("fulltext");
54 }
55 // create document container for all items
56 Container newContainer = eSciDocSession.createContainerInContainer(pid, mdRecord, docBaseContainerId);
57 String newContainerId = newContainer.getId();
58 Date lastModificationDate = newContainer.getLastModificationDate();
59 eSciDocSession.submitContainer(newContainerId, lastModificationDate, "create document");
60 // create the fulltext item
61 String existViewerUrl = "http://mpdl-proto.mpiwg-berlin.mpg.de/mpdl/page-query-result.xql?document=" + eXistIdentifier + "&amp;mode=text";
62 ArrayList<Component> components = new ArrayList<Component>();
63 String contentCategory = "fulltext XML - ECHO";
64 if (docBase != null && docBase.equals("archimedes"))
65 contentCategory = "fulltext XML - Archimedes";
66 Component componentXmlFulltext = new Component("valid", "public", contentCategory, "text/xml", eSciDocStageAreaUrl, "internal-managed");
67 Component componentExistViewer = new Component("valid", "public", contentCategory, "text/html", existViewerUrl, "external-url");
68 components.add(componentXmlFulltext);
69 components.add(componentExistViewer);
70 Item fulltextItem = eSciDocSession.createItemInContainer(newContainerId, pid, mdRecord, components);
71 String fulltextItemId = fulltextItem.getId();
72 Date fulltextItemLastModificationDate = fulltextItem.getLastModificationDate();
73 eSciDocSession.submitItem(fulltextItemId, fulltextItemLastModificationDate, "create document");
74 // page items: for each page create one item
75 SchemaHandler schemaHandler = new SchemaHandler();
76 ArrayList<String> pbFileNamesArrayStr = schemaHandler.getPBFileNames(docNode, docBase);
77 createPageItemsInContainer(docOperation, mdRecord, pbFileNamesArrayStr, newContainerId);
78 return newContainerId;
79 }
80
81 private String updateDocument(MpdlDocOperation docOperation) throws ApplicationException {
82 String docBase = docOperation.getDocBase();
83 String eXistIdentifier = docOperation.getDestUrl();
84 String pid = eSciDocSession.getPid();
85 String destFileName = MpdlConstants.MPDL_EXIST_DATA_DIR + "/documents" + eXistIdentifier;
86 String documentContainerId = eSciDocSession.getContainerIdByEXistId(eXistIdentifier);
87 if (documentContainerId == null)
88 throw new ApplicationException("Document:" + eXistIdentifier + " does not exist.");
89 docOperation.setStatus("update document: " + eXistIdentifier + " on eSciDoc server");
90 // first: upload file to eSciDoc stage area and validate it
91 String eSciDocStageAreaUrl = eSciDocSession.uploadFileToESciDocStageArea(destFileName);
92 // RelaxNG schema validation
93 Node docNode = docOperation.getDocNode();
94 MetadataRecord mdRecord = docOperation.getMdRecord();
95 if (mdRecord != null) {
96 mdRecord.setMediaType("fulltext");
97 }
98 // second: delete all members of the container (page image reference items)
99 String deleteFilter = "<filter name=\"/md-records/md-record/metadata/mediaType\">" + "image" + "</filter>"; // filter to find items of type image
100 deleteContainerItems(docOperation, documentContainerId, deleteFilter);
101 // third: update the fulltext item
102 String fulltextItemFilter = "<filter name=\"/md-records/md-record/metadata/mediaType\">" + "fulltext" + "</filter>"; // filter to find items of type fulltext
103 String fulltextItemsXmlStr = eSciDocSession.getMembersByContainerIdAndFilter(documentContainerId, fulltextItemFilter);
104 String fulltextItemId = eSciDocSession.getFirstItemId(fulltextItemsXmlStr);
105 if (fulltextItemId == null || fulltextItemId.trim().equals(""))
106 throw new ApplicationException("Update of document is not possible: there is no fulltext item in the document container.");
107 Date fulltextItemVersionDate = eSciDocSession.getVersionDate(fulltextItemsXmlStr);
108 ArrayList<Component> components = new ArrayList<Component>();
109 String existViewerUrl = "http://mpdl-proto.mpiwg-berlin.mpg.de/mpdl/page-query-result.xql?document=" + eXistIdentifier + "&amp;mode=text";
110 String contentCategory = "fulltext XML - ECHO";
111 if (docBase != null && docBase.equals("archimedes"))
112 contentCategory = "fulltext XML - Archimedes";
113 Component componentXmlFulltext = new Component("valid", "public", contentCategory, "text/xml", eSciDocStageAreaUrl, "internal-managed");
114 Component componentExistViewer = new Component("valid", "public", contentCategory, "text/html", existViewerUrl, "external-url");
115 components.add(componentXmlFulltext);
116 components.add(componentExistViewer);
117 eSciDocSession.updateItem(fulltextItemId, fulltextItemVersionDate, pid, mdRecord, components);
118 // fourth: page items: for each page create one item
119 SchemaHandler schemaHandler = new SchemaHandler();
120 ArrayList<String> pbFileNamesArrayStr = schemaHandler.getPBFileNames(docNode, docBase);
121 createPageItemsInContainer(docOperation, mdRecord, pbFileNamesArrayStr, documentContainerId);
122 return documentContainerId;
123 }
124
125 private String deleteDocument(MpdlDocOperation docOperation) throws ApplicationException {
126 String eXistIdentifier = docOperation.getDestUrl(); // e.g. /echo/la/bla.xml
127 String documentContainerId = eSciDocSession.getContainerIdByEXistId(eXistIdentifier);
128 if (documentContainerId == null)
129 throw new ApplicationException("eSciDoc: Deletion of eSciDoc container is not possible. There is no eSciDoc container for your document: " + eXistIdentifier);
130 deleteContainer(docOperation, documentContainerId);
131 return documentContainerId;
132 }
133
134 public void deleteContainer(MpdlDocOperation docOperation, String containerId) throws ApplicationException {
135 String eXistIdentifier = docOperation.getDestUrl(); // e.g. /echo/la/bla.xml
136 docOperation.setStatus("delete document: " + eXistIdentifier + " on eSciDoc server");
137 // first: delete all members
138 deleteContainerItems(docOperation, containerId, null);
139 // second: delete container itself
140 eSciDocSession.deleteContainer(containerId);
141 }
142
143 public void deleteContainerItems(MpdlDocOperation docOperation, String containerId, String filter) throws ApplicationException {
144 String operationName = docOperation.getName();
145 String eXistIdentifier = docOperation.getDestUrl();
146 NamespaceContext nsContext = ESciDocRestSession.getNsContext();
147 XmlUtil xmlUtil = XmlUtil.getInstance();
148 String containerXmlStr = eSciDocSession.getContainer(containerId);
149 Date lastModificationDate = eSciDocSession.getLastModificationDate(containerXmlStr);
150 String membersXmlStr = eSciDocSession.getMembersByContainerIdAndFilter(containerId, filter);
151 ArrayList<String> itemMemberIds = xmlUtil.evaluateToStringArray(membersXmlStr, "//escidocItem:item/@xlink:href", nsContext);
152 if (itemMemberIds != null) {
153 eSciDocSession.removeMembers(containerId, lastModificationDate, itemMemberIds);
154 for (int i=0; i< itemMemberIds.size(); i++) {
155 String itemId = itemMemberIds.get(i);
156 int pageNumber = i + 1;
157 docOperation.setStatus(operationName + " document: " + eXistIdentifier + " on eSciDoc server (delete " + pageNumber + " of " + itemMemberIds.size() + " fulltext and image reference items)");
158 eSciDocSession.deleteItem(itemId);
159 }
160 }
161 }
162
163 private void createPageItemsInContainer(MpdlDocOperation docOperation, MetadataRecord mdRecord, ArrayList<String> pbFileNamesArrayStr, String containerId) throws ApplicationException {
164 if (mdRecord == null || pbFileNamesArrayStr == null || containerId == null)
165 return;
166 String operationName = docOperation.getName();
167 String eXistIdentifier = docOperation.getDestUrl();
168 ArrayList<String> memberIds = new ArrayList<String>();
169 SchemaHandler schemaHandler = new SchemaHandler();
170 String pageImgDir = schemaHandler.getPageImgDir(mdRecord);
171 String docBase = docOperation.getDocBase();
172 for (int i=0; i< 10; i++) { // TODO
173 // for (int i=0; i< pbFileNamesArrayStr.size(); i++) {
174 String pid = eSciDocSession.getPid();
175 int pageNumber = i + 1;
176 docOperation.setStatus(operationName + " document: " + eXistIdentifier + " on eSciDoc server (create " + pageNumber + " of " + pbFileNamesArrayStr.size() + " image references)");
177 String fileName = pbFileNamesArrayStr.get(i);
178 fileName = StringUtilEscapeChars.deresolveXmlEntities(fileName);
179 MetadataRecord mdRecordImage = new MetadataRecord();
180 mdRecordImage.setIdentifier(fileName);
181 mdRecordImage.setTitle("Page: " + pageNumber);
182 mdRecordImage.setMediaType("image");
183 ArrayList<Component> components = new ArrayList<Component>();
184 String imageEchoViewerUrl = "http://echo.mpiwg-berlin.mpg.de/zogilib?fn=" + pageImgDir + "/" + fileName + "&amp;pn=" + pageNumber;
185 String imageExistViewerUrl = "http://mpdl-proto.mpiwg-berlin.mpg.de/mpdl/page-query-result.xql?document=" + eXistIdentifier + "&amp;mode=image" + "&amp;pn=" + pageNumber;
186 String fulltextExistViewerUrl = "http://mpdl-proto.mpiwg-berlin.mpg.de/mpdl/page-query-result.xql?document=" + eXistIdentifier + "&amp;mode=text" + "&amp;pn=" + pageNumber;
187 Component componentImageEchoViewer = new Component("valid", "public", "JPEG_DEFAULT", "text/html", imageEchoViewerUrl, "external-url");
188 Component componentImageExistViewer = new Component("valid", "public", "JPEG_DEFAULT", "text/html", imageExistViewerUrl, "external-url");
189 String contentCategory = "fulltext XML - ECHO";
190 if (docBase != null && docBase.equals("archimedes"))
191 contentCategory = "fulltext XML - Archimedes";
192 Component componentFulltextExistViewer = new Component("valid", "public", contentCategory, "text/html", fulltextExistViewerUrl, "external-url");
193 components.add(componentImageEchoViewer);
194 components.add(componentImageExistViewer);
195 components.add(componentFulltextExistViewer);
196 Item item = eSciDocSession.createItem(pid, mdRecordImage, components);
197 String itemId = item.getId();
198 Date lastModificationDate = item.getLastModificationDate();
199 eSciDocSession.submitItem(itemId, lastModificationDate, "create document");
200 String memberId = null;
201 if (itemId != null) {
202 int index = itemId.indexOf(":");
203 if (index > 0) {
204 memberId = itemId.substring(index + 1);
205 memberIds.add(memberId);
206 }
207 }
208 }
209 String containerXmlStr = eSciDocSession.getContainer(containerId);
210 Date lastModificationDate = eSciDocSession.getLastModificationDate(containerXmlStr);
211 eSciDocSession.addMembers(containerId, lastModificationDate, memberIds);
212 }
213 }