Mercurial > hg > mpdl-group
comparison software/eXist/mpdl-modules/src/de/mpg/mpiwg/berlin/mpdl/escidoc/ESciDocIngestor.java @ 0:408254cf2f1d
Erstellung
author | Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de> |
---|---|
date | Wed, 24 Nov 2010 17:24:23 +0100 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:408254cf2f1d |
---|---|
1 package de.mpg.mpiwg.berlin.mpdl.escidoc; | |
2 | |
3 import java.util.ArrayList; | |
4 import java.util.Date; | |
5 | |
6 import javax.xml.namespace.NamespaceContext; | |
7 | |
8 import org.w3c.dom.Node; | |
9 | |
10 import de.mpg.mpiwg.berlin.mpdl.exception.ApplicationException; | |
11 import de.mpg.mpiwg.berlin.mpdl.general.MpdlConstants; | |
12 import de.mpg.mpiwg.berlin.mpdl.schedule.MpdlDocOperation; | |
13 import de.mpg.mpiwg.berlin.mpdl.util.StringUtilEscapeChars; | |
14 import de.mpg.mpiwg.berlin.mpdl.util.XmlUtil; | |
15 import de.mpg.mpiwg.berlin.mpdl.xml.SchemaHandler; | |
16 | |
17 public class ESciDocIngestor { | |
18 private ESciDocRestSession eSciDocSession; | |
19 | |
20 public ESciDocIngestor(ESciDocRestSession eSciDocSession) { | |
21 this.eSciDocSession = eSciDocSession; | |
22 } | |
23 | |
24 public String execute(MpdlDocOperation docOperation) throws ApplicationException { | |
25 String performedContainerId = null; | |
26 String operationName = docOperation.getName(); | |
27 if (operationName.equals("create")) { | |
28 performedContainerId = createDocument(docOperation); | |
29 } else if (operationName.equals("update")) { | |
30 performedContainerId = updateDocument(docOperation); | |
31 } else if (operationName.equals("delete")) { | |
32 performedContainerId = deleteDocument(docOperation); | |
33 } | |
34 String performedESciDocUrl = "http://" + MpdlConstants.MPDL_ESCIDOC_HOST_NAME + ":" + MpdlConstants.MPDL_ESCIDOC_PORT + performedContainerId; | |
35 docOperation.setESciDocDestUrl(performedESciDocUrl); | |
36 return performedESciDocUrl; | |
37 } | |
38 | |
39 private String createDocument(MpdlDocOperation docOperation) throws ApplicationException { | |
40 String pid = eSciDocSession.getPid(); | |
41 String docBaseContainerId = MpdlConstants.MPDL_ESCIDOC_ECHO_CONTAINER_ID; | |
42 String eXistIdentifier = docOperation.getDestUrl(); // e.g. /echo/la/Benedetti_1585.xml | |
43 String docBase = docOperation.getDocBase(); | |
44 if (docBase != null && docBase.equals("archimedes")) | |
45 docBaseContainerId = MpdlConstants.MPDL_ESCIDOC_ARCHIMEDES_CONTAINER_ID; | |
46 docOperation.setStatus("create document: " + eXistIdentifier + " on eSciDoc server"); | |
47 String destFileName = MpdlConstants.MPDL_EXIST_DATA_DIR + "/documents" + eXistIdentifier; | |
48 // upload the file to the eSciDoc stage area | |
49 String eSciDocStageAreaUrl = eSciDocSession.uploadFileToESciDocStageArea(destFileName); | |
50 Node docNode = docOperation.getDocNode(); | |
51 MetadataRecord mdRecord = docOperation.getMdRecord(); | |
52 if (mdRecord != null) { | |
53 mdRecord.setMediaType("fulltext"); | |
54 } | |
55 // create document container for all items | |
56 Container newContainer = eSciDocSession.createContainerInContainer(pid, mdRecord, docBaseContainerId); | |
57 String newContainerId = newContainer.getId(); | |
58 Date lastModificationDate = newContainer.getLastModificationDate(); | |
59 eSciDocSession.submitContainer(newContainerId, lastModificationDate, "create document"); | |
60 // create the fulltext item | |
61 String existViewerUrl = "http://mpdl-proto.mpiwg-berlin.mpg.de/mpdl/page-query-result.xql?document=" + eXistIdentifier + "&mode=text"; | |
62 ArrayList<Component> components = new ArrayList<Component>(); | |
63 String contentCategory = "fulltext XML - ECHO"; | |
64 if (docBase != null && docBase.equals("archimedes")) | |
65 contentCategory = "fulltext XML - Archimedes"; | |
66 Component componentXmlFulltext = new Component("valid", "public", contentCategory, "text/xml", eSciDocStageAreaUrl, "internal-managed"); | |
67 Component componentExistViewer = new Component("valid", "public", contentCategory, "text/html", existViewerUrl, "external-url"); | |
68 components.add(componentXmlFulltext); | |
69 components.add(componentExistViewer); | |
70 Item fulltextItem = eSciDocSession.createItemInContainer(newContainerId, pid, mdRecord, components); | |
71 String fulltextItemId = fulltextItem.getId(); | |
72 Date fulltextItemLastModificationDate = fulltextItem.getLastModificationDate(); | |
73 eSciDocSession.submitItem(fulltextItemId, fulltextItemLastModificationDate, "create document"); | |
74 // page items: for each page create one item | |
75 SchemaHandler schemaHandler = new SchemaHandler(); | |
76 ArrayList<String> pbFileNamesArrayStr = schemaHandler.getPBFileNames(docNode, docBase); | |
77 createPageItemsInContainer(docOperation, mdRecord, pbFileNamesArrayStr, newContainerId); | |
78 return newContainerId; | |
79 } | |
80 | |
81 private String updateDocument(MpdlDocOperation docOperation) throws ApplicationException { | |
82 String docBase = docOperation.getDocBase(); | |
83 String eXistIdentifier = docOperation.getDestUrl(); | |
84 String pid = eSciDocSession.getPid(); | |
85 String destFileName = MpdlConstants.MPDL_EXIST_DATA_DIR + "/documents" + eXistIdentifier; | |
86 String documentContainerId = eSciDocSession.getContainerIdByEXistId(eXistIdentifier); | |
87 if (documentContainerId == null) | |
88 throw new ApplicationException("Document:" + eXistIdentifier + " does not exist."); | |
89 docOperation.setStatus("update document: " + eXistIdentifier + " on eSciDoc server"); | |
90 // first: upload file to eSciDoc stage area and validate it | |
91 String eSciDocStageAreaUrl = eSciDocSession.uploadFileToESciDocStageArea(destFileName); | |
92 // RelaxNG schema validation | |
93 Node docNode = docOperation.getDocNode(); | |
94 MetadataRecord mdRecord = docOperation.getMdRecord(); | |
95 if (mdRecord != null) { | |
96 mdRecord.setMediaType("fulltext"); | |
97 } | |
98 // second: delete all members of the container (page image reference items) | |
99 String deleteFilter = "<filter name=\"/md-records/md-record/metadata/mediaType\">" + "image" + "</filter>"; // filter to find items of type image | |
100 deleteContainerItems(docOperation, documentContainerId, deleteFilter); | |
101 // third: update the fulltext item | |
102 String fulltextItemFilter = "<filter name=\"/md-records/md-record/metadata/mediaType\">" + "fulltext" + "</filter>"; // filter to find items of type fulltext | |
103 String fulltextItemsXmlStr = eSciDocSession.getMembersByContainerIdAndFilter(documentContainerId, fulltextItemFilter); | |
104 String fulltextItemId = eSciDocSession.getFirstItemId(fulltextItemsXmlStr); | |
105 if (fulltextItemId == null || fulltextItemId.trim().equals("")) | |
106 throw new ApplicationException("Update of document is not possible: there is no fulltext item in the document container."); | |
107 Date fulltextItemVersionDate = eSciDocSession.getVersionDate(fulltextItemsXmlStr); | |
108 ArrayList<Component> components = new ArrayList<Component>(); | |
109 String existViewerUrl = "http://mpdl-proto.mpiwg-berlin.mpg.de/mpdl/page-query-result.xql?document=" + eXistIdentifier + "&mode=text"; | |
110 String contentCategory = "fulltext XML - ECHO"; | |
111 if (docBase != null && docBase.equals("archimedes")) | |
112 contentCategory = "fulltext XML - Archimedes"; | |
113 Component componentXmlFulltext = new Component("valid", "public", contentCategory, "text/xml", eSciDocStageAreaUrl, "internal-managed"); | |
114 Component componentExistViewer = new Component("valid", "public", contentCategory, "text/html", existViewerUrl, "external-url"); | |
115 components.add(componentXmlFulltext); | |
116 components.add(componentExistViewer); | |
117 eSciDocSession.updateItem(fulltextItemId, fulltextItemVersionDate, pid, mdRecord, components); | |
118 // fourth: page items: for each page create one item | |
119 SchemaHandler schemaHandler = new SchemaHandler(); | |
120 ArrayList<String> pbFileNamesArrayStr = schemaHandler.getPBFileNames(docNode, docBase); | |
121 createPageItemsInContainer(docOperation, mdRecord, pbFileNamesArrayStr, documentContainerId); | |
122 return documentContainerId; | |
123 } | |
124 | |
125 private String deleteDocument(MpdlDocOperation docOperation) throws ApplicationException { | |
126 String eXistIdentifier = docOperation.getDestUrl(); // e.g. /echo/la/bla.xml | |
127 String documentContainerId = eSciDocSession.getContainerIdByEXistId(eXistIdentifier); | |
128 if (documentContainerId == null) | |
129 throw new ApplicationException("eSciDoc: Deletion of eSciDoc container is not possible. There is no eSciDoc container for your document: " + eXistIdentifier); | |
130 deleteContainer(docOperation, documentContainerId); | |
131 return documentContainerId; | |
132 } | |
133 | |
134 public void deleteContainer(MpdlDocOperation docOperation, String containerId) throws ApplicationException { | |
135 String eXistIdentifier = docOperation.getDestUrl(); // e.g. /echo/la/bla.xml | |
136 docOperation.setStatus("delete document: " + eXistIdentifier + " on eSciDoc server"); | |
137 // first: delete all members | |
138 deleteContainerItems(docOperation, containerId, null); | |
139 // second: delete container itself | |
140 eSciDocSession.deleteContainer(containerId); | |
141 } | |
142 | |
143 public void deleteContainerItems(MpdlDocOperation docOperation, String containerId, String filter) throws ApplicationException { | |
144 String operationName = docOperation.getName(); | |
145 String eXistIdentifier = docOperation.getDestUrl(); | |
146 NamespaceContext nsContext = ESciDocRestSession.getNsContext(); | |
147 XmlUtil xmlUtil = XmlUtil.getInstance(); | |
148 String containerXmlStr = eSciDocSession.getContainer(containerId); | |
149 Date lastModificationDate = eSciDocSession.getLastModificationDate(containerXmlStr); | |
150 String membersXmlStr = eSciDocSession.getMembersByContainerIdAndFilter(containerId, filter); | |
151 ArrayList<String> itemMemberIds = xmlUtil.evaluateToStringArray(membersXmlStr, "//escidocItem:item/@xlink:href", nsContext); | |
152 if (itemMemberIds != null) { | |
153 eSciDocSession.removeMembers(containerId, lastModificationDate, itemMemberIds); | |
154 for (int i=0; i< itemMemberIds.size(); i++) { | |
155 String itemId = itemMemberIds.get(i); | |
156 int pageNumber = i + 1; | |
157 docOperation.setStatus(operationName + " document: " + eXistIdentifier + " on eSciDoc server (delete " + pageNumber + " of " + itemMemberIds.size() + " fulltext and image reference items)"); | |
158 eSciDocSession.deleteItem(itemId); | |
159 } | |
160 } | |
161 } | |
162 | |
163 private void createPageItemsInContainer(MpdlDocOperation docOperation, MetadataRecord mdRecord, ArrayList<String> pbFileNamesArrayStr, String containerId) throws ApplicationException { | |
164 if (mdRecord == null || pbFileNamesArrayStr == null || containerId == null) | |
165 return; | |
166 String operationName = docOperation.getName(); | |
167 String eXistIdentifier = docOperation.getDestUrl(); | |
168 ArrayList<String> memberIds = new ArrayList<String>(); | |
169 SchemaHandler schemaHandler = new SchemaHandler(); | |
170 String pageImgDir = schemaHandler.getPageImgDir(mdRecord); | |
171 String docBase = docOperation.getDocBase(); | |
172 for (int i=0; i< 10; i++) { // TODO | |
173 // for (int i=0; i< pbFileNamesArrayStr.size(); i++) { | |
174 String pid = eSciDocSession.getPid(); | |
175 int pageNumber = i + 1; | |
176 docOperation.setStatus(operationName + " document: " + eXistIdentifier + " on eSciDoc server (create " + pageNumber + " of " + pbFileNamesArrayStr.size() + " image references)"); | |
177 String fileName = pbFileNamesArrayStr.get(i); | |
178 fileName = StringUtilEscapeChars.deresolveXmlEntities(fileName); | |
179 MetadataRecord mdRecordImage = new MetadataRecord(); | |
180 mdRecordImage.setIdentifier(fileName); | |
181 mdRecordImage.setTitle("Page: " + pageNumber); | |
182 mdRecordImage.setMediaType("image"); | |
183 ArrayList<Component> components = new ArrayList<Component>(); | |
184 String imageEchoViewerUrl = "http://echo.mpiwg-berlin.mpg.de/zogilib?fn=" + pageImgDir + "/" + fileName + "&pn=" + pageNumber; | |
185 String imageExistViewerUrl = "http://mpdl-proto.mpiwg-berlin.mpg.de/mpdl/page-query-result.xql?document=" + eXistIdentifier + "&mode=image" + "&pn=" + pageNumber; | |
186 String fulltextExistViewerUrl = "http://mpdl-proto.mpiwg-berlin.mpg.de/mpdl/page-query-result.xql?document=" + eXistIdentifier + "&mode=text" + "&pn=" + pageNumber; | |
187 Component componentImageEchoViewer = new Component("valid", "public", "JPEG_DEFAULT", "text/html", imageEchoViewerUrl, "external-url"); | |
188 Component componentImageExistViewer = new Component("valid", "public", "JPEG_DEFAULT", "text/html", imageExistViewerUrl, "external-url"); | |
189 String contentCategory = "fulltext XML - ECHO"; | |
190 if (docBase != null && docBase.equals("archimedes")) | |
191 contentCategory = "fulltext XML - Archimedes"; | |
192 Component componentFulltextExistViewer = new Component("valid", "public", contentCategory, "text/html", fulltextExistViewerUrl, "external-url"); | |
193 components.add(componentImageEchoViewer); | |
194 components.add(componentImageExistViewer); | |
195 components.add(componentFulltextExistViewer); | |
196 Item item = eSciDocSession.createItem(pid, mdRecordImage, components); | |
197 String itemId = item.getId(); | |
198 Date lastModificationDate = item.getLastModificationDate(); | |
199 eSciDocSession.submitItem(itemId, lastModificationDate, "create document"); | |
200 String memberId = null; | |
201 if (itemId != null) { | |
202 int index = itemId.indexOf(":"); | |
203 if (index > 0) { | |
204 memberId = itemId.substring(index + 1); | |
205 memberIds.add(memberId); | |
206 } | |
207 } | |
208 } | |
209 String containerXmlStr = eSciDocSession.getContainer(containerId); | |
210 Date lastModificationDate = eSciDocSession.getLastModificationDate(containerXmlStr); | |
211 eSciDocSession.addMembers(containerId, lastModificationDate, memberIds); | |
212 } | |
213 } |