Mercurial > hg > mpdl-group
comparison software/eXist/mpdl-modules/src/de/mpg/mpiwg/berlin/mpdl/client/DocumentHandler.java @ 0:408254cf2f1d
Erstellung
author | Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de> |
---|---|
date | Wed, 24 Nov 2010 17:24:23 +0100 |
parents | |
children | fdbdcffe6b90 |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:408254cf2f1d |
---|---|
1 package de.mpg.mpiwg.berlin.mpdl.client; | |
2 | |
3 import java.io.File; | |
4 import java.io.FilenameFilter; | |
5 import java.net.MalformedURLException; | |
6 import java.net.URL; | |
7 import java.util.Date; | |
8 | |
9 import de.mpg.mpiwg.berlin.mpdl.escidoc.ESciDocIngestor; | |
10 import de.mpg.mpiwg.berlin.mpdl.escidoc.MetadataRecord; | |
11 import de.mpg.mpiwg.berlin.mpdl.exception.ApplicationException; | |
12 import de.mpg.mpiwg.berlin.mpdl.general.MpdlConstants; | |
13 import de.mpg.mpiwg.berlin.mpdl.lt.doc.regularization.RegularizationManager; | |
14 import de.mpg.mpiwg.berlin.mpdl.schedule.MpdlDocOperation; | |
15 import de.mpg.mpiwg.berlin.mpdl.util.FileUtil; | |
16 import de.mpg.mpiwg.berlin.mpdl.util.MpdlITextRenderer; | |
17 import de.mpg.mpiwg.berlin.mpdl.xml.SchemaHandler; | |
18 import de.mpg.mpiwg.berlin.mpdl.xmlrpc.FilenameFilterExtension; | |
19 import de.mpg.mpiwg.berlin.mpdl.xmlrpc.MpdlXmlRpcDocHandler; | |
20 | |
21 /** | |
22 * Handler for eXist collections and documents (singleton). | |
23 * Your local directory structure should look like this: | |
24 * documents | |
25 * archimedes | |
26 * ar | |
27 * yourDoc1.xml | |
28 * ... | |
29 * ... | |
30 * zh | |
31 * yourDoc1.xml | |
32 * ... | |
33 * echo | |
34 * ar | |
35 * yourDoc1.xml | |
36 * ... | |
37 * ... | |
38 * zh | |
39 * yourDoc1.xml | |
40 * ... | |
41 * | |
42 */ | |
43 public class DocumentHandler { | |
44 private MpdlXmlRpcDocHandler mpdlXmlRpcDocHandler; | |
45 private ESciDocIngestor eSciDocIngestor; | |
46 | |
47 private String[] docBases = {"archimedes", "echo"}; | |
48 private String[] languages = {"ar", "de", "el", "en", "fr", "it", "la", "nl", "zh"}; | |
49 private String documentRootCollectionMorph = "/db/mpdl/documents/morph"; | |
50 private String documentRootCollectionStandard = "/db/mpdl/documents/standard"; | |
51 private String presentationRootCollection = "/db/mpdl/presentation"; | |
52 private String schemaRootCollection = "/db/mpdl/schema"; | |
53 private String localDocumentDirectory = "/Users/jwillenborg/texts/mpdl/documents"; | |
54 | |
55 private long beginOfOperation; | |
56 private long endOfOperation; | |
57 | |
58 | |
59 public DocumentHandler(MpdlXmlRpcDocHandler mpdlXmlRpcDocHandler) throws ApplicationException { | |
60 this.mpdlXmlRpcDocHandler = mpdlXmlRpcDocHandler; | |
61 } | |
62 | |
63 public DocumentHandler(MpdlXmlRpcDocHandler mpdlXmlRpcDocHandler, ESciDocIngestor eSciDocIngestor) throws ApplicationException { | |
64 this.mpdlXmlRpcDocHandler = mpdlXmlRpcDocHandler; | |
65 this.eSciDocIngestor = eSciDocIngestor; | |
66 } | |
67 | |
68 public void doOperation(MpdlDocOperation docOperation) throws ApplicationException{ | |
69 String operationName = docOperation.getName(); | |
70 if (operationName.equals("create") || operationName.equals("update")) { | |
71 createOrUpdate(docOperation); | |
72 } else if (operationName.equals("delete")) { | |
73 delete(docOperation); | |
74 } else if (operationName.equals("updateExist")) { | |
75 updateExist(docOperation); | |
76 } else if (operationName.equals("deleteExist")) { | |
77 deleteExist(docOperation); | |
78 } else if (operationName.equals("importAllDocumentsLocallyExist")) { | |
79 importAllDocumentsLocallyExist(); | |
80 } else if (operationName.equals("generatePdfHtmlDocumentFiles")) { | |
81 generatePdfHtmlDocumentFiles(); | |
82 } | |
83 } | |
84 | |
85 private void importAllDocumentsLocallyExist() throws ApplicationException { | |
86 System.out.println("Start of DocumentHandler. This operation could be time consuming because documents are indexed on eXist (normal indexing times are 10 seconds for a document) ..."); | |
87 beginOperation(); | |
88 // deletePresentationCollection(); | |
89 // createPresentationCollection(); | |
90 // deleteSchemaCollection(); | |
91 // createSchemaCollection(); | |
92 | |
93 deleteDocumentCollections(); | |
94 createDocumentCollections(); | |
95 saveDocumentFiles(); | |
96 endOperation(); | |
97 System.out.println("The DocumentHandler needed: " + (endOfOperation - beginOfOperation) + " ms" ); | |
98 } | |
99 | |
100 private void createOrUpdate(MpdlDocOperation docOperation) throws ApplicationException { | |
101 try { | |
102 String operationName = docOperation.getName(); | |
103 String language = docOperation.getLanguage(); | |
104 String srcUrlStr = docOperation.getSrcUrl(); | |
105 String eXistIdentifier = docOperation.getDestUrl(); | |
106 String destFileName = MpdlConstants.MPDL_EXIST_DATA_DIR + "/documents" + eXistIdentifier; | |
107 URL srcUrl = null; | |
108 String protocol = null; | |
109 if (srcUrlStr != null && ! srcUrlStr.equals("empty")) { | |
110 srcUrl = new URL(srcUrlStr); | |
111 protocol = srcUrl.getProtocol(); | |
112 } | |
113 SchemaHandler schemaHandler = new SchemaHandler(); | |
114 boolean docExists = mpdlXmlRpcDocHandler.documentExists(docOperation); | |
115 if (operationName.equals("create") && docExists) { | |
116 throw new ApplicationException("Document:" + eXistIdentifier + " already exists. Please use another name or perform the operation \"Update\" of that document."); | |
117 } | |
118 if (operationName.equals("update") && ! docExists) { | |
119 throw new ApplicationException("Document:" + eXistIdentifier + " does not exist. Please use a name that exists and perform the operation \"Update\" again or perform the operation \"Create\" of that document"); | |
120 } | |
121 // load file to local file system | |
122 if (protocol.equals("file")) { | |
123 docOperation.setStatus("upload file: " + srcUrlStr + " to eXist server"); | |
124 } else { | |
125 docOperation.setStatus("download file from: " + srcUrlStr + " to eXist server"); | |
126 } | |
127 FileUtil.getInstance().saveUrlToLocalFile(srcUrl, destFileName); | |
128 // perform validations | |
129 docOperation.setStatus("validate document: " + eXistIdentifier); | |
130 schemaHandler.validate(destFileName, docOperation); | |
131 // perform operation on eXist | |
132 docOperation.setStatus(operationName + " regularizations of document: " + eXistIdentifier + " on eXist server"); | |
133 RegularizationManager regManager = RegularizationManager.getInstance(); | |
134 regManager.saveRegularizations(language, destFileName); | |
135 docOperation.setStatus(operationName + " document: " + eXistIdentifier + " on eXist server"); | |
136 mpdlXmlRpcDocHandler.saveDocumentFile(docOperation); | |
137 // save PDF and HTML versions of the document | |
138 boolean includePdf = docOperation.includePdf(); | |
139 if (includePdf) { | |
140 docOperation.setStatus("create PDF and HTML versions of the document: " + eXistIdentifier); | |
141 MpdlITextRenderer mpdlRenderer = MpdlITextRenderer.getInstance(); | |
142 MetadataRecord mdRecord = docOperation.getMdRecord(); // after validation, docOperation has a mdRecord | |
143 mpdlRenderer.createFile(true, true, "text", mdRecord); // generate Pdf/Html document | |
144 } | |
145 // perform operation on eSciDoc | |
146 eSciDocIngestor.execute(docOperation); | |
147 } catch (MalformedURLException e) { | |
148 throw new ApplicationException(e); | |
149 } | |
150 } | |
151 | |
152 private void delete(MpdlDocOperation docOperation) throws ApplicationException { | |
153 String operationName = docOperation.getName(); | |
154 String eXistIdentifier = docOperation.getDestUrl(); | |
155 String fileName = docOperation.getFileName(); | |
156 if (fileName == null || fileName.trim().equals("")) | |
157 throw new ApplicationException("Your document file name is empty. Please specify a file name for your document."); | |
158 if (! fileName.endsWith(".xml")) | |
159 throw new ApplicationException("Your document file name does not end with \".xml\". Please specify a file name with the suffix \".xml\" for your document."); | |
160 boolean docExists = mpdlXmlRpcDocHandler.documentExists(docOperation); | |
161 if (! docExists) { | |
162 throw new ApplicationException("Document:" + eXistIdentifier + " does not exists. Please use a name that exists and perform the operation \"Delete\" again."); | |
163 } | |
164 // perform operation on eXist | |
165 docOperation.setStatus(operationName + " document: " + eXistIdentifier + " on eXist server"); | |
166 // delete file on local eXist file system: xml, pdf and html | |
167 String eXistIdentifierWithoutExtension = eXistIdentifier.substring(0, eXistIdentifier.length() - 4); // without ".xml" | |
168 String destFileNameXml = MpdlConstants.MPDL_EXIST_DATA_DIR + "/documents" + eXistIdentifier; | |
169 FileUtil.getInstance().deleteFile(destFileNameXml); | |
170 boolean includePdf = docOperation.includePdf(); | |
171 if (includePdf) { | |
172 String destFileNamePdf = MpdlConstants.MPDL_EXIST_DATA_DIR + "/documents" + eXistIdentifierWithoutExtension + ".pdf"; | |
173 String destFileNameHtml = MpdlConstants.MPDL_EXIST_DATA_DIR + "/documents" + eXistIdentifierWithoutExtension + ".html"; | |
174 FileUtil.getInstance().deleteFile(destFileNamePdf); | |
175 FileUtil.getInstance().deleteFile(destFileNameHtml); | |
176 } | |
177 // delete document in eXist | |
178 mpdlXmlRpcDocHandler.deleteDocumentFile(docOperation); | |
179 // perform operation on eSciDoc | |
180 eSciDocIngestor.execute(docOperation); | |
181 } | |
182 | |
183 private void updateExist(MpdlDocOperation docOperation) throws ApplicationException { | |
184 try { | |
185 String operationName = docOperation.getName(); | |
186 String language = docOperation.getLanguage(); | |
187 String srcUrlStr = docOperation.getSrcUrl(); | |
188 String eXistIdentifier = docOperation.getDestUrl(); | |
189 String destFileName = MpdlConstants.MPDL_EXIST_DATA_DIR + "/documents" + eXistIdentifier; | |
190 URL srcUrl = null; | |
191 String protocol = null; | |
192 if (srcUrlStr != null && ! srcUrlStr.equals("empty")) { | |
193 srcUrl = new URL(srcUrlStr); | |
194 protocol = srcUrl.getProtocol(); | |
195 } | |
196 SchemaHandler schemaHandler = new SchemaHandler(); | |
197 if (protocol.equals("file")) { | |
198 docOperation.setStatus("upload file: " + srcUrlStr + " to eXist server"); | |
199 } else { | |
200 docOperation.setStatus("download file from: " + srcUrlStr + " to eXist server"); | |
201 } | |
202 // load file to local file system | |
203 FileUtil.getInstance().saveUrlToLocalFile(srcUrl, destFileName); | |
204 // validation | |
205 docOperation.setStatus("validate document: " + eXistIdentifier); | |
206 schemaHandler.validate(destFileName, docOperation); | |
207 // save regularizations of the document | |
208 docOperation.setStatus(operationName + " regularizations of document: " + eXistIdentifier + " on eXist server"); | |
209 RegularizationManager regManager = RegularizationManager.getInstance(); | |
210 regManager.saveRegularizations(language, destFileName); | |
211 // perform operation on eXist | |
212 docOperation.setStatus(operationName + " document: " + eXistIdentifier + " on eXist server"); | |
213 mpdlXmlRpcDocHandler.saveDocumentFile(docOperation); | |
214 // save PDF and HTML versions of the document | |
215 boolean includePdf = docOperation.includePdf(); | |
216 if (includePdf) { | |
217 docOperation.setStatus("create PDF and HTML versions of the document: " + eXistIdentifier); | |
218 MpdlITextRenderer mpdlRenderer = MpdlITextRenderer.getInstance(); | |
219 MetadataRecord mdRecord = docOperation.getMdRecord(); // after validation, docOperation has a mdRecord | |
220 mpdlRenderer.createFile(true, true, "text", mdRecord); // generate Pdf/Html document | |
221 } | |
222 } catch (MalformedURLException e) { | |
223 throw new ApplicationException(e); | |
224 } | |
225 } | |
226 | |
227 private void deleteExist(MpdlDocOperation docOperation) throws ApplicationException { | |
228 String operationName = docOperation.getName(); | |
229 String eXistIdentifier = docOperation.getDestUrl(); | |
230 String fileName = docOperation.getFileName(); | |
231 if (fileName == null || fileName.trim().equals("")) | |
232 throw new ApplicationException("Your document file name is empty. Please specify a file name for your document."); | |
233 if (! fileName.endsWith(".xml")) | |
234 throw new ApplicationException("Your document file name does not end with \".xml\". Please specify a file name with the suffix \".xml\" for your document."); | |
235 boolean docExists = mpdlXmlRpcDocHandler.documentExists(docOperation); | |
236 if (! docExists) | |
237 throw new ApplicationException("Document:" + eXistIdentifier + " does not exist."); | |
238 // perform operation | |
239 docOperation.setStatus(operationName + " document: " + eXistIdentifier + " on eXist server"); | |
240 // delete file on local file system: xml, pdf and html | |
241 String eXistIdentifierWithoutExtension = eXistIdentifier.substring(0, eXistIdentifier.length() - 4); // without ".xml" | |
242 String destFileNameXml = MpdlConstants.MPDL_EXIST_DATA_DIR + "/documents" + eXistIdentifier; | |
243 FileUtil.getInstance().deleteFile(destFileNameXml); | |
244 boolean includePdf = docOperation.includePdf(); | |
245 if (includePdf) { | |
246 String destFileNamePdf = MpdlConstants.MPDL_EXIST_DATA_DIR + "/documents" + eXistIdentifierWithoutExtension + ".pdf"; | |
247 String destFileNameHtml = MpdlConstants.MPDL_EXIST_DATA_DIR + "/documents" + eXistIdentifierWithoutExtension + ".html"; | |
248 FileUtil.getInstance().deleteFile(destFileNamePdf); | |
249 FileUtil.getInstance().deleteFile(destFileNameHtml); | |
250 } | |
251 // delete document in eXist | |
252 mpdlXmlRpcDocHandler.deleteDocumentFile(docOperation); | |
253 } | |
254 | |
255 private void deleteDocumentCollections() throws ApplicationException { | |
256 mpdlXmlRpcDocHandler.deleteCollection(documentRootCollectionMorph); | |
257 mpdlXmlRpcDocHandler.deleteCollection(documentRootCollectionStandard); | |
258 } | |
259 | |
260 private void createDocumentCollections() throws ApplicationException { | |
261 for (int i=0; i < docBases.length; i++) { | |
262 String docBase = docBases[i]; | |
263 for (int j=0; j < languages.length; j++) { | |
264 String language = languages[j]; | |
265 String documentCollectionMorph = documentRootCollectionMorph + "/" + docBase + "/" + language; | |
266 mpdlXmlRpcDocHandler.createCollection(documentCollectionMorph); | |
267 String documentCollectionStandard = documentRootCollectionStandard + "/" + docBase + "/" + language; | |
268 mpdlXmlRpcDocHandler.createCollection(documentCollectionStandard); | |
269 } | |
270 } | |
271 } | |
272 | |
273 private void saveDocumentFiles() throws ApplicationException { | |
274 int counter = 0; | |
275 for (int i=0; i < docBases.length; i++) { | |
276 String docBase = docBases[i]; | |
277 for (int j=0; j < languages.length; j++) { | |
278 String language = languages[j]; | |
279 String documentCollection = "/" + docBase + "/" + language; | |
280 String localFileDirStr = localDocumentDirectory + "/" + docBase + "/" + language; | |
281 File localFileDir = new File(localFileDirStr); | |
282 FilenameFilter filter = new FilenameFilterExtension("xml"); | |
283 File[] files = localFileDir.listFiles(filter); | |
284 System.out.println("Adding all documents in path: \"" + localFileDirStr + "\" to eXist collection: \"" + documentCollection + "\" ..."); | |
285 for (int k=0; k < files.length; k++) { | |
286 File f = files[k]; | |
287 String localFileNameWithoutPath = f.getName(); | |
288 String fullLocalFileName = f.getPath(); | |
289 String srcUrl = "file://" + fullLocalFileName; | |
290 MpdlDocOperation docOperation = new MpdlDocOperation("updateExist", srcUrl, null, docBase, language, localFileNameWithoutPath); | |
291 long begin = new Date().getTime(); | |
292 doOperation(docOperation); | |
293 long end = new Date().getTime(); | |
294 System.out.println("Added document \"" + fullLocalFileName + "\" to eXist collection: \"" + documentCollection + "\" (" + (end - begin) + " ms)" ); | |
295 counter++; | |
296 } | |
297 } | |
298 } | |
299 System.out.println("Imported documents: " + counter); | |
300 } | |
301 | |
302 private void generatePdfHtmlDocumentFiles() throws ApplicationException { | |
303 int counter = 0; | |
304 MpdlITextRenderer mpdlRenderer = MpdlITextRenderer.getInstance(); | |
305 for (int i=0; i < docBases.length; i++) { | |
306 String docBase = docBases[i]; | |
307 for (int j=0; j < languages.length; j++) { | |
308 String language = languages[j]; | |
309 String localFileDirStr = localDocumentDirectory + "/" + docBase + "/" + language; | |
310 File localFileDir = new File(localFileDirStr); | |
311 FilenameFilter filter = new FilenameFilterExtension("xml"); | |
312 File[] files = localFileDir.listFiles(filter); | |
313 System.out.println("Generating Pdf/Html documents in path: \"" + localFileDirStr + "\" ..."); | |
314 for (int k=0; k < files.length; k++) { | |
315 File f = files[k]; | |
316 String localFileName = f.getName(); | |
317 String fullLocalFileName = f.getPath(); | |
318 String srcUrl = "file://" + fullLocalFileName; | |
319 String localFileNameWithoutExtension = localFileName.substring(0, localFileName.length() - 4); // without ".xml" | |
320 String fullLocalPdfFileName = MpdlConstants.MPDL_EXIST_DATA_DIR + "/documents/" + docBase + "/" + language + "/" + localFileNameWithoutExtension + ".pdf"; | |
321 File localPdfFile = new File(fullLocalPdfFileName); | |
322 boolean pdfFileAlreadyExists = localPdfFile.exists(); | |
323 // generate Pdf/Html file only if pdf file does not already exist | |
324 if (! pdfFileAlreadyExists) { | |
325 MpdlDocOperation docOperation = new MpdlDocOperation("generatePdf", srcUrl, null, docBase, language, localFileName); | |
326 SchemaHandler schemaHandler = new SchemaHandler(); | |
327 schemaHandler.validate(fullLocalFileName, docOperation); | |
328 long begin = new Date().getTime(); | |
329 MetadataRecord mdRecord = docOperation.getMdRecord(); // after validation, docOperation has a mdRecord | |
330 mpdlRenderer.createFile(true, true, "text", mdRecord); // generate Pdf/Html document | |
331 long end = new Date().getTime(); | |
332 System.out.println("Generate Pdf/Html document for: \"" + fullLocalFileName + "\" (" + (end - begin) + " ms)" ); | |
333 counter++; | |
334 try { | |
335 Thread.sleep(60000); // delay so that called servers (digilib, eXist) are not stressed too much | |
336 } catch (InterruptedException e) { | |
337 throw new ApplicationException(e); | |
338 } | |
339 } | |
340 } | |
341 } | |
342 } | |
343 System.out.println("Generated documents: " + counter); | |
344 } | |
345 | |
346 private void deletePresentationCollection() throws ApplicationException { | |
347 mpdlXmlRpcDocHandler.deleteCollection(presentationRootCollection); | |
348 } | |
349 | |
350 private void createPresentationCollection() throws ApplicationException { | |
351 mpdlXmlRpcDocHandler.createCollection(presentationRootCollection); | |
352 } | |
353 | |
354 private void deleteSchemaCollection() throws ApplicationException { | |
355 mpdlXmlRpcDocHandler.deleteCollection(schemaRootCollection); | |
356 } | |
357 | |
358 private void createSchemaCollection() throws ApplicationException { | |
359 mpdlXmlRpcDocHandler.createCollection(schemaRootCollection); | |
360 } | |
361 | |
362 private void beginOperation() { | |
363 beginOfOperation = new Date().getTime(); | |
364 } | |
365 | |
366 private void endOperation() { | |
367 endOfOperation = new Date().getTime(); | |
368 } | |
369 | |
370 } |