comparison software/eXist/mpdl-modules/src/de/mpg/mpiwg/berlin/mpdl/client/DocumentHandler.java @ 0:408254cf2f1d

Erstellung
author Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
date Wed, 24 Nov 2010 17:24:23 +0100
parents
children fdbdcffe6b90
comparison
equal deleted inserted replaced
-1:000000000000 0:408254cf2f1d
1 package de.mpg.mpiwg.berlin.mpdl.client;
2
3 import java.io.File;
4 import java.io.FilenameFilter;
5 import java.net.MalformedURLException;
6 import java.net.URL;
7 import java.util.Date;
8
9 import de.mpg.mpiwg.berlin.mpdl.escidoc.ESciDocIngestor;
10 import de.mpg.mpiwg.berlin.mpdl.escidoc.MetadataRecord;
11 import de.mpg.mpiwg.berlin.mpdl.exception.ApplicationException;
12 import de.mpg.mpiwg.berlin.mpdl.general.MpdlConstants;
13 import de.mpg.mpiwg.berlin.mpdl.lt.doc.regularization.RegularizationManager;
14 import de.mpg.mpiwg.berlin.mpdl.schedule.MpdlDocOperation;
15 import de.mpg.mpiwg.berlin.mpdl.util.FileUtil;
16 import de.mpg.mpiwg.berlin.mpdl.util.MpdlITextRenderer;
17 import de.mpg.mpiwg.berlin.mpdl.xml.SchemaHandler;
18 import de.mpg.mpiwg.berlin.mpdl.xmlrpc.FilenameFilterExtension;
19 import de.mpg.mpiwg.berlin.mpdl.xmlrpc.MpdlXmlRpcDocHandler;
20
21 /**
22 * Handler for eXist collections and documents (singleton).
23 * Your local directory structure should look like this:
24 * documents
25 * archimedes
26 * ar
27 * yourDoc1.xml
28 * ...
29 * ...
30 * zh
31 * yourDoc1.xml
32 * ...
33 * echo
34 * ar
35 * yourDoc1.xml
36 * ...
37 * ...
38 * zh
39 * yourDoc1.xml
40 * ...
41 *
42 */
43 public class DocumentHandler {
44 private MpdlXmlRpcDocHandler mpdlXmlRpcDocHandler;
45 private ESciDocIngestor eSciDocIngestor;
46
47 private String[] docBases = {"archimedes", "echo"};
48 private String[] languages = {"ar", "de", "el", "en", "fr", "it", "la", "nl", "zh"};
49 private String documentRootCollectionMorph = "/db/mpdl/documents/morph";
50 private String documentRootCollectionStandard = "/db/mpdl/documents/standard";
51 private String presentationRootCollection = "/db/mpdl/presentation";
52 private String schemaRootCollection = "/db/mpdl/schema";
53 private String localDocumentDirectory = "/Users/jwillenborg/texts/mpdl/documents";
54
55 private long beginOfOperation;
56 private long endOfOperation;
57
58
59 public DocumentHandler(MpdlXmlRpcDocHandler mpdlXmlRpcDocHandler) throws ApplicationException {
60 this.mpdlXmlRpcDocHandler = mpdlXmlRpcDocHandler;
61 }
62
63 public DocumentHandler(MpdlXmlRpcDocHandler mpdlXmlRpcDocHandler, ESciDocIngestor eSciDocIngestor) throws ApplicationException {
64 this.mpdlXmlRpcDocHandler = mpdlXmlRpcDocHandler;
65 this.eSciDocIngestor = eSciDocIngestor;
66 }
67
68 public void doOperation(MpdlDocOperation docOperation) throws ApplicationException{
69 String operationName = docOperation.getName();
70 if (operationName.equals("create") || operationName.equals("update")) {
71 createOrUpdate(docOperation);
72 } else if (operationName.equals("delete")) {
73 delete(docOperation);
74 } else if (operationName.equals("updateExist")) {
75 updateExist(docOperation);
76 } else if (operationName.equals("deleteExist")) {
77 deleteExist(docOperation);
78 } else if (operationName.equals("importAllDocumentsLocallyExist")) {
79 importAllDocumentsLocallyExist();
80 } else if (operationName.equals("generatePdfHtmlDocumentFiles")) {
81 generatePdfHtmlDocumentFiles();
82 }
83 }
84
85 private void importAllDocumentsLocallyExist() throws ApplicationException {
86 System.out.println("Start of DocumentHandler. This operation could be time consuming because documents are indexed on eXist (normal indexing times are 10 seconds for a document) ...");
87 beginOperation();
88 // deletePresentationCollection();
89 // createPresentationCollection();
90 // deleteSchemaCollection();
91 // createSchemaCollection();
92
93 deleteDocumentCollections();
94 createDocumentCollections();
95 saveDocumentFiles();
96 endOperation();
97 System.out.println("The DocumentHandler needed: " + (endOfOperation - beginOfOperation) + " ms" );
98 }
99
100 private void createOrUpdate(MpdlDocOperation docOperation) throws ApplicationException {
101 try {
102 String operationName = docOperation.getName();
103 String language = docOperation.getLanguage();
104 String srcUrlStr = docOperation.getSrcUrl();
105 String eXistIdentifier = docOperation.getDestUrl();
106 String destFileName = MpdlConstants.MPDL_EXIST_DATA_DIR + "/documents" + eXistIdentifier;
107 URL srcUrl = null;
108 String protocol = null;
109 if (srcUrlStr != null && ! srcUrlStr.equals("empty")) {
110 srcUrl = new URL(srcUrlStr);
111 protocol = srcUrl.getProtocol();
112 }
113 SchemaHandler schemaHandler = new SchemaHandler();
114 boolean docExists = mpdlXmlRpcDocHandler.documentExists(docOperation);
115 if (operationName.equals("create") && docExists) {
116 throw new ApplicationException("Document:" + eXistIdentifier + " already exists. Please use another name or perform the operation \"Update\" of that document.");
117 }
118 if (operationName.equals("update") && ! docExists) {
119 throw new ApplicationException("Document:" + eXistIdentifier + " does not exist. Please use a name that exists and perform the operation \"Update\" again or perform the operation \"Create\" of that document");
120 }
121 // load file to local file system
122 if (protocol.equals("file")) {
123 docOperation.setStatus("upload file: " + srcUrlStr + " to eXist server");
124 } else {
125 docOperation.setStatus("download file from: " + srcUrlStr + " to eXist server");
126 }
127 FileUtil.getInstance().saveUrlToLocalFile(srcUrl, destFileName);
128 // perform validations
129 docOperation.setStatus("validate document: " + eXistIdentifier);
130 schemaHandler.validate(destFileName, docOperation);
131 // perform operation on eXist
132 docOperation.setStatus(operationName + " regularizations of document: " + eXistIdentifier + " on eXist server");
133 RegularizationManager regManager = RegularizationManager.getInstance();
134 regManager.saveRegularizations(language, destFileName);
135 docOperation.setStatus(operationName + " document: " + eXistIdentifier + " on eXist server");
136 mpdlXmlRpcDocHandler.saveDocumentFile(docOperation);
137 // save PDF and HTML versions of the document
138 boolean includePdf = docOperation.includePdf();
139 if (includePdf) {
140 docOperation.setStatus("create PDF and HTML versions of the document: " + eXistIdentifier);
141 MpdlITextRenderer mpdlRenderer = MpdlITextRenderer.getInstance();
142 MetadataRecord mdRecord = docOperation.getMdRecord(); // after validation, docOperation has a mdRecord
143 mpdlRenderer.createFile(true, true, "text", mdRecord); // generate Pdf/Html document
144 }
145 // perform operation on eSciDoc
146 eSciDocIngestor.execute(docOperation);
147 } catch (MalformedURLException e) {
148 throw new ApplicationException(e);
149 }
150 }
151
152 private void delete(MpdlDocOperation docOperation) throws ApplicationException {
153 String operationName = docOperation.getName();
154 String eXistIdentifier = docOperation.getDestUrl();
155 String fileName = docOperation.getFileName();
156 if (fileName == null || fileName.trim().equals(""))
157 throw new ApplicationException("Your document file name is empty. Please specify a file name for your document.");
158 if (! fileName.endsWith(".xml"))
159 throw new ApplicationException("Your document file name does not end with \".xml\". Please specify a file name with the suffix \".xml\" for your document.");
160 boolean docExists = mpdlXmlRpcDocHandler.documentExists(docOperation);
161 if (! docExists) {
162 throw new ApplicationException("Document:" + eXistIdentifier + " does not exists. Please use a name that exists and perform the operation \"Delete\" again.");
163 }
164 // perform operation on eXist
165 docOperation.setStatus(operationName + " document: " + eXistIdentifier + " on eXist server");
166 // delete file on local eXist file system: xml, pdf and html
167 String eXistIdentifierWithoutExtension = eXistIdentifier.substring(0, eXistIdentifier.length() - 4); // without ".xml"
168 String destFileNameXml = MpdlConstants.MPDL_EXIST_DATA_DIR + "/documents" + eXistIdentifier;
169 FileUtil.getInstance().deleteFile(destFileNameXml);
170 boolean includePdf = docOperation.includePdf();
171 if (includePdf) {
172 String destFileNamePdf = MpdlConstants.MPDL_EXIST_DATA_DIR + "/documents" + eXistIdentifierWithoutExtension + ".pdf";
173 String destFileNameHtml = MpdlConstants.MPDL_EXIST_DATA_DIR + "/documents" + eXistIdentifierWithoutExtension + ".html";
174 FileUtil.getInstance().deleteFile(destFileNamePdf);
175 FileUtil.getInstance().deleteFile(destFileNameHtml);
176 }
177 // delete document in eXist
178 mpdlXmlRpcDocHandler.deleteDocumentFile(docOperation);
179 // perform operation on eSciDoc
180 eSciDocIngestor.execute(docOperation);
181 }
182
183 private void updateExist(MpdlDocOperation docOperation) throws ApplicationException {
184 try {
185 String operationName = docOperation.getName();
186 String language = docOperation.getLanguage();
187 String srcUrlStr = docOperation.getSrcUrl();
188 String eXistIdentifier = docOperation.getDestUrl();
189 String destFileName = MpdlConstants.MPDL_EXIST_DATA_DIR + "/documents" + eXistIdentifier;
190 URL srcUrl = null;
191 String protocol = null;
192 if (srcUrlStr != null && ! srcUrlStr.equals("empty")) {
193 srcUrl = new URL(srcUrlStr);
194 protocol = srcUrl.getProtocol();
195 }
196 SchemaHandler schemaHandler = new SchemaHandler();
197 if (protocol.equals("file")) {
198 docOperation.setStatus("upload file: " + srcUrlStr + " to eXist server");
199 } else {
200 docOperation.setStatus("download file from: " + srcUrlStr + " to eXist server");
201 }
202 // load file to local file system
203 FileUtil.getInstance().saveUrlToLocalFile(srcUrl, destFileName);
204 // validation
205 docOperation.setStatus("validate document: " + eXistIdentifier);
206 schemaHandler.validate(destFileName, docOperation);
207 // save regularizations of the document
208 docOperation.setStatus(operationName + " regularizations of document: " + eXistIdentifier + " on eXist server");
209 RegularizationManager regManager = RegularizationManager.getInstance();
210 regManager.saveRegularizations(language, destFileName);
211 // perform operation on eXist
212 docOperation.setStatus(operationName + " document: " + eXistIdentifier + " on eXist server");
213 mpdlXmlRpcDocHandler.saveDocumentFile(docOperation);
214 // save PDF and HTML versions of the document
215 boolean includePdf = docOperation.includePdf();
216 if (includePdf) {
217 docOperation.setStatus("create PDF and HTML versions of the document: " + eXistIdentifier);
218 MpdlITextRenderer mpdlRenderer = MpdlITextRenderer.getInstance();
219 MetadataRecord mdRecord = docOperation.getMdRecord(); // after validation, docOperation has a mdRecord
220 mpdlRenderer.createFile(true, true, "text", mdRecord); // generate Pdf/Html document
221 }
222 } catch (MalformedURLException e) {
223 throw new ApplicationException(e);
224 }
225 }
226
227 private void deleteExist(MpdlDocOperation docOperation) throws ApplicationException {
228 String operationName = docOperation.getName();
229 String eXistIdentifier = docOperation.getDestUrl();
230 String fileName = docOperation.getFileName();
231 if (fileName == null || fileName.trim().equals(""))
232 throw new ApplicationException("Your document file name is empty. Please specify a file name for your document.");
233 if (! fileName.endsWith(".xml"))
234 throw new ApplicationException("Your document file name does not end with \".xml\". Please specify a file name with the suffix \".xml\" for your document.");
235 boolean docExists = mpdlXmlRpcDocHandler.documentExists(docOperation);
236 if (! docExists)
237 throw new ApplicationException("Document:" + eXistIdentifier + " does not exist.");
238 // perform operation
239 docOperation.setStatus(operationName + " document: " + eXistIdentifier + " on eXist server");
240 // delete file on local file system: xml, pdf and html
241 String eXistIdentifierWithoutExtension = eXistIdentifier.substring(0, eXistIdentifier.length() - 4); // without ".xml"
242 String destFileNameXml = MpdlConstants.MPDL_EXIST_DATA_DIR + "/documents" + eXistIdentifier;
243 FileUtil.getInstance().deleteFile(destFileNameXml);
244 boolean includePdf = docOperation.includePdf();
245 if (includePdf) {
246 String destFileNamePdf = MpdlConstants.MPDL_EXIST_DATA_DIR + "/documents" + eXistIdentifierWithoutExtension + ".pdf";
247 String destFileNameHtml = MpdlConstants.MPDL_EXIST_DATA_DIR + "/documents" + eXistIdentifierWithoutExtension + ".html";
248 FileUtil.getInstance().deleteFile(destFileNamePdf);
249 FileUtil.getInstance().deleteFile(destFileNameHtml);
250 }
251 // delete document in eXist
252 mpdlXmlRpcDocHandler.deleteDocumentFile(docOperation);
253 }
254
255 private void deleteDocumentCollections() throws ApplicationException {
256 mpdlXmlRpcDocHandler.deleteCollection(documentRootCollectionMorph);
257 mpdlXmlRpcDocHandler.deleteCollection(documentRootCollectionStandard);
258 }
259
260 private void createDocumentCollections() throws ApplicationException {
261 for (int i=0; i < docBases.length; i++) {
262 String docBase = docBases[i];
263 for (int j=0; j < languages.length; j++) {
264 String language = languages[j];
265 String documentCollectionMorph = documentRootCollectionMorph + "/" + docBase + "/" + language;
266 mpdlXmlRpcDocHandler.createCollection(documentCollectionMorph);
267 String documentCollectionStandard = documentRootCollectionStandard + "/" + docBase + "/" + language;
268 mpdlXmlRpcDocHandler.createCollection(documentCollectionStandard);
269 }
270 }
271 }
272
273 private void saveDocumentFiles() throws ApplicationException {
274 int counter = 0;
275 for (int i=0; i < docBases.length; i++) {
276 String docBase = docBases[i];
277 for (int j=0; j < languages.length; j++) {
278 String language = languages[j];
279 String documentCollection = "/" + docBase + "/" + language;
280 String localFileDirStr = localDocumentDirectory + "/" + docBase + "/" + language;
281 File localFileDir = new File(localFileDirStr);
282 FilenameFilter filter = new FilenameFilterExtension("xml");
283 File[] files = localFileDir.listFiles(filter);
284 System.out.println("Adding all documents in path: \"" + localFileDirStr + "\" to eXist collection: \"" + documentCollection + "\" ...");
285 for (int k=0; k < files.length; k++) {
286 File f = files[k];
287 String localFileNameWithoutPath = f.getName();
288 String fullLocalFileName = f.getPath();
289 String srcUrl = "file://" + fullLocalFileName;
290 MpdlDocOperation docOperation = new MpdlDocOperation("updateExist", srcUrl, null, docBase, language, localFileNameWithoutPath);
291 long begin = new Date().getTime();
292 doOperation(docOperation);
293 long end = new Date().getTime();
294 System.out.println("Added document \"" + fullLocalFileName + "\" to eXist collection: \"" + documentCollection + "\" (" + (end - begin) + " ms)" );
295 counter++;
296 }
297 }
298 }
299 System.out.println("Imported documents: " + counter);
300 }
301
302 private void generatePdfHtmlDocumentFiles() throws ApplicationException {
303 int counter = 0;
304 MpdlITextRenderer mpdlRenderer = MpdlITextRenderer.getInstance();
305 for (int i=0; i < docBases.length; i++) {
306 String docBase = docBases[i];
307 for (int j=0; j < languages.length; j++) {
308 String language = languages[j];
309 String localFileDirStr = localDocumentDirectory + "/" + docBase + "/" + language;
310 File localFileDir = new File(localFileDirStr);
311 FilenameFilter filter = new FilenameFilterExtension("xml");
312 File[] files = localFileDir.listFiles(filter);
313 System.out.println("Generating Pdf/Html documents in path: \"" + localFileDirStr + "\" ...");
314 for (int k=0; k < files.length; k++) {
315 File f = files[k];
316 String localFileName = f.getName();
317 String fullLocalFileName = f.getPath();
318 String srcUrl = "file://" + fullLocalFileName;
319 String localFileNameWithoutExtension = localFileName.substring(0, localFileName.length() - 4); // without ".xml"
320 String fullLocalPdfFileName = MpdlConstants.MPDL_EXIST_DATA_DIR + "/documents/" + docBase + "/" + language + "/" + localFileNameWithoutExtension + ".pdf";
321 File localPdfFile = new File(fullLocalPdfFileName);
322 boolean pdfFileAlreadyExists = localPdfFile.exists();
323 // generate Pdf/Html file only if pdf file does not already exist
324 if (! pdfFileAlreadyExists) {
325 MpdlDocOperation docOperation = new MpdlDocOperation("generatePdf", srcUrl, null, docBase, language, localFileName);
326 SchemaHandler schemaHandler = new SchemaHandler();
327 schemaHandler.validate(fullLocalFileName, docOperation);
328 long begin = new Date().getTime();
329 MetadataRecord mdRecord = docOperation.getMdRecord(); // after validation, docOperation has a mdRecord
330 mpdlRenderer.createFile(true, true, "text", mdRecord); // generate Pdf/Html document
331 long end = new Date().getTime();
332 System.out.println("Generate Pdf/Html document for: \"" + fullLocalFileName + "\" (" + (end - begin) + " ms)" );
333 counter++;
334 try {
335 Thread.sleep(60000); // delay so that called servers (digilib, eXist) are not stressed too much
336 } catch (InterruptedException e) {
337 throw new ApplicationException(e);
338 }
339 }
340 }
341 }
342 }
343 System.out.println("Generated documents: " + counter);
344 }
345
346 private void deletePresentationCollection() throws ApplicationException {
347 mpdlXmlRpcDocHandler.deleteCollection(presentationRootCollection);
348 }
349
350 private void createPresentationCollection() throws ApplicationException {
351 mpdlXmlRpcDocHandler.createCollection(presentationRootCollection);
352 }
353
354 private void deleteSchemaCollection() throws ApplicationException {
355 mpdlXmlRpcDocHandler.deleteCollection(schemaRootCollection);
356 }
357
358 private void createSchemaCollection() throws ApplicationException {
359 mpdlXmlRpcDocHandler.createCollection(schemaRootCollection);
360 }
361
362 private void beginOperation() {
363 beginOfOperation = new Date().getTime();
364 }
365
366 private void endOperation() {
367 endOfOperation = new Date().getTime();
368 }
369
370 }