Mercurial > hg > mpdl-group
annotate software/mpdl-services/mpiwg-mpdl-cms/src/de/mpg/mpiwg/berlin/mpdl/cms/document/DocumentHandler.java @ 23:e845310098ba
diverse Korrekturen
author | Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de> |
---|---|
date | Tue, 27 Nov 2012 12:35:19 +0100 |
parents | |
children |
rev | line source |
---|---|
23
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
1 package de.mpg.mpiwg.berlin.mpdl.cms.document; |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
2 |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
3 import java.io.BufferedOutputStream; |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
4 import java.io.File; |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
5 import java.io.FileNotFoundException; |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
6 import java.io.FileOutputStream; |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
7 import java.io.IOException; |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
8 import java.io.OutputStreamWriter; |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
9 import java.io.StringReader; |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
10 import java.net.MalformedURLException; |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
11 import java.net.URI; |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
12 import java.net.URISyntaxException; |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
13 import java.net.URL; |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
14 import java.util.Arrays; |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
15 import java.util.Date; |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
16 import java.util.Hashtable; |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
17 import java.util.Iterator; |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
18 import java.util.List; |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
19 import java.util.logging.Logger; |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
20 |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
21 import net.sf.saxon.s9api.Axis; |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
22 import net.sf.saxon.s9api.QName; |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
23 import net.sf.saxon.s9api.XdmNode; |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
24 import net.sf.saxon.s9api.XdmNodeKind; |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
25 import net.sf.saxon.s9api.XdmSequenceIterator; |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
26 |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
27 import org.apache.commons.httpclient.HttpClient; |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
28 import org.apache.commons.httpclient.HttpException; |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
29 import org.apache.commons.httpclient.methods.GetMethod; |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
30 import org.apache.commons.io.FileUtils; |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
31 import org.apache.http.HttpResponse; |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
32 import org.apache.http.client.methods.HttpGet; |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
33 import org.apache.http.impl.client.DefaultHttpClient; |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
34 import org.apache.http.params.BasicHttpParams; |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
35 import org.apache.http.params.HttpConnectionParams; |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
36 import org.apache.http.params.HttpParams; |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
37 import org.xml.sax.InputSource; |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
38 import org.xml.sax.SAXException; |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
39 import org.xml.sax.XMLReader; |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
40 |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
41 import com.sun.org.apache.xerces.internal.parsers.SAXParser; |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
42 |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
43 import de.mpg.mpiwg.berlin.mpdl.exception.ApplicationException; |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
44 import de.mpg.mpiwg.berlin.mpdl.lt.text.tokenize.WordContentHandler; |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
45 import de.mpg.mpiwg.berlin.mpdl.lt.text.tokenize.XmlTokenizer; |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
46 import de.mpg.mpiwg.berlin.mpdl.util.StringUtils; |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
47 import de.mpg.mpiwg.berlin.mpdl.util.Util; |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
48 import de.mpg.mpiwg.berlin.mpdl.xml.xquery.XQueryEvaluator; |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
49 import de.mpg.mpiwg.berlin.mpdl.cms.general.Constants; |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
50 import de.mpg.mpiwg.berlin.mpdl.cms.lucene.IndexHandler; |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
51 import de.mpg.mpiwg.berlin.mpdl.cms.scheduler.CmsDocOperation; |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
52 import de.mpg.mpiwg.berlin.mpdl.cms.transform.GetFragmentsContentHandler; |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
53 import de.mpg.mpiwg.berlin.mpdl.cms.transform.PageTransformer; |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
54 import de.mpg.mpiwg.berlin.mpdl.cms.transform.XslResourceTransformer; |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
55 |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
56 /** |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
57 * Handler for documents (singleton). |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
58 */ |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
59 public class DocumentHandler { |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
60 private static Logger LOGGER = Logger.getLogger(DocumentHandler.class.getName()); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
61 private static List<String> EXCLUDED_PROJECT_DOCS = |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
62 Arrays.asList("/echo/zh/Min_chan_luyi_1_7MCGW0WG.xml", // the Saxon transfomer has heavy problems with some characters in CJK Unified Ideographs Extension B, e.g.: line 309 (second reg on page 16) |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
63 "/echo/zh/Min_chan_luyi_2_U7Y9NQ9V.xml", |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
64 "/echo/zh/Min_chan_luyi_3_2FP9M172.xml", |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
65 "/echo/zh/Min_chan_luyi_4_FXA6FSFH.xml", |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
66 "/echo/zh/Min_chan_luyi_5_VG6NY5XD.xml", |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
67 "/echo/zh/Xifa_shenji.xml", |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
68 "/echo/zh/Yulei_tushuo_1_HXX4MGZW.xml", |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
69 "/echo/zh/Yulei_tushuo_2_FN1CTY5C.xml"); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
70 private long beginOfOperation; |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
71 private long endOfOperation; |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
72 |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
73 public void doOperation(CmsDocOperation docOperation) throws ApplicationException { |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
74 String operationName = docOperation.getName(); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
75 if (operationName.equals("create")) { |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
76 create(docOperation); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
77 } else if (operationName.equals("delete")) { |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
78 delete(docOperation); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
79 } else if (operationName.equals("importDirectory")) { |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
80 importDirectory(docOperation); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
81 } else if (operationName.equals("createPdf")) { |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
82 createPdf(docOperation); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
83 } else if (operationName.equals("createAllPdfInDirectory")) { |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
84 createAllPdfInDirectory(docOperation); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
85 } |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
86 } |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
87 |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
88 private void importDirectory(CmsDocOperation docOperation) throws ApplicationException { |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
89 try { |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
90 LOGGER.info("Start of DocumentHandler. This operation could be time consuming because documents are indexed (normal indexing times are 1-10 minutes for a document)"); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
91 beginOperation(); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
92 String localDocumentsUrlStr = docOperation.getSrcUrl(); // start directory: file:/a/local/directory |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
93 String collectionNames = docOperation.getCollectionNames(); // e.g. "echo" |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
94 File localDocumentsDir = new File(new URI(localDocumentsUrlStr)); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
95 boolean docDirExists = localDocumentsDir.exists(); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
96 if (! docDirExists) |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
97 throw new ApplicationException("Document directory:" + localDocumentsUrlStr + " does not exists. Please use a directory that exists and perform the operation again."); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
98 String[] fileExtensions = {"xml"}; |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
99 Iterator<File> iterFiles = FileUtils.iterateFiles(localDocumentsDir, fileExtensions, true); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
100 int i = 0; |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
101 while(iterFiles.hasNext()) { |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
102 i++; |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
103 File xmlFile = iterFiles.next(); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
104 String xmlFileStr = xmlFile.getPath(); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
105 int relativePos = (int) localDocumentsDir.getPath().length(); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
106 String docId = xmlFileStr.substring(relativePos); // relative path name starting from localDocumentsDir, e.g. /tei/de/Test_1789.xml |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
107 String xmlFileUrlStr = xmlFile.toURI().toURL().toString(); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
108 CmsDocOperation createDocOperation = new CmsDocOperation("create", xmlFileUrlStr, null, docId); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
109 createDocOperation.setCollectionNames(collectionNames); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
110 try { |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
111 doOperation(createDocOperation); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
112 Date now = new Date(); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
113 LOGGER.info("Document " + i + ": " + docId + " successfully imported (" + now.toString() + ")"); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
114 } catch (Exception e) { |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
115 LOGGER.info("Document " + i + ": " + docId + " has problems:"); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
116 e.printStackTrace(); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
117 } |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
118 } |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
119 endOperation(); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
120 LOGGER.info("The DocumentHandler needed: " + (endOfOperation - beginOfOperation) + " ms" ); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
121 } catch (Exception e) { |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
122 throw new ApplicationException(e); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
123 } |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
124 } |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
125 |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
126 private void createAllPdfInDirectory(CmsDocOperation docOperation) throws ApplicationException { |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
127 try { |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
128 LOGGER.info("Start of generating Pdf-Documents. This operation could be time consuming because Pdf generation needs some time."); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
129 beginOperation(); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
130 String localDocumentsUrlStr = docOperation.getSrcUrl(); // start directory: file:/a/local/directory |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
131 String collectionNames = docOperation.getCollectionNames(); // e.g. "echo" |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
132 File localDocumentsDir = new File(new URI(localDocumentsUrlStr)); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
133 boolean docDirExists = localDocumentsDir.exists(); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
134 if (! docDirExists) |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
135 throw new ApplicationException("Document directory:" + localDocumentsUrlStr + " does not exists. Please use a directory that exists and perform the operation again."); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
136 String[] fileExtensions = {"xml"}; |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
137 Iterator<File> iterFiles = FileUtils.iterateFiles(localDocumentsDir, fileExtensions, true); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
138 int i = 0; |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
139 while(iterFiles.hasNext()) { |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
140 i++; |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
141 File xmlFile = iterFiles.next(); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
142 String xmlFileStr = xmlFile.getPath(); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
143 int relativePos = (int) localDocumentsDir.getPath().length(); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
144 String docId = xmlFileStr.substring(relativePos); // relative path name starting from localDocumentsDir, e.g. /tei/de/Test_1789.xml |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
145 CmsDocOperation createPdfOperation = new CmsDocOperation("createPdf", null, null, docId); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
146 createPdfOperation.setCollectionNames(collectionNames); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
147 try { |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
148 doOperation(createPdfOperation); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
149 Date now = new Date(); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
150 LOGGER.info("Pdf document " + i + ": " + docId + " successfully created (" + now.toString() + ")"); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
151 } catch (Exception e) { |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
152 LOGGER.info("Pdf document " + i + ": " + docId + " has problems:"); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
153 e.printStackTrace(); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
154 } |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
155 } |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
156 endOperation(); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
157 LOGGER.info("The Pdf generation needed: " + (endOfOperation - beginOfOperation) + " ms" ); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
158 } catch (Exception e) { |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
159 throw new ApplicationException(e); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
160 } |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
161 } |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
162 |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
163 private boolean isProjectDoc(String docId) { |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
164 boolean isProjectDoc = true; |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
165 if (EXCLUDED_PROJECT_DOCS.contains(docId)) |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
166 return false; |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
167 return isProjectDoc; |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
168 } |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
169 |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
170 private void create(CmsDocOperation docOperation) throws ApplicationException { |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
171 try { |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
172 String operationName = docOperation.getName(); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
173 String srcUrlStr = docOperation.getSrcUrl(); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
174 String docId = docOperation.getDocIdentifier(); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
175 if (! isProjectDoc(docId)) { |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
176 LOGGER.info("Operation: " + operationName + " not performed on: " + docId + ". Cause: document is excluded as project doc"); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
177 return; |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
178 } |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
179 String mainLanguage = docOperation.getMainLanguage(); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
180 if (mainLanguage == null) { |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
181 mainLanguage = getMainLanguage(docId); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
182 } |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
183 String[] elementNames = docOperation.getElementNames(); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
184 if (elementNames == null) { |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
185 String[] defaultElementNames = {"s", "head", "caption", "variables", "description"}; |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
186 docOperation.setElementNames(defaultElementNames); // default |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
187 } |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
188 String docDirName = getDocDir(docId); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
189 String docDestFileName = getDocFullFileName(docId); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
190 URL srcUrl = null; |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
191 String protocol = null; |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
192 if (srcUrlStr != null && ! srcUrlStr.equals("empty")) { |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
193 srcUrl = new URL(srcUrlStr); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
194 protocol = srcUrl.getProtocol(); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
195 } |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
196 File docDestFile = new File(docDestFileName); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
197 // parse validation on file |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
198 XQueryEvaluator xQueryEvaluator = new XQueryEvaluator(); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
199 XdmNode docNode = xQueryEvaluator.parse(srcUrl); // if it is not parseable an exception with a detail message is thrown |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
200 String docType = getNodeType(docNode); // archimedes, echo, TEI, html ... |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
201 docType = docType.trim(); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
202 if (docType == null) { |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
203 docOperation.setErrorMessage("file type of: " + srcUrlStr + "is not supported"); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
204 return; |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
205 } |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
206 // perform operation on file system |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
207 if (protocol.equals("file")) { |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
208 docOperation.setStatus("upload file: " + srcUrlStr + " to CMS"); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
209 } else { |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
210 docOperation.setStatus("download file from: " + srcUrlStr + " to CMS"); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
211 } |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
212 FileUtils.copyURLToFile(srcUrl, docDestFile, 100000, 100000); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
213 |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
214 // replace anchor in echo documents and also add the number attribute to figures |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
215 String docDestFileNameUpgrade = docDestFileName + ".upgrade"; |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
216 File docDestFileUpgrade = new File(docDestFileNameUpgrade); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
217 XslResourceTransformer replaceAnchorTransformer = new XslResourceTransformer("replaceAnchor.xsl"); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
218 String docDestFileUrlStr = docDestFile.getPath(); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
219 String result = replaceAnchorTransformer.transform(docDestFileUrlStr); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
220 FileUtils.writeStringToFile(docDestFileUpgrade, result, "utf-8"); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
221 |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
222 MetadataRecord mdRecord = new MetadataRecord(); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
223 mdRecord.setDocId(docId); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
224 mdRecord.setCollectionNames(docOperation.getCollectionNames()); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
225 mdRecord.setType("text/xml"); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
226 |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
227 // generate toc file (toc, figure, handwritten) |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
228 XslResourceTransformer tocTransformer = new XslResourceTransformer("toc.xsl"); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
229 File tocFile = new File(docDirName + "/toc.xml"); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
230 String tocResult = tocTransformer.transform(docDestFileNameUpgrade); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
231 FileUtils.writeStringToFile(tocFile, tocResult, "utf-8"); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
232 |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
233 // Get metadata info of the xml document |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
234 docOperation.setStatus("extract metadata of: " + srcUrlStr + " to CMS"); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
235 XQueryEvaluator xQueryEvaluator2 = new XQueryEvaluator(); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
236 mdRecord = getMetadataRecord(docDestFileUpgrade, docType, mdRecord, xQueryEvaluator2); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
237 String mdRecordLanguage = mdRecord.getLanguage(); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
238 if (mdRecordLanguage == null && mainLanguage != null) |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
239 mdRecord.setLanguage(mainLanguage); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
240 |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
241 // save all pages as single xml files (untokenized and tokenized) |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
242 docOperation.setStatus("extract page fragments of: " + srcUrlStr + " to CMS"); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
243 File docDir = new File(docDirName + "/pages"); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
244 FileUtils.deleteQuietly(docDir); // first delete pages directory |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
245 Hashtable<Integer, StringBuilder> pageFragments = getFragments(docDestFileNameUpgrade, "pb"); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
246 int pageCount = pageFragments.size(); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
247 if (pageCount == 0) { |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
248 // no pb element is found: then the whole document is the first page |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
249 String docXmlStr = FileUtils.readFileToString(docDestFileUpgrade, "utf-8"); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
250 docXmlStr = docXmlStr.replaceAll("<\\?xml.*?\\?>", ""); // remove the xml declaration if it exists |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
251 pageFragments = new Hashtable<Integer, StringBuilder>(); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
252 pageFragments.put(new Integer(1), new StringBuilder(docXmlStr)); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
253 pageCount = 1; |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
254 } |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
255 PageTransformer pageTransformer = new PageTransformer(); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
256 for (int page=1; page<=pageCount; page++) { |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
257 String fragment = pageFragments.get(new Integer(page)).toString(); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
258 fragment = "<?xml version=\"1.0\" encoding=\"utf-8\"?>\n" + fragment; |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
259 String docPageFileName = docDirName + "/pages/page-" + page + ".xml"; |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
260 File docPageFile = new File(docPageFileName); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
261 FileUtils.writeStringToFile(docPageFile, fragment, "utf-8"); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
262 String language = mdRecord.getLanguage(); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
263 String tokenizedXmlStr = tokenizeWithLemmas(fragment, language); // xml fragment enriched with <w> elements |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
264 tokenizedXmlStr = "<?xml version=\"1.0\" encoding=\"utf-8\"?>" + tokenizedXmlStr; |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
265 tokenizedXmlStr = enrichWordsOrigRegNorm(tokenizedXmlStr); // xml string: enrich <w> elements with normalization info (orig, reg, norm) |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
266 String docPageTokenizedFileName = docDirName + "/pages/page-" + page + "-morph.xml"; |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
267 File docPageTokenizedFile = new File(docPageTokenizedFileName); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
268 FileUtils.writeStringToFile(docPageTokenizedFile, tokenizedXmlStr, "utf-8"); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
269 String docPageHtmlFileName = docDirName + "/pages/page-" + page + ".html"; |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
270 File docPageHtmlFile = new File(docPageHtmlFileName); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
271 String htmlStr = pageTransformer.transform(tokenizedXmlStr, mdRecord, page, "html"); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
272 FileUtils.writeStringToFile(docPageHtmlFile, htmlStr, "utf-8"); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
273 } |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
274 |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
275 // perform operation on Lucene |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
276 docOperation.setStatus(operationName + " document: " + docId + " in CMS"); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
277 docOperation.setMdRecord(mdRecord); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
278 IndexHandler indexHandler = IndexHandler.getInstance(); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
279 indexHandler.indexDocument(docOperation); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
280 |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
281 } catch (IOException e) { |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
282 throw new ApplicationException(e); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
283 } |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
284 } |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
285 |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
286 private void delete(CmsDocOperation docOperation) throws ApplicationException { |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
287 String operationName = docOperation.getName(); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
288 String docIdentifier = docOperation.getDocIdentifier(); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
289 if (docIdentifier == null || docIdentifier.trim().equals("")) |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
290 throw new ApplicationException("Your document identifier is empty. Please specify a document identifier for your document."); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
291 String docDirStr = getDocDir(docIdentifier); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
292 File docDir = new File(docDirStr); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
293 boolean docExists = docDir.exists(); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
294 if (! docExists) { |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
295 throw new ApplicationException("Document:" + docIdentifier + " does not exists. Please use a name that exists and perform the operation \"Delete\" again."); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
296 } |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
297 // perform operation on file system |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
298 docOperation.setStatus(operationName + " document: " + docIdentifier + " in CMS"); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
299 FileUtils.deleteQuietly(docDir); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
300 |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
301 // perform operation on Lucene |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
302 IndexHandler indexHandler = IndexHandler.getInstance(); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
303 indexHandler.deleteDocument(docOperation); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
304 |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
305 } |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
306 |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
307 private void createPdf(CmsDocOperation docOperation) throws ApplicationException { |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
308 String docId = docOperation.getDocIdentifier(); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
309 String operationName = docOperation.getName(); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
310 if (docId == null || docId.trim().equals("")) |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
311 throw new ApplicationException("Your document identifier is empty. Please specify a document identifier for your document."); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
312 if (! isProjectDoc(docId)) { |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
313 LOGGER.info("Operation: " + operationName + " not performed on: " + docId + ". Cause: document is excluded as project doc"); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
314 return; |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
315 } |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
316 IndexHandler indexHandler = IndexHandler.getInstance(); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
317 MetadataRecord mdRecord = indexHandler.getDocMetadata(docId); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
318 docOperation.setStatus("create PDF and HTML versions of the document: " + docId); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
319 PdfHandler pdfHandler = PdfHandler.getInstance(); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
320 pdfHandler.createFile(true, true, mdRecord); // generate Pdf + Html document |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
321 } |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
322 |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
323 private MetadataRecord getMetadataRecord(File xmlFile, String schemaName, MetadataRecord mdRecord, XQueryEvaluator xQueryEvaluator) throws ApplicationException { |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
324 if (schemaName == null) |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
325 return mdRecord; |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
326 try { |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
327 URL srcUrl = xmlFile.toURI().toURL(); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
328 if (schemaName.equals("archimedes")) |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
329 mdRecord = getMetadataRecordArch(xQueryEvaluator, srcUrl, mdRecord); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
330 else if (schemaName.equals("echo")) |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
331 mdRecord = getMetadataRecordEcho(xQueryEvaluator, srcUrl, mdRecord); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
332 else if (schemaName.equals("TEI")) |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
333 mdRecord = getMetadataRecordTei(xQueryEvaluator, srcUrl, mdRecord); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
334 else if (schemaName.equals("html")) |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
335 mdRecord = getMetadataRecordHtml(xQueryEvaluator, srcUrl, mdRecord); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
336 else |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
337 mdRecord.setSchemaName("diverse"); // all other cases: set docType to schemaName |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
338 } catch (MalformedURLException e) { |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
339 throw new ApplicationException(e); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
340 } |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
341 mdRecord.setLastModified(new Date()); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
342 return mdRecord; |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
343 } |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
344 |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
345 private MetadataRecord getMetadataRecordArch(XQueryEvaluator xQueryEvaluator, URL srcUrl, MetadataRecord mdRecord) throws ApplicationException { |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
346 String metadataXmlStr = xQueryEvaluator.evaluateAsString(srcUrl, "/archimedes//info"); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
347 if (metadataXmlStr != null) { |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
348 String identifier = xQueryEvaluator.evaluateAsStringValueJoined(metadataXmlStr, "/info/locator"); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
349 if (identifier != null) |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
350 identifier = StringUtils.deresolveXmlEntities(identifier); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
351 String creator = xQueryEvaluator.evaluateAsStringValueJoined(metadataXmlStr, "/info/author"); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
352 if (creator != null) |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
353 creator = StringUtils.deresolveXmlEntities(creator); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
354 String title = xQueryEvaluator.evaluateAsStringValueJoined(metadataXmlStr, "/info/title"); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
355 if (title != null) |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
356 title = StringUtils.deresolveXmlEntities(title); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
357 String language = xQueryEvaluator.evaluateAsStringValueJoined(metadataXmlStr, "/info/lang[1]"); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
358 if (language != null) |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
359 language = StringUtils.deresolveXmlEntities(language); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
360 String place = xQueryEvaluator.evaluateAsStringValueJoined(metadataXmlStr, "/info/place"); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
361 if (place != null) |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
362 place = StringUtils.deresolveXmlEntities(place); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
363 String yearStr = xQueryEvaluator.evaluateAsStringValueJoined(metadataXmlStr, "/info/date"); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
364 Date date = null; |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
365 if (yearStr != null && ! yearStr.equals("")) { |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
366 yearStr = StringUtils.deresolveXmlEntities(yearStr); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
367 yearStr = new Util().toYearStr(yearStr); // test if possible etc |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
368 if (yearStr != null) { |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
369 try { |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
370 date = new Util().toDate(yearStr + "-01-01T00:00:00.000Z"); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
371 } catch (Exception e) { |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
372 // nothing |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
373 } |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
374 } |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
375 } |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
376 String rights = "open access"; |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
377 String license = "http://echo.mpiwg-berlin.mpg.de/policy/oa_basics/declaration"; |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
378 String accessRights = "free"; |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
379 |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
380 mdRecord.setIdentifier(identifier); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
381 mdRecord.setLanguage(language); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
382 mdRecord.setCreator(creator); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
383 mdRecord.setTitle(title); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
384 mdRecord.setPublisher(place); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
385 mdRecord.setRights(rights); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
386 mdRecord.setDate(date); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
387 mdRecord.setLicense(license); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
388 mdRecord.setAccessRights(accessRights); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
389 |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
390 // get echo metadata |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
391 String echoDir = xQueryEvaluator.evaluateAsStringValueJoined(metadataXmlStr, "/info/echodir"); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
392 String docId = mdRecord.getDocId(); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
393 String echoIdTmp = docId; |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
394 if (docId != null && ! docId.isEmpty()) { |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
395 int start = docId.lastIndexOf("/"); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
396 if (start != -1) |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
397 start = start + 1; |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
398 else |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
399 start = 0; |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
400 int end = docId.lastIndexOf("."); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
401 if (end == -1) |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
402 end = docId.length(); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
403 echoIdTmp = docId.substring(start, end); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
404 } |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
405 String echoId = "/permanent/archimedes/" + echoIdTmp; |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
406 if (echoDir != null && ! echoDir.isEmpty()) { |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
407 echoId = echoDir; |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
408 } |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
409 mdRecord = getEchoMetadata(xQueryEvaluator, echoId, mdRecord); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
410 } |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
411 String pageCountStr = xQueryEvaluator.evaluateAsString(srcUrl, "count(//pb)"); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
412 int pageCount = Integer.valueOf(pageCountStr); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
413 mdRecord.setPageCount(pageCount); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
414 mdRecord.setSchemaName("archimedes"); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
415 return mdRecord; |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
416 } |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
417 |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
418 private MetadataRecord getMetadataRecordEcho(XQueryEvaluator xQueryEvaluator, URL srcUrl, MetadataRecord mdRecord) throws ApplicationException { |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
419 String metadataXmlStr = xQueryEvaluator.evaluateAsString(srcUrl, "/*:echo/*:metadata"); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
420 if (metadataXmlStr != null) { |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
421 String identifier = xQueryEvaluator.evaluateAsStringValueJoined(metadataXmlStr, "/*:metadata/*:identifier"); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
422 if (identifier != null) { |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
423 identifier = StringUtils.deresolveXmlEntities(identifier); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
424 } |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
425 String creator = xQueryEvaluator.evaluateAsStringValueJoined(metadataXmlStr, "/*:metadata/*:creator"); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
426 if (creator != null) |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
427 creator = StringUtils.deresolveXmlEntities(creator); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
428 String title = xQueryEvaluator.evaluateAsStringValueJoined(metadataXmlStr, "/*:metadata/*:title"); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
429 if (title != null) |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
430 title = StringUtils.deresolveXmlEntities(title); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
431 String language = xQueryEvaluator.evaluateAsStringValueJoined(metadataXmlStr, "/*:metadata/*:language[1]"); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
432 if (language != null) |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
433 language = StringUtils.deresolveXmlEntities(language); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
434 String yearStr = xQueryEvaluator.evaluateAsStringValueJoined(metadataXmlStr, "/*:metadata/*:date"); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
435 Date date = null; |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
436 if (yearStr != null && ! yearStr.equals("")) { |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
437 yearStr = StringUtils.deresolveXmlEntities(yearStr); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
438 yearStr = new Util().toYearStr(yearStr); // test if possible etc |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
439 if (yearStr != null) { |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
440 try { |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
441 date = new Util().toDate(yearStr + "-01-01T00:00:00.000Z"); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
442 } catch (Exception e) { |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
443 // nothing |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
444 } |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
445 } |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
446 } |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
447 String rights = xQueryEvaluator.evaluateAsStringValueJoined(metadataXmlStr, "/*:metadata/*:rights"); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
448 if (rights != null) |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
449 rights = StringUtils.deresolveXmlEntities(rights); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
450 String license = xQueryEvaluator.evaluateAsStringValueJoined(metadataXmlStr, "/*:metadata/*:license"); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
451 if (license != null) |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
452 license = StringUtils.deresolveXmlEntities(license); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
453 String accessRights = xQueryEvaluator.evaluateAsStringValueJoined(metadataXmlStr, "/*:metadata/*:accessRights"); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
454 if (accessRights != null) |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
455 accessRights = StringUtils.deresolveXmlEntities(accessRights); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
456 |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
457 mdRecord.setIdentifier(identifier); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
458 mdRecord.setLanguage(language); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
459 mdRecord.setCreator(creator); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
460 mdRecord.setTitle(title); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
461 mdRecord.setRights(rights); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
462 mdRecord.setDate(date); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
463 mdRecord.setLicense(license); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
464 mdRecord.setAccessRights(accessRights); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
465 |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
466 // get echo metadata |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
467 String echoDir = xQueryEvaluator.evaluateAsStringValueJoined(metadataXmlStr, "/*:metadata/*:echodir"); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
468 String echoIdTmp = identifier; |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
469 if (identifier != null && ! identifier.isEmpty()) { |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
470 int start = identifier.indexOf("ECHO:"); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
471 if (start != -1) |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
472 start = start + 5; |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
473 else |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
474 start = 0; |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
475 int end = identifier.lastIndexOf("."); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
476 if (end == -1) |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
477 end = identifier.length(); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
478 echoIdTmp = identifier.substring(start, end); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
479 } |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
480 String echoId = "/permanent/library/" + echoIdTmp; |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
481 if (echoDir != null && ! echoDir.isEmpty()) { |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
482 echoId = echoDir; |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
483 } |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
484 mdRecord = getEchoMetadata(xQueryEvaluator, echoId, mdRecord); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
485 } |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
486 String pageCountStr = xQueryEvaluator.evaluateAsString(srcUrl, "count(//*:pb)"); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
487 int pageCount = Integer.valueOf(pageCountStr); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
488 mdRecord.setPageCount(pageCount); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
489 mdRecord.setSchemaName("echo"); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
490 return mdRecord; |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
491 } |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
492 |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
493 private MetadataRecord getMetadataRecordTei(XQueryEvaluator xQueryEvaluator, URL srcUrl, MetadataRecord mdRecord) throws ApplicationException { |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
494 String metadataXmlStr = xQueryEvaluator.evaluateAsString(srcUrl, "/*:TEI/*:teiHeader"); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
495 if (metadataXmlStr != null) { |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
496 String identifier = xQueryEvaluator.evaluateAsStringValueJoined(metadataXmlStr, "/*:teiHeader/*:fileDesc/*:publicationStmt/*:idno"); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
497 if (identifier != null) |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
498 identifier = StringUtils.deresolveXmlEntities(identifier); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
499 String creator = xQueryEvaluator.evaluateAsStringValueJoined(metadataXmlStr, "/*:teiHeader/*:fileDesc/*:titleStmt/*:author"); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
500 if (creator != null) |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
501 creator = StringUtils.deresolveXmlEntities(creator); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
502 String title = xQueryEvaluator.evaluateAsStringValueJoined(metadataXmlStr, "/*:teiHeader/*:fileDesc/*:titleStmt/*:title"); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
503 if (title != null) |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
504 title = StringUtils.deresolveXmlEntities(title); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
505 String language = xQueryEvaluator.evaluateAsStringValueJoined(metadataXmlStr, "string(/*:teiHeader/*:profileDesc/*:langUsage/*:language[1]/@ident)"); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
506 if (language != null && language.isEmpty()) |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
507 language = null; |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
508 if (language != null) |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
509 language = StringUtils.deresolveXmlEntities(language); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
510 String place = xQueryEvaluator.evaluateAsStringValueJoined(metadataXmlStr, "/*:teiHeader/*:fileDesc/*:publicationStmt/*:pubPlace"); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
511 if (place != null) |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
512 place = StringUtils.deresolveXmlEntities(place); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
513 String yearStr = xQueryEvaluator.evaluateAsStringValueJoined(metadataXmlStr, "/*:teiHeader/*:fileDesc/*:publicationStmt/*:date"); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
514 Date date = null; |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
515 if (yearStr != null && ! yearStr.equals("")) { |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
516 yearStr = StringUtils.deresolveXmlEntities(yearStr); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
517 yearStr = new Util().toYearStr(yearStr); // test if possible etc |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
518 if (yearStr != null) { |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
519 try { |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
520 date = new Util().toDate(yearStr + "-01-01T00:00:00.000Z"); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
521 } catch (Exception e) { |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
522 // nothing |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
523 } |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
524 } |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
525 } |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
526 String subject = xQueryEvaluator.evaluateAsStringValueJoined(metadataXmlStr, "string(/*:teiHeader/*:profileDesc/*:textClass/*:keywords/*:term)"); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
527 if (subject != null) |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
528 subject = StringUtils.deresolveXmlEntities(subject); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
529 String rights = xQueryEvaluator.evaluateAsStringValueJoined(metadataXmlStr, "/*:teiHeader/*:fileDesc/*:publicationStmt/*:availability"); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
530 if (rights == null) |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
531 rights = "open access"; |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
532 rights = StringUtils.deresolveXmlEntities(rights); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
533 String license = "http://echo.mpiwg-berlin.mpg.de/policy/oa_basics/declaration"; |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
534 String accessRights = xQueryEvaluator.evaluateAsStringValueJoined(metadataXmlStr, "string(/*:teiHeader/*:fileDesc/*:publicationStmt/*:availability/@status)"); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
535 if (accessRights == null) |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
536 accessRights = "free"; |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
537 accessRights = StringUtils.deresolveXmlEntities(accessRights); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
538 |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
539 mdRecord.setIdentifier(identifier); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
540 mdRecord.setLanguage(language); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
541 mdRecord.setCreator(creator); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
542 mdRecord.setTitle(title); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
543 mdRecord.setPublisher(place); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
544 mdRecord.setRights(rights); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
545 mdRecord.setDate(date); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
546 mdRecord.setSubject(subject); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
547 mdRecord.setLicense(license); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
548 mdRecord.setAccessRights(accessRights); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
549 |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
550 // get echo metadata |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
551 mdRecord = getEchoMetadata(xQueryEvaluator, identifier, mdRecord); // identifier is echoDir |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
552 } |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
553 String pageCountStr = xQueryEvaluator.evaluateAsString(srcUrl, "count(//*:pb)"); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
554 int pageCount = Integer.valueOf(pageCountStr); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
555 mdRecord.setPageCount(pageCount); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
556 mdRecord.setSchemaName("TEI"); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
557 return mdRecord; |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
558 } |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
559 |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
560 private MetadataRecord getMetadataRecordHtml(XQueryEvaluator xQueryEvaluator, URL srcUrl, MetadataRecord mdRecord) throws ApplicationException { |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
561 String metadataXmlStr = xQueryEvaluator.evaluateAsString(srcUrl, "/html/head"); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
562 if (metadataXmlStr != null) { |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
563 String identifier = xQueryEvaluator.evaluateAsStringValueJoined(metadataXmlStr, "string(/meta[@name = 'DC.identifier']/@content)"); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
564 if (identifier != null && ! identifier.isEmpty()) |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
565 identifier = StringUtils.deresolveXmlEntities(identifier); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
566 String creator = xQueryEvaluator.evaluateAsStringValueJoined(metadataXmlStr, "string(/meta[@name = 'DC.creator']/@content)"); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
567 if (creator != null && ! creator.isEmpty()) |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
568 creator = StringUtils.deresolveXmlEntities(creator); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
569 String title = xQueryEvaluator.evaluateAsStringValueJoined(metadataXmlStr, "string(/meta[@name = 'DC.title']/@content)"); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
570 if (title != null && ! title.isEmpty()) |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
571 title = StringUtils.deresolveXmlEntities(title); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
572 String language = xQueryEvaluator.evaluateAsStringValueJoined(metadataXmlStr, "string(/meta[@name = 'DC.language']/@content)"); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
573 if (language != null && language.isEmpty()) |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
574 language = null; |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
575 if (language != null && ! language.isEmpty()) |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
576 language = StringUtils.deresolveXmlEntities(language); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
577 String publisher = xQueryEvaluator.evaluateAsStringValueJoined(metadataXmlStr, "string(/meta[@name = 'DC.publisher']/@content)"); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
578 if (publisher != null) |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
579 publisher = StringUtils.deresolveXmlEntities(publisher); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
580 String yearStr = xQueryEvaluator.evaluateAsStringValueJoined(metadataXmlStr, "string(/meta[@name = 'DC.date']/@content)"); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
581 Date date = null; |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
582 if (yearStr != null && ! yearStr.equals("")) { |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
583 yearStr = StringUtils.deresolveXmlEntities(yearStr); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
584 yearStr = new Util().toYearStr(yearStr); // test if possible etc |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
585 if (yearStr != null) { |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
586 try { |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
587 date = new Util().toDate(yearStr + "-01-01T00:00:00.000Z"); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
588 } catch (Exception e) { |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
589 // nothing |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
590 } |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
591 } |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
592 } |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
593 String subject = xQueryEvaluator.evaluateAsStringValueJoined(metadataXmlStr, "string(/meta[@name = 'DC.subject']/@content)"); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
594 if (subject != null) |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
595 subject = StringUtils.deresolveXmlEntities(subject); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
596 String rights = xQueryEvaluator.evaluateAsStringValueJoined(metadataXmlStr, "string(/meta[@name = 'DC.rights']/@content)"); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
597 if (rights != null && ! rights.isEmpty()) |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
598 rights = StringUtils.deresolveXmlEntities(rights); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
599 String license = xQueryEvaluator.evaluateAsStringValueJoined(metadataXmlStr, "string(/meta[@name = 'DC.license']/@content)"); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
600 if (license != null && ! license.isEmpty()) |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
601 license = StringUtils.deresolveXmlEntities(license); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
602 String accessRights = xQueryEvaluator.evaluateAsStringValueJoined(metadataXmlStr, "string(/meta[@name = 'DC.accessRights']/@content)"); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
603 if (accessRights != null && ! accessRights.isEmpty()) |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
604 accessRights = StringUtils.deresolveXmlEntities(accessRights); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
605 |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
606 mdRecord.setIdentifier(identifier); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
607 mdRecord.setLanguage(language); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
608 mdRecord.setCreator(creator); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
609 mdRecord.setTitle(title); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
610 mdRecord.setPublisher(publisher); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
611 mdRecord.setRights(rights); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
612 mdRecord.setDate(date); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
613 mdRecord.setSubject(subject); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
614 mdRecord.setLicense(license); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
615 mdRecord.setAccessRights(accessRights); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
616 |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
617 // get echo metadata |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
618 mdRecord = getEchoMetadata(xQueryEvaluator, identifier, mdRecord); // identifier is echoDir |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
619 } |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
620 String pageCountStr = xQueryEvaluator.evaluateAsString(srcUrl, "count(//pb)"); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
621 int pageCount = Integer.valueOf(pageCountStr); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
622 mdRecord.setPageCount(pageCount); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
623 mdRecord.setSchemaName("html"); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
624 return mdRecord; |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
625 } |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
626 |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
627 private MetadataRecord getEchoMetadata(XQueryEvaluator xQueryEvaluator, String echoDir, MetadataRecord mdRecord) throws ApplicationException { |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
628 if (echoDir == null) |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
629 return mdRecord; |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
630 String urLTexter = "http://digilib.mpiwg-berlin.mpg.de/digitallibrary/servlet/Texter?fn=" + echoDir + "/index.meta"; |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
631 String echoIndexMetaStr = performGetRequest(urLTexter); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
632 String echoPageImageDir = null; |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
633 String echoFiguresDir = null; |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
634 String mpiwgDocId = null; |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
635 if (echoIndexMetaStr != null) { |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
636 if (echoIndexMetaStr.equals("XXXXTimeoutXXXX")) |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
637 return null; |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
638 echoPageImageDir = xQueryEvaluator.evaluateAsStringValueJoined(echoIndexMetaStr, "/resource/meta/texttool/image"); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
639 if (echoPageImageDir != null) |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
640 echoPageImageDir = echoDir + "/" + echoPageImageDir; |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
641 else |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
642 echoPageImageDir = echoDir + "/" + "pageimg"; // default |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
643 echoFiguresDir = xQueryEvaluator.evaluateAsStringValueJoined(echoIndexMetaStr, "/resource/meta/texttool/figures"); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
644 if (echoFiguresDir != null) |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
645 echoFiguresDir = echoDir + "/" + echoFiguresDir; |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
646 else |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
647 echoFiguresDir = echoDir + "/" + "figures"; // default |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
648 mpiwgDocId = xQueryEvaluator.evaluateAsStringValueJoined(echoIndexMetaStr, "/resource/meta/dri[@type = 'mpiwg']"); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
649 } |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
650 mdRecord.setEchoId(echoDir); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
651 mdRecord.setEchoPageImageDir(echoPageImageDir); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
652 mdRecord.setEchoFiguresDir(echoFiguresDir); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
653 mdRecord.setMpiwgDocId(mpiwgDocId); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
654 return mdRecord; |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
655 } |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
656 |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
657 private String getNodeType(XdmNode node) { |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
658 String nodeType = null; |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
659 XdmSequenceIterator iter = node.axisIterator(Axis.CHILD); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
660 if (iter != null) { |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
661 while (iter.hasNext()) { |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
662 XdmNode firstChild = (XdmNode) iter.next(); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
663 if (firstChild != null) { |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
664 XdmNodeKind nodeKind = firstChild.getNodeKind(); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
665 if (nodeKind.ordinal() == XdmNodeKind.ELEMENT.ordinal()) { |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
666 QName nodeQName = firstChild.getNodeName(); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
667 nodeType = nodeQName.getLocalName(); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
668 } |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
669 } |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
670 } |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
671 } |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
672 return nodeType; |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
673 } |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
674 |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
675 public String getDocFullFileName(String docId) { |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
676 String docDir = getDocDir(docId); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
677 String docFileName = getDocFileName(docId); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
678 String docFullFileName = docDir + "/" + docFileName; |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
679 return docFullFileName; |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
680 } |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
681 |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
682 public String getFullFileName(String docId, String type) { |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
683 String docDir = getDocDir(docId); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
684 String docFileName = getDocFileName(docId); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
685 int lastDot = docFileName.lastIndexOf("."); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
686 String docFileNameWithoutExtension = docFileName.substring(0, lastDot); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
687 String fullFileName = docDir + "/" + docFileNameWithoutExtension + ".xml"; |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
688 if (type != null && ! type.equals("toc")) { |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
689 fullFileName = docDir + "/" + docFileNameWithoutExtension + "." + type; |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
690 } else if (type != null && type.equals("toc")) { |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
691 fullFileName = docDir + "/toc.xml"; |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
692 } |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
693 return fullFileName; |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
694 } |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
695 |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
696 public String getDocDir(String docId) { |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
697 String documentsDirectory = Constants.getInstance().getDocumentsDir(); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
698 String subDir = docId; |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
699 if (docId.contains(".")) { |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
700 int index = docId.lastIndexOf("."); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
701 subDir = docId.substring(0, index); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
702 } |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
703 if (! subDir.startsWith("/")) |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
704 subDir = "/" + subDir; |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
705 String docDir = documentsDirectory + subDir; |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
706 return docDir; |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
707 } |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
708 |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
709 public String getDocFileName(String docId) { |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
710 String docFileName = docId; |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
711 int index = docId.lastIndexOf("/"); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
712 if (index != -1) { |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
713 docFileName = docId.substring(index + 1); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
714 } |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
715 return docFileName; |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
716 } |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
717 |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
718 private String getMainLanguage(String docId) { |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
719 String mainLang = null; |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
720 int to = docId.lastIndexOf("/"); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
721 if (to != -1) { |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
722 String preStr = docId.substring(0, to); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
723 int from = preStr.lastIndexOf("/"); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
724 if (from != -1) |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
725 mainLang = preStr.substring(from + 1, to); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
726 } |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
727 return mainLang; |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
728 } |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
729 |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
730 private Hashtable<Integer, StringBuilder> getFragments(String fileName, String milestoneElementName) throws ApplicationException { |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
731 try { |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
732 GetFragmentsContentHandler getFragmentsContentHandler = new GetFragmentsContentHandler(milestoneElementName); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
733 XMLReader xmlParser = new SAXParser(); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
734 xmlParser.setContentHandler(getFragmentsContentHandler); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
735 StringReader bla = new StringReader(FileUtils.readFileToString(new File(fileName), "utf-8")); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
736 InputSource inputSource = new InputSource(bla); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
737 xmlParser.parse(inputSource); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
738 Hashtable<Integer, StringBuilder> resultFragments = getFragmentsContentHandler.getResultPages(); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
739 return resultFragments; |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
740 } catch (SAXException e) { |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
741 throw new ApplicationException(e); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
742 } catch (IOException e) { |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
743 throw new ApplicationException(e); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
744 } |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
745 } |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
746 |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
747 private String tokenizeWithLemmas(String xmlStr, String language) throws ApplicationException { |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
748 StringReader strReader = new StringReader(xmlStr); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
749 XmlTokenizer xmlTokenizer = new XmlTokenizer(strReader); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
750 xmlTokenizer.setLanguage(language); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
751 String[] outputOptionsWithLemmas = {"withLemmas"}; // so all tokens are fetched with lemmas (costs performance) |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
752 String[] nwbElements = {"lb", "br", "cb", "hi"}; // non word breaking elements; TODO examine bugs with emph, figure e.g. page 30 in /echo/la/Cataneo_1600.xml |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
753 xmlTokenizer.setNWBElements(nwbElements); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
754 xmlTokenizer.setOutputOptions(outputOptionsWithLemmas); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
755 xmlTokenizer.tokenize(); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
756 String retStr = xmlTokenizer.getXmlResult(); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
757 return retStr; |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
758 } |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
759 |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
760 private String enrichWordsOrigRegNorm(String xmlStr) throws ApplicationException { |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
761 try { |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
762 WordContentHandler wordContentHandler = new WordContentHandler(); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
763 XMLReader xmlParser = new SAXParser(); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
764 xmlParser.setContentHandler(wordContentHandler); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
765 StringReader strReader = new StringReader(xmlStr); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
766 InputSource inputSource = new InputSource(strReader); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
767 xmlParser.parse(inputSource); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
768 String result = wordContentHandler.getResult(); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
769 return result; |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
770 } catch (SAXException e) { |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
771 throw new ApplicationException(e); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
772 } catch (IOException e) { |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
773 throw new ApplicationException(e); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
774 } |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
775 } |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
776 |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
777 private String performGetRequest(String url) throws ApplicationException { |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
778 String resultStr = null; |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
779 try { |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
780 boolean urlIsOk = checkUri(url, 2000); // if url doesn't answer after 2 seconds |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
781 if (! urlIsOk) |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
782 return "XXXXTimeoutXXXX"; |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
783 HttpClient httpClient = new HttpClient(); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
784 GetMethod method = new GetMethod(url); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
785 httpClient.executeMethod(method); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
786 byte[] resultBytes = method.getResponseBody(); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
787 resultStr = new String(resultBytes, "utf-8"); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
788 method.releaseConnection(); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
789 } catch (HttpException e) { |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
790 throw new ApplicationException(e); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
791 } catch (IOException e) { |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
792 throw new ApplicationException(e); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
793 } |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
794 return resultStr; |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
795 } |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
796 |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
797 private boolean checkUri(String uriStr, int timeoutMilliseconds) throws ApplicationException { |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
798 boolean isOk = true; |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
799 try { |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
800 URI uri = new URI(uriStr); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
801 HttpGet httpGet = new HttpGet(uri); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
802 HttpParams httpParameters = new BasicHttpParams(); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
803 // Set the timeout in milliseconds until a connection is established. |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
804 // The default value is zero, that means the timeout is not used. |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
805 int timeoutConnection = 2000; |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
806 HttpConnectionParams.setConnectionTimeout(httpParameters, timeoutConnection); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
807 // Set the default socket timeout (SO_TIMEOUT) |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
808 // in milliseconds which is the timeout for waiting for data. |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
809 int timeoutSocket = 2000; |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
810 HttpConnectionParams.setSoTimeout(httpParameters, timeoutSocket); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
811 DefaultHttpClient httpClient = new DefaultHttpClient(httpParameters); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
812 HttpResponse response = httpClient.execute(httpGet); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
813 } catch (IOException e) { |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
814 isOk = false; // if timeout exception is thrown |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
815 } catch (URISyntaxException e) { |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
816 throw new ApplicationException(e); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
817 } |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
818 return isOk; |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
819 } |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
820 |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
821 /** |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
822 * Write string into destFile. If directory for that destFile does not exist |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
823 * it creates this directory including parent directories. |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
824 * @param str string to write |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
825 * @param destFileName destination file name |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
826 * @throws ApplicationException |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
827 */ |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
828 private void saveFile(String str, String destFileName) throws ApplicationException { |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
829 OutputStreamWriter out = null; |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
830 try { |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
831 if (str == null) |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
832 return; // do nothing |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
833 File destFile = new File(destFileName); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
834 File destDir = new File(destFile.getParent()); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
835 if (! destDir.exists()) { |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
836 destDir.mkdirs(); // create the directory including parent directories which do not exist |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
837 } |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
838 out = new OutputStreamWriter(new BufferedOutputStream(new FileOutputStream(destFile)), "utf-8"); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
839 out.write(str); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
840 out.flush(); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
841 } catch (FileNotFoundException e) { |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
842 throw new ApplicationException(e); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
843 } catch (IOException e) { |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
844 throw new ApplicationException(e); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
845 } finally { |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
846 try { |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
847 if (out != null) |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
848 out.close(); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
849 } catch (Exception e) { |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
850 // nothing: always close the stream at the end of the method |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
851 } |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
852 } |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
853 } |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
854 |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
855 private void beginOperation() { |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
856 beginOfOperation = new Date().getTime(); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
857 } |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
858 |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
859 private void endOperation() { |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
860 endOfOperation = new Date().getTime(); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
861 } |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
862 |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
863 } |