Mercurial > hg > mpdl-group
comparison software/mpdl-services-new/mpiwg-mpdl-cms/src/de/mpg/mpiwg/berlin/mpdl/cms/document/DocumentHandler.java @ 25:e9fe3186670c default tip
letzter Stand eingecheckt
author | Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de> |
---|---|
date | Tue, 21 May 2013 10:19:32 +0200 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
23:e845310098ba | 25:e9fe3186670c |
---|---|
1 package de.mpg.mpiwg.berlin.mpdl.cms.document; | |
2 | |
3 import java.io.BufferedOutputStream; | |
4 import java.io.File; | |
5 import java.io.FileNotFoundException; | |
6 import java.io.FileOutputStream; | |
7 import java.io.IOException; | |
8 import java.io.OutputStreamWriter; | |
9 import java.io.StringReader; | |
10 import java.net.MalformedURLException; | |
11 import java.net.URI; | |
12 import java.net.URISyntaxException; | |
13 import java.net.URL; | |
14 import java.util.Arrays; | |
15 import java.util.Date; | |
16 import java.util.Hashtable; | |
17 import java.util.Iterator; | |
18 import java.util.List; | |
19 import java.util.logging.Logger; | |
20 | |
21 import net.sf.saxon.s9api.Axis; | |
22 import net.sf.saxon.s9api.QName; | |
23 import net.sf.saxon.s9api.XdmNode; | |
24 import net.sf.saxon.s9api.XdmNodeKind; | |
25 import net.sf.saxon.s9api.XdmSequenceIterator; | |
26 | |
27 import org.apache.commons.httpclient.HttpClient; | |
28 import org.apache.commons.httpclient.HttpException; | |
29 import org.apache.commons.httpclient.methods.GetMethod; | |
30 import org.apache.commons.io.FileUtils; | |
31 import org.apache.http.HttpResponse; | |
32 import org.apache.http.client.methods.HttpGet; | |
33 import org.apache.http.impl.client.DefaultHttpClient; | |
34 import org.apache.http.params.BasicHttpParams; | |
35 import org.apache.http.params.HttpConnectionParams; | |
36 import org.apache.http.params.HttpParams; | |
37 import org.xml.sax.InputSource; | |
38 import org.xml.sax.SAXException; | |
39 import org.xml.sax.XMLReader; | |
40 | |
41 import com.sun.org.apache.xerces.internal.parsers.SAXParser; | |
42 | |
43 import de.mpg.mpiwg.berlin.mpdl.exception.ApplicationException; | |
44 import de.mpg.mpiwg.berlin.mpdl.lt.general.Language; | |
45 import de.mpg.mpiwg.berlin.mpdl.lt.text.tokenize.WordContentHandler; | |
46 import de.mpg.mpiwg.berlin.mpdl.lt.text.tokenize.XmlTokenizer; | |
47 import de.mpg.mpiwg.berlin.mpdl.util.StringUtils; | |
48 import de.mpg.mpiwg.berlin.mpdl.util.Util; | |
49 import de.mpg.mpiwg.berlin.mpdl.xml.xquery.XQueryEvaluator; | |
50 import de.mpg.mpiwg.berlin.mpdl.cms.general.Constants; | |
51 import de.mpg.mpiwg.berlin.mpdl.cms.lucene.IndexHandler; | |
52 import de.mpg.mpiwg.berlin.mpdl.cms.scheduler.CmsDocOperation; | |
53 import de.mpg.mpiwg.berlin.mpdl.cms.transform.GetFragmentsContentHandler; | |
54 import de.mpg.mpiwg.berlin.mpdl.cms.transform.PageTransformer; | |
55 import de.mpg.mpiwg.berlin.mpdl.cms.transform.XslResourceTransformer; | |
56 | |
57 /** | |
58 * Handler for documents (singleton). | |
59 */ | |
60 public class DocumentHandler { | |
61 private static Logger LOGGER = Logger.getLogger(DocumentHandler.class.getName()); | |
62 private static List<String> EXCLUDED_PROJECT_DOCS = | |
63 Arrays.asList("/echo/zh/Min_chan_luyi_1_7MCGW0WG.xml", // the Saxon transfomer has heavy problems with some characters in CJK Unified Ideographs Extension B, e.g.: line 309 (second reg on page 16) | |
64 "/echo/zh/Min_chan_luyi_2_U7Y9NQ9V.xml", | |
65 "/echo/zh/Min_chan_luyi_3_2FP9M172.xml", | |
66 "/echo/zh/Min_chan_luyi_4_FXA6FSFH.xml", | |
67 "/echo/zh/Min_chan_luyi_5_VG6NY5XD.xml", | |
68 "/echo/zh/Xifa_shenji.xml", | |
69 "/echo/zh/Yulei_tushuo_1_HXX4MGZW.xml", | |
70 "/echo/zh/Yulei_tushuo_2_FN1CTY5C.xml"); | |
71 private long beginOfOperation; | |
72 private long endOfOperation; | |
73 | |
74 public void doOperation(CmsDocOperation docOperation) throws ApplicationException { | |
75 String operationName = docOperation.getName(); | |
76 if (operationName.equals("create")) { | |
77 create(docOperation); | |
78 } else if (operationName.equals("delete")) { | |
79 delete(docOperation); | |
80 } else if (operationName.equals("importDirectory")) { | |
81 importDirectory(docOperation); | |
82 } else if (operationName.equals("createPdf")) { | |
83 createPdf(docOperation); | |
84 } else if (operationName.equals("createAllPdfInDirectory")) { | |
85 createAllPdfInDirectory(docOperation); | |
86 } | |
87 } | |
88 | |
89 private void importDirectory(CmsDocOperation docOperation) throws ApplicationException { | |
90 try { | |
91 LOGGER.info("Start of DocumentHandler. This operation could be time consuming because documents are indexed (normal indexing times are 1-10 minutes for a document)"); | |
92 beginOperation(); | |
93 String localDocumentsUrlStr = docOperation.getSrcUrl(); // start directory: file:/a/local/directory | |
94 String collectionNames = docOperation.getCollectionNames(); // e.g. "echo" | |
95 File localDocumentsDir = new File(new URI(localDocumentsUrlStr)); | |
96 boolean docDirExists = localDocumentsDir.exists(); | |
97 if (! docDirExists) | |
98 throw new ApplicationException("Document directory:" + localDocumentsUrlStr + " does not exists. Please use a directory that exists and perform the operation again."); | |
99 String[] fileExtensions = {"xml"}; | |
100 Iterator<File> iterFiles = FileUtils.iterateFiles(localDocumentsDir, fileExtensions, true); | |
101 int i = 0; | |
102 while(iterFiles.hasNext()) { | |
103 i++; | |
104 File xmlFile = iterFiles.next(); | |
105 String xmlFileStr = xmlFile.getPath(); | |
106 int relativePos = (int) localDocumentsDir.getPath().length(); | |
107 String docId = xmlFileStr.substring(relativePos); // relative path name starting from localDocumentsDir, e.g. /tei/de/Test_1789.xml | |
108 String xmlFileUrlStr = xmlFile.toURI().toURL().toString(); | |
109 CmsDocOperation createDocOperation = new CmsDocOperation("create", xmlFileUrlStr, null, docId); | |
110 createDocOperation.setCollectionNames(collectionNames); | |
111 try { | |
112 doOperation(createDocOperation); | |
113 Date now = new Date(); | |
114 LOGGER.info("Document " + i + ": " + docId + " successfully imported (" + now.toString() + ")"); | |
115 } catch (Exception e) { | |
116 LOGGER.info("Document " + i + ": " + docId + " has problems:"); | |
117 e.printStackTrace(); | |
118 } | |
119 } | |
120 endOperation(); | |
121 LOGGER.info("The DocumentHandler needed: " + (endOfOperation - beginOfOperation) + " ms" ); | |
122 } catch (Exception e) { | |
123 throw new ApplicationException(e); | |
124 } | |
125 } | |
126 | |
127 private void createAllPdfInDirectory(CmsDocOperation docOperation) throws ApplicationException { | |
128 try { | |
129 LOGGER.info("Start of generating Pdf-Documents. This operation could be time consuming because Pdf generation needs some time."); | |
130 beginOperation(); | |
131 String localDocumentsUrlStr = docOperation.getSrcUrl(); // start directory: file:/a/local/directory | |
132 String collectionNames = docOperation.getCollectionNames(); // e.g. "echo" | |
133 File localDocumentsDir = new File(new URI(localDocumentsUrlStr)); | |
134 boolean docDirExists = localDocumentsDir.exists(); | |
135 if (! docDirExists) | |
136 throw new ApplicationException("Document directory:" + localDocumentsUrlStr + " does not exists. Please use a directory that exists and perform the operation again."); | |
137 String[] fileExtensions = {"xml"}; | |
138 Iterator<File> iterFiles = FileUtils.iterateFiles(localDocumentsDir, fileExtensions, true); | |
139 int i = 0; | |
140 while(iterFiles.hasNext()) { | |
141 i++; | |
142 File xmlFile = iterFiles.next(); | |
143 String xmlFileStr = xmlFile.getPath(); | |
144 int relativePos = (int) localDocumentsDir.getPath().length(); | |
145 String docId = xmlFileStr.substring(relativePos); // relative path name starting from localDocumentsDir, e.g. /tei/de/Test_1789.xml | |
146 CmsDocOperation createPdfOperation = new CmsDocOperation("createPdf", null, null, docId); | |
147 createPdfOperation.setCollectionNames(collectionNames); | |
148 try { | |
149 doOperation(createPdfOperation); | |
150 Date now = new Date(); | |
151 LOGGER.info("Pdf document " + i + ": " + docId + " successfully created (" + now.toString() + ")"); | |
152 } catch (Exception e) { | |
153 LOGGER.info("Pdf document " + i + ": " + docId + " has problems:"); | |
154 e.printStackTrace(); | |
155 } | |
156 } | |
157 endOperation(); | |
158 LOGGER.info("The Pdf generation needed: " + (endOfOperation - beginOfOperation) + " ms" ); | |
159 } catch (Exception e) { | |
160 throw new ApplicationException(e); | |
161 } | |
162 } | |
163 | |
164 private boolean isProjectDoc(String docId) { | |
165 boolean isProjectDoc = true; | |
166 if (EXCLUDED_PROJECT_DOCS.contains(docId)) | |
167 return false; | |
168 return isProjectDoc; | |
169 } | |
170 | |
171 private void create(CmsDocOperation docOperation) throws ApplicationException { | |
172 try { | |
173 String operationName = docOperation.getName(); | |
174 String srcUrlStr = docOperation.getSrcUrl(); | |
175 String docId = docOperation.getDocIdentifier(); | |
176 if (! isProjectDoc(docId)) { | |
177 LOGGER.info("Operation: " + operationName + " not performed on: " + docId + ". Cause: document is excluded as project doc"); | |
178 return; | |
179 } | |
180 String mainLanguage = docOperation.getMainLanguage(); | |
181 if (mainLanguage == null) { | |
182 mainLanguage = getMainLanguage(docId); | |
183 } | |
184 String[] elementNames = docOperation.getElementNames(); | |
185 if (elementNames == null) { | |
186 String[] defaultElementNames = {"s", "head", "caption", "variables", "description"}; | |
187 docOperation.setElementNames(defaultElementNames); // default | |
188 } | |
189 String docDirName = getDocDir(docId); | |
190 String docDestFileName = getDocFullFileName(docId); | |
191 URL srcUrl = null; | |
192 String protocol = null; | |
193 if (srcUrlStr != null && ! srcUrlStr.equals("empty")) { | |
194 srcUrl = new URL(srcUrlStr); | |
195 protocol = srcUrl.getProtocol(); | |
196 } | |
197 File docDestFile = new File(docDestFileName); | |
198 // parse validation on file | |
199 XQueryEvaluator xQueryEvaluator = new XQueryEvaluator(); | |
200 XdmNode docNode = xQueryEvaluator.parse(srcUrl); // if it is not parseable an exception with a detail message is thrown | |
201 String docType = getNodeType(docNode); // archimedes, echo, TEI, html ... | |
202 docType = docType.trim(); | |
203 if (docType == null) { | |
204 docOperation.setErrorMessage("file type of: " + srcUrlStr + "is not supported"); | |
205 return; | |
206 } | |
207 // perform operation on file system | |
208 if (protocol.equals("file")) { | |
209 docOperation.setStatus("upload file: " + srcUrlStr + " to CMS"); | |
210 } else { | |
211 docOperation.setStatus("download file from: " + srcUrlStr + " to CMS"); | |
212 } | |
213 FileUtils.copyURLToFile(srcUrl, docDestFile, 100000, 100000); | |
214 | |
215 // replace anchor in echo documents and also add the number attribute to figures | |
216 String docDestFileNameUpgrade = docDestFileName + ".upgrade"; | |
217 File docDestFileUpgrade = new File(docDestFileNameUpgrade); | |
218 XslResourceTransformer replaceAnchorTransformer = new XslResourceTransformer("replaceAnchor.xsl"); | |
219 String docDestFileUrlStr = docDestFile.getPath(); | |
220 String result = replaceAnchorTransformer.transform(docDestFileUrlStr); | |
221 FileUtils.writeStringToFile(docDestFileUpgrade, result, "utf-8"); | |
222 | |
223 MetadataRecord mdRecord = new MetadataRecord(); | |
224 mdRecord.setDocId(docId); | |
225 mdRecord.setCollectionNames(docOperation.getCollectionNames()); | |
226 mdRecord.setType("text/xml"); | |
227 | |
228 // generate toc file (toc, figure, handwritten) | |
229 XslResourceTransformer tocTransformer = new XslResourceTransformer("toc.xsl"); | |
230 File tocFile = new File(docDirName + "/toc.xml"); | |
231 String tocResult = tocTransformer.transform(docDestFileNameUpgrade); | |
232 FileUtils.writeStringToFile(tocFile, tocResult, "utf-8"); | |
233 | |
234 // Get metadata info of the xml document | |
235 docOperation.setStatus("extract metadata of: " + srcUrlStr + " to CMS"); | |
236 XQueryEvaluator xQueryEvaluator2 = new XQueryEvaluator(); | |
237 mdRecord = getMetadataRecord(docDestFileUpgrade, docType, mdRecord, xQueryEvaluator2); | |
238 String mdRecordLanguage = mdRecord.getLanguage(); | |
239 if (mdRecordLanguage == null && mainLanguage != null) | |
240 mdRecord.setLanguage(mainLanguage); | |
241 | |
242 // save all pages as single xml files (untokenized and tokenized) | |
243 docOperation.setStatus("extract page fragments of: " + srcUrlStr + " to CMS"); | |
244 File docDir = new File(docDirName + "/pages"); | |
245 FileUtils.deleteQuietly(docDir); // first delete pages directory | |
246 Hashtable<Integer, StringBuilder> pageFragments = getFragments(docDestFileNameUpgrade, "pb"); | |
247 int pageCount = pageFragments.size(); | |
248 if (pageCount == 0) { | |
249 // no pb element is found: then the whole document is the first page | |
250 String docXmlStr = FileUtils.readFileToString(docDestFileUpgrade, "utf-8"); | |
251 docXmlStr = docXmlStr.replaceAll("<\\?xml.*?\\?>", ""); // remove the xml declaration if it exists | |
252 pageFragments = new Hashtable<Integer, StringBuilder>(); | |
253 pageFragments.put(new Integer(1), new StringBuilder(docXmlStr)); | |
254 pageCount = 1; | |
255 } | |
256 PageTransformer pageTransformer = new PageTransformer(); | |
257 for (int page=1; page<=pageCount; page++) { | |
258 String fragment = pageFragments.get(new Integer(page)).toString(); | |
259 fragment = "<?xml version=\"1.0\" encoding=\"utf-8\"?>\n" + fragment; | |
260 String docPageFileName = docDirName + "/pages/page-" + page + ".xml"; | |
261 File docPageFile = new File(docPageFileName); | |
262 FileUtils.writeStringToFile(docPageFile, fragment, "utf-8"); | |
263 String language = mdRecord.getLanguage(); | |
264 String tokenizedXmlStr = tokenizeWithLemmas(fragment, language); // xml fragment enriched with <w> elements | |
265 tokenizedXmlStr = "<?xml version=\"1.0\" encoding=\"utf-8\"?>" + tokenizedXmlStr; | |
266 tokenizedXmlStr = enrichWordsOrigRegNorm(tokenizedXmlStr); // xml string: enrich <w> elements with normalization info (orig, reg, norm) | |
267 String docPageTokenizedFileName = docDirName + "/pages/page-" + page + "-morph.xml"; | |
268 File docPageTokenizedFile = new File(docPageTokenizedFileName); | |
269 FileUtils.writeStringToFile(docPageTokenizedFile, tokenizedXmlStr, "utf-8"); | |
270 String docPageHtmlFileName = docDirName + "/pages/page-" + page + ".html"; | |
271 File docPageHtmlFile = new File(docPageHtmlFileName); | |
272 String htmlStr = pageTransformer.transform(tokenizedXmlStr, mdRecord, page, "html"); | |
273 FileUtils.writeStringToFile(docPageHtmlFile, htmlStr, "utf-8"); | |
274 } | |
275 | |
276 // perform operation on Lucene | |
277 docOperation.setStatus(operationName + " document: " + docId + " in CMS"); | |
278 docOperation.setMdRecord(mdRecord); | |
279 IndexHandler indexHandler = IndexHandler.getInstance(); | |
280 indexHandler.indexDocument(docOperation); | |
281 | |
282 } catch (IOException e) { | |
283 throw new ApplicationException(e); | |
284 } | |
285 } | |
286 | |
287 private void delete(CmsDocOperation docOperation) throws ApplicationException { | |
288 String operationName = docOperation.getName(); | |
289 String docIdentifier = docOperation.getDocIdentifier(); | |
290 if (docIdentifier == null || docIdentifier.trim().equals("")) | |
291 throw new ApplicationException("Your document identifier is empty. Please specify a document identifier for your document."); | |
292 String docDirStr = getDocDir(docIdentifier); | |
293 File docDir = new File(docDirStr); | |
294 boolean docExists = docDir.exists(); | |
295 if (! docExists) { | |
296 throw new ApplicationException("Document:" + docIdentifier + " does not exists. Please use a name that exists and perform the operation \"Delete\" again."); | |
297 } | |
298 // perform operation on file system | |
299 docOperation.setStatus(operationName + " document: " + docIdentifier + " in CMS"); | |
300 FileUtils.deleteQuietly(docDir); | |
301 | |
302 // perform operation on Lucene | |
303 IndexHandler indexHandler = IndexHandler.getInstance(); | |
304 indexHandler.deleteDocument(docOperation); | |
305 | |
306 } | |
307 | |
308 private void createPdf(CmsDocOperation docOperation) throws ApplicationException { | |
309 String docId = docOperation.getDocIdentifier(); | |
310 String operationName = docOperation.getName(); | |
311 if (docId == null || docId.trim().equals("")) | |
312 throw new ApplicationException("Your document identifier is empty. Please specify a document identifier for your document."); | |
313 if (! isProjectDoc(docId)) { | |
314 LOGGER.info("Operation: " + operationName + " not performed on: " + docId + ". Cause: document is excluded as project doc"); | |
315 return; | |
316 } | |
317 IndexHandler indexHandler = IndexHandler.getInstance(); | |
318 MetadataRecord mdRecord = indexHandler.getDocMetadata(docId); | |
319 docOperation.setStatus("create PDF and HTML versions of the document: " + docId); | |
320 PdfHandler pdfHandler = PdfHandler.getInstance(); | |
321 pdfHandler.createFile(true, true, mdRecord); // generate Pdf + Html document | |
322 } | |
323 | |
324 private MetadataRecord getMetadataRecord(File xmlFile, String schemaName, MetadataRecord mdRecord, XQueryEvaluator xQueryEvaluator) throws ApplicationException { | |
325 if (schemaName == null) | |
326 return mdRecord; | |
327 try { | |
328 URL srcUrl = xmlFile.toURI().toURL(); | |
329 if (schemaName.equals("archimedes")) | |
330 mdRecord = getMetadataRecordArch(xQueryEvaluator, srcUrl, mdRecord); | |
331 else if (schemaName.equals("echo")) | |
332 mdRecord = getMetadataRecordEcho(xQueryEvaluator, srcUrl, mdRecord); | |
333 else if (schemaName.equals("TEI")) | |
334 mdRecord = getMetadataRecordTei(xQueryEvaluator, srcUrl, mdRecord); | |
335 else if (schemaName.equals("html")) | |
336 mdRecord = getMetadataRecordHtml(xQueryEvaluator, srcUrl, mdRecord); | |
337 else | |
338 mdRecord.setSchemaName("diverse"); // all other cases: set docType to schemaName | |
339 } catch (MalformedURLException e) { | |
340 throw new ApplicationException(e); | |
341 } | |
342 mdRecord.setLastModified(new Date()); | |
343 return mdRecord; | |
344 } | |
345 | |
346 private MetadataRecord getMetadataRecordArch(XQueryEvaluator xQueryEvaluator, URL srcUrl, MetadataRecord mdRecord) throws ApplicationException { | |
347 String metadataXmlStr = xQueryEvaluator.evaluateAsString(srcUrl, "/archimedes//info"); | |
348 if (metadataXmlStr != null) { | |
349 String identifier = xQueryEvaluator.evaluateAsStringValueJoined(metadataXmlStr, "/info/locator"); | |
350 if (identifier != null) | |
351 identifier = StringUtils.deresolveXmlEntities(identifier); | |
352 String creator = xQueryEvaluator.evaluateAsStringValueJoined(metadataXmlStr, "/info/author"); | |
353 if (creator != null) | |
354 creator = StringUtils.deresolveXmlEntities(creator); | |
355 String title = xQueryEvaluator.evaluateAsStringValueJoined(metadataXmlStr, "/info/title"); | |
356 if (title != null) | |
357 title = StringUtils.deresolveXmlEntities(title); | |
358 String language = xQueryEvaluator.evaluateAsStringValueJoined(metadataXmlStr, "/info/lang[1]"); | |
359 if (language != null) | |
360 language = StringUtils.deresolveXmlEntities(language); | |
361 String place = xQueryEvaluator.evaluateAsStringValueJoined(metadataXmlStr, "/info/place"); | |
362 if (place != null) | |
363 place = StringUtils.deresolveXmlEntities(place); | |
364 String yearStr = xQueryEvaluator.evaluateAsStringValueJoined(metadataXmlStr, "/info/date"); | |
365 Date date = null; | |
366 if (yearStr != null && ! yearStr.equals("")) { | |
367 yearStr = StringUtils.deresolveXmlEntities(yearStr); | |
368 yearStr = new Util().toYearStr(yearStr); // test if possible etc | |
369 if (yearStr != null) { | |
370 try { | |
371 date = new Util().toDate(yearStr + "-01-01T00:00:00.000Z"); | |
372 } catch (Exception e) { | |
373 // nothing | |
374 } | |
375 } | |
376 } | |
377 String rights = "open access"; | |
378 String license = "http://echo.mpiwg-berlin.mpg.de/policy/oa_basics/declaration"; | |
379 String accessRights = "free"; | |
380 | |
381 mdRecord.setIdentifier(identifier); | |
382 mdRecord.setLanguage(language); | |
383 mdRecord.setCreator(creator); | |
384 mdRecord.setTitle(title); | |
385 mdRecord.setPublisher(place); | |
386 mdRecord.setRights(rights); | |
387 mdRecord.setDate(date); | |
388 mdRecord.setLicense(license); | |
389 mdRecord.setAccessRights(accessRights); | |
390 | |
391 // get echo metadata | |
392 String echoDir = xQueryEvaluator.evaluateAsStringValueJoined(metadataXmlStr, "/info/echodir"); | |
393 String docId = mdRecord.getDocId(); | |
394 String echoIdTmp = docId; | |
395 if (docId != null && ! docId.isEmpty()) { | |
396 int start = docId.lastIndexOf("/"); | |
397 if (start != -1) | |
398 start = start + 1; | |
399 else | |
400 start = 0; | |
401 int end = docId.lastIndexOf("."); | |
402 if (end == -1) | |
403 end = docId.length(); | |
404 echoIdTmp = docId.substring(start, end); | |
405 } | |
406 String echoId = "/permanent/archimedes/" + echoIdTmp; | |
407 if (echoIdTmp == null || echoIdTmp.isEmpty()) | |
408 echoId = null; | |
409 if (echoDir != null && ! echoDir.isEmpty()) { | |
410 echoId = echoDir; | |
411 } | |
412 mdRecord = getEchoMetadata(xQueryEvaluator, echoId, mdRecord); | |
413 } | |
414 String pageCountStr = xQueryEvaluator.evaluateAsString(srcUrl, "count(//pb)"); | |
415 int pageCount = Integer.valueOf(pageCountStr); | |
416 mdRecord.setPageCount(pageCount); | |
417 mdRecord.setSchemaName("archimedes"); | |
418 return mdRecord; | |
419 } | |
420 | |
421 private MetadataRecord getMetadataRecordEcho(XQueryEvaluator xQueryEvaluator, URL srcUrl, MetadataRecord mdRecord) throws ApplicationException { | |
422 String metadataXmlStr = xQueryEvaluator.evaluateAsString(srcUrl, "/*:echo/*:metadata"); | |
423 if (metadataXmlStr != null) { | |
424 String identifier = xQueryEvaluator.evaluateAsStringValueJoined(metadataXmlStr, "/*:metadata/*:identifier"); | |
425 if (identifier != null) { | |
426 identifier = StringUtils.deresolveXmlEntities(identifier); | |
427 } | |
428 String creator = xQueryEvaluator.evaluateAsStringValueJoined(metadataXmlStr, "/*:metadata/*:creator"); | |
429 if (creator != null) | |
430 creator = StringUtils.deresolveXmlEntities(creator); | |
431 String title = xQueryEvaluator.evaluateAsStringValueJoined(metadataXmlStr, "/*:metadata/*:title"); | |
432 if (title != null) | |
433 title = StringUtils.deresolveXmlEntities(title); | |
434 String language = xQueryEvaluator.evaluateAsStringValueJoined(metadataXmlStr, "/*:metadata/*:language[1]"); | |
435 if (language != null) | |
436 language = StringUtils.deresolveXmlEntities(language); | |
437 String yearStr = xQueryEvaluator.evaluateAsStringValueJoined(metadataXmlStr, "/*:metadata/*:date"); | |
438 Date date = null; | |
439 if (yearStr != null && ! yearStr.equals("")) { | |
440 yearStr = StringUtils.deresolveXmlEntities(yearStr); | |
441 yearStr = new Util().toYearStr(yearStr); // test if possible etc | |
442 if (yearStr != null) { | |
443 try { | |
444 date = new Util().toDate(yearStr + "-01-01T00:00:00.000Z"); | |
445 } catch (Exception e) { | |
446 // nothing | |
447 } | |
448 } | |
449 } | |
450 String rights = xQueryEvaluator.evaluateAsStringValueJoined(metadataXmlStr, "/*:metadata/*:rights"); | |
451 if (rights != null) | |
452 rights = StringUtils.deresolveXmlEntities(rights); | |
453 String license = xQueryEvaluator.evaluateAsStringValueJoined(metadataXmlStr, "/*:metadata/*:license"); | |
454 if (license != null) | |
455 license = StringUtils.deresolveXmlEntities(license); | |
456 String accessRights = xQueryEvaluator.evaluateAsStringValueJoined(metadataXmlStr, "/*:metadata/*:accessRights"); | |
457 if (accessRights != null) | |
458 accessRights = StringUtils.deresolveXmlEntities(accessRights); | |
459 | |
460 mdRecord.setIdentifier(identifier); | |
461 mdRecord.setLanguage(language); | |
462 mdRecord.setCreator(creator); | |
463 mdRecord.setTitle(title); | |
464 mdRecord.setRights(rights); | |
465 mdRecord.setDate(date); | |
466 mdRecord.setLicense(license); | |
467 mdRecord.setAccessRights(accessRights); | |
468 | |
469 // get echo metadata | |
470 String echoDir = xQueryEvaluator.evaluateAsStringValueJoined(metadataXmlStr, "/*:metadata/*:echodir"); | |
471 String echoIdTmp = identifier; | |
472 if (identifier != null && ! identifier.isEmpty()) { | |
473 int start = identifier.indexOf("ECHO:"); | |
474 if (start != -1) | |
475 start = start + 5; | |
476 else | |
477 start = 0; | |
478 int end = identifier.lastIndexOf("."); | |
479 if (end == -1) | |
480 end = identifier.length(); | |
481 echoIdTmp = identifier.substring(start, end); | |
482 } | |
483 String echoId = "/permanent/library/" + echoIdTmp; | |
484 if (echoIdTmp == null || echoIdTmp.isEmpty()) | |
485 echoId = null; | |
486 if (echoDir != null && ! echoDir.isEmpty()) { | |
487 echoId = echoDir; | |
488 } | |
489 mdRecord = getEchoMetadata(xQueryEvaluator, echoId, mdRecord); | |
490 } | |
491 String pageCountStr = xQueryEvaluator.evaluateAsString(srcUrl, "count(//*:pb)"); | |
492 int pageCount = Integer.valueOf(pageCountStr); | |
493 mdRecord.setPageCount(pageCount); | |
494 mdRecord.setSchemaName("echo"); | |
495 return mdRecord; | |
496 } | |
497 | |
498 private MetadataRecord getMetadataRecordTei(XQueryEvaluator xQueryEvaluator, URL srcUrl, MetadataRecord mdRecord) throws ApplicationException { | |
499 String metadataXmlStr = xQueryEvaluator.evaluateAsString(srcUrl, "/*:TEI/*:teiHeader"); | |
500 if (metadataXmlStr != null) { | |
501 String identifier = xQueryEvaluator.evaluateAsStringValueJoined(metadataXmlStr, "/*:teiHeader/*:fileDesc/*:publicationStmt/*:idno"); | |
502 if (identifier != null) { | |
503 identifier = StringUtils.deresolveXmlEntities(identifier); | |
504 identifier = deleteSpecialChars(identifier); | |
505 } | |
506 String creator = xQueryEvaluator.evaluateAsStringValueJoined(metadataXmlStr, "/*:teiHeader/*:fileDesc/*:titleStmt/*:author"); | |
507 if (creator != null) | |
508 creator = StringUtils.deresolveXmlEntities(creator); | |
509 String title = xQueryEvaluator.evaluateAsStringValueJoined(metadataXmlStr, "/*:teiHeader/*:fileDesc/*:titleStmt/*:title"); | |
510 if (title != null) | |
511 title = StringUtils.deresolveXmlEntities(title); | |
512 String language = xQueryEvaluator.evaluateAsStringValueJoined(metadataXmlStr, "string(/*:teiHeader/*:profileDesc/*:langUsage/*:language[1]/@ident)"); | |
513 if (language != null && language.isEmpty()) | |
514 language = null; | |
515 if (language != null) { | |
516 language = language.toLowerCase(); | |
517 if (language.length() == 5) { // e.g. "de-DE or en-US" | |
518 if (language.substring(2, 3).equals("-")) { | |
519 String lang = language.substring(0, 2); | |
520 language = Language.getInstance().getISO639Code(lang); | |
521 } | |
522 } | |
523 } | |
524 String place = xQueryEvaluator.evaluateAsStringValueJoined(metadataXmlStr, "/*:teiHeader/*:fileDesc/*:publicationStmt/*:pubPlace"); | |
525 if (place != null) | |
526 place = StringUtils.deresolveXmlEntities(place); | |
527 String yearStr = xQueryEvaluator.evaluateAsStringValueJoined(metadataXmlStr, "/*:teiHeader/*:fileDesc/*:publicationStmt/*:date"); | |
528 Date date = null; | |
529 if (yearStr != null && ! yearStr.equals("")) { | |
530 yearStr = StringUtils.deresolveXmlEntities(yearStr); | |
531 yearStr = new Util().toYearStr(yearStr); // test if possible etc | |
532 if (yearStr != null) { | |
533 try { | |
534 date = new Util().toDate(yearStr + "-01-01T00:00:00.000Z"); | |
535 } catch (Exception e) { | |
536 // nothing | |
537 } | |
538 } | |
539 } | |
540 String subject = xQueryEvaluator.evaluateAsStringValueJoined(metadataXmlStr, "string(/*:teiHeader/*:profileDesc/*:textClass/*:keywords/*:term)"); | |
541 if (subject != null) | |
542 subject = StringUtils.deresolveXmlEntities(subject); | |
543 String rights = xQueryEvaluator.evaluateAsStringValueJoined(metadataXmlStr, "/*:teiHeader/*:fileDesc/*:publicationStmt/*:availability"); | |
544 if (rights == null) | |
545 rights = "open access"; | |
546 rights = StringUtils.deresolveXmlEntities(rights); | |
547 String license = "http://echo.mpiwg-berlin.mpg.de/policy/oa_basics/declaration"; | |
548 String accessRights = xQueryEvaluator.evaluateAsStringValueJoined(metadataXmlStr, "string(/*:teiHeader/*:fileDesc/*:publicationStmt/*:availability/@status)"); | |
549 if (accessRights == null) | |
550 accessRights = "free"; | |
551 accessRights = StringUtils.deresolveXmlEntities(accessRights); | |
552 | |
553 mdRecord.setIdentifier(identifier); | |
554 mdRecord.setLanguage(language); | |
555 mdRecord.setCreator(creator); | |
556 mdRecord.setTitle(title); | |
557 mdRecord.setPublisher(place); | |
558 mdRecord.setRights(rights); | |
559 mdRecord.setDate(date); | |
560 mdRecord.setSubject(subject); | |
561 mdRecord.setLicense(license); | |
562 mdRecord.setAccessRights(accessRights); | |
563 | |
564 // get echo metadata | |
565 mdRecord = getEchoMetadata(xQueryEvaluator, identifier, mdRecord); // identifier is echoDir | |
566 } | |
567 String pageCountStr = xQueryEvaluator.evaluateAsString(srcUrl, "count(//*:pb)"); | |
568 int pageCount = Integer.valueOf(pageCountStr); | |
569 mdRecord.setPageCount(pageCount); | |
570 mdRecord.setSchemaName("TEI"); | |
571 return mdRecord; | |
572 } | |
573 | |
574 private MetadataRecord getMetadataRecordHtml(XQueryEvaluator xQueryEvaluator, URL srcUrl, MetadataRecord mdRecord) throws ApplicationException { | |
575 String metadataXmlStr = xQueryEvaluator.evaluateAsString(srcUrl, "/html/head"); | |
576 if (metadataXmlStr != null) { | |
577 String identifier = xQueryEvaluator.evaluateAsStringValueJoined(metadataXmlStr, "string(/meta[@name = 'DC.identifier']/@content)"); | |
578 if (identifier != null && ! identifier.isEmpty()) | |
579 identifier = StringUtils.deresolveXmlEntities(identifier); | |
580 String creator = xQueryEvaluator.evaluateAsStringValueJoined(metadataXmlStr, "string(/meta[@name = 'DC.creator']/@content)"); | |
581 if (creator != null && ! creator.isEmpty()) | |
582 creator = StringUtils.deresolveXmlEntities(creator); | |
583 String title = xQueryEvaluator.evaluateAsStringValueJoined(metadataXmlStr, "string(/meta[@name = 'DC.title']/@content)"); | |
584 if (title != null && ! title.isEmpty()) | |
585 title = StringUtils.deresolveXmlEntities(title); | |
586 String language = xQueryEvaluator.evaluateAsStringValueJoined(metadataXmlStr, "string(/meta[@name = 'DC.language']/@content)"); | |
587 if (language != null && language.isEmpty()) | |
588 language = null; | |
589 if (language != null && ! language.isEmpty()) | |
590 language = StringUtils.deresolveXmlEntities(language); | |
591 String publisher = xQueryEvaluator.evaluateAsStringValueJoined(metadataXmlStr, "string(/meta[@name = 'DC.publisher']/@content)"); | |
592 if (publisher != null) | |
593 publisher = StringUtils.deresolveXmlEntities(publisher); | |
594 String yearStr = xQueryEvaluator.evaluateAsStringValueJoined(metadataXmlStr, "string(/meta[@name = 'DC.date']/@content)"); | |
595 Date date = null; | |
596 if (yearStr != null && ! yearStr.equals("")) { | |
597 yearStr = StringUtils.deresolveXmlEntities(yearStr); | |
598 yearStr = new Util().toYearStr(yearStr); // test if possible etc | |
599 if (yearStr != null) { | |
600 try { | |
601 date = new Util().toDate(yearStr + "-01-01T00:00:00.000Z"); | |
602 } catch (Exception e) { | |
603 // nothing | |
604 } | |
605 } | |
606 } | |
607 String subject = xQueryEvaluator.evaluateAsStringValueJoined(metadataXmlStr, "string(/meta[@name = 'DC.subject']/@content)"); | |
608 if (subject != null) | |
609 subject = StringUtils.deresolveXmlEntities(subject); | |
610 String rights = xQueryEvaluator.evaluateAsStringValueJoined(metadataXmlStr, "string(/meta[@name = 'DC.rights']/@content)"); | |
611 if (rights != null && ! rights.isEmpty()) | |
612 rights = StringUtils.deresolveXmlEntities(rights); | |
613 String license = xQueryEvaluator.evaluateAsStringValueJoined(metadataXmlStr, "string(/meta[@name = 'DC.license']/@content)"); | |
614 if (license != null && ! license.isEmpty()) | |
615 license = StringUtils.deresolveXmlEntities(license); | |
616 String accessRights = xQueryEvaluator.evaluateAsStringValueJoined(metadataXmlStr, "string(/meta[@name = 'DC.accessRights']/@content)"); | |
617 if (accessRights != null && ! accessRights.isEmpty()) | |
618 accessRights = StringUtils.deresolveXmlEntities(accessRights); | |
619 | |
620 mdRecord.setIdentifier(identifier); | |
621 mdRecord.setLanguage(language); | |
622 mdRecord.setCreator(creator); | |
623 mdRecord.setTitle(title); | |
624 mdRecord.setPublisher(publisher); | |
625 mdRecord.setRights(rights); | |
626 mdRecord.setDate(date); | |
627 mdRecord.setSubject(subject); | |
628 mdRecord.setLicense(license); | |
629 mdRecord.setAccessRights(accessRights); | |
630 | |
631 // get echo metadata | |
632 mdRecord = getEchoMetadata(xQueryEvaluator, identifier, mdRecord); // identifier is echoDir | |
633 } | |
634 String pageCountStr = xQueryEvaluator.evaluateAsString(srcUrl, "count(//pb)"); | |
635 int pageCount = Integer.valueOf(pageCountStr); | |
636 mdRecord.setPageCount(pageCount); | |
637 mdRecord.setSchemaName("html"); | |
638 return mdRecord; | |
639 } | |
640 | |
641 private MetadataRecord getEchoMetadata(XQueryEvaluator xQueryEvaluator, String echoDir, MetadataRecord mdRecord) throws ApplicationException { | |
642 if (echoDir == null || echoDir.isEmpty()) { | |
643 String docId = mdRecord.getDocId(); | |
644 echoDir = getEchoDir(xQueryEvaluator, docId); | |
645 if (echoDir == null) | |
646 return mdRecord; | |
647 } | |
648 String urLTexter = "http://digilib.mpiwg-berlin.mpg.de/digitallibrary/servlet/Texter?fn=" + echoDir + "/index.meta"; | |
649 String echoIndexMetaStr = performGetRequest(urLTexter); | |
650 String echoPageImageDir = null; | |
651 String echoFiguresDir = null; | |
652 String mpiwgDocId = null; | |
653 if (echoIndexMetaStr != null) { | |
654 if (echoIndexMetaStr.equals("XXXXTimeoutXXXX")) | |
655 return null; | |
656 else if (echoIndexMetaStr.equals("XXXXUrlErrorXXXX")) | |
657 return mdRecord; | |
658 echoPageImageDir = xQueryEvaluator.evaluateAsStringValueJoined(echoIndexMetaStr, "/resource/meta/texttool/image"); | |
659 if (echoPageImageDir != null) | |
660 echoPageImageDir = echoDir + "/" + echoPageImageDir; | |
661 else | |
662 echoPageImageDir = echoDir + "/" + "pageimg"; // default | |
663 echoFiguresDir = xQueryEvaluator.evaluateAsStringValueJoined(echoIndexMetaStr, "/resource/meta/texttool/figures"); | |
664 if (echoFiguresDir != null) | |
665 echoFiguresDir = echoDir + "/" + echoFiguresDir; | |
666 else | |
667 echoFiguresDir = echoDir + "/" + "figures"; // default | |
668 mpiwgDocId = xQueryEvaluator.evaluateAsStringValueJoined(echoIndexMetaStr, "/resource/meta/dri[@type = 'mpiwg']"); | |
669 } | |
670 mdRecord.setEchoId(echoDir); | |
671 mdRecord.setEchoPageImageDir(echoPageImageDir); | |
672 mdRecord.setEchoFiguresDir(echoFiguresDir); | |
673 mdRecord.setMpiwgDocId(mpiwgDocId); | |
674 return mdRecord; | |
675 } | |
676 | |
677 private String getEchoDir(XQueryEvaluator xQueryEvaluator, String docId) throws ApplicationException { | |
678 String echoDir = null; | |
679 String urLTextUrlPath = "http://md.mpiwg-berlin.mpg.de/purls/searchSolr?text-url-path=" + docId + "&format=short"; | |
680 String resultXmlStr = performGetRequest(urLTextUrlPath); | |
681 if (resultXmlStr != null) { | |
682 if (resultXmlStr.equals("XXXXTimeoutXXXX")) | |
683 return null; | |
684 else if (resultXmlStr.equals("XXXXUrlErrorXXXX")) | |
685 return null; | |
686 String archivePath = xQueryEvaluator.evaluateAsStringValueJoined(resultXmlStr, "//archive-path"); | |
687 if (archivePath != null) { | |
688 archivePath = archivePath.replaceAll("/mpiwg/online", ""); | |
689 if (archivePath.isEmpty()) | |
690 echoDir = null; | |
691 else | |
692 echoDir = archivePath; | |
693 } | |
694 } | |
695 return echoDir; | |
696 } | |
697 | |
698 private String getNodeType(XdmNode node) { | |
699 String nodeType = null; | |
700 XdmSequenceIterator iter = node.axisIterator(Axis.CHILD); | |
701 if (iter != null) { | |
702 while (iter.hasNext()) { | |
703 XdmNode firstChild = (XdmNode) iter.next(); | |
704 if (firstChild != null) { | |
705 XdmNodeKind nodeKind = firstChild.getNodeKind(); | |
706 if (nodeKind.ordinal() == XdmNodeKind.ELEMENT.ordinal()) { | |
707 QName nodeQName = firstChild.getNodeName(); | |
708 nodeType = nodeQName.getLocalName(); | |
709 } | |
710 } | |
711 } | |
712 } | |
713 return nodeType; | |
714 } | |
715 | |
716 public String getDocFullFileName(String docId) { | |
717 String docDir = getDocDir(docId); | |
718 String docFileName = getDocFileName(docId); | |
719 String docFullFileName = docDir + "/" + docFileName; | |
720 return docFullFileName; | |
721 } | |
722 | |
723 public String getFullFileName(String docId, String type) { | |
724 String docDir = getDocDir(docId); | |
725 String docFileName = getDocFileName(docId); | |
726 int lastDot = docFileName.lastIndexOf("."); | |
727 String docFileNameWithoutExtension = docFileName.substring(0, lastDot); | |
728 String fullFileName = docDir + "/" + docFileNameWithoutExtension + ".xml"; | |
729 if (type != null && ! type.equals("toc")) { | |
730 fullFileName = docDir + "/" + docFileNameWithoutExtension + "." + type; | |
731 } else if (type != null && type.equals("toc")) { | |
732 fullFileName = docDir + "/toc.xml"; | |
733 } | |
734 return fullFileName; | |
735 } | |
736 | |
737 public String getDocDir(String docId) { | |
738 String documentsDirectory = Constants.getInstance().getDocumentsDir(); | |
739 String subDir = docId; | |
740 if (docId.contains(".")) { | |
741 int index = docId.lastIndexOf("."); | |
742 subDir = docId.substring(0, index); | |
743 } | |
744 if (! subDir.startsWith("/")) | |
745 subDir = "/" + subDir; | |
746 String docDir = documentsDirectory + subDir; | |
747 return docDir; | |
748 } | |
749 | |
750 public String getDocFileName(String docId) { | |
751 String docFileName = docId; | |
752 int index = docId.lastIndexOf("/"); | |
753 if (index != -1) { | |
754 docFileName = docId.substring(index + 1); | |
755 } | |
756 return docFileName; | |
757 } | |
758 | |
759 private String getMainLanguage(String docId) { | |
760 String mainLang = null; | |
761 int to = docId.lastIndexOf("/"); | |
762 if (to != -1) { | |
763 String preStr = docId.substring(0, to); | |
764 int from = preStr.lastIndexOf("/"); | |
765 if (from != -1) | |
766 mainLang = preStr.substring(from + 1, to); | |
767 } | |
768 return mainLang; | |
769 } | |
770 | |
771 private String deleteSpecialChars(String inputStr) { | |
772 StringBuilder buf = new StringBuilder(); | |
773 for (int i = 0; i < inputStr.length(); i++) { | |
774 char c = inputStr.charAt(i); | |
775 String replace = new String(); | |
776 switch (c) { | |
777 case '@': replace = ""; break; | |
778 case ' ': replace = ""; break; | |
779 case ';': replace = ""; break; | |
780 default: replace += c; break; | |
781 } | |
782 buf.append(replace); | |
783 } | |
784 return buf.toString(); | |
785 } | |
786 | |
787 private Hashtable<Integer, StringBuilder> getFragments(String fileName, String milestoneElementName) throws ApplicationException { | |
788 try { | |
789 GetFragmentsContentHandler getFragmentsContentHandler = new GetFragmentsContentHandler(milestoneElementName); | |
790 XMLReader xmlParser = new SAXParser(); | |
791 xmlParser.setContentHandler(getFragmentsContentHandler); | |
792 StringReader bla = new StringReader(FileUtils.readFileToString(new File(fileName), "utf-8")); | |
793 InputSource inputSource = new InputSource(bla); | |
794 xmlParser.parse(inputSource); | |
795 Hashtable<Integer, StringBuilder> resultFragments = getFragmentsContentHandler.getResultPages(); | |
796 return resultFragments; | |
797 } catch (SAXException e) { | |
798 throw new ApplicationException(e); | |
799 } catch (IOException e) { | |
800 throw new ApplicationException(e); | |
801 } | |
802 } | |
803 | |
804 private String tokenizeWithLemmas(String xmlStr, String language) throws ApplicationException { | |
805 StringReader strReader = new StringReader(xmlStr); | |
806 XmlTokenizer xmlTokenizer = new XmlTokenizer(strReader); | |
807 xmlTokenizer.setLanguage(language); | |
808 String[] outputOptionsWithLemmas = {"withLemmas"}; // so all tokens are fetched with lemmas (costs performance) | |
809 // non word breaking elements; | |
810 // TODO examine bugs with emph, figure, hi : | |
811 // e.g. "... der <hi rend="i">Capi-<lb n="16"/>talist.</hi> Es ..." | |
812 // e.g. page 30 in /echo/la/Cataneo_1600.xml | |
813 String[] nwbElements = {"lb", "br", "cb"}; | |
814 xmlTokenizer.setNWBElements(nwbElements); | |
815 xmlTokenizer.setOutputOptions(outputOptionsWithLemmas); | |
816 xmlTokenizer.tokenize(); | |
817 String retStr = xmlTokenizer.getXmlResult(); | |
818 return retStr; | |
819 } | |
820 | |
821 private String enrichWordsOrigRegNorm(String xmlStr) throws ApplicationException { | |
822 try { | |
823 WordContentHandler wordContentHandler = new WordContentHandler(); | |
824 XMLReader xmlParser = new SAXParser(); | |
825 xmlParser.setContentHandler(wordContentHandler); | |
826 StringReader strReader = new StringReader(xmlStr); | |
827 InputSource inputSource = new InputSource(strReader); | |
828 xmlParser.parse(inputSource); | |
829 String result = wordContentHandler.getResult(); | |
830 return result; | |
831 } catch (SAXException e) { | |
832 throw new ApplicationException(e); | |
833 } catch (IOException e) { | |
834 throw new ApplicationException(e); | |
835 } | |
836 } | |
837 | |
838 private String performGetRequest(String url) throws ApplicationException { | |
839 String resultStr = null; | |
840 try { | |
841 boolean urlIsOk = checkUri(url, 2000); // if url doesn't answer after 2 seconds | |
842 if (! urlIsOk) | |
843 return "XXXXTimeoutXXXX"; | |
844 HttpClient httpClient = new HttpClient(); | |
845 GetMethod method = new GetMethod(url); | |
846 httpClient.executeMethod(method); | |
847 int statusCode = method.getStatusCode(); | |
848 if (statusCode >= 400) | |
849 return "XXXXUrlErrorXXXX"; | |
850 byte[] resultBytes = method.getResponseBody(); | |
851 resultStr = new String(resultBytes, "utf-8"); | |
852 method.releaseConnection(); | |
853 } catch (HttpException e) { | |
854 throw new ApplicationException(e); | |
855 } catch (IOException e) { | |
856 throw new ApplicationException(e); | |
857 } | |
858 return resultStr; | |
859 } | |
860 | |
861 private boolean checkUri(String uriStr, int timeoutMilliseconds) throws ApplicationException { | |
862 boolean isOk = true; | |
863 try { | |
864 URI uri = new URI(uriStr); | |
865 HttpGet httpGet = new HttpGet(uri); | |
866 HttpParams httpParameters = new BasicHttpParams(); | |
867 // Set the timeout in milliseconds until a connection is established. | |
868 // The default value is zero, that means the timeout is not used. | |
869 int timeoutConnection = 2000; | |
870 HttpConnectionParams.setConnectionTimeout(httpParameters, timeoutConnection); | |
871 // Set the default socket timeout (SO_TIMEOUT) | |
872 // in milliseconds which is the timeout for waiting for data. | |
873 int timeoutSocket = 2000; | |
874 HttpConnectionParams.setSoTimeout(httpParameters, timeoutSocket); | |
875 DefaultHttpClient httpClient = new DefaultHttpClient(httpParameters); | |
876 HttpResponse response = httpClient.execute(httpGet); | |
877 } catch (IOException e) { | |
878 isOk = false; // if timeout exception is thrown | |
879 } catch (URISyntaxException e) { | |
880 throw new ApplicationException(e); | |
881 } | |
882 return isOk; | |
883 } | |
884 | |
885 /** | |
886 * Write string into destFile. If directory for that destFile does not exist | |
887 * it creates this directory including parent directories. | |
888 * @param str string to write | |
889 * @param destFileName destination file name | |
890 * @throws ApplicationException | |
891 */ | |
892 private void saveFile(String str, String destFileName) throws ApplicationException { | |
893 OutputStreamWriter out = null; | |
894 try { | |
895 if (str == null) | |
896 return; // do nothing | |
897 File destFile = new File(destFileName); | |
898 File destDir = new File(destFile.getParent()); | |
899 if (! destDir.exists()) { | |
900 destDir.mkdirs(); // create the directory including parent directories which do not exist | |
901 } | |
902 out = new OutputStreamWriter(new BufferedOutputStream(new FileOutputStream(destFile)), "utf-8"); | |
903 out.write(str); | |
904 out.flush(); | |
905 } catch (FileNotFoundException e) { | |
906 throw new ApplicationException(e); | |
907 } catch (IOException e) { | |
908 throw new ApplicationException(e); | |
909 } finally { | |
910 try { | |
911 if (out != null) | |
912 out.close(); | |
913 } catch (Exception e) { | |
914 // nothing: always close the stream at the end of the method | |
915 } | |
916 } | |
917 } | |
918 | |
919 private void beginOperation() { | |
920 beginOfOperation = new Date().getTime(); | |
921 } | |
922 | |
923 private void endOperation() { | |
924 endOfOperation = new Date().getTime(); | |
925 } | |
926 | |
927 } |