Mercurial > hg > mpdl-group
comparison software/mpdl-services-new/mpiwg-mpdl-cms/src/de/mpg/mpiwg/berlin/mpdl/cms/test/TestLocal.java @ 25:e9fe3186670c default tip
letzter Stand eingecheckt
author | Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de> |
---|---|
date | Tue, 21 May 2013 10:19:32 +0200 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
23:e845310098ba | 25:e9fe3186670c |
---|---|
1 package de.mpg.mpiwg.berlin.mpdl.cms.test; | |
2 | |
3 import java.io.BufferedInputStream; | |
4 import java.io.File; | |
5 import java.io.FileReader; | |
6 import java.io.IOException; | |
7 import java.io.InputStream; | |
8 import java.io.PrintWriter; | |
9 import java.io.StringReader; | |
10 import java.io.StringWriter; | |
11 import java.net.URL; | |
12 import java.util.ArrayList; | |
13 import java.util.Date; | |
14 import java.util.Hashtable; | |
15 | |
16 import javax.xml.transform.stream.StreamSource; | |
17 | |
18 import net.sf.saxon.s9api.Processor; | |
19 import net.sf.saxon.s9api.QName; | |
20 import net.sf.saxon.s9api.Serializer; | |
21 import net.sf.saxon.s9api.XdmNode; | |
22 import net.sf.saxon.s9api.XsltCompiler; | |
23 import net.sf.saxon.s9api.XsltExecutable; | |
24 | |
25 import org.apache.commons.io.FileUtils; | |
26 import org.apache.commons.io.IOUtils; | |
27 import org.apache.lucene.document.Document; | |
28 import org.apache.lucene.document.Fieldable; | |
29 import org.apache.lucene.index.Term; | |
30 import org.xml.sax.InputSource; | |
31 import org.xml.sax.SAXException; | |
32 import org.xml.sax.XMLReader; | |
33 | |
34 import com.sun.org.apache.xerces.internal.parsers.SAXParser; | |
35 | |
36 import de.mpg.mpiwg.berlin.mpdl.cms.lucene.IndexHandler; | |
37 import de.mpg.mpiwg.berlin.mpdl.cms.scheduler.CmsChainScheduler; | |
38 import de.mpg.mpiwg.berlin.mpdl.cms.scheduler.CmsDocOperation; | |
39 import de.mpg.mpiwg.berlin.mpdl.exception.ApplicationException; | |
40 import de.mpg.mpiwg.berlin.mpdl.lt.dict.db.LexHandler; | |
41 import de.mpg.mpiwg.berlin.mpdl.lt.morph.app.MorphologyCache; | |
42 import de.mpg.mpiwg.berlin.mpdl.lt.text.tokenize.WordContentHandler; | |
43 import de.mpg.mpiwg.berlin.mpdl.lt.text.tokenize.XmlTokenizer; | |
44 import de.mpg.mpiwg.berlin.mpdl.lt.text.tokenize.XmlTokenizerContentHandler; | |
45 import de.mpg.mpiwg.berlin.mpdl.util.Util; | |
46 import de.mpg.mpiwg.berlin.mpdl.xml.xquery.XQueryEvaluator; | |
47 import de.mpg.mpiwg.berlin.mpdl.cms.transform.GetFragmentsContentHandler; | |
48 import de.mpg.mpiwg.berlin.mpdl.cms.transform.HighlightContentHandler; | |
49 import de.mpg.mpiwg.berlin.mpdl.cms.transform.PageTransformer; | |
50 import de.mpg.mpiwg.berlin.mpdl.cms.transform.XslResourceTransformer; | |
51 import de.mpg.mpiwg.berlin.mpdl.cms.document.DocumentHandler; | |
52 import de.mpg.mpiwg.berlin.mpdl.cms.document.Hits; | |
53 import de.mpg.mpiwg.berlin.mpdl.cms.document.MetadataRecord; | |
54 import de.mpg.mpiwg.berlin.mpdl.cms.document.Token; | |
55 | |
56 public class TestLocal { | |
57 private IndexHandler indexer; | |
58 | |
59 public static void main(String[] args) throws ApplicationException { | |
60 try { | |
61 TestLocal test = new TestLocal(); | |
62 test.init(); | |
63 // test.importAllDocuments(); | |
64 // test.createAllPdfInDirectory(); | |
65 // test.testTransform(); | |
66 // test.testXml(); | |
67 // test.generateToc(); | |
68 test.testCalls(); | |
69 // test.generatePdf(); | |
70 // test.xquery(); | |
71 // test.createToc(); | |
72 // test.testScheduler(); | |
73 // test.getDocInfo(); | |
74 // test.testChars(); | |
75 test.end(); | |
76 } catch (Exception e) { | |
77 e.printStackTrace(); | |
78 } | |
79 } | |
80 | |
81 private void init() throws ApplicationException { | |
82 indexer = IndexHandler.getInstance(); | |
83 } | |
84 | |
85 private void end() throws ApplicationException { | |
86 indexer.end(); | |
87 } | |
88 | |
89 private void testXml() throws ApplicationException { | |
90 try { | |
91 DocumentHandler docHandler = new DocumentHandler(); | |
92 String docDirName = docHandler.getDocDir("/echo/la/Benedetti_1585_163127KK.xml"); | |
93 String pageXmlFileName = docDirName + "/pages" + "/page-" + "444" + ".xml"; | |
94 File pageXmlFile = new File(pageXmlFileName); | |
95 String pageXmlStr = null; | |
96 if (pageXmlFile.exists()) | |
97 pageXmlStr = FileUtils.readFileToString(pageXmlFile, "utf-8"); | |
98 System.out.println(pageXmlStr); | |
99 String tokStr = tokenizeWithLemmas(pageXmlStr, "lat"); | |
100 System.out.println(tokStr); | |
101 tokStr = "<?xml version=\"1.0\" encoding=\"utf-8\"?>" + tokStr; | |
102 byte[] blablabla = tokStr.getBytes("utf-8"); | |
103 String blablu = new String(blablabla, "utf-8"); | |
104 String bla = enrichWordsOrigRegNorm(blablu); | |
105 System.out.println(bla); | |
106 | |
107 XQueryEvaluator xQueryEvaluator = new XQueryEvaluator(); | |
108 URL url = new URL("file:/var/yp/Test_1789.xml"); | |
109 XdmNode docNode = xQueryEvaluator.parse(url); // if it is not parseable an exception with a detail message is thrown | |
110 | |
111 File srcFile = new File("/Users/jwillenborg/mpdl/data/xml/documents/tei/de/dt-ptolemaeus-tei-merge2.xml"); | |
112 FileReader docFileReader = new FileReader(srcFile); | |
113 XmlTokenizer docXmlTokenizer = new XmlTokenizer(docFileReader); | |
114 docXmlTokenizer.setDocIdentifier("/tei/de/dt-ptolemaeus-tei-merge2.xml"); | |
115 docXmlTokenizer.tokenize(); | |
116 ArrayList<XmlTokenizerContentHandler.Element> elements = docXmlTokenizer.getElements("s"); | |
117 String blabla = ""; | |
118 } catch (Exception e) { | |
119 e.printStackTrace(); | |
120 } | |
121 } | |
122 | |
123 private void importAllDocuments() throws ApplicationException { | |
124 DocumentHandler docHandler = new DocumentHandler(); | |
125 CmsDocOperation docOperation = new CmsDocOperation("importDirectory", "file:/Users/jwillenborg/test/documents", null, null); | |
126 docOperation.setCollectionNames("echo"); | |
127 docHandler.doOperation(docOperation); | |
128 } | |
129 | |
130 private void createAllPdfInDirectory() throws ApplicationException { | |
131 DocumentHandler docHandler = new DocumentHandler(); | |
132 CmsDocOperation docOperation = new CmsDocOperation("createAllPdfInDirectory", "file:/Users/jwillenborg/test/documents", null, null); | |
133 docOperation.setCollectionNames("echo"); | |
134 docHandler.doOperation(docOperation); | |
135 } | |
136 | |
137 private void generatePdf() throws ApplicationException { | |
138 long begin = new Date().getTime(); | |
139 DocumentHandler docHandler = new DocumentHandler(); | |
140 // String docId = "/echo/la/Benedetti_1585_163127KK.xml"; | |
141 String docId = "/diverse/de/Einst_Ueber_de_1907_02.xml"; | |
142 // String docId = "/archimedes/it/caver_metod_020_it_1891.xml"; | |
143 CmsDocOperation docOperation = new CmsDocOperation("createPdf", null, null, docId); | |
144 docHandler.doOperation(docOperation); | |
145 long end = new Date().getTime(); | |
146 System.out.println("Needed time: " + (end - begin)); | |
147 } | |
148 | |
149 private void testChars() throws ApplicationException { | |
150 String docId = "/test/benedetti/page-444.xml"; | |
151 String docSrcUrlStr = "file:/Users/jwillenborg/texts/mpdl/documents" + docId; | |
152 DocumentHandler docHandler = new DocumentHandler(); | |
153 CmsDocOperation docOperation = new CmsDocOperation("create", docSrcUrlStr, null, docId); | |
154 docHandler.doOperation(docOperation); | |
155 } | |
156 | |
157 private void testCalls() throws ApplicationException { | |
158 Date before = new Date(); | |
159 System.out.println("Indexing start: " + before.getTime()); | |
160 String docIdGoerz = "/tei/de/dt-ptolemaeus-tei-merge2.xml"; | |
161 String docSrcUrlStr = "file:/Users/jwillenborg/texts/mpdl/documents" + docIdGoerz; | |
162 DocumentHandler docHandler = new DocumentHandler(); | |
163 CmsDocOperation docOperation = new CmsDocOperation("create", docSrcUrlStr, null, docIdGoerz); | |
164 // docHandler.doOperation(docOperation); | |
165 String docIdSchulz = "/tei/de/Schulz_2009.xml"; | |
166 docSrcUrlStr = "file:/Users/jwillenborg/texts/mpdl/documents" + docIdSchulz; | |
167 docHandler = new DocumentHandler(); | |
168 docOperation = new CmsDocOperation("create", docSrcUrlStr, null, docIdSchulz); | |
169 // docHandler.doOperation(docOperation); | |
170 String docIdBenedetti = "/echo/la/Benedetti_1585_163127KK.xml"; | |
171 docSrcUrlStr = "file:/Users/jwillenborg/texts/mpdl/documents" + docIdBenedetti; | |
172 docHandler = new DocumentHandler(); | |
173 docOperation = new CmsDocOperation("create", docSrcUrlStr, null, docIdBenedetti); | |
174 // docHandler.doOperation(docOperation); | |
175 String docIdSongYingxing = "/echo/zh/SongYingxing_1637.xml"; | |
176 docSrcUrlStr = "file:/Users/jwillenborg/texts/mpdl/documents" + docIdSongYingxing; | |
177 docHandler = new DocumentHandler(); | |
178 docOperation = new CmsDocOperation("create", docSrcUrlStr, null, docIdSongYingxing); | |
179 // docHandler.doOperation(docOperation); | |
180 String docIdMonte = "/archimedes/la/monte_mecha_036_la_1577.xml"; | |
181 docSrcUrlStr = "file:/Users/jwillenborg/texts/mpdl/documents" + docIdMonte; | |
182 docHandler = new DocumentHandler(); | |
183 docOperation = new CmsDocOperation("create", docSrcUrlStr, null, docIdMonte); | |
184 // docHandler.doOperation(docOperation); | |
185 String docIdEinstein = "/diverse/de/Einst_Antwo_de_1912.xml"; | |
186 docSrcUrlStr = "file:/Users/jwillenborg/texts/mpdl/documents" + docIdEinstein; | |
187 docHandler = new DocumentHandler(); | |
188 docOperation = new CmsDocOperation("create", docSrcUrlStr, null, docIdEinstein); | |
189 // docHandler.doOperation(docOperation); | |
190 String docIdEinsteinUeber = "/diverse/de/Einst_Ueber_de_1907_02.xml"; | |
191 docSrcUrlStr = "file:/Users/jwillenborg/texts/mpdl/documents" + docIdEinsteinUeber; | |
192 docHandler = new DocumentHandler(); | |
193 docOperation = new CmsDocOperation("create", docSrcUrlStr, null, docIdEinsteinUeber); | |
194 // docHandler.doOperation(docOperation); | |
195 String docIdTest = "/echo/zh/Yulei_tushuo_2_FN1CTY5C.xml"; | |
196 docSrcUrlStr = "file:/Users/jwillenborg/texts/mpdl/documents" + docIdTest; | |
197 docHandler = new DocumentHandler(); | |
198 docOperation = new CmsDocOperation("create", docSrcUrlStr, null, docIdTest); | |
199 // docHandler.doOperation(docOperation); | |
200 String docIdMega = "/test/mega/MEGA_A2_B013-00_ETX.xml"; | |
201 docSrcUrlStr = "file:/Users/jwillenborg/texts/mpdl/documents" + docIdMega; | |
202 docHandler = new DocumentHandler(); | |
203 docOperation = new CmsDocOperation("create", docSrcUrlStr, null, docIdMega); | |
204 // docHandler.doOperation(docOperation); | |
205 String docIdDiverse = "/diverse/en/078_A_1916.xml"; | |
206 docSrcUrlStr = "file:/Users/jwillenborg/texts/mpdl/documents" + docIdDiverse; | |
207 docHandler = new DocumentHandler(); | |
208 docOperation = new CmsDocOperation("create", docSrcUrlStr, null, docIdDiverse); | |
209 // docHandler.doOperation(docOperation); | |
210 String docIdEinstGrossmann = "/diverse/de/EinsteinGrossmann.xml"; | |
211 docSrcUrlStr = "file:/Users/jwillenborg/texts/mpdl/documents" + docIdEinstGrossmann; | |
212 docHandler = new DocumentHandler(); | |
213 docOperation = new CmsDocOperation("create", docSrcUrlStr, null, docIdEinstGrossmann); | |
214 // docHandler.doOperation(docOperation); | |
215 String docIdEinstGrund = "/diverse/en/078_A_1916.xml"; | |
216 docSrcUrlStr = "file:/Users/jwillenborg/texts/mpdl/documents" + docIdEinstGrund; | |
217 docHandler = new DocumentHandler(); | |
218 docOperation = new CmsDocOperation("create", docSrcUrlStr, null, docIdEinstGrund); | |
219 // docHandler.doOperation(docOperation); | |
220 String docIdVolta = "/archimedes/it/volta_nuoMemLetTerz_922_it_1795.xml"; | |
221 docSrcUrlStr = "file:/Users/jwillenborg/texts/mpdl/documents" + docIdVolta; | |
222 docHandler = new DocumentHandler(); | |
223 docOperation = new CmsDocOperation("create", docSrcUrlStr, null, docIdVolta); | |
224 // docHandler.doOperation(docOperation); | |
225 String docIdVitruv = "/echo/it/Vitruvius_1747_Y1G1TRCW.xml"; | |
226 docSrcUrlStr = "file:/Users/jwillenborg/texts/mpdl/documents" + docIdVitruv; | |
227 docHandler = new DocumentHandler(); | |
228 docOperation = new CmsDocOperation("create", docSrcUrlStr, null, docIdVitruv); | |
229 docHandler.doOperation(docOperation); | |
230 // indexer.deleteDocument(docIdGoerz); | |
231 // indexer.deleteDocument(docIdBenedetti); | |
232 MorphologyCache.getInstance().end(); | |
233 LexHandler.getInstance().end(); | |
234 } | |
235 | |
236 private Hashtable<Integer, StringBuilder> getFragments(String fileName) throws ApplicationException { | |
237 try { | |
238 GetFragmentsContentHandler getFragmentsContentHandler = new GetFragmentsContentHandler(); | |
239 XMLReader xmlParser = new SAXParser(); | |
240 xmlParser.setContentHandler(getFragmentsContentHandler); | |
241 InputSource inputSource = new InputSource(fileName); | |
242 xmlParser.parse(inputSource); | |
243 Hashtable<Integer, StringBuilder> resultFragments = getFragmentsContentHandler.getResultPages(); | |
244 return resultFragments; | |
245 } catch (SAXException e) { | |
246 throw new ApplicationException(e); | |
247 } catch (IOException e) { | |
248 throw new ApplicationException(e); | |
249 } | |
250 } | |
251 | |
252 private void testTransform() throws ApplicationException { | |
253 Date begin = new Date(); | |
254 XslResourceTransformer xslResourceTransformer = new XslResourceTransformer("pageXml.xsl"); | |
255 xslResourceTransformer = new XslResourceTransformer("pageTei.xsl"); | |
256 xslResourceTransformer = new XslResourceTransformer("pageArchimedes.xsl"); | |
257 xslResourceTransformer = new XslResourceTransformer("pageXhtml.xsl"); | |
258 xslResourceTransformer = new XslResourceTransformer("pageEcho.xsl"); | |
259 Date end = new Date(); | |
260 System.out.println("Needed time: " + (end.getTime() - begin.getTime()) + " ms"); | |
261 begin = new Date(); | |
262 String docFilePath = "/Users/jwillenborg/mpdl/data/xml/documents/echo/la/Benedetti_1585/pages/page-13-morph.xml"; | |
263 xslResourceTransformer = new XslResourceTransformer("pageEcho.xsl"); | |
264 String result = xslResourceTransformer.transform(docFilePath); | |
265 xslResourceTransformer = new XslResourceTransformer("pageEcho.xsl"); | |
266 result = xslResourceTransformer.transform(docFilePath); | |
267 xslResourceTransformer = new XslResourceTransformer("pageEcho.xsl"); | |
268 result = xslResourceTransformer.transform(docFilePath); | |
269 xslResourceTransformer = new XslResourceTransformer("pageEcho.xsl"); | |
270 result = xslResourceTransformer.transform(docFilePath); | |
271 xslResourceTransformer = new XslResourceTransformer("pageEcho.xsl"); | |
272 result = xslResourceTransformer.transform(docFilePath); | |
273 end = new Date(); | |
274 System.out.println("Needed time: " + (end.getTime() - begin.getTime()) + " ms"); | |
275 xslResourceTransformer = new XslResourceTransformer("pageEcho.xsl"); | |
276 begin = new Date(); | |
277 result = xslResourceTransformer.transform(docFilePath); | |
278 result = xslResourceTransformer.transform(docFilePath); | |
279 result = xslResourceTransformer.transform(docFilePath); | |
280 result = xslResourceTransformer.transform(docFilePath); | |
281 result = xslResourceTransformer.transform(docFilePath); | |
282 end = new Date(); | |
283 System.out.println("Needed time: " + (end.getTime() - begin.getTime()) + " ms"); | |
284 try { | |
285 Processor processor = new Processor(false); | |
286 XsltCompiler xsltCompiler = processor.newXsltCompiler(); | |
287 URL xslUrl = XslResourceTransformer.class.getResource("pageEcho.xsl"); | |
288 StreamSource xslStreamSource = new StreamSource(xslUrl.openStream()); | |
289 XsltExecutable xsltExecutable = xsltCompiler.compile(xslStreamSource); | |
290 net.sf.saxon.s9api.XsltTransformer xsltTransformer = xsltExecutable.load(); | |
291 Serializer serializer = new Serializer(); | |
292 serializer.setOutputWriter(new StringWriter()); | |
293 begin = new Date(); | |
294 for (int i=0; i<=5; i++) { | |
295 StreamSource xmlDoc = new StreamSource(docFilePath); | |
296 xsltTransformer.setSource(xmlDoc); // needs some time for bigger documents | |
297 xsltTransformer.setDestination(serializer); | |
298 xsltTransformer.transform(); // needs some time for bigger documents | |
299 result = serializer.getOutputDestination().toString(); | |
300 } | |
301 end = new Date(); | |
302 System.out.println("Needed time: " + (end.getTime() - begin.getTime()) + " ms"); | |
303 } catch (Exception e) { | |
304 | |
305 } | |
306 } | |
307 | |
308 private String tokenizeXmlFragment() throws ApplicationException { | |
309 String result = null; | |
310 try { | |
311 String xmlFragment = new String(FileUtils.readFileToByteArray(new File("/Users/jwillenborg/tmp/testFragment2.xml")), "utf-8"); | |
312 String srcUrlStr = "http://mpdl-system.mpiwg-berlin.mpg.de/mpdl/page-query-result.xql?document=/echo/la/Benedetti_1585.xml&mode=pureXml&pn=13"; | |
313 URL srcUrl = new URL(srcUrlStr); | |
314 InputStream inputStream = srcUrl.openStream(); | |
315 BufferedInputStream in = new BufferedInputStream(inputStream); | |
316 xmlFragment = IOUtils.toString(in, "utf-8"); | |
317 in.close(); | |
318 | |
319 XmlTokenizer xmlTokenizer = new XmlTokenizer(new StringReader(xmlFragment)); | |
320 xmlTokenizer.setLanguage("lat"); | |
321 String[] stopElements = {"var"}; | |
322 // xmlTokenizer.setOutputFormat("string"); | |
323 String[] outputOptions = {"withLemmas"}; | |
324 xmlTokenizer.setOutputOptions(outputOptions); | |
325 xmlTokenizer.setStopElements(stopElements); | |
326 xmlTokenizer.tokenize(); | |
327 result = xmlTokenizer.getXmlResult(); | |
328 System.out.println(result); | |
329 } catch (Exception e) { | |
330 throw new ApplicationException(e); | |
331 } | |
332 return result; | |
333 } | |
334 | |
335 private String normalizeWords(String xmlStr) throws ApplicationException { | |
336 try { | |
337 WordContentHandler wordContentHandler = new WordContentHandler(); | |
338 XMLReader xmlParser = new SAXParser(); | |
339 xmlParser.setContentHandler(wordContentHandler); | |
340 StringReader strReader = new StringReader(xmlStr); | |
341 InputSource inputSource = new InputSource(strReader); | |
342 xmlParser.parse(inputSource); | |
343 String result = wordContentHandler.getResult(); | |
344 return result; | |
345 } catch (SAXException e) { | |
346 throw new ApplicationException(e); | |
347 } catch (IOException e) { | |
348 throw new ApplicationException(e); | |
349 } | |
350 } | |
351 | |
352 private String tokenizeWithLemmas(String xmlStr, String language) throws ApplicationException { | |
353 StringReader strReader = new StringReader(xmlStr); | |
354 XmlTokenizer xmlTokenizer = new XmlTokenizer(strReader); | |
355 xmlTokenizer.setLanguage(language); | |
356 String[] outputOptionsWithLemmas = {"withLemmas"}; // so all tokens are fetched with lemmas (costs performance) | |
357 xmlTokenizer.setOutputOptions(outputOptionsWithLemmas); | |
358 xmlTokenizer.tokenize(); | |
359 String retStr = xmlTokenizer.getXmlResult(); | |
360 return retStr; | |
361 } | |
362 | |
363 private String enrichWordsOrigRegNorm(String xmlStr) throws ApplicationException { | |
364 try { | |
365 WordContentHandler wordContentHandler = new WordContentHandler(); | |
366 XMLReader xmlParser = new SAXParser(); | |
367 xmlParser.setContentHandler(wordContentHandler); | |
368 StringReader strReader = new StringReader(xmlStr); | |
369 InputSource inputSource = new InputSource(strReader); | |
370 xmlParser.parse(inputSource); | |
371 String result = wordContentHandler.getResult(); | |
372 return result; | |
373 } catch (SAXException e) { | |
374 throw new ApplicationException(e); | |
375 } catch (IOException e) { | |
376 throw new ApplicationException(e); | |
377 } | |
378 } | |
379 | |
380 private String highlight(String xmlStr, String highlightElem, int highlightElemPos, String highlightQueryType, String highlightQuery, String language) throws ApplicationException { | |
381 String result = null; | |
382 try { | |
383 xmlStr = normalizeWords(xmlStr); | |
384 HighlightContentHandler highlightContentHandler = new HighlightContentHandler(highlightElem, highlightElemPos, highlightQueryType, highlightQuery, language); | |
385 highlightContentHandler.setFirstPageBreakReachedMode(true); | |
386 XMLReader xmlParser = new SAXParser(); | |
387 xmlParser.setContentHandler(highlightContentHandler); | |
388 StringReader stringReader = new StringReader(xmlStr); | |
389 InputSource inputSource = new InputSource(stringReader); | |
390 xmlParser.parse(inputSource); | |
391 result = highlightContentHandler.getResult().toString(); | |
392 } catch (SAXException e) { | |
393 throw new ApplicationException(e); | |
394 } catch (IOException e) { | |
395 throw new ApplicationException(e); | |
396 } | |
397 return result; | |
398 } | |
399 | |
400 private void testScheduler() throws ApplicationException { | |
401 CmsDocOperation docOperation = new CmsDocOperation("update", "http://mpdl-system.mpiwg-berlin.mpg.de/mpdl/getDoc?doc=/echo/zh/SongYingxing_1637.xml", null, "/echo/zh/SongYingxing_1637.xml"); | |
402 String[] elemNames = {"s", "head"}; | |
403 docOperation.setElementNames(elemNames); | |
404 CmsChainScheduler scheduler = CmsChainScheduler.getInstance(); | |
405 docOperation = scheduler.doOperation(docOperation); | |
406 String bla = ""; | |
407 } | |
408 | |
409 private void xquery() throws ApplicationException { | |
410 try { | |
411 XQueryEvaluator xQueryEvaluator = new XQueryEvaluator(); | |
412 URL srcUrl = new URL("file:/Users/jwillenborg/tmp/blablabla/Benedetti_1585.xml"); | |
413 String getTocEntries = "let $tocEntries := //echo:div[@type = 'section' or @type = 'chapter']"; | |
414 String getFigures = | |
415 "let $allFigures := //*:figure " + | |
416 "let $figures := " + | |
417 " for $figure at $pos in $allFigures "+ | |
418 " let $caption := string-join($figure/*:caption/text(), ' ') " + | |
419 " let $description := string-join($figure/*:description/text(), ' ') " + | |
420 " let $variables := string-join($figure/*:variables/text(), ' ') " + | |
421 " let $retFigure := " + | |
422 " element {'figure'}" + | |
423 " { attribute {'number'} {$pos}, " + | |
424 " element {'caption'} {$caption}, " + | |
425 " element {'description'} {$description}, " + | |
426 " element {'variables'} {$variables} }" + | |
427 " return " + | |
428 " $retFigure " + | |
429 "return $figures"; | |
430 String result = xQueryEvaluator.evaluateAsString(srcUrl, getFigures); | |
431 String bla = result; | |
432 } catch (Exception e) { | |
433 throw new ApplicationException(e); | |
434 } | |
435 } | |
436 | |
437 private void createToc() throws ApplicationException { | |
438 String docDirName = "/Users/jwillenborg/mpdl/data/xml/documents/echo/zh/SongYingxing_1637"; | |
439 XslResourceTransformer tocTransformer = new XslResourceTransformer("toc.xsl"); | |
440 File tocFile = new File(docDirName + "/toc.xml"); | |
441 String docDestFileName = docDirName + "/SongYingxing_1637.xml"; | |
442 String tocResult = tocTransformer.transform(docDestFileName); | |
443 String bla = ""; | |
444 } | |
445 | |
446 private void getDocInfo() throws ApplicationException { | |
447 IndexHandler indexHandler = IndexHandler.getInstance(); | |
448 MetadataRecord mdRecord = indexHandler.getDocMetadata("/echo/la/Benedetti_1585_163127KK.xml"); | |
449 } | |
450 | |
451 } |