Mercurial > hg > mpdl-group
comparison software/eXist/mpdl-modules/src/de/mpg/mpiwg/berlin/mpdl/donatus/example/ExampleDonatusHandler.java @ 0:408254cf2f1d
Erstellung
author | Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de> |
---|---|
date | Wed, 24 Nov 2010 17:24:23 +0100 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:408254cf2f1d |
---|---|
1 package de.mpg.mpiwg.berlin.mpdl.donatus.example; | |
2 | |
3 import java.io.BufferedInputStream; | |
4 import java.io.FileInputStream; | |
5 import java.io.FileNotFoundException; | |
6 import java.io.IOException; | |
7 import java.io.InputStream; | |
8 import java.util.Date; | |
9 | |
10 import de.mpg.mpiwg.berlin.mpdl.donatus.xmlrpc.DonatusXmlRpcClient; | |
11 | |
12 /** | |
13 * Example Handler for eXist XML documents (singleton). | |
14 * Local document files could be stored into eXist collections over XML-RPC. | |
15 * The eXistXmlRpcInterface could not be used in a multi threading environment. | |
16 * Collections could be configured language specific. You should ask your eXist | |
17 * administrator for the name of the document collection and for the language | |
18 * collection names which could be used. | |
19 * Then you have to set the instance variables: "serverName", "serverPort", | |
20 * "userName", "pw", "documentCollectionName", "localDirectoryName" and | |
21 * "languages" (see below). That's all. | |
22 * | |
23 * For example your local directory structure could look like this: | |
24 * archimedesForEXist | |
25 * documents | |
26 * ar | |
27 * yourDoc1.xml | |
28 * yourDoc2.xml | |
29 * ... | |
30 * de | |
31 * yourDoc1.xml | |
32 * yourDoc2.xml | |
33 * ... | |
34 * el | |
35 * yourDoc1.xml | |
36 * yourDoc2.xml | |
37 * ... | |
38 * ... | |
39 */ | |
40 public class ExampleDonatusHandler { | |
41 private static ExampleDonatusHandler instance; | |
42 private DonatusXmlRpcClient donatusXmlRpcClient = null; | |
43 | |
44 private String documentCollectionName = "/db/mpdl-example/archimedes/documents"; | |
45 private String localDirectoryName = "/Users/jwillenborg/texts/archimedesForEXist/documents"; | |
46 | |
47 private String exampleDocumentName = "achil_propo_087_la_1545.xml"; | |
48 | |
49 private long beginOfOperation; | |
50 private long endOfOperation; | |
51 | |
52 public static ExampleDonatusHandler getInstance() { | |
53 if (instance == null) { | |
54 instance = new ExampleDonatusHandler(); | |
55 instance.init(); | |
56 } | |
57 return instance; | |
58 } | |
59 | |
60 public static void main(String[] args) { | |
61 getInstance(); | |
62 instance.beginOperation(); | |
63 System.out.println("Start ... "); | |
64 String result = instance.analyzeExampleDocumentFile(); // example for analyzing one document | |
65 instance.endOperation(); | |
66 System.out.println(result); | |
67 System.out.println("End of operation. Elapsed time: " + (instance.endOfOperation - instance.beginOfOperation) + " ms" ); | |
68 } | |
69 | |
70 private void init() { | |
71 donatusXmlRpcClient = new DonatusXmlRpcClient(); // default server is "archimedes.fas.harvard.edu" | |
72 } | |
73 | |
74 private String analyzeExampleDocument() { | |
75 String locator = "xxx"; // TODO take uri | |
76 String language = "la"; | |
77 String s1 = "<s id=\"id.0.1.01.02\">An recentiores Mathematici Aristotelem in errore deprehenderint proportionum regulas docentem, quibus motus invicem comparantur, disputandum.</s>"; | |
78 String s2 = "<s id=\"id.0.1.01.03\">Praesens opus in quatuor secatur partes.</s>"; | |
79 String s3 = "<s id=\"id.0.1.01.04\">Primo fundamenta quaedam subiiciam.</s>"; | |
80 String s4 = "<s id=\"id.0.1.01.05\">Secundo regulas quasdam asseram.</s>"; | |
81 String s5 = "<s id=\"id.0.1.01.06\">Tertio conclusiones aliquas probabo.</s>"; | |
82 String s6 = "<s id=\"id.0.1.01.07\">Quarto ad obiecta respondebo.</s>"; | |
83 String s7 = "<s id=\"id.0.1.01.08\">Hic deus lumen infundat.</s>"; | |
84 String sentences = s1 + s2 + s3 + s4 + s5 + s6 + s7; | |
85 String doc = "<fragment>" + sentences + "</fragment>"; | |
86 String morphDocTypeXmlStr = instance.donatusXmlRpcClient.analyze(locator, language, doc); | |
87 return morphDocTypeXmlStr; | |
88 } | |
89 | |
90 private String analyzeExampleDocumentFile() { | |
91 String morphDocTypeXmlStr = null; | |
92 try { | |
93 String locator = "xxx"; // TODO take uri | |
94 String language = "la"; | |
95 String exampleLocalFile = instance.localDirectoryName + "/" + language + "/" + instance.exampleDocumentName; // TODO example document with sentences | |
96 StringBuffer docStringArray = new StringBuffer(""); | |
97 int chunkSize = 20000 * 1024; // copies data from a file in 20 MB chunks to server file so that not too much RAM is consumed on server | |
98 InputStream localFileInputStream = new BufferedInputStream(new FileInputStream(exampleLocalFile)); | |
99 byte[] chunk = new byte[chunkSize]; | |
100 while ((chunk = readBytes(localFileInputStream, chunkSize)) != null) { | |
101 docStringArray.append(new String(chunk)); | |
102 } | |
103 morphDocTypeXmlStr = instance.donatusXmlRpcClient.analyze(locator, language, docStringArray.toString()); | |
104 } catch (FileNotFoundException e) { | |
105 e.printStackTrace(); | |
106 } | |
107 return morphDocTypeXmlStr; | |
108 } | |
109 | |
110 private String analyzeExampleSentence() { | |
111 String locator = "xxx"; // TODO take uri | |
112 String language = "la"; | |
113 String s = "<s id=\"id.0.1.01.05\"><w>Secundo</w><w>regulas</w><w>quasdam</w><w>asseram</w></s>"; | |
114 // String s = "<s><w>Secundo</w><w>regulas</w><w>quasdam</w><w>asseram</w></s>"; | |
115 String morphDocTypeXmlStr = instance.donatusXmlRpcClient.analyze(locator, language, s); | |
116 return morphDocTypeXmlStr; | |
117 } | |
118 | |
119 /** | |
120 * Reads a chunk of data of an input stream. | |
121 * Does not close the stream until last bytes are read | |
122 * @in in the input stream to be read | |
123 * @chunkSize chunkSize length of the chunk which is read | |
124 * @return byte[] of bytes read | |
125 */ | |
126 private byte[] readBytes(InputStream in, int chunkSize) { | |
127 byte[] resultBytes = new byte[chunkSize]; | |
128 try { | |
129 int len = in.read(resultBytes, 0, chunkSize); | |
130 if (len == -1) { | |
131 try { in.close(); } catch (Exception e) { } // close the stream if end of file is reached | |
132 resultBytes = null; | |
133 } else if (len < chunkSize && len != chunkSize) { // if read chunk is last chunk of the file it delivers this chunk | |
134 byte[] tmp = new byte[len]; | |
135 System.arraycopy(resultBytes, 0, tmp, 0, len); | |
136 resultBytes = tmp; | |
137 } | |
138 } catch (FileNotFoundException e) { | |
139 e.printStackTrace(); | |
140 } catch (IOException e) { | |
141 e.printStackTrace(); | |
142 } | |
143 return resultBytes; | |
144 } | |
145 | |
146 private void beginOperation() { | |
147 beginOfOperation = new Date().getTime(); | |
148 } | |
149 | |
150 private void endOperation() { | |
151 endOfOperation = new Date().getTime(); | |
152 } | |
153 | |
154 } |