comparison software/eXist/mpdl-modules/src/de/mpg/mpiwg/berlin/mpdl/donatus/example/ExampleDonatusHandler.java @ 0:408254cf2f1d

Erstellung
author Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
date Wed, 24 Nov 2010 17:24:23 +0100
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:408254cf2f1d
1 package de.mpg.mpiwg.berlin.mpdl.donatus.example;
2
3 import java.io.BufferedInputStream;
4 import java.io.FileInputStream;
5 import java.io.FileNotFoundException;
6 import java.io.IOException;
7 import java.io.InputStream;
8 import java.util.Date;
9
10 import de.mpg.mpiwg.berlin.mpdl.donatus.xmlrpc.DonatusXmlRpcClient;
11
12 /**
13 * Example Handler for eXist XML documents (singleton).
14 * Local document files could be stored into eXist collections over XML-RPC.
15 * The eXistXmlRpcInterface could not be used in a multi threading environment.
16 * Collections could be configured language specific. You should ask your eXist
17 * administrator for the name of the document collection and for the language
18 * collection names which could be used.
19 * Then you have to set the instance variables: "serverName", "serverPort",
20 * "userName", "pw", "documentCollectionName", "localDirectoryName" and
21 * "languages" (see below). That's all.
22 *
23 * For example your local directory structure could look like this:
24 * archimedesForEXist
25 * documents
26 * ar
27 * yourDoc1.xml
28 * yourDoc2.xml
29 * ...
30 * de
31 * yourDoc1.xml
32 * yourDoc2.xml
33 * ...
34 * el
35 * yourDoc1.xml
36 * yourDoc2.xml
37 * ...
38 * ...
39 */
40 public class ExampleDonatusHandler {
41 private static ExampleDonatusHandler instance;
42 private DonatusXmlRpcClient donatusXmlRpcClient = null;
43
44 private String documentCollectionName = "/db/mpdl-example/archimedes/documents";
45 private String localDirectoryName = "/Users/jwillenborg/texts/archimedesForEXist/documents";
46
47 private String exampleDocumentName = "achil_propo_087_la_1545.xml";
48
49 private long beginOfOperation;
50 private long endOfOperation;
51
52 public static ExampleDonatusHandler getInstance() {
53 if (instance == null) {
54 instance = new ExampleDonatusHandler();
55 instance.init();
56 }
57 return instance;
58 }
59
60 public static void main(String[] args) {
61 getInstance();
62 instance.beginOperation();
63 System.out.println("Start ... ");
64 String result = instance.analyzeExampleDocumentFile(); // example for analyzing one document
65 instance.endOperation();
66 System.out.println(result);
67 System.out.println("End of operation. Elapsed time: " + (instance.endOfOperation - instance.beginOfOperation) + " ms" );
68 }
69
70 private void init() {
71 donatusXmlRpcClient = new DonatusXmlRpcClient(); // default server is "archimedes.fas.harvard.edu"
72 }
73
74 private String analyzeExampleDocument() {
75 String locator = "xxx"; // TODO take uri
76 String language = "la";
77 String s1 = "<s id=\"id.0.1.01.02\">An recentiores Mathematici Aristotelem in errore deprehenderint proportionum regulas docentem, quibus motus invicem comparantur, disputandum.</s>";
78 String s2 = "<s id=\"id.0.1.01.03\">Praesens opus in quatuor secatur partes.</s>";
79 String s3 = "<s id=\"id.0.1.01.04\">Primo fundamenta quaedam subiiciam.</s>";
80 String s4 = "<s id=\"id.0.1.01.05\">Secundo regulas quasdam asseram.</s>";
81 String s5 = "<s id=\"id.0.1.01.06\">Tertio conclusiones aliquas probabo.</s>";
82 String s6 = "<s id=\"id.0.1.01.07\">Quarto ad obiecta respondebo.</s>";
83 String s7 = "<s id=\"id.0.1.01.08\">Hic deus lumen infundat.</s>";
84 String sentences = s1 + s2 + s3 + s4 + s5 + s6 + s7;
85 String doc = "<fragment>" + sentences + "</fragment>";
86 String morphDocTypeXmlStr = instance.donatusXmlRpcClient.analyze(locator, language, doc);
87 return morphDocTypeXmlStr;
88 }
89
90 private String analyzeExampleDocumentFile() {
91 String morphDocTypeXmlStr = null;
92 try {
93 String locator = "xxx"; // TODO take uri
94 String language = "la";
95 String exampleLocalFile = instance.localDirectoryName + "/" + language + "/" + instance.exampleDocumentName; // TODO example document with sentences
96 StringBuffer docStringArray = new StringBuffer("");
97 int chunkSize = 20000 * 1024; // copies data from a file in 20 MB chunks to server file so that not too much RAM is consumed on server
98 InputStream localFileInputStream = new BufferedInputStream(new FileInputStream(exampleLocalFile));
99 byte[] chunk = new byte[chunkSize];
100 while ((chunk = readBytes(localFileInputStream, chunkSize)) != null) {
101 docStringArray.append(new String(chunk));
102 }
103 morphDocTypeXmlStr = instance.donatusXmlRpcClient.analyze(locator, language, docStringArray.toString());
104 } catch (FileNotFoundException e) {
105 e.printStackTrace();
106 }
107 return morphDocTypeXmlStr;
108 }
109
110 private String analyzeExampleSentence() {
111 String locator = "xxx"; // TODO take uri
112 String language = "la";
113 String s = "<s id=\"id.0.1.01.05\"><w>Secundo</w><w>regulas</w><w>quasdam</w><w>asseram</w></s>";
114 // String s = "<s><w>Secundo</w><w>regulas</w><w>quasdam</w><w>asseram</w></s>";
115 String morphDocTypeXmlStr = instance.donatusXmlRpcClient.analyze(locator, language, s);
116 return morphDocTypeXmlStr;
117 }
118
119 /**
120 * Reads a chunk of data of an input stream.
121 * Does not close the stream until last bytes are read
122 * @in in the input stream to be read
123 * @chunkSize chunkSize length of the chunk which is read
124 * @return byte[] of bytes read
125 */
126 private byte[] readBytes(InputStream in, int chunkSize) {
127 byte[] resultBytes = new byte[chunkSize];
128 try {
129 int len = in.read(resultBytes, 0, chunkSize);
130 if (len == -1) {
131 try { in.close(); } catch (Exception e) { } // close the stream if end of file is reached
132 resultBytes = null;
133 } else if (len < chunkSize && len != chunkSize) { // if read chunk is last chunk of the file it delivers this chunk
134 byte[] tmp = new byte[len];
135 System.arraycopy(resultBytes, 0, tmp, 0, len);
136 resultBytes = tmp;
137 }
138 } catch (FileNotFoundException e) {
139 e.printStackTrace();
140 } catch (IOException e) {
141 e.printStackTrace();
142 }
143 return resultBytes;
144 }
145
146 private void beginOperation() {
147 beginOfOperation = new Date().getTime();
148 }
149
150 private void endOperation() {
151 endOfOperation = new Date().getTime();
152 }
153
154 }