Mercurial > hg > mpdl-group
view software/eXist/mpdl-modules/src/de/mpg/mpiwg/berlin/mpdl/donatus/example/ExampleDonatusHandler.java @ 0:408254cf2f1d
Erstellung
author | Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de> |
---|---|
date | Wed, 24 Nov 2010 17:24:23 +0100 |
parents | |
children |
line wrap: on
line source
package de.mpg.mpiwg.berlin.mpdl.donatus.example; import java.io.BufferedInputStream; import java.io.FileInputStream; import java.io.FileNotFoundException; import java.io.IOException; import java.io.InputStream; import java.util.Date; import de.mpg.mpiwg.berlin.mpdl.donatus.xmlrpc.DonatusXmlRpcClient; /** * Example Handler for eXist XML documents (singleton). * Local document files could be stored into eXist collections over XML-RPC. * The eXistXmlRpcInterface could not be used in a multi threading environment. * Collections could be configured language specific. You should ask your eXist * administrator for the name of the document collection and for the language * collection names which could be used. * Then you have to set the instance variables: "serverName", "serverPort", * "userName", "pw", "documentCollectionName", "localDirectoryName" and * "languages" (see below). That's all. * * For example your local directory structure could look like this: * archimedesForEXist * documents * ar * yourDoc1.xml * yourDoc2.xml * ... * de * yourDoc1.xml * yourDoc2.xml * ... * el * yourDoc1.xml * yourDoc2.xml * ... * ... */ public class ExampleDonatusHandler { private static ExampleDonatusHandler instance; private DonatusXmlRpcClient donatusXmlRpcClient = null; private String documentCollectionName = "/db/mpdl-example/archimedes/documents"; private String localDirectoryName = "/Users/jwillenborg/texts/archimedesForEXist/documents"; private String exampleDocumentName = "achil_propo_087_la_1545.xml"; private long beginOfOperation; private long endOfOperation; public static ExampleDonatusHandler getInstance() { if (instance == null) { instance = new ExampleDonatusHandler(); instance.init(); } return instance; } public static void main(String[] args) { getInstance(); instance.beginOperation(); System.out.println("Start ... "); String result = instance.analyzeExampleDocumentFile(); // example for analyzing one document instance.endOperation(); System.out.println(result); System.out.println("End of operation. Elapsed time: " + (instance.endOfOperation - instance.beginOfOperation) + " ms" ); } private void init() { donatusXmlRpcClient = new DonatusXmlRpcClient(); // default server is "archimedes.fas.harvard.edu" } private String analyzeExampleDocument() { String locator = "xxx"; // TODO take uri String language = "la"; String s1 = "<s id=\"id.0.1.01.02\">An recentiores Mathematici Aristotelem in errore deprehenderint proportionum regulas docentem, quibus motus invicem comparantur, disputandum.</s>"; String s2 = "<s id=\"id.0.1.01.03\">Praesens opus in quatuor secatur partes.</s>"; String s3 = "<s id=\"id.0.1.01.04\">Primo fundamenta quaedam subiiciam.</s>"; String s4 = "<s id=\"id.0.1.01.05\">Secundo regulas quasdam asseram.</s>"; String s5 = "<s id=\"id.0.1.01.06\">Tertio conclusiones aliquas probabo.</s>"; String s6 = "<s id=\"id.0.1.01.07\">Quarto ad obiecta respondebo.</s>"; String s7 = "<s id=\"id.0.1.01.08\">Hic deus lumen infundat.</s>"; String sentences = s1 + s2 + s3 + s4 + s5 + s6 + s7; String doc = "<fragment>" + sentences + "</fragment>"; String morphDocTypeXmlStr = instance.donatusXmlRpcClient.analyze(locator, language, doc); return morphDocTypeXmlStr; } private String analyzeExampleDocumentFile() { String morphDocTypeXmlStr = null; try { String locator = "xxx"; // TODO take uri String language = "la"; String exampleLocalFile = instance.localDirectoryName + "/" + language + "/" + instance.exampleDocumentName; // TODO example document with sentences StringBuffer docStringArray = new StringBuffer(""); int chunkSize = 20000 * 1024; // copies data from a file in 20 MB chunks to server file so that not too much RAM is consumed on server InputStream localFileInputStream = new BufferedInputStream(new FileInputStream(exampleLocalFile)); byte[] chunk = new byte[chunkSize]; while ((chunk = readBytes(localFileInputStream, chunkSize)) != null) { docStringArray.append(new String(chunk)); } morphDocTypeXmlStr = instance.donatusXmlRpcClient.analyze(locator, language, docStringArray.toString()); } catch (FileNotFoundException e) { e.printStackTrace(); } return morphDocTypeXmlStr; } private String analyzeExampleSentence() { String locator = "xxx"; // TODO take uri String language = "la"; String s = "<s id=\"id.0.1.01.05\"><w>Secundo</w><w>regulas</w><w>quasdam</w><w>asseram</w></s>"; // String s = "<s><w>Secundo</w><w>regulas</w><w>quasdam</w><w>asseram</w></s>"; String morphDocTypeXmlStr = instance.donatusXmlRpcClient.analyze(locator, language, s); return morphDocTypeXmlStr; } /** * Reads a chunk of data of an input stream. * Does not close the stream until last bytes are read * @in in the input stream to be read * @chunkSize chunkSize length of the chunk which is read * @return byte[] of bytes read */ private byte[] readBytes(InputStream in, int chunkSize) { byte[] resultBytes = new byte[chunkSize]; try { int len = in.read(resultBytes, 0, chunkSize); if (len == -1) { try { in.close(); } catch (Exception e) { } // close the stream if end of file is reached resultBytes = null; } else if (len < chunkSize && len != chunkSize) { // if read chunk is last chunk of the file it delivers this chunk byte[] tmp = new byte[len]; System.arraycopy(resultBytes, 0, tmp, 0, len); resultBytes = tmp; } } catch (FileNotFoundException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } return resultBytes; } private void beginOperation() { beginOfOperation = new Date().getTime(); } private void endOperation() { endOfOperation = new Date().getTime(); } }