view software/eXist/mpdl-modules/src/de/mpg/mpiwg/berlin/mpdl/donatus/example/ExampleDonatusHandler.java @ 0:408254cf2f1d

Erstellung
author Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
date Wed, 24 Nov 2010 17:24:23 +0100
parents
children
line wrap: on
line source

package de.mpg.mpiwg.berlin.mpdl.donatus.example;

import java.io.BufferedInputStream;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;
import java.util.Date;

import de.mpg.mpiwg.berlin.mpdl.donatus.xmlrpc.DonatusXmlRpcClient;

/**
 * Example Handler for eXist XML documents (singleton). 
 * Local document files could be stored into eXist collections over XML-RPC. 
 * The eXistXmlRpcInterface could not be used in a multi threading environment.
 * Collections could be configured language specific. You should ask your eXist
 * administrator for the name of the document collection and for the language 
 * collection names which could be used. 
 * Then you have to set the instance variables: "serverName", "serverPort", 
 * "userName", "pw", "documentCollectionName", "localDirectoryName" and 
 * "languages" (see below). That's all.
 * 
 * For example your local directory structure could look like this:
 * archimedesForEXist
 *   documents
 *     ar
 *       yourDoc1.xml
 *       yourDoc2.xml
 *       ...
 *     de
 *       yourDoc1.xml
 *       yourDoc2.xml
 *       ...
 *     el
 *       yourDoc1.xml
 *       yourDoc2.xml
 *       ...
 *     ...
 */
public class ExampleDonatusHandler {
  private static ExampleDonatusHandler instance;
  private DonatusXmlRpcClient donatusXmlRpcClient = null;

  private String documentCollectionName = "/db/mpdl-example/archimedes/documents";
  private String localDirectoryName = "/Users/jwillenborg/texts/archimedesForEXist/documents";
  
  private String exampleDocumentName = "achil_propo_087_la_1545.xml";
  
  private long beginOfOperation;
  private long endOfOperation;
  
  public static ExampleDonatusHandler getInstance() {
    if (instance == null) {
      instance = new ExampleDonatusHandler();
      instance.init();
    }
    return instance;
  }

  public static void main(String[] args) {
    getInstance();
    instance.beginOperation();
    System.out.println("Start ... ");
    String result = instance.analyzeExampleDocumentFile();  // example for analyzing one document
    instance.endOperation();
    System.out.println(result);
    System.out.println("End of operation. Elapsed time: " + (instance.endOfOperation - instance.beginOfOperation) + " ms" );
  }

  private void init() {
    donatusXmlRpcClient = new DonatusXmlRpcClient(); // default server is "archimedes.fas.harvard.edu"
  }
  
  private String analyzeExampleDocument() {
    String locator = "xxx";   // TODO take uri 
    String language = "la";
    String s1 = "<s id=\"id.0.1.01.02\">An recentiores Mathematici Aristotelem in errore deprehenderint proportionum regulas docentem, quibus motus invicem comparantur, disputandum.</s>";
    String s2 = "<s id=\"id.0.1.01.03\">Praesens opus in quatuor secatur partes.</s>";
    String s3 = "<s id=\"id.0.1.01.04\">Primo fundamenta quaedam subiiciam.</s>";
    String s4 = "<s id=\"id.0.1.01.05\">Secundo regulas quasdam asseram.</s>";
    String s5 = "<s id=\"id.0.1.01.06\">Tertio conclusiones aliquas probabo.</s>";
    String s6 = "<s id=\"id.0.1.01.07\">Quarto ad obiecta respondebo.</s>";
    String s7 = "<s id=\"id.0.1.01.08\">Hic deus lumen infundat.</s>";
    String sentences = s1 + s2 + s3 + s4 + s5 + s6 + s7;
    String doc = "<fragment>" + sentences + "</fragment>";
    String morphDocTypeXmlStr = instance.donatusXmlRpcClient.analyze(locator, language, doc);
    return morphDocTypeXmlStr;
  }

  private String analyzeExampleDocumentFile() {
    String morphDocTypeXmlStr = null;
    try {
      String locator = "xxx";   // TODO take uri 
      String language = "la";
      String exampleLocalFile = instance.localDirectoryName + "/" + language + "/" + instance.exampleDocumentName;  // TODO example document with sentences
      StringBuffer docStringArray = new StringBuffer("");
      int chunkSize = 20000 * 1024;  // copies data from a file in 20 MB chunks to server file so that not too much RAM is consumed on server 
      InputStream localFileInputStream = new BufferedInputStream(new FileInputStream(exampleLocalFile));
      byte[] chunk = new byte[chunkSize];
      while ((chunk = readBytes(localFileInputStream, chunkSize)) != null) {
        docStringArray.append(new String(chunk));
      }
      morphDocTypeXmlStr = instance.donatusXmlRpcClient.analyze(locator, language, docStringArray.toString());
    } catch (FileNotFoundException e) {
      e.printStackTrace();
    }
    return morphDocTypeXmlStr;
  }

  private String analyzeExampleSentence() {
    String locator = "xxx";   // TODO take uri 
    String language = "la";
    String s = "<s id=\"id.0.1.01.05\"><w>Secundo</w><w>regulas</w><w>quasdam</w><w>asseram</w></s>";
    // String s = "<s><w>Secundo</w><w>regulas</w><w>quasdam</w><w>asseram</w></s>";
    String morphDocTypeXmlStr = instance.donatusXmlRpcClient.analyze(locator, language, s);
    return morphDocTypeXmlStr;
  }

  /**
   *  Reads a chunk of data of an input stream.
   *  Does not close the stream until last bytes are read
   *  @in in the input stream to be read
   *  @chunkSize chunkSize length of the chunk which is read
   *  @return byte[] of bytes read
   */
  private byte[] readBytes(InputStream in, int chunkSize) {
    byte[] resultBytes = new byte[chunkSize];
    try {
      int len = in.read(resultBytes, 0, chunkSize);
      if (len == -1) {
        try { in.close(); } catch (Exception e) { }  // close the stream if end of file is reached
        resultBytes = null;
      } else if (len < chunkSize && len != chunkSize) {  // if read chunk is last chunk of the file it delivers this chunk 
        byte[] tmp = new byte[len];
        System.arraycopy(resultBytes, 0, tmp, 0, len);
        resultBytes = tmp;
      }
    } catch (FileNotFoundException e) {
      e.printStackTrace();
    } catch (IOException e) {
      e.printStackTrace();
    } 
    return resultBytes;  
  }

  private void beginOperation() {
    beginOfOperation = new Date().getTime();
  }

  private void endOperation() {
    endOfOperation = new Date().getTime();
  }

}