changeset 0:408254cf2f1d

Erstellung
author Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
date Wed, 24 Nov 2010 17:24:23 +0100
parents
children a40e50e67a4e ad6a8085b8c6
files software/eXist/mpdl-modules/build.properties software/eXist/mpdl-modules/build.xml software/eXist/mpdl-modules/mpdl-system.properties software/eXist/mpdl-modules/src/de/mpg/mpiwg/berlin/mpdl/analysis/MpdlStandardAnalyzer.java software/eXist/mpdl-modules/src/de/mpg/mpiwg/berlin/mpdl/client/DocumentHandler.java software/eXist/mpdl-modules/src/de/mpg/mpiwg/berlin/mpdl/doc/GetDocServlet.java software/eXist/mpdl-modules/src/de/mpg/mpiwg/berlin/mpdl/donatus/analysis/DonatusAnalyzer.java software/eXist/mpdl-modules/src/de/mpg/mpiwg/berlin/mpdl/donatus/analysis/DonatusStemFilter.java software/eXist/mpdl-modules/src/de/mpg/mpiwg/berlin/mpdl/donatus/analysis/DonatusStemmer.java software/eXist/mpdl-modules/src/de/mpg/mpiwg/berlin/mpdl/donatus/analysis/lang/BrazilianStemmer.java software/eXist/mpdl-modules/src/de/mpg/mpiwg/berlin/mpdl/donatus/analysis/lang/DonatusAnalyzerAR.java software/eXist/mpdl-modules/src/de/mpg/mpiwg/berlin/mpdl/donatus/analysis/lang/DonatusAnalyzerDE.java software/eXist/mpdl-modules/src/de/mpg/mpiwg/berlin/mpdl/donatus/analysis/lang/DonatusAnalyzerEL.java software/eXist/mpdl-modules/src/de/mpg/mpiwg/berlin/mpdl/donatus/analysis/lang/DonatusAnalyzerEN.java software/eXist/mpdl-modules/src/de/mpg/mpiwg/berlin/mpdl/donatus/analysis/lang/DonatusAnalyzerFR.java software/eXist/mpdl-modules/src/de/mpg/mpiwg/berlin/mpdl/donatus/analysis/lang/DonatusAnalyzerIT.java software/eXist/mpdl-modules/src/de/mpg/mpiwg/berlin/mpdl/donatus/analysis/lang/DonatusAnalyzerLA.java software/eXist/mpdl-modules/src/de/mpg/mpiwg/berlin/mpdl/donatus/analysis/lang/DonatusAnalyzerNL.java software/eXist/mpdl-modules/src/de/mpg/mpiwg/berlin/mpdl/donatus/analysis/lang/DonatusAnalyzerZH.java software/eXist/mpdl-modules/src/de/mpg/mpiwg/berlin/mpdl/donatus/analysis/lang/DutchStemmer.java software/eXist/mpdl-modules/src/de/mpg/mpiwg/berlin/mpdl/donatus/analysis/lang/FrenchStemmer.java software/eXist/mpdl-modules/src/de/mpg/mpiwg/berlin/mpdl/donatus/analysis/lang/GermanStemmer.java software/eXist/mpdl-modules/src/de/mpg/mpiwg/berlin/mpdl/donatus/analysis/lang/RussianStemmer.java software/eXist/mpdl-modules/src/de/mpg/mpiwg/berlin/mpdl/donatus/example/ExampleDonatusHandler.java software/eXist/mpdl-modules/src/de/mpg/mpiwg/berlin/mpdl/donatus/example/ParseDonatusResult.java software/eXist/mpdl-modules/src/de/mpg/mpiwg/berlin/mpdl/donatus/example/TestDonatus.java software/eXist/mpdl-modules/src/de/mpg/mpiwg/berlin/mpdl/donatus/xmlrpc/DonatusAnalysis.java software/eXist/mpdl-modules/src/de/mpg/mpiwg/berlin/mpdl/donatus/xmlrpc/DonatusBerkeleyDbEnv.java software/eXist/mpdl-modules/src/de/mpg/mpiwg/berlin/mpdl/donatus/xmlrpc/DonatusCache.java software/eXist/mpdl-modules/src/de/mpg/mpiwg/berlin/mpdl/donatus/xmlrpc/DonatusCacheOld.java software/eXist/mpdl-modules/src/de/mpg/mpiwg/berlin/mpdl/donatus/xmlrpc/DonatusConstants.java software/eXist/mpdl-modules/src/de/mpg/mpiwg/berlin/mpdl/donatus/xmlrpc/DonatusContextForm.java software/eXist/mpdl-modules/src/de/mpg/mpiwg/berlin/mpdl/donatus/xmlrpc/DonatusHandler.java software/eXist/mpdl-modules/src/de/mpg/mpiwg/berlin/mpdl/donatus/xmlrpc/DonatusLemma.java software/eXist/mpdl-modules/src/de/mpg/mpiwg/berlin/mpdl/donatus/xmlrpc/DonatusMorphologyDocument.java software/eXist/mpdl-modules/src/de/mpg/mpiwg/berlin/mpdl/donatus/xmlrpc/DonatusMorphologyDocumentContentHandler.java software/eXist/mpdl-modules/src/de/mpg/mpiwg/berlin/mpdl/donatus/xmlrpc/DonatusToken.java software/eXist/mpdl-modules/src/de/mpg/mpiwg/berlin/mpdl/donatus/xmlrpc/DonatusVariant.java software/eXist/mpdl-modules/src/de/mpg/mpiwg/berlin/mpdl/donatus/xmlrpc/DonatusWtagContainer.java software/eXist/mpdl-modules/src/de/mpg/mpiwg/berlin/mpdl/donatus/xmlrpc/DonatusWtagDocument.java software/eXist/mpdl-modules/src/de/mpg/mpiwg/berlin/mpdl/donatus/xmlrpc/DonatusWtagSection.java software/eXist/mpdl-modules/src/de/mpg/mpiwg/berlin/mpdl/donatus/xmlrpc/DonatusXmlRpcClient.java software/eXist/mpdl-modules/src/de/mpg/mpiwg/berlin/mpdl/escidoc/Component.java software/eXist/mpdl-modules/src/de/mpg/mpiwg/berlin/mpdl/escidoc/Container.java software/eXist/mpdl-modules/src/de/mpg/mpiwg/berlin/mpdl/escidoc/Context.java software/eXist/mpdl-modules/src/de/mpg/mpiwg/berlin/mpdl/escidoc/ESciDocIngestor.java software/eXist/mpdl-modules/src/de/mpg/mpiwg/berlin/mpdl/escidoc/ESciDocRESTServlet.java software/eXist/mpdl-modules/src/de/mpg/mpiwg/berlin/mpdl/escidoc/ESciDocRestSession.java software/eXist/mpdl-modules/src/de/mpg/mpiwg/berlin/mpdl/escidoc/Grant.java software/eXist/mpdl-modules/src/de/mpg/mpiwg/berlin/mpdl/escidoc/Item.java software/eXist/mpdl-modules/src/de/mpg/mpiwg/berlin/mpdl/escidoc/MetadataRecord.java software/eXist/mpdl-modules/src/de/mpg/mpiwg/berlin/mpdl/escidoc/TestESciDoc.java software/eXist/mpdl-modules/src/de/mpg/mpiwg/berlin/mpdl/escidoc/TestESciDocEXist.java software/eXist/mpdl-modules/src/de/mpg/mpiwg/berlin/mpdl/exception/ApplicationException.java software/eXist/mpdl-modules/src/de/mpg/mpiwg/berlin/mpdl/general/MpdlConstants.java software/eXist/mpdl-modules/src/de/mpg/mpiwg/berlin/mpdl/lt/analyzer/MpdlFilter.java software/eXist/mpdl-modules/src/de/mpg/mpiwg/berlin/mpdl/lt/analyzer/MpdlMorphAnalyzer.java software/eXist/mpdl-modules/src/de/mpg/mpiwg/berlin/mpdl/lt/analyzer/MpdlNormalizer.java software/eXist/mpdl-modules/src/de/mpg/mpiwg/berlin/mpdl/lt/analyzer/MpdlStandardAnalyzer.java software/eXist/mpdl-modules/src/de/mpg/mpiwg/berlin/mpdl/lt/analyzer/MpdlStemFilter.java software/eXist/mpdl-modules/src/de/mpg/mpiwg/berlin/mpdl/lt/analyzer/MpdlStemmer.java software/eXist/mpdl-modules/src/de/mpg/mpiwg/berlin/mpdl/lt/analyzer/MpdlTokenizer.java software/eXist/mpdl-modules/src/de/mpg/mpiwg/berlin/mpdl/lt/analyzer/MpdlTokenizerAnalyzer.java software/eXist/mpdl-modules/src/de/mpg/mpiwg/berlin/mpdl/lt/analyzer/lang/BrazilianStemmer.java software/eXist/mpdl-modules/src/de/mpg/mpiwg/berlin/mpdl/lt/analyzer/lang/DutchStemmer.java software/eXist/mpdl-modules/src/de/mpg/mpiwg/berlin/mpdl/lt/analyzer/lang/FrenchStemmer.java software/eXist/mpdl-modules/src/de/mpg/mpiwg/berlin/mpdl/lt/analyzer/lang/GermanStemmer.java software/eXist/mpdl-modules/src/de/mpg/mpiwg/berlin/mpdl/lt/analyzer/lang/MpdlAnalyzerAR.java software/eXist/mpdl-modules/src/de/mpg/mpiwg/berlin/mpdl/lt/analyzer/lang/MpdlAnalyzerDE.java software/eXist/mpdl-modules/src/de/mpg/mpiwg/berlin/mpdl/lt/analyzer/lang/MpdlAnalyzerEL.java software/eXist/mpdl-modules/src/de/mpg/mpiwg/berlin/mpdl/lt/analyzer/lang/MpdlAnalyzerEN.java software/eXist/mpdl-modules/src/de/mpg/mpiwg/berlin/mpdl/lt/analyzer/lang/MpdlAnalyzerFR.java software/eXist/mpdl-modules/src/de/mpg/mpiwg/berlin/mpdl/lt/analyzer/lang/MpdlAnalyzerIT.java software/eXist/mpdl-modules/src/de/mpg/mpiwg/berlin/mpdl/lt/analyzer/lang/MpdlAnalyzerLA.java software/eXist/mpdl-modules/src/de/mpg/mpiwg/berlin/mpdl/lt/analyzer/lang/MpdlAnalyzerNL.java software/eXist/mpdl-modules/src/de/mpg/mpiwg/berlin/mpdl/lt/analyzer/lang/MpdlAnalyzerZH.java software/eXist/mpdl-modules/src/de/mpg/mpiwg/berlin/mpdl/lt/analyzer/lang/RussianStemmer.java software/eXist/mpdl-modules/src/de/mpg/mpiwg/berlin/mpdl/lt/doc/ArchimedesDocContentHandler.java software/eXist/mpdl-modules/src/de/mpg/mpiwg/berlin/mpdl/lt/doc/ArchimedesDocForeignLangContentHandler.java software/eXist/mpdl-modules/src/de/mpg/mpiwg/berlin/mpdl/lt/doc/ArchimedesDocManager.java software/eXist/mpdl-modules/src/de/mpg/mpiwg/berlin/mpdl/lt/doc/DictionarizerContentHandler.java software/eXist/mpdl-modules/src/de/mpg/mpiwg/berlin/mpdl/lt/doc/NormalizeCharsContentHandler.java software/eXist/mpdl-modules/src/de/mpg/mpiwg/berlin/mpdl/lt/doc/regularization/DBRegularizationHandler.java software/eXist/mpdl-modules/src/de/mpg/mpiwg/berlin/mpdl/lt/doc/regularization/DbEnvRegularization.java software/eXist/mpdl-modules/src/de/mpg/mpiwg/berlin/mpdl/lt/doc/regularization/Regularization.java software/eXist/mpdl-modules/src/de/mpg/mpiwg/berlin/mpdl/lt/doc/regularization/RegularizationManager.java software/eXist/mpdl-modules/src/de/mpg/mpiwg/berlin/mpdl/lt/general/Betacode2Unicode.lex software/eXist/mpdl-modules/src/de/mpg/mpiwg/berlin/mpdl/lt/general/Betacode2Unicode.lex.old software/eXist/mpdl-modules/src/de/mpg/mpiwg/berlin/mpdl/lt/general/Betacode2UnicodeLex.java software/eXist/mpdl-modules/src/de/mpg/mpiwg/berlin/mpdl/lt/general/Buckwalter2Unicode.lex software/eXist/mpdl-modules/src/de/mpg/mpiwg/berlin/mpdl/lt/general/Buckwalter2UnicodeLex.java software/eXist/mpdl-modules/src/de/mpg/mpiwg/berlin/mpdl/lt/general/Language.java software/eXist/mpdl-modules/src/de/mpg/mpiwg/berlin/mpdl/lt/general/Transcoder.java software/eXist/mpdl-modules/src/de/mpg/mpiwg/berlin/mpdl/lt/lex/app/Lexica.java software/eXist/mpdl-modules/src/de/mpg/mpiwg/berlin/mpdl/lt/lex/app/Lexicon.java software/eXist/mpdl-modules/src/de/mpg/mpiwg/berlin/mpdl/lt/lex/app/LexiconEntry.java software/eXist/mpdl-modules/src/de/mpg/mpiwg/berlin/mpdl/lt/lex/db/DBLexWriter.java software/eXist/mpdl-modules/src/de/mpg/mpiwg/berlin/mpdl/lt/lex/db/DbEnvLex.java software/eXist/mpdl-modules/src/de/mpg/mpiwg/berlin/mpdl/lt/lex/db/LexEntryContentHandler.java software/eXist/mpdl-modules/src/de/mpg/mpiwg/berlin/mpdl/lt/lex/db/LexEntryErrorHandler.java software/eXist/mpdl-modules/src/de/mpg/mpiwg/berlin/mpdl/lt/lex/db/LexHandler.java software/eXist/mpdl-modules/src/de/mpg/mpiwg/berlin/mpdl/lt/morph/app/Form.java software/eXist/mpdl-modules/src/de/mpg/mpiwg/berlin/mpdl/lt/morph/app/Lemma.java software/eXist/mpdl-modules/src/de/mpg/mpiwg/berlin/mpdl/lt/morph/app/MorphFileReaderContentHandler.java software/eXist/mpdl-modules/src/de/mpg/mpiwg/berlin/mpdl/lt/morph/app/MorphologyCache.java software/eXist/mpdl-modules/src/de/mpg/mpiwg/berlin/mpdl/lt/morph/app/SimpleMorphContentHandler.java software/eXist/mpdl-modules/src/de/mpg/mpiwg/berlin/mpdl/lt/morph/converter/Converter.java software/eXist/mpdl-modules/src/de/mpg/mpiwg/berlin/mpdl/lt/morph/converter/PerseusContentHandler.java software/eXist/mpdl-modules/src/de/mpg/mpiwg/berlin/mpdl/lt/morph/db/DBMorphHandler.java software/eXist/mpdl-modules/src/de/mpg/mpiwg/berlin/mpdl/lt/morph/db/DBMorphSupWriter.java software/eXist/mpdl-modules/src/de/mpg/mpiwg/berlin/mpdl/lt/morph/db/DBMorphWriter.java software/eXist/mpdl-modules/src/de/mpg/mpiwg/berlin/mpdl/lt/morph/db/DBMorphWriterContentHandler.java software/eXist/mpdl-modules/src/de/mpg/mpiwg/berlin/mpdl/lt/morph/db/DbEnvMorph.java software/eXist/mpdl-modules/src/de/mpg/mpiwg/berlin/mpdl/lt/morph/db/DbEnvMorphSup.java software/eXist/mpdl-modules/src/de/mpg/mpiwg/berlin/mpdl/lucene/LuceneUtil.java software/eXist/mpdl-modules/src/de/mpg/mpiwg/berlin/mpdl/lucene/MorphQueryParser.java software/eXist/mpdl-modules/src/de/mpg/mpiwg/berlin/mpdl/schedule/MpdlChainScheduler.java software/eXist/mpdl-modules/src/de/mpg/mpiwg/berlin/mpdl/schedule/MpdlChainSchedulerListener.java software/eXist/mpdl-modules/src/de/mpg/mpiwg/berlin/mpdl/schedule/MpdlDocJob.java software/eXist/mpdl-modules/src/de/mpg/mpiwg/berlin/mpdl/schedule/MpdlDocOperation.java software/eXist/mpdl-modules/src/de/mpg/mpiwg/berlin/mpdl/schedule/quartz.properties software/eXist/mpdl-modules/src/de/mpg/mpiwg/berlin/mpdl/util/FileUtil.java software/eXist/mpdl-modules/src/de/mpg/mpiwg/berlin/mpdl/util/FilenameFilterExtension.java software/eXist/mpdl-modules/src/de/mpg/mpiwg/berlin/mpdl/util/MpdlITextRenderer.java software/eXist/mpdl-modules/src/de/mpg/mpiwg/berlin/mpdl/util/MpdlITextUserAgent.java software/eXist/mpdl-modules/src/de/mpg/mpiwg/berlin/mpdl/util/StringUtilEscapeChars.java software/eXist/mpdl-modules/src/de/mpg/mpiwg/berlin/mpdl/util/Util.java software/eXist/mpdl-modules/src/de/mpg/mpiwg/berlin/mpdl/util/XmlUtil.java software/eXist/mpdl-modules/src/de/mpg/mpiwg/berlin/mpdl/xml/SchemaHandler.java software/eXist/mpdl-modules/src/de/mpg/mpiwg/berlin/mpdl/xmlrpc/FilenameFilterExtension.java software/eXist/mpdl-modules/src/de/mpg/mpiwg/berlin/mpdl/xmlrpc/MpdlXmlRpcDocHandler.java software/eXist/mpdl-modules/src/de/mpg/mpiwg/berlin/mpdl/xmlrpc/MpdlXmlRpcInterface.java software/eXist/mpdl-modules/src/de/mpg/mpiwg/berlin/mpdl/xmlrpc/MpdlXmlRpcInterfaceImpl.java software/eXist/mpdl-modules/src/org/exist/xquery/modules/document/DocumentModule.java software/eXist/mpdl-modules/src/org/exist/xquery/modules/document/GetFragmentBetweenFunctionByFileSearch.java software/eXist/mpdl-modules/src/org/exist/xquery/modules/mpdldoc/CheckUri.java software/eXist/mpdl-modules/src/org/exist/xquery/modules/mpdldoc/ESciDocLogin.java software/eXist/mpdl-modules/src/org/exist/xquery/modules/mpdldoc/GetESciDocContainerIdByExistId.java software/eXist/mpdl-modules/src/org/exist/xquery/modules/mpdldoc/GetESciDocs.java software/eXist/mpdl-modules/src/org/exist/xquery/modules/mpdldoc/GetJobs.java software/eXist/mpdl-modules/src/org/exist/xquery/modules/mpdldoc/Html2Pdf.java software/eXist/mpdl-modules/src/org/exist/xquery/modules/mpdldoc/MPDLDocModule.java software/eXist/mpdl-modules/src/org/exist/xquery/modules/mpdldoc/MpdlDocOperationStarter.java software/eXist/mpdl-modules/src/org/exist/xquery/modules/mpdltext/Dictionarize.java software/eXist/mpdl-modules/src/org/exist/xquery/modules/mpdltext/EncodeBig5.java software/eXist/mpdl-modules/src/org/exist/xquery/modules/mpdltext/GetBig5EncodedTerms.java software/eXist/mpdl-modules/src/org/exist/xquery/modules/mpdltext/GetDonatusQueryVariants.java software/eXist/mpdl-modules/src/org/exist/xquery/modules/mpdltext/GetLemma.java software/eXist/mpdl-modules/src/org/exist/xquery/modules/mpdltext/GetLemmasByFormName.java software/eXist/mpdl-modules/src/org/exist/xquery/modules/mpdltext/GetLemmasByLuceneQuery.java software/eXist/mpdl-modules/src/org/exist/xquery/modules/mpdltext/GetLemmasStrByFormName.java software/eXist/mpdl-modules/src/org/exist/xquery/modules/mpdltext/GetLexEntriesByFormName.java software/eXist/mpdl-modules/src/org/exist/xquery/modules/mpdltext/GetLexEntriesByLuceneQuery.java software/eXist/mpdl-modules/src/org/exist/xquery/modules/mpdltext/GetLexEntryKeysByFormName.java software/eXist/mpdl-modules/src/org/exist/xquery/modules/mpdltext/GetQueryMorphForms.java software/eXist/mpdl-modules/src/org/exist/xquery/modules/mpdltext/GetQueryRegularizations.java software/eXist/mpdl-modules/src/org/exist/xquery/modules/mpdltext/GetTranscodedText.java software/eXist/mpdl-modules/src/org/exist/xquery/modules/mpdltext/GetWordsFunction.java software/eXist/mpdl-modules/src/org/exist/xquery/modules/mpdltext/LuceneQueryParser.java software/eXist/mpdl-modules/src/org/exist/xquery/modules/mpdltext/MPDLTextModule.java software/eXist/mpdl-modules/src/org/exist/xquery/modules/mpdltext/NormalizeChars.java software/eXist/mpdl-modules/src/org/exist/xquery/modules/mpdltext/ToCLevelGenerator.java
diffstat 162 files changed, 31084 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/software/eXist/mpdl-modules/build.properties	Wed Nov 24 17:24:23 2010 +0100
@@ -0,0 +1,7 @@
+###########################################################
+# This file specifies which XQuery extension modules should be compiled and
+# $Id: $
+##########################################################
+
+include.module.example = true
+include.module.text = true
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/software/eXist/mpdl-modules/build.xml	Wed Nov 24 17:24:23 2010 +0100
@@ -0,0 +1,109 @@
+<project basedir="." default="jar" name="modules">
+  <property name="src" value="./src"/>
+  <property name="classes" value="./classes"/>
+	<property name="top.dir" value="../../.."/>
+	
+  <property name="build.compiler" value="modern"/>
+
+	<property file="${top.dir}/build.properties"/>
+	
+  <property file="build.properties"/>
+
+  <property name="deploy.dir" value="${top.dir}/../existUser"/>
+  <property name="xquery.archimedes.dir" value="webapp/archimedes"/>
+
+  <condition property="include.module.example.config">
+    <istrue value="${include.module.example}"/>
+  </condition>
+
+  <condition property="include.module.text.config">
+    <istrue value="${include.module.text}"/>
+  </condition>
+
+  <path id="classpath.core">
+    <fileset dir="${top.dir}/${lib.core}">
+      <include name="*.jar"/>
+    </fileset>
+    <fileset dir="${top.dir}/${lib.optional}">
+      <include name="*.jar"/>
+    </fileset>
+    <fileset dir="${top.dir}/${lib.endorsed}">
+	    <include name="*.jar"/>
+	  </fileset>
+    <fileset dir="${top.dir}/${lib.extensions}">
+      <include name="*.jar"/>
+    </fileset>
+	  <fileset dir="${top.dir}/${lib.user}">
+		  <include name="*.jar"/>
+	  </fileset>
+    <fileset dir="${top.dir}/tools/ant/lib">
+      <include name="*.jar"/>
+    </fileset>
+    <pathelement path="${top.dir}/extensions/indexes/lucene/lib/lucene-core-2.4.1.jar"/>
+    <pathelement path="${top.dir}/exist.jar"/>
+    <pathelement path="${top.dir}/exist-optional.jar"/>
+    <pathelement path="${top.dir}/start.jar"/>
+    <pathelement path="${java.class.path}"/>
+  </path>
+	
+  <target name="prepare" depends="">
+   	<mkdir dir="${classes}"/>
+  </target>
+	
+	<target name="compile" depends="prepare">
+    <echo message="---------------------------"/>
+    <echo message="Compiling MPDL extension modules"/>
+    <echo message="---------------------------"/>
+    <javac debug="${build.debug}" 
+      deprecation="${build.deprecation}" 
+      destdir="${classes}" 
+      encoding="UTF-8"
+      optimize="${build.optimize}" 
+      srcdir="${src}" 
+      source="1.6">
+      <classpath>
+       	<path refid="classpath.core"/>
+      </classpath>
+    </javac>
+    <copy todir="${classes}">
+      <fileset dir="${src}">
+        <include name="**/*.properties"/>
+        <include name="**/*.jpg"/>
+      </fileset>
+    </copy>
+	</target>
+	
+  <target name="jar" depends="compile">
+	  <echo message="Creating mpdl-extension-modules.jar ..."/>
+    <jar basedir="${classes}" jarfile="${top.dir}/${lib.extensions}/mpdl-extension-modules.jar">  
+      <manifest>
+        <attribute name="Project-Name" value="${project.name}"/>
+        <attribute name="Project-Version" value="${project.version}"/>
+        <attribute name="Project-Build" value="${DSTAMP}"/>
+        <attribute name="SVN-Revision" value="${svn.revision}"/>
+      </manifest>
+    </jar>
+	</target>
+
+  <!-- deploy MPDL project from existDev to existUser -->
+  <target name="deploy">
+    <tstamp>
+      <format property="today" pattern="yyyy-MM-dd:hh-mm-ss"/>
+    </tstamp>
+    <!-- backup old contents -->
+    <copy toDir="${deploy.dir}/backup/${today}/${xquery.archimedes.dir}" preservelastmodified="true">
+      <fileset dir="${deploy.dir}/${xquery.archimedes.dir}"/>
+    </copy>
+    <copy file="${deploy.dir}/${lib.extensions}/mpdl-extension-modules.jar" tofile="${deploy.dir}/backup/${today}/${lib.extensions}/mpdl-extension-modules.jar" filtering="true" overwrite="true" preservelastmodified = "true"/>
+    <!-- copy files from existDev to existUser -->
+    <copy file="${top.dir}/${lib.extensions}/mpdl-extension-modules.jar" tofile="${deploy.dir}/${lib.extensions}/mpdl-extension-modules.jar" filtering="true" overwrite="true" preservelastmodified = "true"/>
+    <copy toDir="${deploy.dir}/${xquery.archimedes.dir}" preservelastmodified="true">
+      <fileset dir="${top.dir}/${xquery.archimedes.dir}"/>
+    </copy>
+  </target>
+
+	<target name="clean">
+		<delete dir="${classes}"/>
+		<delete file="${top.dir}/${lib.extensions}/mpdl-extension-modules.jar" failonerror="false"/>
+	</target>
+</project>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/software/eXist/mpdl-modules/mpdl-system.properties	Wed Nov 24 17:24:23 2010 +0100
@@ -0,0 +1,18 @@
+# eXist settings: XML-RPC-Interface, doc-interface
+exist.fullHostname=mpdl-proto.mpiwg-berlin.mpg.de
+exist.hostname=localhost
+exist.port=30030
+exist.adminUserName=admin
+exist.adminUserPW=
+exist.echoRelaxNGPath=/exist/rest/db/mpdl/schema/echo/echo.rnc
+
+# eSciDoc settings
+escidoc.hostname=euler.mpiwg-berlin.mpg.de
+escidoc.port=8080
+escidoc.oumId=/oum/organizational-unit/escidoc:ex3
+escidoc.cmmId=/cmm/content-model/escidoc:persistent4
+escidoc.contextId=/ir/context/escidoc:38600
+escidoc.archimedesContainerId=/ir/container/escidoc:42507
+escidoc.echoContainerId=/ir/container/escidoc:38602
+escidoc.adminUserName=jwillenborg
+escidoc.adminUserPW=
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/software/eXist/mpdl-modules/src/de/mpg/mpiwg/berlin/mpdl/analysis/MpdlStandardAnalyzer.java	Wed Nov 24 17:24:23 2010 +0100
@@ -0,0 +1,103 @@
+package de.mpg.mpiwg.berlin.mpdl.analysis;
+
+import org.apache.lucene.analysis.*;
+import org.apache.lucene.analysis.standard.StandardFilter;
+import org.apache.lucene.analysis.standard.StandardTokenizer;
+
+import java.io.File;
+import java.io.IOException;
+import java.io.Reader;
+import java.util.Set;
+
+/**
+ * StandardAnalyzer which is case insensitive (no LowerCaseFilter in method tokenStream 
+ * and reusableTokenStream)
+ *
+ */
+public class MpdlStandardAnalyzer extends Analyzer {
+  private Set stopSet;
+  /** An array containing some common English words that are usually not
+  useful for searching. */
+  public static final String[] STOP_WORDS = StopAnalyzer.ENGLISH_STOP_WORDS;
+
+  /** Builds an analyzer with the default stop words ({@link #STOP_WORDS}). */
+  public MpdlStandardAnalyzer() {
+    this(STOP_WORDS);
+  }
+
+  /** Builds an analyzer with the given stop words. */
+  public MpdlStandardAnalyzer(Set stopWords) {
+    stopSet = stopWords;
+  }
+
+  /** Builds an analyzer with the given stop words. */
+  public MpdlStandardAnalyzer(String[] stopWords) {
+    stopSet = StopFilter.makeStopSet(stopWords);
+  }
+
+  /** Builds an analyzer with the stop words from the given file.
+   * @see WordlistLoader#getWordSet(File)
+   */
+  public MpdlStandardAnalyzer(File stopwords) throws IOException {
+    stopSet = WordlistLoader.getWordSet(stopwords);
+  }
+
+  /** Builds an analyzer with the stop words from the given reader.
+   * @see WordlistLoader#getWordSet(Reader)
+   */
+  public MpdlStandardAnalyzer(Reader stopwords) throws IOException {
+    stopSet = WordlistLoader.getWordSet(stopwords);
+  }
+
+  /** Constructs a {@link StandardTokenizer} filtered by a {@link
+  StandardFilter}, not a {@link LowerCaseFilter} and a {@link StopFilter}. */
+  public TokenStream tokenStream(String fieldName, Reader reader) {
+    StandardTokenizer tokenStream = new StandardTokenizer(reader);
+    tokenStream.setMaxTokenLength(maxTokenLength);
+    TokenStream result = new StandardFilter(tokenStream);
+    result = new StopFilter(result, stopSet);
+    return result;
+  }
+
+  private static final class SavedStreams {
+    StandardTokenizer tokenStream;
+    TokenStream filteredTokenStream;
+  }
+
+  /** Default maximum allowed token length */
+  public static final int DEFAULT_MAX_TOKEN_LENGTH = 255;
+
+  private int maxTokenLength = DEFAULT_MAX_TOKEN_LENGTH;
+
+  /**
+   * Set maximum allowed token length.  If a token is seen
+   * that exceeds this length then it is discarded.  This
+   * setting only takes effect the next time tokenStream or
+   * reusableTokenStream is called.
+   */
+  public void setMaxTokenLength(int length) {
+    maxTokenLength = length;
+  }
+    
+  /**
+   * @see #setMaxTokenLength
+   */
+  public int getMaxTokenLength() {
+    return maxTokenLength;
+  }
+  
+  public TokenStream reusableTokenStream(String fieldName, Reader reader) throws IOException {
+    SavedStreams streams = (SavedStreams) getPreviousTokenStream();
+    if (streams == null) {
+      streams = new SavedStreams();
+      setPreviousTokenStream(streams);
+      streams.tokenStream = new StandardTokenizer(reader);
+      streams.filteredTokenStream = new StandardFilter(streams.tokenStream);
+      streams.filteredTokenStream = new StopFilter(streams.filteredTokenStream, stopSet);
+    } else {
+      streams.tokenStream.reset(reader);
+    }
+    streams.tokenStream.setMaxTokenLength(maxTokenLength);
+    return streams.filteredTokenStream;
+  }
+}
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/software/eXist/mpdl-modules/src/de/mpg/mpiwg/berlin/mpdl/client/DocumentHandler.java	Wed Nov 24 17:24:23 2010 +0100
@@ -0,0 +1,370 @@
+package de.mpg.mpiwg.berlin.mpdl.client;
+
+import java.io.File;
+import java.io.FilenameFilter;
+import java.net.MalformedURLException;
+import java.net.URL;
+import java.util.Date;
+
+import de.mpg.mpiwg.berlin.mpdl.escidoc.ESciDocIngestor;
+import de.mpg.mpiwg.berlin.mpdl.escidoc.MetadataRecord;
+import de.mpg.mpiwg.berlin.mpdl.exception.ApplicationException;
+import de.mpg.mpiwg.berlin.mpdl.general.MpdlConstants;
+import de.mpg.mpiwg.berlin.mpdl.lt.doc.regularization.RegularizationManager;
+import de.mpg.mpiwg.berlin.mpdl.schedule.MpdlDocOperation;
+import de.mpg.mpiwg.berlin.mpdl.util.FileUtil;
+import de.mpg.mpiwg.berlin.mpdl.util.MpdlITextRenderer;
+import de.mpg.mpiwg.berlin.mpdl.xml.SchemaHandler;
+import de.mpg.mpiwg.berlin.mpdl.xmlrpc.FilenameFilterExtension;
+import de.mpg.mpiwg.berlin.mpdl.xmlrpc.MpdlXmlRpcDocHandler;
+
+/**
+ * Handler for eXist collections and documents (singleton). 
+ * Your local directory structure should look like this:
+ * documents
+ *   archimedes
+ *     ar
+ *       yourDoc1.xml
+ *       ...
+ *     ...
+ *     zh
+ *       yourDoc1.xml
+ *       ...
+ *   echo
+ *     ar
+ *       yourDoc1.xml
+ *       ...
+ *     ...
+ *     zh
+ *       yourDoc1.xml
+ *       ...
+ *       
+ */
+public class DocumentHandler {
+  private MpdlXmlRpcDocHandler mpdlXmlRpcDocHandler;
+  private ESciDocIngestor eSciDocIngestor;
+
+  private String[] docBases = {"archimedes", "echo"};
+  private String[] languages = {"ar", "de", "el", "en", "fr", "it", "la", "nl", "zh"};
+  private String documentRootCollectionMorph = "/db/mpdl/documents/morph";
+  private String documentRootCollectionStandard = "/db/mpdl/documents/standard";
+  private String presentationRootCollection = "/db/mpdl/presentation";
+  private String schemaRootCollection = "/db/mpdl/schema";
+  private String localDocumentDirectory = "/Users/jwillenborg/texts/mpdl/documents";
+  
+  private long beginOfOperation;
+  private long endOfOperation;
+  
+  
+  public DocumentHandler(MpdlXmlRpcDocHandler mpdlXmlRpcDocHandler) throws ApplicationException {
+    this.mpdlXmlRpcDocHandler = mpdlXmlRpcDocHandler;
+  }
+
+  public DocumentHandler(MpdlXmlRpcDocHandler mpdlXmlRpcDocHandler, ESciDocIngestor eSciDocIngestor) throws ApplicationException {
+    this.mpdlXmlRpcDocHandler = mpdlXmlRpcDocHandler;
+    this.eSciDocIngestor = eSciDocIngestor;
+  }
+
+  public void doOperation(MpdlDocOperation docOperation) throws ApplicationException{
+    String operationName = docOperation.getName();  
+    if (operationName.equals("create") || operationName.equals("update")) {
+      createOrUpdate(docOperation);
+    } else if (operationName.equals("delete")) {
+      delete(docOperation);
+    } else if (operationName.equals("updateExist")) {
+      updateExist(docOperation);
+    } else if (operationName.equals("deleteExist")) {
+      deleteExist(docOperation);
+    } else if (operationName.equals("importAllDocumentsLocallyExist")) {
+      importAllDocumentsLocallyExist();
+    } else if (operationName.equals("generatePdfHtmlDocumentFiles")) {
+      generatePdfHtmlDocumentFiles();
+    }
+  }
+  
+  private void importAllDocumentsLocallyExist() throws ApplicationException {
+    System.out.println("Start of DocumentHandler. This operation could be time consuming because documents are indexed on eXist (normal indexing times are 10 seconds for a document) ...");
+    beginOperation();
+    // deletePresentationCollection();
+    // createPresentationCollection();
+    // deleteSchemaCollection();
+    // createSchemaCollection();
+    
+    deleteDocumentCollections();
+    createDocumentCollections();
+    saveDocumentFiles();
+    endOperation();
+    System.out.println("The DocumentHandler needed: " + (endOfOperation - beginOfOperation) + " ms" );
+  }
+  
+  private void createOrUpdate(MpdlDocOperation docOperation) throws ApplicationException {
+    try {
+      String operationName = docOperation.getName();  
+      String language = docOperation.getLanguage();  
+      String srcUrlStr = docOperation.getSrcUrl(); 
+      String eXistIdentifier = docOperation.getDestUrl();
+      String destFileName = MpdlConstants.MPDL_EXIST_DATA_DIR + "/documents" + eXistIdentifier;
+      URL srcUrl = null;
+      String protocol = null;
+      if (srcUrlStr != null && ! srcUrlStr.equals("empty")) {
+        srcUrl = new URL(srcUrlStr);
+        protocol = srcUrl.getProtocol();
+      }
+      SchemaHandler schemaHandler = new SchemaHandler();
+      boolean docExists = mpdlXmlRpcDocHandler.documentExists(docOperation);
+      if (operationName.equals("create") && docExists) {
+        throw new ApplicationException("Document:" + eXistIdentifier + " already exists. Please use another name or perform the operation \"Update\" of that document.");
+      }
+      if (operationName.equals("update") && ! docExists) {
+        throw new ApplicationException("Document:" + eXistIdentifier + " does not exist. Please use a name that exists and perform the operation \"Update\" again or perform the operation \"Create\" of that document");
+      }
+      // load file to local file system
+      if (protocol.equals("file")) {
+        docOperation.setStatus("upload file: " + srcUrlStr + " to eXist server");
+      } else {
+        docOperation.setStatus("download file from: " + srcUrlStr + " to eXist server");
+      }
+      FileUtil.getInstance().saveUrlToLocalFile(srcUrl, destFileName);
+      //  perform validations
+      docOperation.setStatus("validate document: " + eXistIdentifier);
+      schemaHandler.validate(destFileName, docOperation);
+      // perform operation on eXist
+      docOperation.setStatus(operationName + " regularizations of document: " + eXistIdentifier + " on eXist server");
+      RegularizationManager regManager = RegularizationManager.getInstance();
+      regManager.saveRegularizations(language, destFileName);
+      docOperation.setStatus(operationName + " document: " + eXistIdentifier + " on eXist server");
+      mpdlXmlRpcDocHandler.saveDocumentFile(docOperation);
+      // save PDF and HTML versions of the document
+      boolean includePdf = docOperation.includePdf();
+      if (includePdf) {
+        docOperation.setStatus("create PDF and HTML versions of the document: " + eXistIdentifier);
+        MpdlITextRenderer mpdlRenderer = MpdlITextRenderer.getInstance();
+        MetadataRecord mdRecord = docOperation.getMdRecord();  // after validation, docOperation has a mdRecord
+        mpdlRenderer.createFile(true, true, "text", mdRecord);  // generate Pdf/Html document
+      }
+      // perform operation on eSciDoc
+      eSciDocIngestor.execute(docOperation);
+    } catch (MalformedURLException e) {
+      throw new ApplicationException(e);
+    }
+  }
+  
+  private void delete(MpdlDocOperation docOperation) throws ApplicationException {
+    String operationName = docOperation.getName();  
+    String eXistIdentifier = docOperation.getDestUrl();
+    String fileName = docOperation.getFileName();
+    if (fileName == null || fileName.trim().equals(""))
+      throw new ApplicationException("Your document file name is empty. Please specify a file name for your document.");
+    if (! fileName.endsWith(".xml"))
+      throw new ApplicationException("Your document file name does not end with \".xml\". Please specify a file name with the suffix \".xml\" for your document.");
+    boolean docExists = mpdlXmlRpcDocHandler.documentExists(docOperation);
+    if (! docExists) {
+      throw new ApplicationException("Document:" + eXistIdentifier + " does not exists. Please use a name that exists and perform the operation \"Delete\" again.");
+    }
+    // perform operation on eXist
+    docOperation.setStatus(operationName + " document: " + eXistIdentifier + " on eXist server");
+    // delete file on local eXist file system: xml, pdf and html
+    String eXistIdentifierWithoutExtension = eXistIdentifier.substring(0, eXistIdentifier.length() - 4);  // without ".xml"
+    String destFileNameXml = MpdlConstants.MPDL_EXIST_DATA_DIR + "/documents" + eXistIdentifier;
+    FileUtil.getInstance().deleteFile(destFileNameXml);
+    boolean includePdf = docOperation.includePdf();
+    if (includePdf) {
+      String destFileNamePdf = MpdlConstants.MPDL_EXIST_DATA_DIR + "/documents" + eXistIdentifierWithoutExtension + ".pdf";
+      String destFileNameHtml = MpdlConstants.MPDL_EXIST_DATA_DIR + "/documents" + eXistIdentifierWithoutExtension + ".html";
+      FileUtil.getInstance().deleteFile(destFileNamePdf);
+      FileUtil.getInstance().deleteFile(destFileNameHtml);
+    }
+    // delete document in eXist
+    mpdlXmlRpcDocHandler.deleteDocumentFile(docOperation);
+    // perform operation on eSciDoc
+    eSciDocIngestor.execute(docOperation);
+  }
+  
+  private void updateExist(MpdlDocOperation docOperation) throws ApplicationException {
+    try {
+      String operationName = docOperation.getName();  
+      String language = docOperation.getLanguage();  
+      String srcUrlStr = docOperation.getSrcUrl(); 
+      String eXistIdentifier = docOperation.getDestUrl();
+      String destFileName = MpdlConstants.MPDL_EXIST_DATA_DIR + "/documents" + eXistIdentifier;
+      URL srcUrl = null;
+      String protocol = null;
+      if (srcUrlStr != null && ! srcUrlStr.equals("empty")) {
+        srcUrl = new URL(srcUrlStr);
+        protocol = srcUrl.getProtocol();
+      }
+      SchemaHandler schemaHandler = new SchemaHandler();
+      if (protocol.equals("file")) {
+        docOperation.setStatus("upload file: " + srcUrlStr + " to eXist server");
+      } else {
+        docOperation.setStatus("download file from: " + srcUrlStr + " to eXist server");
+      }
+      // load file to local file system
+      FileUtil.getInstance().saveUrlToLocalFile(srcUrl, destFileName);
+      //  validation
+      docOperation.setStatus("validate document: " + eXistIdentifier);
+      schemaHandler.validate(destFileName, docOperation);
+      // save regularizations of the document
+      docOperation.setStatus(operationName + " regularizations of document: " + eXistIdentifier + " on eXist server");
+      RegularizationManager regManager = RegularizationManager.getInstance();
+      regManager.saveRegularizations(language, destFileName);
+      // perform operation on eXist
+      docOperation.setStatus(operationName + " document: " + eXistIdentifier + " on eXist server");
+      mpdlXmlRpcDocHandler.saveDocumentFile(docOperation);
+      // save PDF and HTML versions of the document
+      boolean includePdf = docOperation.includePdf();
+      if (includePdf) {
+        docOperation.setStatus("create PDF and HTML versions of the document: " + eXistIdentifier);
+        MpdlITextRenderer mpdlRenderer = MpdlITextRenderer.getInstance();
+        MetadataRecord mdRecord = docOperation.getMdRecord();  // after validation, docOperation has a mdRecord
+        mpdlRenderer.createFile(true, true, "text", mdRecord);   // generate Pdf/Html document
+      }
+    } catch (MalformedURLException e) {
+      throw new ApplicationException(e);
+    }
+  }
+  
+  private void deleteExist(MpdlDocOperation docOperation) throws ApplicationException {
+    String operationName = docOperation.getName();  
+    String eXistIdentifier = docOperation.getDestUrl();
+    String fileName = docOperation.getFileName();
+    if (fileName == null || fileName.trim().equals(""))
+      throw new ApplicationException("Your document file name is empty. Please specify a file name for your document.");
+    if (! fileName.endsWith(".xml"))
+      throw new ApplicationException("Your document file name does not end with \".xml\". Please specify a file name with the suffix \".xml\" for your document.");
+    boolean docExists = mpdlXmlRpcDocHandler.documentExists(docOperation);
+    if (! docExists)
+      throw new ApplicationException("Document:" + eXistIdentifier + " does not exist.");
+    // perform operation
+    docOperation.setStatus(operationName + " document: " + eXistIdentifier + " on eXist server");
+    // delete file on local file system: xml, pdf and html
+    String eXistIdentifierWithoutExtension = eXistIdentifier.substring(0, eXistIdentifier.length() - 4);  // without ".xml"
+    String destFileNameXml = MpdlConstants.MPDL_EXIST_DATA_DIR + "/documents" + eXistIdentifier;
+    FileUtil.getInstance().deleteFile(destFileNameXml);
+    boolean includePdf = docOperation.includePdf();
+    if (includePdf) {
+      String destFileNamePdf = MpdlConstants.MPDL_EXIST_DATA_DIR + "/documents" + eXistIdentifierWithoutExtension + ".pdf";
+      String destFileNameHtml = MpdlConstants.MPDL_EXIST_DATA_DIR + "/documents" + eXistIdentifierWithoutExtension + ".html";
+      FileUtil.getInstance().deleteFile(destFileNamePdf);
+      FileUtil.getInstance().deleteFile(destFileNameHtml);
+    }
+    // delete document in eXist
+    mpdlXmlRpcDocHandler.deleteDocumentFile(docOperation);
+  }
+  
+  private void deleteDocumentCollections() throws ApplicationException {
+    mpdlXmlRpcDocHandler.deleteCollection(documentRootCollectionMorph);
+    mpdlXmlRpcDocHandler.deleteCollection(documentRootCollectionStandard);
+  }
+
+  private void createDocumentCollections() throws ApplicationException {
+    for (int i=0; i < docBases.length; i++) {
+      String docBase = docBases[i];
+      for (int j=0; j < languages.length; j++) {
+        String language = languages[j];
+        String documentCollectionMorph = documentRootCollectionMorph + "/" + docBase + "/" + language;
+        mpdlXmlRpcDocHandler.createCollection(documentCollectionMorph);
+        String documentCollectionStandard = documentRootCollectionStandard + "/" + docBase + "/" + language;
+        mpdlXmlRpcDocHandler.createCollection(documentCollectionStandard);
+      }
+    }
+  }
+
+  private void saveDocumentFiles() throws ApplicationException {
+    int counter = 0;
+    for (int i=0; i < docBases.length; i++) {
+      String docBase = docBases[i];
+      for (int j=0; j < languages.length; j++) {
+        String language = languages[j];
+        String documentCollection = "/" + docBase + "/" + language;
+        String localFileDirStr = localDocumentDirectory + "/" + docBase + "/" + language;
+        File localFileDir = new File(localFileDirStr);
+        FilenameFilter filter = new FilenameFilterExtension("xml");
+        File[] files = localFileDir.listFiles(filter);
+        System.out.println("Adding all documents in path: \"" + localFileDirStr + "\" to eXist collection: \"" + documentCollection + "\" ...");
+        for (int k=0; k < files.length; k++) {
+          File f = files[k];
+          String localFileNameWithoutPath = f.getName();
+          String fullLocalFileName = f.getPath();
+          String srcUrl = "file://" + fullLocalFileName;
+          MpdlDocOperation docOperation = new MpdlDocOperation("updateExist", srcUrl, null, docBase, language, localFileNameWithoutPath); 
+          long begin = new Date().getTime();
+          doOperation(docOperation);
+          long end = new Date().getTime();
+          System.out.println("Added document \"" + fullLocalFileName + "\" to eXist collection: \"" + documentCollection + "\" (" + (end - begin) + " ms)" );
+          counter++;
+        }
+      }
+    }
+    System.out.println("Imported documents: " + counter);
+  }
+
+  private void generatePdfHtmlDocumentFiles() throws ApplicationException {
+    int counter = 0;
+    MpdlITextRenderer mpdlRenderer = MpdlITextRenderer.getInstance();
+    for (int i=0; i < docBases.length; i++) {
+      String docBase = docBases[i];
+      for (int j=0; j < languages.length; j++) {
+        String language = languages[j];
+        String localFileDirStr = localDocumentDirectory + "/" + docBase + "/" + language;
+        File localFileDir = new File(localFileDirStr);
+        FilenameFilter filter = new FilenameFilterExtension("xml");
+        File[] files = localFileDir.listFiles(filter);
+        System.out.println("Generating Pdf/Html documents in path: \"" + localFileDirStr + "\" ...");
+        for (int k=0; k < files.length; k++) {
+          File f = files[k];
+          String localFileName = f.getName();
+          String fullLocalFileName = f.getPath();
+          String srcUrl = "file://" + fullLocalFileName;
+          String localFileNameWithoutExtension = localFileName.substring(0, localFileName.length() - 4);  // without ".xml"
+          String fullLocalPdfFileName = MpdlConstants.MPDL_EXIST_DATA_DIR + "/documents/" + docBase + "/" + language + "/" + localFileNameWithoutExtension + ".pdf";
+          File localPdfFile = new File(fullLocalPdfFileName);
+          boolean pdfFileAlreadyExists = localPdfFile.exists();
+          // generate Pdf/Html file only if pdf file does not already exist
+          if (! pdfFileAlreadyExists) {
+            MpdlDocOperation docOperation = new MpdlDocOperation("generatePdf", srcUrl, null, docBase, language, localFileName); 
+            SchemaHandler schemaHandler = new SchemaHandler();
+            schemaHandler.validate(fullLocalFileName, docOperation);
+            long begin = new Date().getTime();
+            MetadataRecord mdRecord = docOperation.getMdRecord();  // after validation, docOperation has a mdRecord
+            mpdlRenderer.createFile(true, true, "text", mdRecord);  // generate Pdf/Html document
+            long end = new Date().getTime();
+            System.out.println("Generate Pdf/Html document for: \"" + fullLocalFileName + "\" (" + (end - begin) + " ms)" );
+            counter++;
+            try {
+              Thread.sleep(60000);  // delay so that called servers (digilib, eXist) are not stressed too much
+            } catch (InterruptedException e) {
+              throw new ApplicationException(e);
+            }
+          }
+        }
+      }
+    }
+    System.out.println("Generated documents: " + counter);
+  }
+
+  private void deletePresentationCollection() throws ApplicationException {
+    mpdlXmlRpcDocHandler.deleteCollection(presentationRootCollection);
+  }
+
+  private void createPresentationCollection() throws ApplicationException {
+    mpdlXmlRpcDocHandler.createCollection(presentationRootCollection);
+  }
+
+  private void deleteSchemaCollection() throws ApplicationException {
+    mpdlXmlRpcDocHandler.deleteCollection(schemaRootCollection);
+  }
+
+  private void createSchemaCollection() throws ApplicationException {
+    mpdlXmlRpcDocHandler.createCollection(schemaRootCollection);
+  }
+
+  private void beginOperation() {
+    beginOfOperation = new Date().getTime();
+  }
+
+  private void endOperation() {
+    endOfOperation = new Date().getTime();
+  }
+
+}
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/software/eXist/mpdl-modules/src/de/mpg/mpiwg/berlin/mpdl/doc/GetDocServlet.java	Wed Nov 24 17:24:23 2010 +0100
@@ -0,0 +1,54 @@
+package de.mpg.mpiwg.berlin.mpdl.doc;
+
+import java.io.BufferedInputStream;
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.IOException;
+import java.io.OutputStream;
+import java.io.PrintWriter;
+import java.net.URLConnection;
+
+import javax.servlet.ServletException;
+import javax.servlet.http.HttpServlet;
+import javax.servlet.http.HttpServletRequest;
+import javax.servlet.http.HttpServletResponse;
+
+import de.mpg.mpiwg.berlin.mpdl.general.MpdlConstants;
+
+public class GetDocServlet extends HttpServlet {
+  private static final long serialVersionUID = -4889427839010526185L;
+
+  protected void doGet(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException {
+    String docEXistIdentifier = request.getParameter("doc");
+    String docFileName = MpdlConstants.MPDL_EXIST_DATA_DIR + "/documents" + docEXistIdentifier;
+    File docFile = new File(docFileName);
+    if (docFile.exists())
+      write(response, docFile);
+    else
+      write(response, "Document: " + docEXistIdentifier + " does not exist");
+  }
+  
+  private void write(HttpServletResponse response, File file) throws IOException {
+    String fileName = file.getName();
+    OutputStream out = response.getOutputStream();
+    BufferedInputStream is = new BufferedInputStream(new FileInputStream(file));
+    String contentType = URLConnection.guessContentTypeFromName(fileName);  // other methods: URLConnection.guessContentTypeFromStream(is); or MIMEUtils.getMIMEType(file);
+    if (contentType != null)
+      response.setContentType(contentType);
+    response.setHeader("Content-Disposition", "filename=" + fileName);
+    byte[] buf = new byte[20000*1024]; // 20MB buffer
+    int bytesRead;
+    while ((bytesRead = is.read(buf)) != -1) {
+      out.write(buf, 0, bytesRead);
+    }
+    is.close();
+    out.flush();
+    out.close();
+  }
+
+  private void write(HttpServletResponse response, String str) throws IOException {
+    PrintWriter out = response.getWriter();
+    out.write(str);
+  }
+  
+}
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/software/eXist/mpdl-modules/src/de/mpg/mpiwg/berlin/mpdl/donatus/analysis/DonatusAnalyzer.java	Wed Nov 24 17:24:23 2010 +0100
@@ -0,0 +1,181 @@
+package de.mpg.mpiwg.berlin.mpdl.donatus.analysis;
+import java.io.File;
+import java.io.IOException;
+import java.io.Reader;
+import java.io.StringReader;
+import java.util.ArrayList;
+import java.util.HashSet;
+import java.util.Hashtable;
+import java.util.Set;
+
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.LowerCaseFilter;
+import org.apache.lucene.analysis.StopFilter;
+import org.apache.lucene.analysis.Token;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.WordlistLoader;
+import org.apache.lucene.analysis.br.BrazilianAnalyzer;
+import org.apache.lucene.analysis.cz.CzechAnalyzer;
+import org.apache.lucene.analysis.de.GermanAnalyzer;
+import org.apache.lucene.analysis.fr.FrenchAnalyzer;
+import org.apache.lucene.analysis.nl.DutchAnalyzer;
+import org.apache.lucene.analysis.standard.StandardAnalyzer;
+import org.apache.lucene.analysis.standard.StandardFilter;
+import org.apache.lucene.analysis.standard.StandardTokenizer;
+
+import de.mpg.mpiwg.berlin.mpdl.donatus.xmlrpc.DonatusConstants;
+
+/**
+ * Analyzer for specific languages. Supports an external list of stopwords (words that
+ * will not be indexed at all) and an external list of exclusions (word that will
+ * not be stemmed, but indexed).
+ * A default set of stopwords is used unless an alternative list is specified, the
+ * exclusion list is empty by default.
+ */
+public class DonatusAnalyzer extends Analyzer {
+  protected String language = DonatusConstants.DEFAULT_LANGUAGE;
+  
+  /**
+   * Contains the stopwords used with the StopFilter.
+   */
+  protected Set stopSet = new HashSet();
+
+  /**
+   * Contains words that should be indexed but not stemmed.
+   */
+  protected Set exclusionSet = new HashSet();
+
+  /**
+   * Builds an analyzer with the stop words for the given language
+   * (<code>GERMAN_STOP_WORDS</code>).
+   */
+  public DonatusAnalyzer() {
+    String[] stopWords = getStopWords(language);  // stopwords for the language
+    stopSet = StopFilter.makeStopSet(stopWords);
+  }
+
+  /**
+   * Builds an analyzer with the given stop words.
+   */
+  public DonatusAnalyzer(String[] stopwords) {
+    stopSet = StopFilter.makeStopSet(stopwords);
+  }
+
+  /**
+   * Builds an analyzer with the given stop words.
+   */
+  public DonatusAnalyzer(Hashtable stopwords) {
+    stopSet = new HashSet(stopwords.keySet());
+  }
+
+  /**
+   * Builds an analyzer with the given stop words.
+   */
+  public DonatusAnalyzer(File stopwords) throws IOException {
+    stopSet = WordlistLoader.getWordSet(stopwords);
+  }
+
+  public String getLanguage() {
+    return language;  
+  }
+  
+  protected void setLanguage(String lang) {
+    this.language = lang;
+  }
+  
+  /**
+   * Get stopwords for the language: fetch them from the open language analyzers for some languages
+   * TODO other languages
+   * @param language
+   * @return stopwords
+   * 
+   * 
+Taken from: http://www.perseus.tufts.edu/hopper/stopwords
+# English: a, a's, able, about, above, according, accordingly, across, actually, after, afterwards, again, against, ain't, all, allow, allows, almost, alone, along, already, also, although, always, am, among, amongst, an, and, another, any, anybody, anyhow, anyone, anything, anyway, anyways, anywhere, apart, appear, appreciate, appropriate, are, aren't, around, as, aside, ask, asking, associated, at, available, away, awfully, b, be, became, because, become, becomes, becoming, been, before, beforehand, behind, being, believe, below, beside, besides, best, better, between, beyond, both, brief, but, by, c, c'mon, c's, came, can, can't, cannot, cant, cause, causes, certain, certainly, changes, clearly, co, com, come, comes, concerning, consequently, consider, considering, contain, containing, contains, corresponding, could, couldn't, course, currently, d, definitely, described, despite, did, didn't, different, do, does, doesn't, doing, don't, done, down, downwards, during, e, each, edu, eg, eight, either, else, elsewhere, enough, entirely, especially, et, etc, even, ever, every, everybody, everyone, everything, everywhere, ex, exactly, example, except, f, far, few, fifth, first, five, followed, following, follows, for, former, formerly, forth, four, from, further, furthermore, g, get, gets, getting, given, gives, go, goes, going, gone, got, gotten, greetings, h, had, hadn't, happens, hardly, has, hasn't, have, haven't, having, he, he's, hello, help, hence, her, here, here's, hereafter, hereby, herein, hereupon, hers, herself, hi, him, himself, his, hither, hopefully, how, howbeit, however, i, i'd, i'll, i'm, i've, ie, if, ignored, immediate, in, inasmuch, inc, indeed, indicate, indicated, indicates, inner, insofar, instead, into, inward, is, isn't, it, it'd, it'll, it's, its, itself, j, just, k, keep, keeps, kept, know, known, knows, l, last, lately, later, latter, latterly, least, less, lest, let, let's, like, liked, likely, little, look, looking, looks, ltd, m, mainly, many, may, maybe, me, mean, meanwhile, merely, might, more, moreover, most, mostly, much, must, my, myself, n, name, namely, nd, near, nearly, necessary, need, needs, neither, never, nevertheless, new, next, nine, no, nobody, non, none, noone, nor, normally, not, nothing, novel, now, nowhere, o, obviously, of, off, often, oh, ok, okay, old, on, once, one, ones, only, onto, or, other, others, otherwise, ought, our, ours, ourselves, out, outside, over, overall, own, p, particular, particularly, per, perhaps, placed, please, plus, possible, presumably, probably, provides, q, que, quite, qv, r, rather, rd, re, really, reasonably, regarding, regardless, regards, relatively, respectively, right, s, said, same, saw, say, saying, says, second, secondly, see, seeing, seem, seemed, seeming, seems, seen, self, selves, sensible, sent, serious, seriously, seven, several, shall, she, should, shouldn't, since, six, so, some, somebody, somehow, someone, something, sometime, sometimes, somewhat, somewhere, soon, sorry, specified, specify, specifying, still, sub, such, sup, sure, t, t's, take, taken, tell, tends, th, than, thank, thanks, thanx, that, that's, thats, the, their, theirs, them, themselves, then, thence, there, there's, thereafter, thereby, therefore, therein, theres, thereupon, these, they, they'd, they'll, they're, they've, think, third, this, thorough, thoroughly, those, though, three, through, throughout, thru, thus, to, together, too, took, toward, towards, tried, tries, truly, try, trying, twice, two, u, un, under, unfortunately, unless, unlikely, until, unto, up, upon, us, use, used, useful, uses, using, usually, uucp, v, value, various, very, via, viz, vs, w, want, wants, was, wasn't, way, we, we'd, we'll, we're, we've, welcome, well, went, were, weren't, what, what's, whatever, when, whence, whenever, where, where's, whereafter, whereas, whereby, wherein, whereupon, wherever, whether, which, while, whilst, whither, who, who's, whoever, whole, whom, whose, why, will, willing, wish, with, within, without, won't, wonder, would, wouldn't, x, y, yes, yet, you, you'd, you'll, you're, you've, your, yours, yourself, yourselves, z, zero
+
+# Greek: a)/llos, a)/n, a)/ra, a)ll', a)lla/, a)po/, au)to/s, d', dai/, dai/s, de/, dh/, dia/, e(autou=, e)/ti, e)a/n, e)gw/, e)k, e)mo/s, e)n, e)pi/, ei), ei)/mi, ei)mi/, ei)s, ga/r, ga^, ge, h(, h)/, kai/, kata/, me/n, meta/, mh/, o(, o(/de, o(/s, o(/stis, o(/ti, oi(, ou(/tws, ou(=tos, ou), ou)/te, ou)=n, ou)de/, ou)dei/s, ou)k, para/, peri/, pro/s, so/s, su/, su/n, ta/, te, th/n, th=s, th=|, ti, ti/, ti/s, tis, to/, to/n, toi/, toiou=tos, tou/s, tou=, tw=n, tw=|, u(mo/s, u(pe/r, u(po/, w(/ste, w(s, w)=
+
+# Latin: ab, ac, ad, adhic, aliqui, aliquis, an, ante, apud, at, atque, aut, autem, cum, cur, de, deinde, dum, ego, enim, ergo, es, est, et, etiam, etsi, ex, fio, haud, hic, iam, idem, igitur, ille, in, infra, inter, interim, ipse, is, ita, magis, modo, mox, nam, ne, nec, necque, neque, nisi, non, nos, o, ob, per, possum, post, pro, quae, quam, quare, qui, quia, quicumque, quidem, quilibet, quis, quisnam, quisquam, quisque, quisquis, quo, quoniam, sed, si, sic, sive, sub, sui, sum, super, suus, tam, tamen, trans, tu, tum, ubi, uel, uero, unus, ut
+
+# Italian: a, ad, agli, al, alcun, alcuno, all', alla, alle, allo, altra, altre, altri, altro, assai, avere, bene, c', ch', che, chi, ci, cio, co', col, come, con, cosi, cosi\, d', da, dal, dall', dalla, dalle, de, de', degli, dei, del, dell', della, delle, dello, di, duo, e, ed, egli, essere, et, gia, gia\, gli, gran, grande, i, il, in, io, l', la, le, li, lo, ma, maggior, maggiore, mai, mio, molto, ne, ne', nel, nell', nella, nelle, non, o, ogn', ogni, oue, ove, per, perche, piu, piu\, poco, poi, puo, qual, qualche, qualcun, qualcuno, quale, quanta, quante, quanti, quanto, quasi, quella, quelle, quelli, quello, questa, queste, questi, questo, qui, s', se, sempre, senza, si, sotto, su, sua, sue, sui, suo, tal, tanta, tante, tanti, tanto, tra, tre, tutta, tutte, tutti, tutto, un, una, uno, vn, vna, vno
+
+# German: aber, alle, als, also, am, an, andern, auch, auf, aus, bei, bey, bis, da, daher, das, dass, de, dem, den, der, des, die, diese, dieser, dieses, doch, durch, eben, ein, eine, einem, einen, einer, eines, er, es, fur, gegen, haben, hat, ihre, im, in, ist, kan, man, mehr, mit, nach, nicht, noch, nur, oder, ohne, sehr, sei, selbst, sey, sich, sie, sind, so, uber, um, und, unter, vgl, vom, von, weil, welche, wenn, werden, wie, wird, zu, zur
+
+# French: a, amp, au, auec, aussi, autre, autres, aux, bien, car, ce, ces, cette, ceux, chose, choses, comme, d', dans, de, des, deux, dire, dont, du, elle, elles, en, encore, est, estre, et, faire, fait, faut, force, grande, ie, il, ils, l', la, le, les, leur, leurs, lors, luy, mais, mesme, n', ne, nous, on, ont, or, ou, par, parce, pas, peut, plus, plusieurs, point, pour, pourquoy, puis, qu', quand, que, qui, quoy, sa, sans, se, ses, si, soit, son, sont, sur, tous, tout, toutes, vn, vne, y
+   */
+  public String[] getStopWords(String language) {
+    String[] stopwords = new String[0];
+    if (language != null) {
+      if (language.equals("en"))
+        stopwords = StandardAnalyzer.STOP_WORDS;
+      else if(language.equals("br"))
+        stopwords = BrazilianAnalyzer.BRAZILIAN_STOP_WORDS;
+      else if(language.equals("cz"))
+        stopwords = CzechAnalyzer.CZECH_STOP_WORDS;
+      else if(language.equals("de"))
+        stopwords = GermanAnalyzer.GERMAN_STOP_WORDS;
+      else if(language.equals("fr"))
+        stopwords = FrenchAnalyzer.FRENCH_STOP_WORDS;
+      else if(language.equals("nl"))
+        stopwords = DutchAnalyzer.DUTCH_STOP_WORDS;
+   }
+    return stopwords;
+  }
+  
+  /**
+   * Builds an exclusionlist from an array of Strings.
+   */
+  public void setStemExclusionTable(String[] exclusionlist) {
+    exclusionSet = StopFilter.makeStopSet(exclusionlist);
+  }
+
+  /**
+   * Builds an exclusionlist from a Hashtable.
+   */
+  public void setStemExclusionTable(Hashtable exclusionlist) {
+    exclusionSet = new HashSet(exclusionlist.keySet());
+  }
+
+  /**
+   * Builds an exclusionlist from the words contained in the given file.
+   */
+  public void setStemExclusionTable(File exclusionlist) throws IOException {
+    exclusionSet = WordlistLoader.getWordSet(exclusionlist);
+  }
+
+  /**
+   * Creates a TokenStream which tokenizes all the text in the provided Reader.
+   *
+   * @return A TokenStream build from a StandardTokenizer filtered with
+   *         StandardFilter, LowerCaseFilter, StopFilter, DonatusStemFilter
+   */
+  public TokenStream tokenStream(String fieldName, Reader reader) {
+    TokenStream result = new StandardTokenizer(reader);
+    result = new StandardFilter(result);
+    result = new LowerCaseFilter(result);
+    result = new StopFilter(result, stopSet);
+    result = new DonatusStemFilter(this, result, exclusionSet);
+    return result;
+  }
+  
+  public ArrayList<String> getToken(String inputString) {
+    ArrayList<String> token = new ArrayList<String>();
+    try {
+      Reader reader = new StringReader(inputString);
+      TokenStream result = new StandardTokenizer(reader);
+      result = new StandardFilter(result);
+      result = new LowerCaseFilter(result);
+      result = new StopFilter(result, stopSet);
+      Token t = result.next();
+      while (t != null) {
+        String currentToken = String.valueOf(t.termBuffer());
+        token.add(currentToken);
+        t = result.next();
+      }
+    } catch (IOException e) {
+      e.printStackTrace();
+    }
+    return token;
+  }
+  
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/software/eXist/mpdl-modules/src/de/mpg/mpiwg/berlin/mpdl/donatus/analysis/DonatusStemFilter.java	Wed Nov 24 17:24:23 2010 +0100
@@ -0,0 +1,53 @@
+package de.mpg.mpiwg.berlin.mpdl.donatus.analysis;
+
+import org.apache.lucene.analysis.Token;
+import org.apache.lucene.analysis.TokenFilter;
+import org.apache.lucene.analysis.TokenStream;
+
+import java.io.IOException;
+import java.util.Set;
+
+public final class DonatusStemFilter extends TokenFilter {
+  private DonatusAnalyzer analyzer;
+  private Token token = null;
+  private DonatusStemmer stemmer = null;
+  private Set exclusionSet = null;
+
+  public DonatusStemFilter(TokenStream in) {
+    super(in);
+    stemmer = new DonatusStemmer();
+  }
+
+  public DonatusStemFilter(DonatusAnalyzer analyzer, TokenStream in, Set exclusionSet) {
+    this(in);
+    this.analyzer = analyzer;
+    this.exclusionSet = exclusionSet;
+    this.stemmer.setLanguage(analyzer.getLanguage());
+  }
+
+  public final Token next() throws IOException {
+    if (( token = input.next()) == null) {
+      return null;
+    } else if ( exclusionSet != null && exclusionSet.contains( token.termText() ) ) {
+      return token;
+    } else {
+      String s = stemmer.stem(token.termText());
+      // If not stemmed, dont waste the time creating a new token
+      if ( !s.equals( token.termText() ) ) {
+        return new Token( s, token.startOffset(),
+          token.endOffset(), token.type() );
+      }
+      return token;
+    }
+  }
+
+  public void setStemmer(DonatusStemmer stemmer) {
+    if ( stemmer != null ) {
+      this.stemmer = stemmer;
+    }
+  }
+
+  public void setExclusionSet(Set exclusionSet) {
+    this.exclusionSet = exclusionSet;
+  }
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/software/eXist/mpdl-modules/src/de/mpg/mpiwg/berlin/mpdl/donatus/analysis/DonatusStemmer.java	Wed Nov 24 17:24:23 2010 +0100
@@ -0,0 +1,146 @@
+package de.mpg.mpiwg.berlin.mpdl.donatus.analysis;
+
+import org.apache.log4j.Logger;
+
+import de.mpg.mpiwg.berlin.mpdl.donatus.xmlrpc.DonatusCache;
+import de.mpg.mpiwg.berlin.mpdl.donatus.xmlrpc.DonatusConstants;
+import de.mpg.mpiwg.berlin.mpdl.donatus.xmlrpc.DonatusLemma;
+import de.mpg.mpiwg.berlin.mpdl.exception.ApplicationException;
+
+public class DonatusStemmer {
+  private String language = DonatusConstants.DEFAULT_LANGUAGE;
+
+  protected void setLanguage(String language) {
+    this.language = language;
+  }
+  
+  /**
+   * Used for indexing documents and for querying
+   * @param term
+   * @return
+   */
+  protected String stem(String term) {
+    String stem = null;
+    term = term.toLowerCase();
+    // try to find the stem by the DonatusCache
+    DonatusLemma donatusLemma = null; 
+    try {
+      DonatusCache donatusCache = DonatusCache.getInstance();
+      donatusLemma = donatusCache.getLemmaByVariantForm(language, term);
+    } catch (ApplicationException e) {
+      // nothing, do not disturb
+    }
+    if (donatusLemma != null)
+      stem = donatusLemma.getForm();
+    // if not found by Donatus try to use Snowball (or later other language specific stemmers)
+    if (stem == null) {
+      stem = stemBySnowball(term, language);
+      // if term is not equal to the base form and also the stem is not too short (> 2 characters) then add this Snowball variant to the lemmas in cache
+      if ((! stem.equals(term)) && stem.length() > 2) {
+        try {
+          DonatusCache donatusCache = DonatusCache.getInstance();
+          if (donatusCache.getMode() == DonatusCache.DOCUMENT_MODE) {
+            donatusCache.addVariant(language, stem, DonatusConstants.TYPE_SNOWBALL, term);
+          }
+        } catch (ApplicationException e) {
+          Logger.getLogger(DonatusStemmer.class).warn("DonatusCache: an exception was caught while indexing a document: " + e.getMessage(), e);
+        }
+      }
+    }
+    /* TODO if Snowball is too bad (for some languages) use Lucene analyzers
+    if (stem == null) {
+      stem = stemByLanguageStemmers(term, this.language);
+    }
+    */
+    return stem;
+  }
+
+  private String stemBySnowball(String term, String language) {
+    String stem = null;
+    if (language.equals("de")) {
+      net.sf.snowball.ext.GermanStemmer stemmer = new net.sf.snowball.ext.GermanStemmer();
+      stemmer.setCurrent(term); 
+      stemmer.stem();
+      stem = stemmer.getCurrent();
+    } else if (language.equals("en")) {
+      net.sf.snowball.ext.EnglishStemmer stemmer = new net.sf.snowball.ext.EnglishStemmer();
+      stemmer.setCurrent(term);
+      stemmer.stem();
+      stem = stemmer.getCurrent();
+    } else if (language.equals("nl")) {
+      net.sf.snowball.ext.DutchStemmer stemmer = new net.sf.snowball.ext.DutchStemmer();
+      stemmer.setCurrent(term);
+      stemmer.stem();
+      stem = stemmer.getCurrent();
+    } else if (language.equals("fi")) {
+      net.sf.snowball.ext.FinnishStemmer stemmer = new net.sf.snowball.ext.FinnishStemmer();
+      stemmer.setCurrent(term);
+      stemmer.stem();
+      stem = stemmer.getCurrent();
+    } else if (language.equals("fr")) {
+      net.sf.snowball.ext.FrenchStemmer stemmer = new net.sf.snowball.ext.FrenchStemmer();
+      stemmer.setCurrent(term);
+      stemmer.stem();
+      stem = stemmer.getCurrent();
+    } else if (language.equals("it")) {
+      net.sf.snowball.ext.ItalianStemmer stemmer = new net.sf.snowball.ext.ItalianStemmer();
+      stemmer.setCurrent(term);
+      stemmer.stem();
+      stem = stemmer.getCurrent();
+    } else if (language.equals("no")) {
+      net.sf.snowball.ext.NorwegianStemmer stemmer = new net.sf.snowball.ext.NorwegianStemmer();
+      stemmer.setCurrent(term);
+      stemmer.stem();
+      stem = stemmer.getCurrent();
+    } else if (language.equals("pt")) {
+      net.sf.snowball.ext.PortugueseStemmer stemmer = new net.sf.snowball.ext.PortugueseStemmer();
+      stemmer.setCurrent(term);
+      stemmer.stem();
+      stem = stemmer.getCurrent();
+    } else if (language.equals("ru")) {
+      net.sf.snowball.ext.RussianStemmer stemmer = new net.sf.snowball.ext.RussianStemmer();
+      stemmer.setCurrent(term);
+      stemmer.stem();
+      stem = stemmer.getCurrent();
+    } else if (language.equals("es")) {
+      net.sf.snowball.ext.SpanishStemmer stemmer = new net.sf.snowball.ext.SpanishStemmer();
+      stemmer.setCurrent(term);
+      stemmer.stem();
+      stem = stemmer.getCurrent();
+    } else if (language.equals("sv")) {
+      net.sf.snowball.ext.SwedishStemmer stemmer = new net.sf.snowball.ext.SwedishStemmer();
+      stemmer.setCurrent(term);
+      stemmer.stem();
+      stem = stemmer.getCurrent();
+    } else {
+      stem = term; // if no language fits deliver the term itself as the stem form
+    }
+    return stem;
+  }
+
+  /*
+  private String stemByLanguageStemmers(String term, String language) {
+    // TODO provide other languages
+    String stem = null;
+    if (language.equals("br")) {
+      BrazilianStemmer stemmer = new BrazilianStemmer();
+      stem = stemmer.stem(term);
+    } else if (language.equals("de")) {
+      GermanStemmer stemmer = new GermanStemmer();
+      stem = stemmer.stem(term);
+    } else if (language.equals("fr")) {
+      FrenchStemmer stemmer = new FrenchStemmer();
+      stem = stemmer.stem(term);
+    } else if (language.equals("nl")) {
+      DutchStemmer stemmer = new DutchStemmer();
+      stem = stemmer.stem(term);
+    } else if (language.equals("ru")) {
+      RussianStemmer stemmer = new RussianStemmer();
+      stem = stemmer.stem(term);
+    } else {
+      stem = term; // if no language fits deliver the term itself as the stem form
+    }
+    return stem;
+  }
+  */
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/software/eXist/mpdl-modules/src/de/mpg/mpiwg/berlin/mpdl/donatus/analysis/lang/BrazilianStemmer.java	Wed Nov 24 17:24:23 2010 +0100
@@ -0,0 +1,1021 @@
+package de.mpg.mpiwg.berlin.mpdl.donatus.analysis.lang;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * A stemmer for Brazilian words.
+ */
+public class BrazilianStemmer {
+
+	/**
+	 * Changed term
+	 */
+	private   String TERM ;
+	private   String CT ;
+	private   String R1 ;
+	private   String R2 ;
+	private   String RV ;
+
+
+	public BrazilianStemmer() {
+	}
+
+	/**
+	 * Stemms the given term to an unique <tt>discriminator</tt>.
+	 *
+	 * @param term  The term that should be stemmed.
+	 * @return      Discriminator for <tt>term</tt>
+	 */
+	public String stem( String term ) {
+    boolean altered = false ; // altered the term
+
+    // creates CT
+    createCT(term) ;
+
+		if ( !isIndexable( CT ) ) {
+			return null;
+		}
+		if ( !isStemmable( CT ) ) {
+			return CT ;
+		}
+
+    R1 = getR1(CT) ;
+    R2 = getR1(R1) ;
+    RV = getRV(CT) ;
+    TERM = term + ";" +CT ;
+
+    altered = step1() ;
+    if (!altered) {
+      altered = step2() ;
+    }
+
+    if (altered) {
+      step3();
+    } else {
+      step4();
+    }
+
+    step5() ;
+
+    return CT ;
+	}
+
+	/**
+	 * Checks a term if it can be processed correctly.
+	 *
+	 * @return  true if, and only if, the given term consists in letters.
+	 */
+	private boolean isStemmable( String term ) {
+		for ( int c = 0; c < term.length(); c++ ) {
+			// Discard terms that contain non-letter characters.
+			if ( !Character.isLetter(term.charAt(c))) {
+				return false;
+			}
+		}
+		return true;
+	}
+
+	/**
+	 * Checks a term if it can be processed indexed.
+	 *
+	 * @return  true if it can be indexed
+	 */
+	private boolean isIndexable( String term ) {
+		return (term.length() < 30) && (term.length() > 2) ;
+	}
+
+	/**
+	 * See if string is 'a','e','i','o','u'
+   *
+   * @return true if is vowel
+	 */
+	private boolean isVowel( char value ) {
+    return (value == 'a') ||
+           (value == 'e') ||
+           (value == 'i') ||
+           (value == 'o') ||
+           (value == 'u') ;
+  }
+
+	/**
+	 * Gets R1
+   *
+   * R1 - is the region after the first non-vowel follwing a vowel,
+   *      or is the null region at the end of the word if there is
+   *      no such non-vowel.
+   *
+   * @return null or a string representing R1
+	 */
+	private String getR1( String value ) {
+    int     i;
+    int     j;
+
+    // be-safe !!!
+    if (value == null) {
+      return null ;
+    }
+
+    // find 1st vowel
+    i = value.length()-1 ;
+    for (j=0 ; j < i ; j++) {
+      if (isVowel(value.charAt(j))) {
+        break ;
+      }
+    }
+
+    if (!(j < i)) {
+      return null ;
+    }
+
+    // find 1st non-vowel
+    for ( ; j < i ; j++) {
+      if (!(isVowel(value.charAt(j)))) {
+        break ;
+      }
+    }
+
+    if (!(j < i)) {
+      return null ;
+    }
+
+    return value.substring(j+1) ;
+  }
+
+	/**
+	 * Gets RV
+   *
+   * RV - IF the second letter is a consoant, RV is the region after
+   *      the next following vowel,
+   *
+   *      OR if the first two letters are vowels, RV is the region
+   *      after the next consoant,
+   *
+   *      AND otherwise (consoant-vowel case) RV is the region after
+   *      the third letter.
+   *
+   *      BUT RV is the end of the word if this positions cannot be
+   *      found.
+   *
+   * @return null or a string representing RV
+	 */
+	private String getRV( String value ) {
+    int     i;
+    int     j;
+
+    // be-safe !!!
+    if (value == null) {
+      return null ;
+    }
+
+    i = value.length()-1 ;
+
+    // RV - IF the second letter is a consoant, RV is the region after
+    //      the next following vowel,
+    if ((i > 0) && !isVowel(value.charAt(1))) {
+      // find 1st vowel
+      for (j=2 ; j < i ; j++) {
+        if (isVowel(value.charAt(j))) {
+          break ;
+        }
+      }
+
+      if (j < i) {
+        return value.substring(j+1) ;
+      }
+    }
+
+
+    // RV - OR if the first two letters are vowels, RV is the region
+    //      after the next consoant,
+    if ((i > 1) &&
+        isVowel(value.charAt(0)) &&
+        isVowel(value.charAt(1))) {
+      // find 1st consoant
+      for (j=2 ; j < i ; j++) {
+        if (!isVowel(value.charAt(j))) {
+          break ;
+        }
+      }
+
+      if (j < i) {
+        return value.substring(j+1) ;
+      }
+    }
+
+    // RV - AND otherwise (consoant-vowel case) RV is the region after
+    //      the third letter.
+    if (i > 2) {
+      return value.substring(3) ;
+    }
+
+    return null ;
+  }
+
+	/**
+   * 1) Turn to lowercase
+   * 2) Remove accents
+   * 3) ã -> a ; õ -> o
+   * 4) ç -> c
+   *
+   * @return null or a string transformed
+	 */
+	private String changeTerm( String value ) {
+    int     j;
+    String  r = "" ;
+
+    // be-safe !!!
+    if (value == null) {
+      return null ;
+    }
+
+    value = value.toLowerCase() ;
+    for (j=0 ; j < value.length() ; j++) {
+      if ((value.charAt(j) == 'á') ||
+          (value.charAt(j) == 'â') ||
+          (value.charAt(j) == 'ã')) {
+        r= r + "a" ; continue ;
+      }
+      if ((value.charAt(j) == 'é') ||
+          (value.charAt(j) == 'ê')) {
+        r= r + "e" ; continue ;
+      }
+      if (value.charAt(j) == 'í') {
+        r= r + "i" ; continue ;
+      }
+      if ((value.charAt(j) == 'ó') ||
+          (value.charAt(j) == 'ô') ||
+          (value.charAt(j) == 'õ')) {
+        r= r + "o" ; continue ;
+      }
+      if ((value.charAt(j) == 'ú') ||
+          (value.charAt(j) == 'ü')) {
+        r= r + "u" ; continue ;
+      }
+      if (value.charAt(j) == 'ç') {
+        r= r + "c" ; continue ;
+      }
+      if (value.charAt(j) == 'ñ') {
+        r= r + "n" ; continue ;
+      }
+
+      r= r+ value.charAt(j) ;
+    }
+
+    return r ;
+  }
+
+	/**
+   * Check if a string ends with a suffix
+   *
+   * @return true if the string ends with the specified suffix
+	 */
+	private boolean suffix( String value, String suffix ) {
+
+    // be-safe !!!
+    if ((value == null) || (suffix == null)) {
+      return false ;
+    }
+
+    if (suffix.length() > value.length()) {
+      return false ;
+    }
+
+    return value.substring(value.length()-suffix.length()).equals(suffix);
+  }
+
+	/**
+   * Replace a string suffix by another
+   *
+   * @return the replaced String
+	 */
+	private String replaceSuffix( String value, String toReplace, String changeTo ) {
+    String vvalue ;
+
+    // be-safe !!!
+    if ((value == null) ||
+        (toReplace == null) ||
+        (changeTo == null) ) {
+      return value ;
+    }
+
+    vvalue = removeSuffix(value,toReplace) ;
+
+    if (value.equals(vvalue)) {
+      return value ;
+    } else {
+      return vvalue + changeTo ;
+    }
+  }
+
+	/**
+   * Remove a string suffix
+   *
+   * @return the String without the suffix
+	 */
+	private String removeSuffix( String value, String toRemove ) {
+    // be-safe !!!
+    if ((value == null) ||
+        (toRemove == null) ||
+        !suffix(value,toRemove) ) {
+      return value ;
+    }
+
+    return value.substring(0,value.length()-toRemove.length()) ;
+  }
+
+	/**
+   * See if a suffix is preceded by a String
+   *
+   * @return true if the suffix is preceded
+	 */
+	private boolean suffixPreceded( String value, String suffix, String preceded ) {
+    // be-safe !!!
+    if ((value == null) ||
+        (suffix == null) ||
+        (preceded == null) ||
+        !suffix(value,suffix) ) {
+      return false ;
+    }
+
+    return suffix(removeSuffix(value,suffix),preceded) ;
+  }
+
+	/**
+	 * Creates CT (changed term) , substituting * 'ã' and 'õ' for 'a~' and 'o~'.
+	 */
+	private void createCT( String term ) {
+    CT = changeTerm(term) ;
+
+    if (CT.length() < 2) return ;
+
+    // if the first character is ... , remove it
+    if ((CT.charAt(0) == '"')  ||
+        (CT.charAt(0) == '\'') ||
+        (CT.charAt(0) == '-')  ||
+        (CT.charAt(0) == ',')  ||
+        (CT.charAt(0) == ';')  ||
+        (CT.charAt(0) == '.')  ||
+        (CT.charAt(0) == '?')  ||
+        (CT.charAt(0) == '!')
+        ) {
+        CT = CT.substring(1);
+    }
+
+    if (CT.length() < 2) return ;
+
+    // if the last character is ... , remove it
+    if ((CT.charAt(CT.length()-1) == '-') ||
+        (CT.charAt(CT.length()-1) == ',') ||
+        (CT.charAt(CT.length()-1) == ';') ||
+        (CT.charAt(CT.length()-1) == '.') ||
+        (CT.charAt(CT.length()-1) == '?') ||
+        (CT.charAt(CT.length()-1) == '!') ||
+        (CT.charAt(CT.length()-1) == '\'') ||
+        (CT.charAt(CT.length()-1) == '"')
+        ) {
+        CT = CT.substring(0,CT.length()-1);
+    }
+  }
+
+
+	/**
+	 * Standart suffix removal.
+   * Search for the longest among the following suffixes, and perform
+   * the following actions:
+   *
+   * @return false if no ending was removed
+	 */
+	private boolean step1() {
+    if (CT == null) return false ;
+
+    // suffix lenght = 7
+    if (suffix(CT,"uciones") && suffix(R2,"uciones")) {
+        CT = replaceSuffix(CT,"uciones","u") ; return true;
+    }
+
+    // suffix lenght = 6
+    if (CT.length() >= 6) {
+      if (suffix(CT,"imentos") && suffix(R2,"imentos")) {
+          CT = removeSuffix(CT,"imentos") ; return true;
+      }
+      if (suffix(CT,"amentos") && suffix(R2,"amentos")) {
+          CT = removeSuffix(CT,"amentos") ; return true;
+      }
+      if (suffix(CT,"adores") && suffix(R2,"adores")) {
+          CT = removeSuffix(CT,"adores") ; return true;
+      }
+      if (suffix(CT,"adoras") && suffix(R2,"adoras")) {
+          CT = removeSuffix(CT,"adoras") ; return true;
+      }
+      if (suffix(CT,"logias") && suffix(R2,"logias")) {
+          replaceSuffix(CT,"logias","log") ; return true;
+      }
+      if (suffix(CT,"encias") && suffix(R2,"encias")) {
+          CT = replaceSuffix(CT,"encias","ente") ; return true;
+      }
+      if (suffix(CT,"amente") && suffix(R1,"amente")) {
+          CT = removeSuffix(CT,"amente") ; return true;
+      }
+      if (suffix(CT,"idades") && suffix(R2,"idades")) {
+          CT = removeSuffix(CT,"idades") ; return true;
+      }
+    }
+
+    // suffix lenght = 5
+    if (CT.length() >= 5) {
+      if (suffix(CT,"acoes") && suffix(R2,"acoes")) {
+          CT = removeSuffix(CT,"acoes") ; return true;
+      }
+      if (suffix(CT,"imento") && suffix(R2,"imento")) {
+          CT = removeSuffix(CT,"imento") ; return true;
+      }
+      if (suffix(CT,"amento") && suffix(R2,"amento")) {
+          CT = removeSuffix(CT,"amento") ; return true;
+      }
+      if (suffix(CT,"adora") && suffix(R2,"adora")) {
+          CT = removeSuffix(CT,"adora") ; return true;
+      }
+      if (suffix(CT,"ismos") && suffix(R2,"ismos")) {
+          CT = removeSuffix(CT,"ismos") ; return true;
+      }
+      if (suffix(CT,"istas") && suffix(R2,"istas")) {
+          CT = removeSuffix(CT,"istas") ; return true;
+      }
+      if (suffix(CT,"logia") && suffix(R2,"logia")) {
+          CT = replaceSuffix(CT,"logia","log") ; return true;
+      }
+      if (suffix(CT,"ucion") && suffix(R2,"ucion")) {
+          CT = replaceSuffix(CT,"ucion","u") ; return true;
+      }
+      if (suffix(CT,"encia") && suffix(R2,"encia")) {
+          CT = replaceSuffix(CT,"encia","ente") ; return true;
+      }
+      if (suffix(CT,"mente") && suffix(R2,"mente")) {
+          CT = removeSuffix(CT,"mente") ; return true;
+      }
+      if (suffix(CT,"idade") && suffix(R2,"idade")) {
+          CT = removeSuffix(CT,"idade") ; return true;
+      }
+    }
+
+    // suffix lenght = 4
+    if (CT.length() >= 4) {
+      if (suffix(CT,"acao") && suffix(R2,"acao")) {
+          CT = removeSuffix(CT,"acao") ; return true;
+      }
+      if (suffix(CT,"ezas") && suffix(R2,"ezas")) {
+          CT = removeSuffix(CT,"ezas") ; return true;
+      }
+      if (suffix(CT,"icos") && suffix(R2,"icos")) {
+          CT = removeSuffix(CT,"icos") ; return true ;
+      }
+      if (suffix(CT,"icas") && suffix(R2,"icas")) {
+          CT = removeSuffix(CT,"icas") ; return true ;
+      }
+      if (suffix(CT,"ismo") && suffix(R2,"ismo")) {
+          CT = removeSuffix(CT,"ismo") ; return true ;
+      }
+      if (suffix(CT,"avel") && suffix(R2,"avel")) {
+          CT = removeSuffix(CT,"avel") ; return true ;
+      }
+      if (suffix(CT,"ivel") && suffix(R2,"ivel")) {
+          CT = removeSuffix(CT,"ivel") ; return true ;
+      }
+      if (suffix(CT,"ista") && suffix(R2,"ista")) {
+          CT = removeSuffix(CT,"ista") ; return true ;
+      }
+      if (suffix(CT,"osos") && suffix(R2,"osos")) {
+          CT = removeSuffix(CT,"osos") ; return true ;
+      }
+      if (suffix(CT,"osas") && suffix(R2,"osas")) {
+          CT = removeSuffix(CT,"osas") ; return true ;
+      }
+      if (suffix(CT,"ador") && suffix(R2,"ador")) {
+          CT = removeSuffix(CT,"ador") ; return true ;
+      }
+      if (suffix(CT,"ivas") && suffix(R2,"ivas")) {
+          CT = removeSuffix(CT,"ivas") ; return true ;
+      }
+      if (suffix(CT,"ivos") && suffix(R2,"ivos")) {
+          CT = removeSuffix(CT,"ivos") ; return true ;
+      }
+      if (suffix(CT,"iras") &&
+          suffix(RV,"iras") &&
+          suffixPreceded(CT,"iras","e")) {
+          CT = replaceSuffix(CT,"iras","ir") ; return true ;
+      }
+    }
+
+    // suffix lenght = 3
+    if (CT.length() >= 3) {
+      if (suffix(CT,"eza") && suffix(R2,"eza")) {
+          CT = removeSuffix(CT,"eza") ; return true ;
+      }
+      if (suffix(CT,"ico") && suffix(R2,"ico")) {
+          CT = removeSuffix(CT,"ico") ; return true ;
+      }
+      if (suffix(CT,"ica") && suffix(R2,"ica")) {
+          CT = removeSuffix(CT,"ica") ; return true ;
+      }
+      if (suffix(CT,"oso") && suffix(R2,"oso")) {
+          CT = removeSuffix(CT,"oso") ; return true ;
+      }
+      if (suffix(CT,"osa") && suffix(R2,"osa")) {
+          CT = removeSuffix(CT,"osa") ; return true ;
+      }
+      if (suffix(CT,"iva") && suffix(R2,"iva")) {
+          CT = removeSuffix(CT,"iva") ; return true ;
+      }
+      if (suffix(CT,"ivo") && suffix(R2,"ivo")) {
+          CT = removeSuffix(CT,"ivo") ; return true ;
+      }
+      if (suffix(CT,"ira") &&
+          suffix(RV,"ira") &&
+          suffixPreceded(CT,"ira","e")) {
+          CT = replaceSuffix(CT,"ira","ir") ; return true ;
+      }
+    }
+
+    // no ending was removed by step1
+    return false ;
+  }
+
+
+	/**
+	 * Verb suffixes.
+   *
+   * Search for the longest among the following suffixes in RV,
+   * and if found, delete.
+   *
+   * @return false if no ending was removed
+	*/
+	private boolean step2() {
+    if (RV == null) return false ;
+
+    // suffix lenght = 7
+    if (RV.length() >= 7) {
+      if (suffix(RV,"issemos")) {
+        CT = removeSuffix(CT,"issemos") ; return true;
+      }
+      if (suffix(RV,"essemos")) {
+        CT = removeSuffix(CT,"essemos") ; return true;
+      }
+      if (suffix(RV,"assemos")) {
+        CT = removeSuffix(CT,"assemos") ; return true;
+      }
+      if (suffix(RV,"ariamos")) {
+        CT = removeSuffix(CT,"ariamos") ; return true;
+      }
+      if (suffix(RV,"eriamos")) {
+        CT = removeSuffix(CT,"eriamos") ; return true;
+      }
+      if (suffix(RV,"iriamos")) {
+        CT = removeSuffix(CT,"iriamos") ; return true;
+      }
+    }
+
+    // suffix lenght = 6
+    if (RV.length() >= 6) {
+      if (suffix(RV,"iremos")) {
+        CT = removeSuffix(CT,"iremos") ; return true;
+      }
+      if (suffix(RV,"eremos")) {
+        CT = removeSuffix(CT,"eremos") ; return true;
+      }
+      if (suffix(RV,"aremos")) {
+        CT = removeSuffix(CT,"aremos") ; return true;
+      }
+      if (suffix(RV,"avamos")) {
+        CT = removeSuffix(CT,"avamos") ; return true;
+      }
+      if (suffix(RV,"iramos")) {
+        CT = removeSuffix(CT,"iramos") ; return true;
+      }
+      if (suffix(RV,"eramos")) {
+        CT = removeSuffix(CT,"eramos") ; return true;
+      }
+      if (suffix(RV,"aramos")) {
+        CT = removeSuffix(CT,"aramos") ; return true;
+      }
+      if (suffix(RV,"asseis")) {
+        CT = removeSuffix(CT,"asseis") ; return true;
+      }
+      if (suffix(RV,"esseis")) {
+        CT = removeSuffix(CT,"esseis") ; return true;
+      }
+      if (suffix(RV,"isseis")) {
+        CT = removeSuffix(CT,"isseis") ; return true;
+      }
+      if (suffix(RV,"arieis")) {
+        CT = removeSuffix(CT,"arieis") ; return true;
+      }
+      if (suffix(RV,"erieis")) {
+        CT = removeSuffix(CT,"erieis") ; return true;
+      }
+      if (suffix(RV,"irieis")) {
+        CT = removeSuffix(CT,"irieis") ; return true;
+      }
+    }
+
+
+    // suffix lenght = 5
+    if (RV.length() >= 5) {
+      if (suffix(RV,"irmos")) {
+        CT = removeSuffix(CT,"irmos") ; return true;
+      }
+      if (suffix(RV,"iamos")) {
+        CT = removeSuffix(CT,"iamos") ; return true;
+      }
+      if (suffix(RV,"armos")) {
+        CT = removeSuffix(CT,"armos") ; return true;
+      }
+      if (suffix(RV,"ermos")) {
+        CT = removeSuffix(CT,"ermos") ; return true;
+      }
+      if (suffix(RV,"areis")) {
+        CT = removeSuffix(CT,"areis") ; return true;
+      }
+      if (suffix(RV,"ereis")) {
+        CT = removeSuffix(CT,"ereis") ; return true;
+      }
+      if (suffix(RV,"ireis")) {
+        CT = removeSuffix(CT,"ireis") ; return true;
+      }
+      if (suffix(RV,"asses")) {
+        CT = removeSuffix(CT,"asses") ; return true;
+      }
+      if (suffix(RV,"esses")) {
+        CT = removeSuffix(CT,"esses") ; return true;
+      }
+      if (suffix(RV,"isses")) {
+        CT = removeSuffix(CT,"isses") ; return true;
+      }
+      if (suffix(RV,"astes")) {
+        CT = removeSuffix(CT,"astes") ; return true;
+      }
+      if (suffix(RV,"assem")) {
+        CT = removeSuffix(CT,"assem") ; return true;
+      }
+      if (suffix(RV,"essem")) {
+        CT = removeSuffix(CT,"essem") ; return true;
+      }
+      if (suffix(RV,"issem")) {
+        CT = removeSuffix(CT,"issem") ; return true;
+      }
+      if (suffix(RV,"ardes")) {
+        CT = removeSuffix(CT,"ardes") ; return true;
+      }
+      if (suffix(RV,"erdes")) {
+        CT = removeSuffix(CT,"erdes") ; return true;
+      }
+      if (suffix(RV,"irdes")) {
+        CT = removeSuffix(CT,"irdes") ; return true;
+      }
+      if (suffix(RV,"ariam")) {
+        CT = removeSuffix(CT,"ariam") ; return true;
+      }
+      if (suffix(RV,"eriam")) {
+        CT = removeSuffix(CT,"eriam") ; return true;
+      }
+      if (suffix(RV,"iriam")) {
+        CT = removeSuffix(CT,"iriam") ; return true;
+      }
+      if (suffix(RV,"arias")) {
+        CT = removeSuffix(CT,"arias") ; return true;
+      }
+      if (suffix(RV,"erias")) {
+        CT = removeSuffix(CT,"erias") ; return true;
+      }
+      if (suffix(RV,"irias")) {
+        CT = removeSuffix(CT,"irias") ; return true;
+      }
+      if (suffix(RV,"estes")) {
+        CT = removeSuffix(CT,"estes") ; return true;
+      }
+      if (suffix(RV,"istes")) {
+        CT = removeSuffix(CT,"istes") ; return true;
+      }
+      if (suffix(RV,"areis")) {
+        CT = removeSuffix(CT,"areis") ; return true;
+      }
+      if (suffix(RV,"aveis")) {
+        CT = removeSuffix(CT,"aveis") ; return true;
+      }
+    }
+
+    // suffix lenght = 4
+    if (RV.length() >= 4) {
+      if (suffix(RV,"aria")) {
+        CT = removeSuffix(CT,"aria") ; return true;
+      }
+      if (suffix(RV,"eria")) {
+        CT = removeSuffix(CT,"eria") ; return true;
+      }
+      if (suffix(RV,"iria")) {
+        CT = removeSuffix(CT,"iria") ; return true;
+      }
+      if (suffix(RV,"asse")) {
+        CT = removeSuffix(CT,"asse") ; return true;
+      }
+      if (suffix(RV,"esse")) {
+        CT = removeSuffix(CT,"esse") ; return true;
+      }
+      if (suffix(RV,"isse")) {
+        CT = removeSuffix(CT,"isse") ; return true;
+      }
+      if (suffix(RV,"aste")) {
+        CT = removeSuffix(CT,"aste") ; return true;
+      }
+      if (suffix(RV,"este")) {
+        CT = removeSuffix(CT,"este") ; return true;
+      }
+      if (suffix(RV,"iste")) {
+        CT = removeSuffix(CT,"iste") ; return true;
+      }
+      if (suffix(RV,"arei")) {
+        CT = removeSuffix(CT,"arei") ; return true;
+      }
+      if (suffix(RV,"erei")) {
+        CT = removeSuffix(CT,"erei") ; return true;
+      }
+      if (suffix(RV,"irei")) {
+        CT = removeSuffix(CT,"irei") ; return true;
+      }
+      if (suffix(RV,"aram")) {
+        CT = removeSuffix(CT,"aram") ; return true;
+      }
+      if (suffix(RV,"eram")) {
+        CT = removeSuffix(CT,"eram") ; return true;
+      }
+      if (suffix(RV,"iram")) {
+        CT = removeSuffix(CT,"iram") ; return true;
+      }
+      if (suffix(RV,"avam")) {
+        CT = removeSuffix(CT,"avam") ; return true;
+      }
+      if (suffix(RV,"arem")) {
+        CT = removeSuffix(CT,"arem") ; return true;
+      }
+      if (suffix(RV,"erem")) {
+        CT = removeSuffix(CT,"erem") ; return true;
+      }
+      if (suffix(RV,"irem")) {
+        CT = removeSuffix(CT,"irem") ; return true;
+      }
+      if (suffix(RV,"ando")) {
+        CT = removeSuffix(CT,"ando") ; return true;
+      }
+      if (suffix(RV,"endo")) {
+        CT = removeSuffix(CT,"endo") ; return true;
+      }
+      if (suffix(RV,"indo")) {
+        CT = removeSuffix(CT,"indo") ; return true;
+      }
+      if (suffix(RV,"arao")) {
+        CT = removeSuffix(CT,"arao") ; return true;
+      }
+      if (suffix(RV,"erao")) {
+        CT = removeSuffix(CT,"erao") ; return true;
+      }
+      if (suffix(RV,"irao")) {
+        CT = removeSuffix(CT,"irao") ; return true;
+      }
+      if (suffix(RV,"adas")) {
+        CT = removeSuffix(CT,"adas") ; return true;
+      }
+      if (suffix(RV,"idas")) {
+        CT = removeSuffix(CT,"idas") ; return true;
+      }
+      if (suffix(RV,"aras")) {
+        CT = removeSuffix(CT,"aras") ; return true;
+      }
+      if (suffix(RV,"eras")) {
+        CT = removeSuffix(CT,"eras") ; return true;
+      }
+      if (suffix(RV,"iras")) {
+        CT = removeSuffix(CT,"iras") ; return true;
+      }
+      if (suffix(RV,"avas")) {
+        CT = removeSuffix(CT,"avas") ; return true;
+      }
+      if (suffix(RV,"ares")) {
+        CT = removeSuffix(CT,"ares") ; return true;
+      }
+      if (suffix(RV,"eres")) {
+        CT = removeSuffix(CT,"eres") ; return true;
+      }
+      if (suffix(RV,"ires")) {
+        CT = removeSuffix(CT,"ires") ; return true;
+      }
+      if (suffix(RV,"ados")) {
+        CT = removeSuffix(CT,"ados") ; return true;
+      }
+      if (suffix(RV,"idos")) {
+        CT = removeSuffix(CT,"idos") ; return true;
+      }
+      if (suffix(RV,"amos")) {
+        CT = removeSuffix(CT,"amos") ; return true;
+      }
+      if (suffix(RV,"emos")) {
+        CT = removeSuffix(CT,"emos") ; return true;
+      }
+      if (suffix(RV,"imos")) {
+        CT = removeSuffix(CT,"imos") ; return true;
+      }
+      if (suffix(RV,"iras")) {
+        CT = removeSuffix(CT,"iras") ; return true;
+      }
+      if (suffix(RV,"ieis")) {
+        CT = removeSuffix(CT,"ieis") ; return true;
+      }
+    }
+
+    // suffix lenght = 3
+    if (RV.length() >= 3) {
+      if (suffix(RV,"ada")) {
+        CT = removeSuffix(CT,"ada") ; return true;
+      }
+      if (suffix(RV,"ida")) {
+        CT = removeSuffix(CT,"ida") ; return true;
+      }
+      if (suffix(RV,"ara")) {
+        CT = removeSuffix(CT,"ara") ; return true;
+      }
+      if (suffix(RV,"era")) {
+        CT = removeSuffix(CT,"era") ; return true;
+      }
+      if (suffix(RV,"ira")) {
+        CT = removeSuffix(CT,"ava") ; return true;
+      }
+      if (suffix(RV,"iam")) {
+        CT = removeSuffix(CT,"iam") ; return true;
+      }
+      if (suffix(RV,"ado")) {
+        CT = removeSuffix(CT,"ado") ; return true;
+      }
+      if (suffix(RV,"ido")) {
+        CT = removeSuffix(CT,"ido") ; return true;
+      }
+      if (suffix(RV,"ias")) {
+        CT = removeSuffix(CT,"ias") ; return true;
+      }
+      if (suffix(RV,"ais")) {
+        CT = removeSuffix(CT,"ais") ; return true;
+      }
+      if (suffix(RV,"eis")) {
+        CT = removeSuffix(CT,"eis") ; return true;
+      }
+      if (suffix(RV,"ira")) {
+        CT = removeSuffix(CT,"ira") ; return true;
+      }
+      if (suffix(RV,"ear")) {
+        CT = removeSuffix(CT,"ear") ; return true;
+      }
+    }
+
+    // suffix lenght = 2
+    if (RV.length() >= 2) {
+      if (suffix(RV,"ia")) {
+        CT = removeSuffix(CT,"ia") ; return true;
+      }
+      if (suffix(RV,"ei")) {
+        CT = removeSuffix(CT,"ei") ; return true;
+      }
+      if (suffix(RV,"am")) {
+        CT = removeSuffix(CT,"am") ; return true;
+      }
+      if (suffix(RV,"em")) {
+        CT = removeSuffix(CT,"em") ; return true;
+      }
+      if (suffix(RV,"ar")) {
+        CT = removeSuffix(CT,"ar") ; return true;
+      }
+      if (suffix(RV,"er")) {
+        CT = removeSuffix(CT,"er") ; return true;
+      }
+      if (suffix(RV,"ir")) {
+        CT = removeSuffix(CT,"ir") ; return true;
+      }
+      if (suffix(RV,"as")) {
+        CT = removeSuffix(CT,"as") ; return true;
+      }
+      if (suffix(RV,"es")) {
+        CT = removeSuffix(CT,"es") ; return true;
+      }
+      if (suffix(RV,"is")) {
+        CT = removeSuffix(CT,"is") ; return true;
+      }
+      if (suffix(RV,"eu")) {
+        CT = removeSuffix(CT,"eu") ; return true;
+      }
+      if (suffix(RV,"iu")) {
+        CT = removeSuffix(CT,"iu") ; return true;
+      }
+      if (suffix(RV,"iu")) {
+        CT = removeSuffix(CT,"iu") ; return true;
+      }
+      if (suffix(RV,"ou")) {
+        CT = removeSuffix(CT,"ou") ; return true;
+      }
+    }
+
+    // no ending was removed by step2
+    return false ;
+  }
+
+	/**
+	 * Delete suffix 'i' if in RV and preceded by 'c'
+   *
+	*/
+	private void step3() {
+    if (RV == null) return ;
+
+    if (suffix(RV,"i") && suffixPreceded(RV,"i","c")) {
+      CT = removeSuffix(CT,"i") ;
+    }
+
+  }
+
+	/**
+	 * Residual suffix
+   *
+   * If the word ends with one of the suffixes (os a i o á í ó)
+   * in RV, delete it
+   *
+	*/
+	private void step4() {
+    if (RV == null) return  ;
+
+    if (suffix(RV,"os")) {
+      CT = removeSuffix(CT,"os") ; return ;
+    }
+    if (suffix(RV,"a")) {
+      CT = removeSuffix(CT,"a") ; return ;
+    }
+    if (suffix(RV,"i")) {
+      CT = removeSuffix(CT,"i") ; return ;
+    }
+    if (suffix(RV,"o")) {
+      CT = removeSuffix(CT,"o") ; return ;
+    }
+
+  }
+
+	/**
+	 * If the word ends with one of ( e é ê) in RV,delete it,
+   * and if preceded by 'gu' (or 'ci') with the 'u' (or 'i') in RV,
+   * delete the 'u' (or 'i')
+   *
+   * Or if the word ends ç remove the cedilha
+   *
+	*/
+	private void step5() {
+    if (RV == null) return  ;
+
+    if (suffix(RV,"e")) {
+      if (suffixPreceded(RV,"e","gu")) {
+        CT = removeSuffix(CT,"e") ;
+        CT = removeSuffix(CT,"u") ;
+        return ;
+      }
+
+      if (suffixPreceded(RV,"e","ci")) {
+        CT = removeSuffix(CT,"e") ;
+        CT = removeSuffix(CT,"i") ;
+        return ;
+      }
+
+      CT = removeSuffix(CT,"e") ; return ;
+    }
+  }
+
+	/**
+	 * For log and debug purpose
+	 *
+	 * @return  TERM, CT, RV, R1 and R2
+	 */
+	public String log() {
+    return " (TERM = " + TERM + ")" +
+           " (CT = " + CT +")" +
+           " (RV = " + RV +")" +
+           " (R1 = " + R1 +")" +
+           " (R2 = " + R2 +")" ;
+	}
+
+}
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/software/eXist/mpdl-modules/src/de/mpg/mpiwg/berlin/mpdl/donatus/analysis/lang/DonatusAnalyzerAR.java	Wed Nov 24 17:24:23 2010 +0100
@@ -0,0 +1,41 @@
+package de.mpg.mpiwg.berlin.mpdl.donatus.analysis.lang;
+
+import java.io.File;
+import java.io.IOException;
+import java.util.Hashtable;
+
+import de.mpg.mpiwg.berlin.mpdl.donatus.analysis.DonatusAnalyzer;
+
+public class DonatusAnalyzerAR extends DonatusAnalyzer {
+  private static String LANGUAGE = "ar";
+
+  public DonatusAnalyzerAR() {
+    super();
+    setLanguage(LANGUAGE);
+  }
+
+  /**
+   * Builds an analyzer with the given stop words.
+   */
+  public DonatusAnalyzerAR(String[] stopwords) {
+    super(stopwords);
+    setLanguage(LANGUAGE);
+  }
+
+  /**
+   * Builds an analyzer with the given stop words.
+   */
+  public DonatusAnalyzerAR(Hashtable stopwords) {
+    super(stopwords);
+    setLanguage(LANGUAGE);
+  }
+
+  /**
+   * Builds an analyzer with the given stop words.
+   */
+  public DonatusAnalyzerAR(File stopwords) throws IOException {
+    super(stopwords);
+    setLanguage(LANGUAGE);
+  }
+
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/software/eXist/mpdl-modules/src/de/mpg/mpiwg/berlin/mpdl/donatus/analysis/lang/DonatusAnalyzerDE.java	Wed Nov 24 17:24:23 2010 +0100
@@ -0,0 +1,41 @@
+package de.mpg.mpiwg.berlin.mpdl.donatus.analysis.lang;
+
+import java.io.File;
+import java.io.IOException;
+import java.util.Hashtable;
+
+import de.mpg.mpiwg.berlin.mpdl.donatus.analysis.DonatusAnalyzer;
+
+public class DonatusAnalyzerDE extends DonatusAnalyzer {
+  private static String LANGUAGE = "de";
+
+  public DonatusAnalyzerDE() {
+    super();
+    setLanguage(LANGUAGE);
+  }
+
+  /**
+   * Builds an analyzer with the given stop words.
+   */
+  public DonatusAnalyzerDE(String[] stopwords) {
+    super(stopwords);
+    setLanguage(LANGUAGE);
+  }
+
+  /**
+   * Builds an analyzer with the given stop words.
+   */
+  public DonatusAnalyzerDE(Hashtable stopwords) {
+    super(stopwords);
+    setLanguage(LANGUAGE);
+  }
+
+  /**
+   * Builds an analyzer with the given stop words.
+   */
+  public DonatusAnalyzerDE(File stopwords) throws IOException {
+    super(stopwords);
+    setLanguage(LANGUAGE);
+  }
+
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/software/eXist/mpdl-modules/src/de/mpg/mpiwg/berlin/mpdl/donatus/analysis/lang/DonatusAnalyzerEL.java	Wed Nov 24 17:24:23 2010 +0100
@@ -0,0 +1,41 @@
+package de.mpg.mpiwg.berlin.mpdl.donatus.analysis.lang;
+
+import java.io.File;
+import java.io.IOException;
+import java.util.Hashtable;
+
+import de.mpg.mpiwg.berlin.mpdl.donatus.analysis.DonatusAnalyzer;
+
+public class DonatusAnalyzerEL extends DonatusAnalyzer {
+  private static String LANGUAGE = "el";
+
+  public DonatusAnalyzerEL() {
+    super();
+    setLanguage(LANGUAGE);
+  }
+
+  /**
+   * Builds an analyzer with the given stop words.
+   */
+  public DonatusAnalyzerEL(String[] stopwords) {
+    super(stopwords);
+    setLanguage(LANGUAGE);
+  }
+
+  /**
+   * Builds an analyzer with the given stop words.
+   */
+  public DonatusAnalyzerEL(Hashtable stopwords) {
+    super(stopwords);
+    setLanguage(LANGUAGE);
+  }
+
+  /**
+   * Builds an analyzer with the given stop words.
+   */
+  public DonatusAnalyzerEL(File stopwords) throws IOException {
+    super(stopwords);
+    setLanguage(LANGUAGE);
+  }
+
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/software/eXist/mpdl-modules/src/de/mpg/mpiwg/berlin/mpdl/donatus/analysis/lang/DonatusAnalyzerEN.java	Wed Nov 24 17:24:23 2010 +0100
@@ -0,0 +1,41 @@
+package de.mpg.mpiwg.berlin.mpdl.donatus.analysis.lang;
+
+import java.io.File;
+import java.io.IOException;
+import java.util.Hashtable;
+
+import de.mpg.mpiwg.berlin.mpdl.donatus.analysis.DonatusAnalyzer;
+
+public class DonatusAnalyzerEN extends DonatusAnalyzer {
+  private static String LANGUAGE = "en";
+
+  public DonatusAnalyzerEN() {
+    super();
+    setLanguage(LANGUAGE);
+  }
+
+  /**
+   * Builds an analyzer with the given stop words.
+   */
+  public DonatusAnalyzerEN(String[] stopwords) {
+    super(stopwords);
+    setLanguage(LANGUAGE);
+  }
+
+  /**
+   * Builds an analyzer with the given stop words.
+   */
+  public DonatusAnalyzerEN(Hashtable stopwords) {
+    super(stopwords);
+    setLanguage(LANGUAGE);
+  }
+
+  /**
+   * Builds an analyzer with the given stop words.
+   */
+  public DonatusAnalyzerEN(File stopwords) throws IOException {
+    super(stopwords);
+    setLanguage(LANGUAGE);
+  }
+
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/software/eXist/mpdl-modules/src/de/mpg/mpiwg/berlin/mpdl/donatus/analysis/lang/DonatusAnalyzerFR.java	Wed Nov 24 17:24:23 2010 +0100
@@ -0,0 +1,41 @@
+package de.mpg.mpiwg.berlin.mpdl.donatus.analysis.lang;
+
+import java.io.File;
+import java.io.IOException;
+import java.util.Hashtable;
+
+import de.mpg.mpiwg.berlin.mpdl.donatus.analysis.DonatusAnalyzer;
+
+public class DonatusAnalyzerFR extends DonatusAnalyzer {
+  private static String LANGUAGE = "fr";
+
+  public DonatusAnalyzerFR() {
+    super();
+    setLanguage(LANGUAGE);
+  }
+
+  /**
+   * Builds an analyzer with the given stop words.
+   */
+  public DonatusAnalyzerFR(String[] stopwords) {
+    super(stopwords);
+    setLanguage(LANGUAGE);
+  }
+
+  /**
+   * Builds an analyzer with the given stop words.
+   */
+  public DonatusAnalyzerFR(Hashtable stopwords) {
+    super(stopwords);
+    setLanguage(LANGUAGE);
+  }
+
+  /**
+   * Builds an analyzer with the given stop words.
+   */
+  public DonatusAnalyzerFR(File stopwords) throws IOException {
+    super(stopwords);
+    setLanguage(LANGUAGE);
+  }
+
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/software/eXist/mpdl-modules/src/de/mpg/mpiwg/berlin/mpdl/donatus/analysis/lang/DonatusAnalyzerIT.java	Wed Nov 24 17:24:23 2010 +0100
@@ -0,0 +1,41 @@
+package de.mpg.mpiwg.berlin.mpdl.donatus.analysis.lang;
+
+import java.io.File;
+import java.io.IOException;
+import java.util.Hashtable;
+
+import de.mpg.mpiwg.berlin.mpdl.donatus.analysis.DonatusAnalyzer;
+
+public class DonatusAnalyzerIT extends DonatusAnalyzer {
+  private static String LANGUAGE = "it";
+
+  public DonatusAnalyzerIT() {
+    super();
+    setLanguage(LANGUAGE);
+  }
+
+  /**
+   * Builds an analyzer with the given stop words.
+   */
+  public DonatusAnalyzerIT(String[] stopwords) {
+    super(stopwords);
+    setLanguage(LANGUAGE);
+  }
+
+  /**
+   * Builds an analyzer with the given stop words.
+   */
+  public DonatusAnalyzerIT(Hashtable stopwords) {
+    super(stopwords);
+    setLanguage(LANGUAGE);
+  }
+
+  /**
+   * Builds an analyzer with the given stop words.
+   */
+  public DonatusAnalyzerIT(File stopwords) throws IOException {
+    super(stopwords);
+    setLanguage(LANGUAGE);
+  }
+
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/software/eXist/mpdl-modules/src/de/mpg/mpiwg/berlin/mpdl/donatus/analysis/lang/DonatusAnalyzerLA.java	Wed Nov 24 17:24:23 2010 +0100
@@ -0,0 +1,41 @@
+package de.mpg.mpiwg.berlin.mpdl.donatus.analysis.lang;
+
+import java.io.File;
+import java.io.IOException;
+import java.util.Hashtable;
+
+import de.mpg.mpiwg.berlin.mpdl.donatus.analysis.DonatusAnalyzer;
+
+public class DonatusAnalyzerLA extends DonatusAnalyzer {
+  private static String LANGUAGE = "la";
+
+  public DonatusAnalyzerLA() {
+    super();
+    setLanguage(LANGUAGE);
+  }
+
+  /**
+   * Builds an analyzer with the given stop words.
+   */
+  public DonatusAnalyzerLA(String[] stopwords) {
+    super(stopwords);
+    setLanguage(LANGUAGE);
+  }
+
+  /**
+   * Builds an analyzer with the given stop words.
+   */
+  public DonatusAnalyzerLA(Hashtable stopwords) {
+    super(stopwords);
+    setLanguage(LANGUAGE);
+  }
+
+  /**
+   * Builds an analyzer with the given stop words.
+   */
+  public DonatusAnalyzerLA(File stopwords) throws IOException {
+    super(stopwords);
+    setLanguage(LANGUAGE);
+  }
+
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/software/eXist/mpdl-modules/src/de/mpg/mpiwg/berlin/mpdl/donatus/analysis/lang/DonatusAnalyzerNL.java	Wed Nov 24 17:24:23 2010 +0100
@@ -0,0 +1,41 @@
+package de.mpg.mpiwg.berlin.mpdl.donatus.analysis.lang;
+
+import java.io.File;
+import java.io.IOException;
+import java.util.Hashtable;
+
+import de.mpg.mpiwg.berlin.mpdl.donatus.analysis.DonatusAnalyzer;
+
+public class DonatusAnalyzerNL extends DonatusAnalyzer {
+  private static String LANGUAGE = "nl";
+
+  public DonatusAnalyzerNL() {
+    super();
+    setLanguage(LANGUAGE);
+  }
+
+  /**
+   * Builds an analyzer with the given stop words.
+   */
+  public DonatusAnalyzerNL(String[] stopwords) {
+    super(stopwords);
+    setLanguage(LANGUAGE);
+  }
+
+  /**
+   * Builds an analyzer with the given stop words.
+   */
+  public DonatusAnalyzerNL(Hashtable stopwords) {
+    super(stopwords);
+    setLanguage(LANGUAGE);
+  }
+
+  /**
+   * Builds an analyzer with the given stop words.
+   */
+  public DonatusAnalyzerNL(File stopwords) throws IOException {
+    super(stopwords);
+    setLanguage(LANGUAGE);
+  }
+
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/software/eXist/mpdl-modules/src/de/mpg/mpiwg/berlin/mpdl/donatus/analysis/lang/DonatusAnalyzerZH.java	Wed Nov 24 17:24:23 2010 +0100
@@ -0,0 +1,41 @@
+package de.mpg.mpiwg.berlin.mpdl.donatus.analysis.lang;
+
+import java.io.File;
+import java.io.IOException;
+import java.util.Hashtable;
+
+import de.mpg.mpiwg.berlin.mpdl.donatus.analysis.DonatusAnalyzer;
+
+public class DonatusAnalyzerZH extends DonatusAnalyzer {
+  private static String LANGUAGE = "zh";
+
+  public DonatusAnalyzerZH() {
+    super();
+    setLanguage(LANGUAGE);
+  }
+
+  /**
+   * Builds an analyzer with the given stop words.
+   */
+  public DonatusAnalyzerZH(String[] stopwords) {
+    super(stopwords);
+    setLanguage(LANGUAGE);
+  }
+
+  /**
+   * Builds an analyzer with the given stop words.
+   */
+  public DonatusAnalyzerZH(Hashtable stopwords) {
+    super(stopwords);
+    setLanguage(LANGUAGE);
+  }
+
+  /**
+   * Builds an analyzer with the given stop words.
+   */
+  public DonatusAnalyzerZH(File stopwords) throws IOException {
+    super(stopwords);
+    setLanguage(LANGUAGE);
+  }
+
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/software/eXist/mpdl-modules/src/de/mpg/mpiwg/berlin/mpdl/donatus/analysis/lang/DutchStemmer.java	Wed Nov 24 17:24:23 2010 +0100
@@ -0,0 +1,407 @@
+package de.mpg.mpiwg.berlin.mpdl.donatus.analysis.lang;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.util.Map;
+
+/**
+ *
+ * A stemmer for Dutch words. The algorithm is an implementation of
+ * the <a href="http://snowball.tartarus.org/algorithms/dutch/stemmer.html">dutch stemming</a>
+ * algorithm in Martin Porter's snowball project.
+ * 
+ * @author Edwin de Jonge (ejne at cbs.nl)
+ */
+
+public class DutchStemmer {
+  /**
+   * Buffer for the terms while stemming them.
+   */
+  private StringBuffer sb = new StringBuffer();
+  private boolean _removedE;
+  private Map _stemDict;
+
+  private int _R1;
+  private int _R2;
+
+  //TODO convert to internal
+  /*
+   * Stemms the given term to an unique <tt>discriminator</tt>.
+   *
+   * @param term The term that should be stemmed.
+   * @return Discriminator for <tt>term</tt>
+   */
+  public String stem(String term) {
+    term = term.toLowerCase();
+    if (!isStemmable(term))
+      return term;
+    if (_stemDict != null && _stemDict.containsKey(term))
+      if (_stemDict.get(term) instanceof String)
+        return (String) _stemDict.get(term);
+      else
+        return null;
+
+    // Reset the StringBuffer.
+    sb.delete(0, sb.length());
+    sb.insert(0, term);
+    // Stemming starts here...
+    substitute(sb);
+    storeYandI(sb);
+    _R1 = getRIndex(sb, 0);
+    _R1 = Math.max(3, _R1);
+    step1(sb);
+    step2(sb);
+    _R2 = getRIndex(sb, _R1);
+    step3a(sb);
+    step3b(sb);
+    step4(sb);
+    reStoreYandI(sb);
+    return sb.toString();
+  }
+
+  private boolean enEnding(StringBuffer sb) {
+    String[] enend = new String[]{"ene", "en"};
+    for (int i = 0; i < enend.length; i++) {
+      String end = enend[i];
+      String s = sb.toString();
+      int index = s.length() - end.length();
+      if (s.endsWith(end) &&
+          index >= _R1 &&
+          isValidEnEnding(sb, index - 1)
+      ) {
+        sb.delete(index, index + end.length());
+        unDouble(sb, index);
+        return true;
+      }
+    }
+    return false;
+  }
+
+
+  private void step1(StringBuffer sb) {
+    if (_R1 >= sb.length())
+      return;
+
+    String s = sb.toString();
+    int lengthR1 = sb.length() - _R1;
+    int index;
+
+    if (s.endsWith("heden")) {
+      sb.replace(_R1, lengthR1 + _R1, sb.substring(_R1, lengthR1 + _R1).replaceAll("heden", "heid"));
+      return;
+    }
+
+    if (enEnding(sb))
+      return;
+
+    if (s.endsWith("se") &&
+        (index = s.length() - 2) >= _R1 &&
+        isValidSEnding(sb, index - 1)
+    ) {
+      sb.delete(index, index + 2);
+      return;
+    }
+    if (s.endsWith("s") &&
+        (index = s.length() - 1) >= _R1 &&
+        isValidSEnding(sb, index - 1)) {
+      sb.delete(index, index + 1);
+    }
+  }
+
+  /**
+   * Delete suffix e if in R1 and
+   * preceded by a non-vowel, and then undouble the ending
+   *
+   * @param sb String being stemmed
+   */
+  private void step2(StringBuffer sb) {
+    _removedE = false;
+    if (_R1 >= sb.length())
+      return;
+    String s = sb.toString();
+    int index = s.length() - 1;
+    if (index >= _R1 &&
+        s.endsWith("e") &&
+        !isVowel(sb.charAt(index - 1))) {
+      sb.delete(index, index + 1);
+      unDouble(sb);
+      _removedE = true;
+    }
+  }
+
+  /**
+   * Delete "heid"
+   *
+   * @param sb String being stemmed
+   */
+  private void step3a(StringBuffer sb) {
+    if (_R2 >= sb.length())
+      return;
+    String s = sb.toString();
+    int index = s.length() - 4;
+    if (s.endsWith("heid") && index >= _R2 && sb.charAt(index - 1) != 'c') {
+      sb.delete(index, index + 4); //remove heid
+      enEnding(sb);
+    }
+  }
+
+  /**
+   * <p>A d-suffix, or derivational suffix, enables a new word,
+   * often with a different grammatical category, or with a different
+   * sense, to be built from another word. Whether a d-suffix can be
+   * attached is discovered not from the rules of grammar, but by
+   * referring to a dictionary. So in English, ness can be added to
+   * certain adjectives to form corresponding nouns (littleness,
+   * kindness, foolishness ...) but not to all adjectives
+   * (not for example, to big, cruel, wise ...) d-suffixes can be
+   * used to change meaning, often in rather exotic ways.</p>
+   * Remove "ing", "end", "ig", "lijk", "baar" and "bar"
+   *
+   * @param sb String being stemmed
+   */
+  private void step3b(StringBuffer sb) {
+    if (_R2 >= sb.length())
+      return;
+    String s = sb.toString();
+    int index = 0;
+
+    if ((s.endsWith("end") || s.endsWith("ing")) &&
+        (index = s.length() - 3) >= _R2) {
+      sb.delete(index, index + 3);
+      if (sb.charAt(index - 2) == 'i' &&
+          sb.charAt(index - 1) == 'g') {
+        if (sb.charAt(index - 3) != 'e' & index - 2 >= _R2) {
+          index -= 2;
+          sb.delete(index, index + 2);
+        }
+      } else {
+        unDouble(sb, index);
+      }
+      return;
+    }
+    if (s.endsWith("ig") &&
+        (index = s.length() - 2) >= _R2
+    ) {
+      if (sb.charAt(index - 1) != 'e')
+        sb.delete(index, index + 2);
+      return;
+    }
+    if (s.endsWith("lijk") &&
+        (index = s.length() - 4) >= _R2
+    ) {
+      sb.delete(index, index + 4);
+      step2(sb);
+      return;
+    }
+    if (s.endsWith("baar") &&
+        (index = s.length() - 4) >= _R2
+    ) {
+      sb.delete(index, index + 4);
+      return;
+    }
+    if (s.endsWith("bar") &&
+        (index = s.length() - 3) >= _R2
+    ) {
+      if (_removedE)
+        sb.delete(index, index + 3);
+      return;
+    }
+  }
+
+  /**
+   * undouble vowel
+   * If the words ends CVD, where C is a non-vowel, D is a non-vowel other than I, and V is double a, e, o or u, remove one of the vowels from V (for example, maan -> man, brood -> brod).
+   *
+   * @param sb String being stemmed
+   */
+  private void step4(StringBuffer sb) {
+    if (sb.length() < 4)
+      return;
+    String end = sb.substring(sb.length() - 4, sb.length());
+    char c = end.charAt(0);
+    char v1 = end.charAt(1);
+    char v2 = end.charAt(2);
+    char d = end.charAt(3);
+    if (v1 == v2 &&
+        d != 'I' &&
+        v1 != 'i' &&
+        isVowel(v1) &&
+        !isVowel(d) &&
+        !isVowel(c)) {
+      sb.delete(sb.length() - 2, sb.length() - 1);
+    }
+  }
+
+  /**
+   * Checks if a term could be stemmed.
+   *
+   * @return true if, and only if, the given term consists in letters.
+   */
+  private boolean isStemmable(String term) {
+    for (int c = 0; c < term.length(); c++) {
+      if (!Character.isLetter(term.charAt(c))) return false;
+    }
+    return true;
+  }
+
+  /**
+   * Substitute ä, ë, ï, ö, ü, á , é, í, ó, ú
+   */
+  private void substitute(StringBuffer buffer) {
+    for (int i = 0; i < buffer.length(); i++) {
+      switch (buffer.charAt(i)) {
+        case 'ä':
+        case 'á':
+          {
+            buffer.setCharAt(i, 'a');
+            break;
+          }
+        case 'ë':
+        case 'é':
+          {
+            buffer.setCharAt(i, 'e');
+            break;
+          }
+        case 'ü':
+        case 'ú':
+          {
+            buffer.setCharAt(i, 'u');
+            break;
+          }
+        case 'ï':
+        case 'i':
+          {
+            buffer.setCharAt(i, 'i');
+            break;
+          }
+        case 'ö':
+        case 'ó':
+          {
+            buffer.setCharAt(i, 'o');
+            break;
+          }
+      }
+    }
+  }
+
+  /*private boolean isValidSEnding(StringBuffer sb) {
+    return isValidSEnding(sb, sb.length() - 1);
+  }*/
+
+  private boolean isValidSEnding(StringBuffer sb, int index) {
+    char c = sb.charAt(index);
+    if (isVowel(c) || c == 'j')
+      return false;
+    return true;
+  }
+
+  /*private boolean isValidEnEnding(StringBuffer sb) {
+    return isValidEnEnding(sb, sb.length() - 1);
+  }*/
+
+  private boolean isValidEnEnding(StringBuffer sb, int index) {
+    char c = sb.charAt(index);
+    if (isVowel(c))
+      return false;
+    if (c < 3)
+      return false;
+    // ends with "gem"?
+    if (c == 'm' && sb.charAt(index - 2) == 'g' && sb.charAt(index - 1) == 'e')
+      return false;
+    return true;
+  }
+
+  private void unDouble(StringBuffer sb) {
+    unDouble(sb, sb.length());
+  }
+
+  private void unDouble(StringBuffer sb, int endIndex) {
+    String s = sb.substring(0, endIndex);
+    if (s.endsWith("kk") || s.endsWith("tt") || s.endsWith("dd") || s.endsWith("nn") || s.endsWith("mm") || s.endsWith("ff")) {
+      sb.delete(endIndex - 1, endIndex);
+    }
+  }
+
+  private int getRIndex(StringBuffer sb, int start) {
+    if (start == 0)
+      start = 1;
+    int i = start;
+    for (; i < sb.length(); i++) {
+      //first non-vowel preceded by a vowel
+      if (!isVowel(sb.charAt(i)) && isVowel(sb.charAt(i - 1))) {
+        return i + 1;
+      }
+    }
+    return i + 1;
+  }
+
+  private void storeYandI(StringBuffer sb) {
+    if (sb.charAt(0) == 'y')
+      sb.setCharAt(0, 'Y');
+
+    int last = sb.length() - 1;
+
+    for (int i = 1; i < last; i++) {
+      switch (sb.charAt(i)) {
+        case 'i':
+          {
+            if (isVowel(sb.charAt(i - 1)) &&
+                isVowel(sb.charAt(i + 1))
+            )
+              sb.setCharAt(i, 'I');
+            break;
+          }
+        case 'y':
+          {
+            if (isVowel(sb.charAt(i - 1)))
+              sb.setCharAt(i, 'Y');
+            break;
+          }
+      }
+    }
+    if (last > 0 && sb.charAt(last) == 'y' && isVowel(sb.charAt(last - 1)))
+      sb.setCharAt(last, 'Y');
+  }
+
+  private void reStoreYandI(StringBuffer sb) {
+    String tmp = sb.toString();
+    sb.delete(0, sb.length());
+    sb.insert(0, tmp.replaceAll("I", "i").replaceAll("Y", "y"));
+  }
+
+  private boolean isVowel(char c) {
+    switch (c) {
+      case 'e':
+      case 'a':
+      case 'o':
+      case 'i':
+      case 'u':
+      case 'y':
+      case 'è':
+        {
+          return true;
+        }
+    }
+    return false;
+  }
+
+  void setStemDictionary(Map dict) {
+    _stemDict = dict;
+  }
+
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/software/eXist/mpdl-modules/src/de/mpg/mpiwg/berlin/mpdl/donatus/analysis/lang/FrenchStemmer.java	Wed Nov 24 17:24:23 2010 +0100
@@ -0,0 +1,709 @@
+package de.mpg.mpiwg.berlin.mpdl.donatus.analysis.lang;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * A stemmer for French words. The algorithm is based on the work of
+ * Dr Martin Porter on his snowball project<br>
+ * refer to http://snowball.sourceforge.net/french/stemmer.html<br>
+ * (French stemming algorithm) for details
+ *
+ * @author    Patrick Talbot
+ */
+
+public class FrenchStemmer {
+
+    /**
+     * Buffer for the terms while stemming them.
+     */
+    private StringBuffer sb = new StringBuffer();
+
+    /**
+     * A temporary buffer, used to reconstruct R2
+     */
+     private StringBuffer tb = new StringBuffer();
+
+	/**
+	 * Region R0 is equal to the whole buffer
+	 */
+	private String R0;
+
+	/**
+	 * Region RV
+	 * "If the word begins with two vowels, RV is the region after the third letter,
+	 * otherwise the region after the first vowel not at the beginning of the word,
+	 * or the end of the word if these positions cannot be found."
+	 */
+    private String RV;
+
+	/**
+	 * Region R1
+	 * "R1 is the region after the first non-vowel following a vowel
+	 * or is the null region at the end of the word if there is no such non-vowel"
+	 */
+    private String R1;
+
+	/**
+	 * Region R2
+	 * "R2 is the region after the first non-vowel in R1 following a vowel
+	 * or is the null region at the end of the word if there is no such non-vowel"
+	 */
+    private String R2;
+
+
+	/**
+	 * Set to true if we need to perform step 2
+	 */
+    private boolean suite;
+
+	/**
+	 * Set to true if the buffer was modified
+	 */
+    private boolean modified;
+
+
+    /**
+     * Stemms the given term to a unique <tt>discriminator</tt>.
+     *
+     * @param term  java.langString The term that should be stemmed
+     * @return java.lang.String  Discriminator for <tt>term</tt>
+     */
+    public String stem( String term ) {
+		if ( !isStemmable( term ) ) {
+			return term;
+		}
+
+		// Use lowercase for medium stemming.
+		term = term.toLowerCase();
+
+		// Reset the StringBuffer.
+		sb.delete( 0, sb.length() );
+		sb.insert( 0, term );
+
+		// reset the booleans
+		modified = false;
+		suite = false;
+
+		sb = treatVowels( sb );
+
+		setStrings();
+
+		step1();
+
+		if (!modified || suite)
+		{
+			if (RV != null)
+			{
+				suite = step2a();
+				if (!suite)
+					step2b();
+			}
+		}
+
+		if (modified || suite)
+			step3();
+		else
+			step4();
+
+		step5();
+
+		step6();
+
+		return sb.toString();
+    }
+
+	/**
+	 * Sets the search region Strings<br>
+	 * it needs to be done each time the buffer was modified
+	 */
+	private void setStrings() {
+		// set the strings
+		R0 = sb.toString();
+		RV = retrieveRV( sb );
+		R1 = retrieveR( sb );
+		if ( R1 != null )
+		{
+			tb.delete( 0, tb.length() );
+			tb.insert( 0, R1 );
+			R2 = retrieveR( tb );
+		}
+		else
+			R2 = null;
+	}
+
+	/**
+	 * First step of the Porter Algorithmn<br>
+	 * refer to http://snowball.sourceforge.net/french/stemmer.html for an explanation
+	 */
+	private void step1( ) {
+		String[] suffix = { "ances", "iqUes", "ismes", "ables", "istes", "ance", "iqUe", "isme", "able", "iste" };
+		deleteFrom( R2, suffix );
+
+		replaceFrom( R2, new String[] { "logies", "logie" }, "log" );
+		replaceFrom( R2, new String[] { "usions", "utions", "usion", "ution" }, "u" );
+		replaceFrom( R2, new String[] { "ences", "ence" }, "ent" );
+
+		String[] search = { "atrices", "ateurs", "ations", "atrice", "ateur", "ation"};
+		deleteButSuffixFromElseReplace( R2, search, "ic",  true, R0, "iqU" );
+
+		deleteButSuffixFromElseReplace( R2, new String[] { "ements", "ement" }, "eus", false, R0, "eux" );
+		deleteButSuffixFrom( R2, new String[] { "ements", "ement" }, "ativ", false );
+		deleteButSuffixFrom( R2, new String[] { "ements", "ement" }, "iv", false );
+		deleteButSuffixFrom( R2, new String[] { "ements", "ement" }, "abl", false );
+		deleteButSuffixFrom( R2, new String[] { "ements", "ement" }, "iqU", false );
+
+		deleteFromIfTestVowelBeforeIn( R1, new String[] { "issements", "issement" }, false, R0 );
+		deleteFrom( RV, new String[] { "ements", "ement" } );
+
+		deleteButSuffixFromElseReplace( R2, new String[] { "ités", "ité" }, "abil", false, R0, "abl" );
+		deleteButSuffixFromElseReplace( R2, new String[] { "ités", "ité" }, "ic", false, R0, "iqU" );
+		deleteButSuffixFrom( R2, new String[] { "ités", "ité" }, "iv", true );
+
+		String[] autre = { "ifs", "ives", "if", "ive" };
+		deleteButSuffixFromElseReplace( R2, autre, "icat", false, R0, "iqU" );
+		deleteButSuffixFromElseReplace( R2, autre, "at", true, R2, "iqU" );
+
+		replaceFrom( R0, new String[] { "eaux" }, "eau" );
+
+		replaceFrom( R1, new String[] { "aux" }, "al" );
+
+		deleteButSuffixFromElseReplace( R2, new String[] { "euses", "euse" }, "", true, R1, "eux" );
+
+		deleteFrom( R2, new String[] { "eux" } );
+
+		// if one of the next steps is performed, we will need to perform step2a
+		boolean temp = false;
+		temp = replaceFrom( RV, new String[] { "amment" }, "ant" );
+		if (temp == true)
+			suite = true;
+		temp = replaceFrom( RV, new String[] { "emment" }, "ent" );
+		if (temp == true)
+			suite = true;
+		temp = deleteFromIfTestVowelBeforeIn( RV, new String[] { "ments", "ment" }, true, RV );
+		if (temp == true)
+			suite = true;
+
+	}
+
+	/**
+	 * Second step (A) of the Porter Algorithmn<br>
+	 * Will be performed if nothing changed from the first step
+	 * or changed were done in the amment, emment, ments or ment suffixes<br>
+	 * refer to http://snowball.sourceforge.net/french/stemmer.html for an explanation
+	 *
+	 * @return boolean - true if something changed in the StringBuffer
+	 */
+	private boolean step2a() {
+		String[] search = { "îmes", "îtes", "iraIent", "irait", "irais", "irai", "iras", "ira",
+							"irent", "iriez", "irez", "irions", "irons", "iront",
+							"issaIent", "issais", "issantes", "issante", "issants", "issant",
+							"issait", "issais", "issions", "issons", "issiez", "issez", "issent",
+							"isses", "isse", "ir", "is", "ît", "it", "ies", "ie", "i" };
+		return deleteFromIfTestVowelBeforeIn( RV, search, false, RV );
+	}
+
+	/**
+	 * Second step (B) of the Porter Algorithmn<br>
+	 * Will be performed if step 2 A was performed unsuccessfully<br>
+	 * refer to http://snowball.sourceforge.net/french/stemmer.html for an explanation
+	 */
+	private void step2b() {
+		String[] suffix = { "eraIent", "erais", "erait", "erai", "eras", "erions", "eriez",
+							"erons", "eront","erez", "èrent", "era", "ées", "iez",
+							"ée", "és", "er", "ez", "é" };
+		deleteFrom( RV, suffix );
+
+		String[] search = { "assions", "assiez", "assent", "asses", "asse", "aIent",
+							"antes", "aIent", "Aient", "ante", "âmes", "âtes", "ants", "ant",
+							"ait", "aît", "ais", "Ait", "Aît", "Ais", "ât", "as", "ai", "Ai", "a" };
+		deleteButSuffixFrom( RV, search, "e", true );
+
+		deleteFrom( R2, new String[] { "ions" } );
+	}
+
+	/**
+	 * Third step of the Porter Algorithmn<br>
+	 * refer to http://snowball.sourceforge.net/french/stemmer.html for an explanation
+	 */
+	private void step3() {
+		if (sb.length()>0)
+		{
+			char ch = sb.charAt( sb.length()-1 );
+			if (ch == 'Y')
+			{
+				sb.setCharAt( sb.length()-1, 'i' );
+				setStrings();
+			}
+			else if (ch == 'ç')
+			{
+				sb.setCharAt( sb.length()-1, 'c' );
+				setStrings();
+			}
+		}
+	}
+
+	/**
+	 * Fourth step of the Porter Algorithmn<br>
+	 * refer to http://snowball.sourceforge.net/french/stemmer.html for an explanation
+	 */
+	private void step4() {
+		if (sb.length() > 1)
+		{
+			char ch = sb.charAt( sb.length()-1 );
+			if (ch == 's')
+			{
+				char b = sb.charAt( sb.length()-2 );
+				if (b != 'a' && b != 'i' && b != 'o' && b != 'u' && b != 'è' && b != 's')
+				{
+					sb.delete( sb.length() - 1, sb.length());
+					setStrings();
+				}
+			}
+		}
+		boolean found = deleteFromIfPrecededIn( R2, new String[] { "ion" }, RV, "s" );
+		if (!found)
+		found = deleteFromIfPrecededIn( R2, new String[] { "ion" }, RV, "t" );
+
+		replaceFrom( RV, new String[] { "Ière", "ière", "Ier", "ier" }, "i" );
+		deleteFrom( RV, new String[] { "e" } );
+		deleteFromIfPrecededIn( RV, new String[] { "ë" }, R0, "gu" );
+	}
+
+	/**
+	 * Fifth step of the Porter Algorithmn<br>
+	 * refer to http://snowball.sourceforge.net/french/stemmer.html for an explanation
+	 */
+	private void step5() {
+		if (R0 != null)
+		{
+			if (R0.endsWith("enn") || R0.endsWith("onn") || R0.endsWith("ett") || R0.endsWith("ell") || R0.endsWith("eill"))
+			{
+				sb.delete( sb.length() - 1, sb.length() );
+				setStrings();
+			}
+		}
+	}
+
+	/**
+	 * Sixth (and last!) step of the Porter Algorithmn<br>
+	 * refer to http://snowball.sourceforge.net/french/stemmer.html for an explanation
+	 */
+	private void step6() {
+		if (R0!=null && R0.length()>0)
+		{
+			boolean seenVowel = false;
+			boolean seenConson = false;
+			int pos = -1;
+			for (int i = R0.length()-1; i > -1; i--)
+			{
+				char ch = R0.charAt(i);
+				if (isVowel(ch))
+				{
+					if (!seenVowel)
+					{
+						if (ch == 'é' || ch == 'è')
+						{
+							pos = i;
+							break;
+						}
+					}
+					seenVowel = true;
+				}
+				else
+				{
+					if (seenVowel)
+						break;
+					else
+						seenConson = true;
+				}
+			}
+			if (pos > -1 && seenConson && !seenVowel)
+				sb.setCharAt(pos, 'e');
+		}
+	}
+
+	/**
+	 * Delete a suffix searched in zone "source" if zone "from" contains prefix + search string
+	 *
+	 * @param source java.lang.String - the primary source zone for search
+	 * @param search java.lang.String[] - the strings to search for suppression
+	 * @param from java.lang.String - the secondary source zone for search
+	 * @param prefix java.lang.String - the prefix to add to the search string to test
+	 * @return boolean - true if modified
+	 */
+	private boolean deleteFromIfPrecededIn( String source, String[] search, String from, String prefix ) {
+		boolean found = false;
+		if (source!=null )
+		{
+			for (int i = 0; i < search.length; i++) {
+				if ( source.endsWith( search[i] ))
+				{
+					if (from!=null && from.endsWith( prefix + search[i] ))
+					{
+						sb.delete( sb.length() - search[i].length(), sb.length());
+						found = true;
+						setStrings();
+						break;
+					}
+				}
+			}
+		}
+		return found;
+	}
+
+	/**
+	 * Delete a suffix searched in zone "source" if the preceding letter is (or isn't) a vowel
+	 *
+	 * @param source java.lang.String - the primary source zone for search
+	 * @param search java.lang.String[] - the strings to search for suppression
+	 * @param vowel boolean - true if we need a vowel before the search string
+	 * @param from java.lang.String - the secondary source zone for search (where vowel could be)
+	 * @return boolean - true if modified
+	 */
+	private boolean deleteFromIfTestVowelBeforeIn( String source, String[] search, boolean vowel, String from ) {
+		boolean found = false;
+		if (source!=null && from!=null)
+		{
+			for (int i = 0; i < search.length; i++) {
+				if ( source.endsWith( search[i] ))
+				{
+					if ((search[i].length() + 1) <= from.length())
+					{
+						boolean test = isVowel(sb.charAt(sb.length()-(search[i].length()+1)));
+						if (test == vowel)
+						{
+							sb.delete( sb.length() - search[i].length(), sb.length());
+							modified = true;
+							found = true;
+							setStrings();
+							break;
+						}
+					}
+				}
+			}
+		}
+		return found;
+	}
+
+	/**
+	 * Delete a suffix searched in zone "source" if preceded by the prefix
+	 *
+	 * @param source java.lang.String - the primary source zone for search
+	 * @param search java.lang.String[] - the strings to search for suppression
+	 * @param prefix java.lang.String - the prefix to add to the search string to test
+	 * @param without boolean - true if it will be deleted even without prefix found
+	 */
+	private void deleteButSuffixFrom( String source, String[] search, String prefix, boolean without ) {
+		if (source!=null)
+		{
+			for (int i = 0; i < search.length; i++) {
+				if ( source.endsWith( prefix + search[i] ))
+				{
+					sb.delete( sb.length() - (prefix.length() + search[i].length()), sb.length() );
+					modified = true;
+					setStrings();
+					break;
+				}
+				else if ( without && source.endsWith( search[i] ))
+				{
+					sb.delete( sb.length() - search[i].length(), sb.length() );
+					modified = true;
+					setStrings();
+					break;
+				}
+			}
+		}
+	}
+
+	/**
+	 * Delete a suffix searched in zone "source" if preceded by prefix<br>
+	 * or replace it with the replace string if preceded by the prefix in the zone "from"<br>
+	 * or delete the suffix if specified
+	 *
+	 * @param source java.lang.String - the primary source zone for search
+	 * @param search java.lang.String[] - the strings to search for suppression
+	 * @param prefix java.lang.String - the prefix to add to the search string to test
+	 * @param without boolean - true if it will be deleted even without prefix found
+	 */
+	private void deleteButSuffixFromElseReplace( String source, String[] search, String prefix, boolean without, String from, String replace ) {
+		if (source!=null)
+		{
+			for (int i = 0; i < search.length; i++) {
+				if ( source.endsWith( prefix + search[i] ))
+				{
+					sb.delete( sb.length() - (prefix.length() + search[i].length()), sb.length() );
+					modified = true;
+					setStrings();
+					break;
+				}
+				else if ( from!=null && from.endsWith( prefix + search[i] ))
+				{
+					sb.replace( sb.length() - (prefix.length() + search[i].length()), sb.length(), replace );
+					modified = true;
+					setStrings();
+					break;
+				}
+				else if ( without && source.endsWith( search[i] ))
+				{
+					sb.delete( sb.length() - search[i].length(), sb.length() );
+					modified = true;
+					setStrings();
+					break;
+				}
+			}
+		}
+	}
+
+	/**
+	 * Replace a search string with another within the source zone
+	 *
+	 * @param source java.lang.String - the source zone for search
+	 * @param search java.lang.String[] - the strings to search for replacement
+	 * @param replace java.lang.String - the replacement string
+	 */
+	private boolean replaceFrom( String source, String[] search, String replace ) {
+		boolean found = false;
+		if (source!=null)
+		{
+			for (int i = 0; i < search.length; i++) {
+				if ( source.endsWith( search[i] ))
+				{
+					sb.replace( sb.length() - search[i].length(), sb.length(), replace );
+					modified = true;
+					found = true;
+					setStrings();
+					break;
+				}
+			}
+		}
+		return found;
+	}
+
+	/**
+	 * Delete a search string within the source zone
+	 *
+	 * @param source the source zone for search
+	 * @param suffix the strings to search for suppression
+	 */
+	private void deleteFrom(String source, String[] suffix ) {
+		if (source!=null)
+		{
+			for (int i = 0; i < suffix.length; i++) {
+				if (source.endsWith( suffix[i] ))
+				{
+					sb.delete( sb.length() - suffix[i].length(), sb.length());
+					modified = true;
+					setStrings();
+					break;
+				}
+			}
+		}
+	}
+
+	/**
+	 * Test if a char is a french vowel, including accentuated ones
+	 *
+	 * @param ch the char to test
+	 * @return boolean - true if the char is a vowel
+	 */
+	private boolean isVowel(char ch) {
+		switch (ch)
+		{
+			case 'a':
+			case 'e':
+			case 'i':
+			case 'o':
+			case 'u':
+			case 'y':
+			case 'â':
+			case 'à':
+			case 'ë':
+			case 'é':
+			case 'ê':
+			case 'è':
+			case 'ï':
+			case 'î':
+			case 'ô':
+			case 'ü':
+			case 'ù':
+			case 'û':
+				return true;
+			default:
+				return false;
+		}
+	}
+
+	/**
+	 * Retrieve the "R zone" (1 or 2 depending on the buffer) and return the corresponding string<br>
+	 * "R is the region after the first non-vowel following a vowel
+	 * or is the null region at the end of the word if there is no such non-vowel"<br>
+	 * @param buffer java.lang.StringBuffer - the in buffer
+	 * @return java.lang.String - the resulting string
+	 */
+	private String retrieveR( StringBuffer buffer ) {
+		int len = buffer.length();
+		int pos = -1;
+		for (int c = 0; c < len; c++) {
+			if (isVowel( buffer.charAt( c )))
+			{
+				pos = c;
+				break;
+			}
+		}
+		if (pos > -1)
+		{
+			int consonne = -1;
+			for (int c = pos; c < len; c++) {
+				if (!isVowel(buffer.charAt( c )))
+				{
+					consonne = c;
+					break;
+				}
+			}
+			if (consonne > -1 && (consonne+1) < len)
+				return buffer.substring( consonne+1, len );
+			else
+				return null;
+		}
+		else
+			return null;
+	}
+
+	/**
+	 * Retrieve the "RV zone" from a buffer an return the corresponding string<br>
+	 * "If the word begins with two vowels, RV is the region after the third letter,
+	 * otherwise the region after the first vowel not at the beginning of the word,
+	 * or the end of the word if these positions cannot be found."<br>
+	 * @param buffer java.lang.StringBuffer - the in buffer
+	 * @return java.lang.String - the resulting string
+	 */
+	private String retrieveRV( StringBuffer buffer ) {
+		int len = buffer.length();
+		if ( buffer.length() > 3)
+		{
+			if ( isVowel(buffer.charAt( 0 )) && isVowel(buffer.charAt( 1 ))) {
+				return buffer.substring(3,len);
+			}
+			else
+			{
+				int pos = 0;
+				for (int c = 1; c < len; c++) {
+					if (isVowel( buffer.charAt( c )))
+					{
+						pos = c;
+						break;
+					}
+				}
+				if ( pos+1 < len )
+					return buffer.substring( pos+1, len );
+				else
+					return null;
+			}
+		}
+		else
+			return null;
+	}
+
+
+
+    /**
+	 * Turns u and i preceded AND followed by a vowel to UpperCase<br>
+	 * Turns y preceded OR followed by a vowel to UpperCase<br>
+	 * Turns u preceded by q to UpperCase<br>
+     *
+     * @param buffer java.util.StringBuffer - the buffer to treat
+     * @return java.util.StringBuffer - the treated buffer
+     */
+    private StringBuffer treatVowels( StringBuffer buffer ) {
+		for ( int c = 0; c < buffer.length(); c++ ) {
+			char ch = buffer.charAt( c );
+
+			if (c == 0) // first char
+			{
+				if (buffer.length()>1)
+				{
+					if (ch == 'y' && isVowel(buffer.charAt( c + 1 )))
+						buffer.setCharAt( c, 'Y' );
+				}
+			}
+			else if (c == buffer.length()-1) // last char
+			{
+				if (ch == 'u' && buffer.charAt( c - 1 ) == 'q')
+					buffer.setCharAt( c, 'U' );
+				if (ch == 'y' && isVowel(buffer.charAt( c - 1 )))
+					buffer.setCharAt( c, 'Y' );
+			}
+			else // other cases
+			{
+				if (ch == 'u')
+				{
+					if (buffer.charAt( c - 1) == 'q')
+						buffer.setCharAt( c, 'U' );
+					else if (isVowel(buffer.charAt( c - 1 )) && isVowel(buffer.charAt( c + 1 )))
+						buffer.setCharAt( c, 'U' );
+				}
+				if (ch == 'i')
+				{
+					if (isVowel(buffer.charAt( c - 1 )) && isVowel(buffer.charAt( c + 1 )))
+						buffer.setCharAt( c, 'I' );
+				}
+				if (ch == 'y')
+				{
+					if (isVowel(buffer.charAt( c - 1 )) || isVowel(buffer.charAt( c + 1 )))
+						buffer.setCharAt( c, 'Y' );
+				}
+			}
+		}
+
+		return buffer;
+    }
+
+    /**
+     * Checks a term if it can be processed correctly.
+     *
+     * @return boolean - true if, and only if, the given term consists in letters.
+     */
+    private boolean isStemmable( String term ) {
+		boolean upper = false;
+		int first = -1;
+		for ( int c = 0; c < term.length(); c++ ) {
+			// Discard terms that contain non-letter characters.
+			if ( !Character.isLetter( term.charAt( c ) ) ) {
+				return false;
+			}
+			// Discard terms that contain multiple uppercase letters.
+			if ( Character.isUpperCase( term.charAt( c ) ) ) {
+				if ( upper ) {
+					return false;
+				}
+			// First encountered uppercase letter, set flag and save
+			// position.
+				else {
+					first = c;
+					upper = true;
+				}
+			}
+		}
+		// Discard the term if it contains a single uppercase letter that
+		// is not starting the term.
+		if ( first > 0 ) {
+			return false;
+		}
+		return true;
+    }
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/software/eXist/mpdl-modules/src/de/mpg/mpiwg/berlin/mpdl/donatus/analysis/lang/GermanStemmer.java	Wed Nov 24 17:24:23 2010 +0100
@@ -0,0 +1,267 @@
+package de.mpg.mpiwg.berlin.mpdl.donatus.analysis.lang;
+// This file is encoded in UTF-8
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * A stemmer for German words. The algorithm is based on the report
+ * "A Fast and Simple Stemming Algorithm for German Words" by J&ouml;rg
+ * Caumanns (joerg.caumanns at isst.fhg.de).
+ *
+ *
+ * @version   $Id: GermanStemmer.java 564236 2007-08-09 15:21:19Z gsingers $
+ */
+public class GermanStemmer
+{
+    /**
+     * Buffer for the terms while stemming them.
+     */
+    private StringBuffer sb = new StringBuffer();
+
+    /**
+     * Amount of characters that are removed with <tt>substitute()</tt> while stemming.
+     */
+    private int substCount = 0;
+
+    /**
+     * Stemms the given term to an unique <tt>discriminator</tt>.
+     *
+     * @param term  The term that should be stemmed.
+     * @return      Discriminator for <tt>term</tt>
+     */
+    public String stem( String term )
+    {
+      // Use lowercase for medium stemming.
+      term = term.toLowerCase();
+      if ( !isStemmable( term ) )
+        return term;
+      // Reset the StringBuffer.
+      sb.delete( 0, sb.length() );
+      sb.insert( 0, term );
+      // Stemming starts here...
+      substitute( sb );
+      strip( sb );
+      optimize( sb );
+      resubstitute( sb );
+      removeParticleDenotion( sb );
+      return sb.toString();
+    }
+
+    /**
+     * Checks if a term could be stemmed.
+     *
+     * @return  true if, and only if, the given term consists in letters.
+     */
+    private boolean isStemmable( String term )
+    {
+      for ( int c = 0; c < term.length(); c++ ) {
+        if ( !Character.isLetter( term.charAt( c ) ) )
+          return false;
+      }
+      return true;
+    }
+
+    /**
+     * suffix stripping (stemming) on the current term. The stripping is reduced
+     * to the seven "base" suffixes "e", "s", "n", "t", "em", "er" and * "nd",
+     * from which all regular suffixes are build of. The simplification causes
+     * some overstemming, and way more irregular stems, but still provides unique.
+     * discriminators in the most of those cases.
+     * The algorithm is context free, except of the length restrictions.
+     */
+    private void strip( StringBuffer buffer )
+    {
+      boolean doMore = true;
+      while ( doMore && buffer.length() > 3 ) {
+        if ( ( buffer.length() + substCount > 5 ) &&
+          buffer.substring( buffer.length() - 2, buffer.length() ).equals( "nd" ) )
+        {
+          buffer.delete( buffer.length() - 2, buffer.length() );
+        }
+        else if ( ( buffer.length() + substCount > 4 ) &&
+          buffer.substring( buffer.length() - 2, buffer.length() ).equals( "em" ) ) {
+            buffer.delete( buffer.length() - 2, buffer.length() );
+        }
+        else if ( ( buffer.length() + substCount > 4 ) &&
+          buffer.substring( buffer.length() - 2, buffer.length() ).equals( "er" ) ) {
+            buffer.delete( buffer.length() - 2, buffer.length() );
+        }
+        else if ( buffer.charAt( buffer.length() - 1 ) == 'e' ) {
+          buffer.deleteCharAt( buffer.length() - 1 );
+        }
+        else if ( buffer.charAt( buffer.length() - 1 ) == 's' ) {
+          buffer.deleteCharAt( buffer.length() - 1 );
+        }
+        else if ( buffer.charAt( buffer.length() - 1 ) == 'n' ) {
+          buffer.deleteCharAt( buffer.length() - 1 );
+        }
+        // "t" occurs only as suffix of verbs.
+        else if ( buffer.charAt( buffer.length() - 1 ) == 't' ) {
+          buffer.deleteCharAt( buffer.length() - 1 );
+        }
+        else {
+          doMore = false;
+        }
+      }
+    }
+
+    /**
+     * Does some optimizations on the term. This optimisations are
+     * contextual.
+     */
+    private void optimize( StringBuffer buffer )
+    {
+      // Additional step for female plurals of professions and inhabitants.
+      if ( buffer.length() > 5 && buffer.substring( buffer.length() - 5, buffer.length() ).equals( "erin*" ) ) {
+        buffer.deleteCharAt( buffer.length() -1 );
+        strip( buffer );
+      }
+      // Additional step for irregular plural nouns like "Matrizen -> Matrix".
+      if ( buffer.charAt( buffer.length() - 1 ) == ( 'z' ) ) {
+        buffer.setCharAt( buffer.length() - 1, 'x' );
+      }
+    }
+
+    /**
+     * Removes a particle denotion ("ge") from a term.
+     */
+    private void removeParticleDenotion( StringBuffer buffer )
+    {
+      if ( buffer.length() > 4 ) {
+        for ( int c = 0; c < buffer.length() - 3; c++ ) {
+          if ( buffer.substring( c, c + 4 ).equals( "gege" ) ) {
+            buffer.delete( c, c + 2 );
+            return;
+          }
+        }
+      }
+    }
+
+    /**
+     * Do some substitutions for the term to reduce overstemming:
+     *
+     * - Substitute Umlauts with their corresponding vowel: äöü -> aou,
+     *   "ß" is substituted by "ss"
+     * - Substitute a second char of a pair of equal characters with
+     *   an asterisk: ?? -> ?*
+     * - Substitute some common character combinations with a token:
+     *   sch/ch/ei/ie/ig/st -> $/§/%/&/#/!
+     */
+    private void substitute( StringBuffer buffer )
+    {
+      substCount = 0;
+      for ( int c = 0; c < buffer.length(); c++ ) {
+        // Replace the second char of a pair of the equal characters with an asterisk
+        if ( c > 0 && buffer.charAt( c ) == buffer.charAt ( c - 1 )  ) {
+          buffer.setCharAt( c, '*' );
+        }
+        // Substitute Umlauts.
+        else if ( buffer.charAt( c ) == 'ä' ) {
+          buffer.setCharAt( c, 'a' );
+        }
+        else if ( buffer.charAt( c ) == 'ö' ) {
+          buffer.setCharAt( c, 'o' );
+        }
+        else if ( buffer.charAt( c ) == 'ü' ) {
+          buffer.setCharAt( c, 'u' );
+        }
+        // Fix bug so that 'ß' at the end of a word is replaced.
+        else if ( buffer.charAt( c ) == 'ß' ) {
+            buffer.setCharAt( c, 's' );
+            buffer.insert( c + 1, 's' );
+            substCount++;
+        }
+        // Take care that at least one character is left left side from the current one
+        if ( c < buffer.length() - 1 ) {
+          // Masking several common character combinations with an token
+          if ( ( c < buffer.length() - 2 ) && buffer.charAt( c ) == 's' &&
+            buffer.charAt( c + 1 ) == 'c' && buffer.charAt( c + 2 ) == 'h' )
+          {
+            buffer.setCharAt( c, '$' );
+            buffer.delete( c + 1, c + 3 );
+            substCount =+ 2;
+          }
+          else if ( buffer.charAt( c ) == 'c' && buffer.charAt( c + 1 ) == 'h' ) {
+            buffer.setCharAt( c, '§' );
+            buffer.deleteCharAt( c + 1 );
+            substCount++;
+          }
+          else if ( buffer.charAt( c ) == 'e' && buffer.charAt( c + 1 ) == 'i' ) {
+            buffer.setCharAt( c, '%' );
+            buffer.deleteCharAt( c + 1 );
+            substCount++;
+          }
+          else if ( buffer.charAt( c ) == 'i' && buffer.charAt( c + 1 ) == 'e' ) {
+            buffer.setCharAt( c, '&' );
+            buffer.deleteCharAt( c + 1 );
+            substCount++;
+          }
+          else if ( buffer.charAt( c ) == 'i' && buffer.charAt( c + 1 ) == 'g' ) {
+            buffer.setCharAt( c, '#' );
+            buffer.deleteCharAt( c + 1 );
+            substCount++;
+          }
+          else if ( buffer.charAt( c ) == 's' && buffer.charAt( c + 1 ) == 't' ) {
+            buffer.setCharAt( c, '!' );
+            buffer.deleteCharAt( c + 1 );
+            substCount++;
+          }
+        }
+      }
+    }
+
+    /**
+     * Undoes the changes made by substitute(). That are character pairs and
+     * character combinations. Umlauts will remain as their corresponding vowel,
+     * as "ß" remains as "ss".
+     */
+    private void resubstitute( StringBuffer buffer )
+    {
+      for ( int c = 0; c < buffer.length(); c++ ) {
+        if ( buffer.charAt( c ) == '*' ) {
+          char x = buffer.charAt( c - 1 );
+          buffer.setCharAt( c, x );
+        }
+        else if ( buffer.charAt( c ) == '$' ) {
+          buffer.setCharAt( c, 's' );
+          buffer.insert( c + 1, new char[]{'c', 'h'}, 0, 2 );
+        }
+        else if ( buffer.charAt( c ) == '§' ) {
+          buffer.setCharAt( c, 'c' );
+          buffer.insert( c + 1, 'h' );
+        }
+        else if ( buffer.charAt( c ) == '%' ) {
+          buffer.setCharAt( c, 'e' );
+          buffer.insert( c + 1, 'i' );
+        }
+        else if ( buffer.charAt( c ) == '&' ) {
+          buffer.setCharAt( c, 'i' );
+          buffer.insert( c + 1, 'e' );
+        }
+        else if ( buffer.charAt( c ) == '#' ) {
+          buffer.setCharAt( c, 'i' );
+          buffer.insert( c + 1, 'g' );
+        }
+        else if ( buffer.charAt( c ) == '!' ) {
+          buffer.setCharAt( c, 's' );
+          buffer.insert( c + 1, 't' );
+        }
+      }
+    }
+    
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/software/eXist/mpdl-modules/src/de/mpg/mpiwg/berlin/mpdl/donatus/analysis/lang/RussianStemmer.java	Wed Nov 24 17:24:23 2010 +0100
@@ -0,0 +1,630 @@
+package de.mpg.mpiwg.berlin.mpdl.donatus.analysis.lang;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * Russian stemming algorithm implementation (see http://snowball.sourceforge.net for detailed description).
+ *
+ *
+ * @version $Id: RussianStemmer.java 564236 2007-08-09 15:21:19Z gsingers $
+ */
+public class RussianStemmer
+{
+    private char[] charset;
+
+    // positions of RV, R1 and R2 respectively
+    private int RV, R1, R2;
+
+    // letters (currently unused letters are commented out)
+    private final static char A = 0;
+    //private final static char B = 1;
+    private final static char V = 2;
+    private final static char G = 3;
+    //private final static char D = 4;
+    private final static char E = 5;
+    //private final static char ZH = 6;
+    //private final static char Z = 7;
+    private final static char I = 8;
+    private final static char I_ = 9;
+    //private final static char K = 10;
+    private final static char L = 11;
+    private final static char M = 12;
+    private final static char N = 13;
+    private final static char O = 14;
+    //private final static char P = 15;
+    //private final static char R = 16;
+    private final static char S = 17;
+    private final static char T = 18;
+    private final static char U = 19;
+    //private final static char F = 20;
+    private final static char X = 21;
+    //private final static char TS = 22;
+    //private final static char CH = 23;
+    private final static char SH = 24;
+    private final static char SHCH = 25;
+    //private final static char HARD = 26;
+    private final static char Y = 27;
+    private final static char SOFT = 28;
+    private final static char AE = 29;
+    private final static char IU = 30;
+    private final static char IA = 31;
+
+    // stem definitions
+    private static char[] vowels = { A, E, I, O, U, Y, AE, IU, IA };
+
+    private static char[][] perfectiveGerundEndings1 = {
+        { V },
+        { V, SH, I },
+        { V, SH, I, S, SOFT }
+    };
+
+    private static char[][] perfectiveGerund1Predessors = {
+        { A },
+        { IA }
+    };
+
+    private static char[][] perfectiveGerundEndings2 = { { I, V }, {
+        Y, V }, {
+            I, V, SH, I }, {
+                Y, V, SH, I }, {
+                    I, V, SH, I, S, SOFT }, {
+                        Y, V, SH, I, S, SOFT }
+    };
+
+    private static char[][] adjectiveEndings = {
+        { E, E },
+        { I, E },
+        { Y, E },
+        { O, E },
+        { E, I_ },
+        { I, I_ },
+        { Y, I_ },
+        { O, I_ },
+        { E, M },
+        { I, M },
+        { Y, M },
+        { O, M },
+        { I, X },
+        { Y, X },
+        { U, IU },
+        { IU, IU },
+        { A, IA },
+        { IA, IA },
+        { O, IU },
+        { E, IU },
+        { I, M, I },
+        { Y, M, I },
+        { E, G, O },
+        { O, G, O },
+        { E, M, U },
+        {O, M, U }
+    };
+
+    private static char[][] participleEndings1 = {
+        { SHCH },
+        { E, M },
+        { N, N },
+        { V, SH },
+        { IU, SHCH }
+    };
+
+    private static char[][] participleEndings2 = {
+        { I, V, SH },
+        { Y, V, SH },
+        { U, IU, SHCH }
+    };
+
+    private static char[][] participle1Predessors = {
+        { A },
+        { IA }
+    };
+
+    private static char[][] reflexiveEndings = {
+        { S, IA },
+        { S, SOFT }
+    };
+
+    private static char[][] verbEndings1 = {
+        { I_ },
+        { L },
+        { N },
+        { L, O },
+        { N, O },
+        { E, T },
+        { IU, T },
+        { L, A },
+        { N, A },
+        { L, I },
+        { E, M },
+        { N, Y },
+        { E, T, E },
+        { I_, T, E },
+        { T, SOFT },
+        { E, SH, SOFT },
+        { N, N, O }
+    };
+
+    private static char[][] verbEndings2 = {
+        { IU },
+        { U, IU },
+        { E, N },
+        { E, I_ },
+        { IA, T },
+        { U, I_ },
+        { I, L },
+        { Y, L },
+        { I, M },
+        { Y, M },
+        { I, T },
+        { Y, T },
+        { I, L, A },
+        { Y, L, A },
+        { E, N, A },
+        { I, T, E },
+        { I, L, I },
+        { Y, L, I },
+        { I, L, O },
+        { Y, L, O },
+        { E, N, O },
+        { U, E, T },
+        { U, IU, T },
+        { E, N, Y },
+        { I, T, SOFT },
+        { Y, T, SOFT },
+        { I, SH, SOFT },
+        { E, I_, T, E },
+        { U, I_, T, E }
+    };
+
+    private static char[][] verb1Predessors = {
+        { A },
+        { IA }
+    };
+
+    private static char[][] nounEndings = {
+        { A },
+        { U },
+        { I_ },
+        { O },
+        { U },
+        { E },
+        { Y },
+        { I },
+        { SOFT },
+        { IA },
+        { E, V },
+        { O, V },
+        { I, E },
+        { SOFT, E },
+        { IA, X },
+        { I, IU },
+        { E, I },
+        { I, I },
+        { E, I_ },
+        { O, I_ },
+        { E, M },
+        { A, M },
+        { O, M },
+        { A, X },
+        { SOFT, IU },
+        { I, IA },
+        { SOFT, IA },
+        { I, I_ },
+        { IA, M },
+        { IA, M, I },
+        { A, M, I },
+        { I, E, I_ },
+        { I, IA, M },
+        { I, E, M },
+        { I, IA, X },
+        { I, IA, M, I }
+    };
+
+    private static char[][] superlativeEndings = {
+        { E, I_, SH },
+        { E, I_, SH, E }
+    };
+
+    private static char[][] derivationalEndings = {
+        { O, S, T },
+        { O, S, T, SOFT }
+    };
+
+    /**
+     * RussianStemmer constructor comment.
+     */
+    public RussianStemmer()
+    {
+        super();
+    }
+
+    /**
+     * RussianStemmer constructor comment.
+     */
+    public RussianStemmer(char[] charset)
+    {
+        super();
+        this.charset = charset;
+    }
+
+    /**
+     * Adjectival ending is an adjective ending,
+     * optionally preceded by participle ending.
+     * Creation date: (17/03/2002 12:14:58 AM)
+     * @param stemmingZone java.lang.StringBuffer
+     */
+    private boolean adjectival(StringBuffer stemmingZone)
+    {
+        // look for adjective ending in a stemming zone
+        if (!findAndRemoveEnding(stemmingZone, adjectiveEndings))
+            return false;
+        // if adjective ending was found, try for participle ending.
+        // variable r is unused, we are just interested in the side effect of
+        // findAndRemoveEnding():
+        boolean r =
+            findAndRemoveEnding(stemmingZone, participleEndings1, participle1Predessors)
+            ||
+            findAndRemoveEnding(stemmingZone, participleEndings2);
+        return true;
+    }
+
+    /**
+     * Derivational endings
+     * Creation date: (17/03/2002 12:14:58 AM)
+     * @param stemmingZone java.lang.StringBuffer
+     */
+    private boolean derivational(StringBuffer stemmingZone)
+    {
+        int endingLength = findEnding(stemmingZone, derivationalEndings);
+        if (endingLength == 0)
+             // no derivational ending found
+            return false;
+        else
+        {
+            // Ensure that the ending locates in R2
+            if (R2 - RV <= stemmingZone.length() - endingLength)
+            {
+                stemmingZone.setLength(stemmingZone.length() - endingLength);
+                return true;
+            }
+            else
+            {
+                return false;
+            }
+        }
+    }
+
+    /**
+     * Finds ending among given ending class and returns the length of ending found(0, if not found).
+     * Creation date: (17/03/2002 8:18:34 PM)
+     */
+    private int findEnding(StringBuffer stemmingZone, int startIndex, char[][] theEndingClass)
+    {
+        boolean match = false;
+        for (int i = theEndingClass.length - 1; i >= 0; i--)
+        {
+            char[] theEnding = theEndingClass[i];
+            // check if the ending is bigger than stemming zone
+            if (startIndex < theEnding.length - 1)
+            {
+                match = false;
+                continue;
+            }
+            match = true;
+            int stemmingIndex = startIndex;
+            for (int j = theEnding.length - 1; j >= 0; j--)
+            {
+                if (stemmingZone.charAt(stemmingIndex--) != charset[theEnding[j]])
+                {
+                    match = false;
+                    break;
+                }
+            }
+            // check if ending was found
+            if (match)
+            {
+                return theEndingClass[i].length; // cut ending
+            }
+        }
+        return 0;
+    }
+
+    private int findEnding(StringBuffer stemmingZone, char[][] theEndingClass)
+    {
+        return findEnding(stemmingZone, stemmingZone.length() - 1, theEndingClass);
+    }
+
+    /**
+     * Finds the ending among the given class of endings and removes it from stemming zone.
+     * Creation date: (17/03/2002 8:18:34 PM)
+     */
+    private boolean findAndRemoveEnding(StringBuffer stemmingZone, char[][] theEndingClass)
+    {
+        int endingLength = findEnding(stemmingZone, theEndingClass);
+        if (endingLength == 0)
+            // not found
+            return false;
+        else {
+            stemmingZone.setLength(stemmingZone.length() - endingLength);
+            // cut the ending found
+            return true;
+        }
+    }
+
+    /**
+     * Finds the ending among the given class of endings, then checks if this ending was
+     * preceded by any of given predessors, and if so, removes it from stemming zone.
+     * Creation date: (17/03/2002 8:18:34 PM)
+     */
+    private boolean findAndRemoveEnding(StringBuffer stemmingZone,
+        char[][] theEndingClass, char[][] thePredessors)
+    {
+        int endingLength = findEnding(stemmingZone, theEndingClass);
+        if (endingLength == 0)
+            // not found
+            return false;
+        else
+        {
+            int predessorLength =
+                findEnding(stemmingZone,
+                    stemmingZone.length() - endingLength - 1,
+                    thePredessors);
+            if (predessorLength == 0)
+                return false;
+            else {
+                stemmingZone.setLength(stemmingZone.length() - endingLength);
+                // cut the ending found
+                return true;
+            }
+        }
+
+    }
+
+    /**
+     * Marks positions of RV, R1 and R2 in a given word.
+     * Creation date: (16/03/2002 3:40:11 PM)
+     */
+    private void markPositions(String word)
+    {
+        RV = 0;
+        R1 = 0;
+        R2 = 0;
+        int i = 0;
+        // find RV
+        while (word.length() > i && !isVowel(word.charAt(i)))
+        {
+            i++;
+        }
+        if (word.length() - 1 < ++i)
+            return; // RV zone is empty
+        RV = i;
+        // find R1
+        while (word.length() > i && isVowel(word.charAt(i)))
+        {
+            i++;
+        }
+        if (word.length() - 1 < ++i)
+            return; // R1 zone is empty
+        R1 = i;
+        // find R2
+        while (word.length() > i && !isVowel(word.charAt(i)))
+        {
+            i++;
+        }
+        if (word.length() - 1 < ++i)
+            return; // R2 zone is empty
+        while (word.length() > i && isVowel(word.charAt(i)))
+        {
+            i++;
+        }
+        if (word.length() - 1 < ++i)
+            return; // R2 zone is empty
+        R2 = i;
+    }
+
+    /**
+     * Checks if character is a vowel..
+     * Creation date: (16/03/2002 10:47:03 PM)
+     * @return boolean
+     * @param letter char
+     */
+    private boolean isVowel(char letter)
+    {
+        for (int i = 0; i < vowels.length; i++)
+        {
+            if (letter == charset[vowels[i]])
+                return true;
+        }
+        return false;
+    }
+
+    /**
+     * Noun endings.
+     * Creation date: (17/03/2002 12:14:58 AM)
+     * @param stemmingZone java.lang.StringBuffer
+     */
+    private boolean noun(StringBuffer stemmingZone)
+    {
+        return findAndRemoveEnding(stemmingZone, nounEndings);
+    }
+
+    /**
+     * Perfective gerund endings.
+     * Creation date: (17/03/2002 12:14:58 AM)
+     * @param stemmingZone java.lang.StringBuffer
+     */
+    private boolean perfectiveGerund(StringBuffer stemmingZone)
+    {
+        return findAndRemoveEnding(
+            stemmingZone,
+            perfectiveGerundEndings1,
+            perfectiveGerund1Predessors)
+            || findAndRemoveEnding(stemmingZone, perfectiveGerundEndings2);
+    }
+
+    /**
+     * Reflexive endings.
+     * Creation date: (17/03/2002 12:14:58 AM)
+     * @param stemmingZone java.lang.StringBuffer
+     */
+    private boolean reflexive(StringBuffer stemmingZone)
+    {
+        return findAndRemoveEnding(stemmingZone, reflexiveEndings);
+    }
+
+    /**
+     * Insert the method's description here.
+     * Creation date: (17/03/2002 12:14:58 AM)
+     * @param stemmingZone java.lang.StringBuffer
+     */
+    private boolean removeI(StringBuffer stemmingZone)
+    {
+        if (stemmingZone.length() > 0
+            && stemmingZone.charAt(stemmingZone.length() - 1) == charset[I])
+        {
+            stemmingZone.setLength(stemmingZone.length() - 1);
+            return true;
+        }
+        else
+        {
+            return false;
+        }
+    }
+
+    /**
+     * Insert the method's description here.
+     * Creation date: (17/03/2002 12:14:58 AM)
+     * @param stemmingZone java.lang.StringBuffer
+     */
+    private boolean removeSoft(StringBuffer stemmingZone)
+    {
+        if (stemmingZone.length() > 0
+            && stemmingZone.charAt(stemmingZone.length() - 1) == charset[SOFT])
+        {
+            stemmingZone.setLength(stemmingZone.length() - 1);
+            return true;
+        }
+        else
+        {
+            return false;
+        }
+    }
+
+    /**
+     * Insert the method's description here.
+     * Creation date: (16/03/2002 10:58:42 PM)
+     * @param newCharset char[]
+     */
+    public void setCharset(char[] newCharset)
+    {
+        charset = newCharset;
+    }
+
+    /**
+     * Finds the stem for given Russian word.
+     * Creation date: (16/03/2002 3:36:48 PM)
+     * @return java.lang.String
+     * @param input java.lang.String
+     */
+    public String stem(String input)
+    {
+        markPositions(input);
+        if (RV == 0)
+            return input; //RV wasn't detected, nothing to stem
+        StringBuffer stemmingZone = new StringBuffer(input.substring(RV));
+        // stemming goes on in RV
+        // Step 1
+
+        if (!perfectiveGerund(stemmingZone))
+        {
+            reflexive(stemmingZone);
+            // variable r is unused, we are just interested in the flow that gets
+            // created by logical expression: apply adjectival(); if that fails,
+            // apply verb() etc
+            boolean r =
+                adjectival(stemmingZone)
+                || verb(stemmingZone)
+                || noun(stemmingZone);
+        }
+        // Step 2
+        removeI(stemmingZone);
+        // Step 3
+        derivational(stemmingZone);
+        // Step 4
+        superlative(stemmingZone);
+        undoubleN(stemmingZone);
+        removeSoft(stemmingZone);
+        // return result
+        return input.substring(0, RV) + stemmingZone.toString();
+    }
+
+    /**
+     * Superlative endings.
+     * Creation date: (17/03/2002 12:14:58 AM)
+     * @param stemmingZone java.lang.StringBuffer
+     */
+    private boolean superlative(StringBuffer stemmingZone)
+    {
+        return findAndRemoveEnding(stemmingZone, superlativeEndings);
+    }
+
+    /**
+     * Undoubles N.
+     * Creation date: (17/03/2002 12:14:58 AM)
+     * @param stemmingZone java.lang.StringBuffer
+     */
+    private boolean undoubleN(StringBuffer stemmingZone)
+    {
+        char[][] doubleN = {
+            { N, N }
+        };
+        if (findEnding(stemmingZone, doubleN) != 0)
+        {
+            stemmingZone.setLength(stemmingZone.length() - 1);
+            return true;
+        }
+        else
+        {
+            return false;
+        }
+    }
+
+    /**
+     * Verb endings.
+     * Creation date: (17/03/2002 12:14:58 AM)
+     * @param stemmingZone java.lang.StringBuffer
+     */
+    private boolean verb(StringBuffer stemmingZone)
+    {
+        return findAndRemoveEnding(
+            stemmingZone,
+            verbEndings1,
+            verb1Predessors)
+            || findAndRemoveEnding(stemmingZone, verbEndings2);
+    }
+
+    /**
+     * Static method for stemming with different charsets
+     */
+    public static String stem(String theWord, char[] charset)
+    {
+        RussianStemmer stemmer = new RussianStemmer();
+        stemmer.setCharset(charset);
+        return stemmer.stem(theWord);
+    }
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/software/eXist/mpdl-modules/src/de/mpg/mpiwg/berlin/mpdl/donatus/example/ExampleDonatusHandler.java	Wed Nov 24 17:24:23 2010 +0100
@@ -0,0 +1,154 @@
+package de.mpg.mpiwg.berlin.mpdl.donatus.example;
+
+import java.io.BufferedInputStream;
+import java.io.FileInputStream;
+import java.io.FileNotFoundException;
+import java.io.IOException;
+import java.io.InputStream;
+import java.util.Date;
+
+import de.mpg.mpiwg.berlin.mpdl.donatus.xmlrpc.DonatusXmlRpcClient;
+
+/**
+ * Example Handler for eXist XML documents (singleton). 
+ * Local document files could be stored into eXist collections over XML-RPC. 
+ * The eXistXmlRpcInterface could not be used in a multi threading environment.
+ * Collections could be configured language specific. You should ask your eXist
+ * administrator for the name of the document collection and for the language 
+ * collection names which could be used. 
+ * Then you have to set the instance variables: "serverName", "serverPort", 
+ * "userName", "pw", "documentCollectionName", "localDirectoryName" and 
+ * "languages" (see below). That's all.
+ * 
+ * For example your local directory structure could look like this:
+ * archimedesForEXist
+ *   documents
+ *     ar
+ *       yourDoc1.xml
+ *       yourDoc2.xml
+ *       ...
+ *     de
+ *       yourDoc1.xml
+ *       yourDoc2.xml
+ *       ...
+ *     el
+ *       yourDoc1.xml
+ *       yourDoc2.xml
+ *       ...
+ *     ...
+ */
+public class ExampleDonatusHandler {
+  private static ExampleDonatusHandler instance;
+  private DonatusXmlRpcClient donatusXmlRpcClient = null;
+
+  private String documentCollectionName = "/db/mpdl-example/archimedes/documents";
+  private String localDirectoryName = "/Users/jwillenborg/texts/archimedesForEXist/documents";
+  
+  private String exampleDocumentName = "achil_propo_087_la_1545.xml";
+  
+  private long beginOfOperation;
+  private long endOfOperation;
+  
+  public static ExampleDonatusHandler getInstance() {
+    if (instance == null) {
+      instance = new ExampleDonatusHandler();
+      instance.init();
+    }
+    return instance;
+  }
+
+  public static void main(String[] args) {
+    getInstance();
+    instance.beginOperation();
+    System.out.println("Start ... ");
+    String result = instance.analyzeExampleDocumentFile();  // example for analyzing one document
+    instance.endOperation();
+    System.out.println(result);
+    System.out.println("End of operation. Elapsed time: " + (instance.endOfOperation - instance.beginOfOperation) + " ms" );
+  }
+
+  private void init() {
+    donatusXmlRpcClient = new DonatusXmlRpcClient(); // default server is "archimedes.fas.harvard.edu"
+  }
+  
+  private String analyzeExampleDocument() {
+    String locator = "xxx";   // TODO take uri 
+    String language = "la";
+    String s1 = "<s id=\"id.0.1.01.02\">An recentiores Mathematici Aristotelem in errore deprehenderint proportionum regulas docentem, quibus motus invicem comparantur, disputandum.</s>";
+    String s2 = "<s id=\"id.0.1.01.03\">Praesens opus in quatuor secatur partes.</s>";
+    String s3 = "<s id=\"id.0.1.01.04\">Primo fundamenta quaedam subiiciam.</s>";
+    String s4 = "<s id=\"id.0.1.01.05\">Secundo regulas quasdam asseram.</s>";
+    String s5 = "<s id=\"id.0.1.01.06\">Tertio conclusiones aliquas probabo.</s>";
+    String s6 = "<s id=\"id.0.1.01.07\">Quarto ad obiecta respondebo.</s>";
+    String s7 = "<s id=\"id.0.1.01.08\">Hic deus lumen infundat.</s>";
+    String sentences = s1 + s2 + s3 + s4 + s5 + s6 + s7;
+    String doc = "<fragment>" + sentences + "</fragment>";
+    String morphDocTypeXmlStr = instance.donatusXmlRpcClient.analyze(locator, language, doc);
+    return morphDocTypeXmlStr;
+  }
+
+  private String analyzeExampleDocumentFile() {
+    String morphDocTypeXmlStr = null;
+    try {
+      String locator = "xxx";   // TODO take uri 
+      String language = "la";
+      String exampleLocalFile = instance.localDirectoryName + "/" + language + "/" + instance.exampleDocumentName;  // TODO example document with sentences
+      StringBuffer docStringArray = new StringBuffer("");
+      int chunkSize = 20000 * 1024;  // copies data from a file in 20 MB chunks to server file so that not too much RAM is consumed on server 
+      InputStream localFileInputStream = new BufferedInputStream(new FileInputStream(exampleLocalFile));
+      byte[] chunk = new byte[chunkSize];
+      while ((chunk = readBytes(localFileInputStream, chunkSize)) != null) {
+        docStringArray.append(new String(chunk));
+      }
+      morphDocTypeXmlStr = instance.donatusXmlRpcClient.analyze(locator, language, docStringArray.toString());
+    } catch (FileNotFoundException e) {
+      e.printStackTrace();
+    }
+    return morphDocTypeXmlStr;
+  }
+
+  private String analyzeExampleSentence() {
+    String locator = "xxx";   // TODO take uri 
+    String language = "la";
+    String s = "<s id=\"id.0.1.01.05\"><w>Secundo</w><w>regulas</w><w>quasdam</w><w>asseram</w></s>";
+    // String s = "<s><w>Secundo</w><w>regulas</w><w>quasdam</w><w>asseram</w></s>";
+    String morphDocTypeXmlStr = instance.donatusXmlRpcClient.analyze(locator, language, s);
+    return morphDocTypeXmlStr;
+  }
+
+  /**
+   *  Reads a chunk of data of an input stream.
+   *  Does not close the stream until last bytes are read
+   *  @in in the input stream to be read
+   *  @chunkSize chunkSize length of the chunk which is read
+   *  @return byte[] of bytes read
+   */
+  private byte[] readBytes(InputStream in, int chunkSize) {
+    byte[] resultBytes = new byte[chunkSize];
+    try {
+      int len = in.read(resultBytes, 0, chunkSize);
+      if (len == -1) {
+        try { in.close(); } catch (Exception e) { }  // close the stream if end of file is reached
+        resultBytes = null;
+      } else if (len < chunkSize && len != chunkSize) {  // if read chunk is last chunk of the file it delivers this chunk 
+        byte[] tmp = new byte[len];
+        System.arraycopy(resultBytes, 0, tmp, 0, len);
+        resultBytes = tmp;
+      }
+    } catch (FileNotFoundException e) {
+      e.printStackTrace();
+    } catch (IOException e) {
+      e.printStackTrace();
+    } 
+    return resultBytes;  
+  }
+
+  private void beginOperation() {
+    beginOfOperation = new Date().getTime();
+  }
+
+  private void endOperation() {
+    endOfOperation = new Date().getTime();
+  }
+
+}
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/software/eXist/mpdl-modules/src/de/mpg/mpiwg/berlin/mpdl/donatus/example/ParseDonatusResult.java	Wed Nov 24 17:24:23 2010 +0100
@@ -0,0 +1,63 @@
+package de.mpg.mpiwg.berlin.mpdl.donatus.example;
+
+import org.xml.sax.*;
+
+public class ParseDonatusResult implements ContentHandler {
+  public String value = null;
+  
+  public void startDocument() throws SAXException {
+    System.out.println("<?xml version=\"1.0\"?>");
+  }
+
+  public void endDocument() throws SAXException {
+    
+  }
+
+  
+  public void characters(char[] c, int start, int length) throws SAXException {
+    //System.out.print(new String(c, start, length));
+  }
+
+  public void ignorableWhitespace(char[] c, int start, int length) throws SAXException {
+    //characters(c, start, length);
+  }
+
+  public void processingInstruction(String target, String data) throws SAXException {
+    /*System.out.print("<?");
+    System.out.print(target);
+    if (data != null && data.length() > 0)
+      System.out.print(" " + data);
+    System.out.println("?>");*/
+  }
+
+  public void setDocumentLocator(org.xml.sax.Locator arg1) {
+    
+  }
+
+  public void endElement(String uri, String localName, String name) throws SAXException {
+    // TODO Auto-generated method stub
+  }
+
+  public void endPrefixMapping(String prefix) throws SAXException {
+    // TODO Auto-generated method stub
+  }
+
+  public void skippedEntity(String name) throws SAXException {
+    // TODO Auto-generated method stub
+  }
+
+  public void startElement(String uri, String localName, String name, Attributes attrs) throws SAXException {
+    if (name.equals("lemma"))
+      if (attrs != null) {
+        int length = attrs.getLength();
+        for (int i = 0; (i < length) & (value==null); i++) {
+          if (attrs.getLocalName(i).equals("form"))
+            value = attrs.getValue(i);
+        }
+      }
+  }
+
+  public void startPrefixMapping(String prefix, String uri) throws SAXException {
+    // TODO Auto-generated method stub
+  }
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/software/eXist/mpdl-modules/src/de/mpg/mpiwg/berlin/mpdl/donatus/example/TestDonatus.java	Wed Nov 24 17:24:23 2010 +0100
@@ -0,0 +1,179 @@
+package de.mpg.mpiwg.berlin.mpdl.donatus.example;
+
+import java.io.BufferedReader;
+import java.io.IOException;
+import java.io.InputStreamReader;
+import java.io.Reader;
+import java.io.StringReader;
+import java.io.UnsupportedEncodingException;
+import java.net.MalformedURLException;
+import java.net.URL;
+import java.util.ArrayList;
+import java.util.Hashtable;
+
+import org.apache.xmlrpc.XmlRpcException;
+import org.apache.xmlrpc.client.XmlRpcClient;
+import org.apache.xmlrpc.client.XmlRpcClientConfigImpl;
+import org.xml.sax.InputSource;
+import org.xml.sax.SAXException;
+import org.xml.sax.XMLReader;
+
+import java.util.Vector;
+
+import com.sun.org.apache.xerces.internal.parsers.SAXParser;
+
+import de.mpg.mpiwg.berlin.mpdl.donatus.xmlrpc.DonatusMorphologyDocument;
+import de.mpg.mpiwg.berlin.mpdl.donatus.xmlrpc.DonatusMorphologyDocumentContentHandler;
+import de.mpg.mpiwg.berlin.mpdl.donatus.xmlrpc.DonatusWtagContainer;
+import de.mpg.mpiwg.berlin.mpdl.donatus.xmlrpc.DonatusWtagDocument;
+import de.mpg.mpiwg.berlin.mpdl.donatus.xmlrpc.DonatusWtagSection;
+import de.mpg.mpiwg.berlin.mpdl.donatus.xmlrpc.DonatusXmlRpcClient;
+
+public class TestDonatus {
+  private DonatusXmlRpcClient donatusXmlRpcClient= null;
+
+  public static void main(String[] args) {
+    TestDonatus testDonatus = new TestDonatus();
+    // testDonatus.lineInputAnalyzer();
+    DonatusMorphologyDocument result = testDonatus.analyzeMonte037();
+    
+  }
+
+  public TestDonatus() {
+    init();
+  }
+  
+  private void init() {
+    donatusXmlRpcClient = new DonatusXmlRpcClient();
+  }
+  
+  private DonatusMorphologyDocument analyze(DonatusWtagDocument doc) {
+    DonatusMorphologyDocument resultDoc = donatusXmlRpcClient.analyze(doc);
+    return resultDoc;
+  }
+      
+  private DonatusMorphologyDocument analyzeMonte037() {
+    String locator = "http://archimedes/037.xml";
+    int docId = 4711;
+    String languageIt = "it";
+    ArrayList<String> s1It = new ArrayList<String>();
+    s1It.add("LE"); 
+    s1It.add("MECHANICHE");
+    s1It.add("DELL");
+    s1It.add("ILLVSTRISS");
+    s1It.add("SIG");
+    s1It.add("GVIDO");
+    s1It.add("VBALDO");
+    s1It.add("DE");
+    s1It.add("MARCHESI");
+    s1It.add("DEL");
+    s1It.add("MONTE");
+    s1It.add("LE");
+    ArrayList<String> s2It = new ArrayList<String>();
+    s2It.add("TRADOTTE"); 
+    s2It.add("IN"); 
+    s2It.add("VOLGARE"); 
+    s2It.add("DAL"); 
+    s2It.add("SIG"); 
+    s2It.add("FILIPPO"); 
+    s2It.add("PIGAFETTA"); 
+    s2It.add("Nellequali"); 
+    s2It.add("&#x17F;i"); 
+    s2It.add("contiene"); 
+    s2It.add("la");
+    DonatusWtagContainer sIt1 = new DonatusWtagContainer("s", "1", s1It);
+    DonatusWtagContainer sIt2 = new DonatusWtagContainer("s", "2", s2It);
+    DonatusWtagSection sectionIt = new DonatusWtagSection(languageIt);
+    sectionIt.addContainer(sIt1);
+    sectionIt.addContainer(sIt2);
+    String languageLa = "la";
+    ArrayList<String> s1La = new ArrayList<String>();
+    s1La.add("GVIDIVBALDI"); 
+    s1La.add("E"); 
+    s1La.add("MARCHIONIBVS"); 
+    s1La.add("MONTIS"); 
+    s1La.add("MECHANICORVM"); 
+    s1La.add("LIBER"); 
+    DonatusWtagContainer sLa1 = new DonatusWtagContainer("s", "1", s1La);
+    DonatusWtagSection sectionLa = new DonatusWtagSection(languageLa);
+    sectionLa.addContainer(sLa1);
+    DonatusWtagDocument donatusWtagDocument = new DonatusWtagDocument(locator);
+    donatusWtagDocument.addSection(sectionIt);
+    donatusWtagDocument.addSection(sectionLa);    
+    DonatusMorphologyDocument donatusXmlMorphologyDoc = analyze(donatusWtagDocument);
+    return donatusXmlMorphologyDoc;
+  }
+
+  private String analyze(String locator, String language, String documentString) {
+    String resultDoc = donatusXmlRpcClient.analyze(locator, language, documentString);
+    return resultDoc;
+  }
+      
+  private String askDonatus(String term) throws XmlRpcException, IOException {
+    String WTAG_START="<wtag locator=\"xxx\"><section lang=\"de\"><s id=\"3.2\"><w>";
+    String WTAG_END="</w></s></section></wtag>";
+    ParseDonatusResult ch = new ParseDonatusResult();
+    try {
+      URL url = new URL("http://archimedes.fas.harvard.edu/cgi-bin/donatus-rpc");
+
+      XmlRpcClientConfigImpl config = new XmlRpcClientConfigImpl();
+      config.setServerURL(url);
+      XmlRpcClient xmlClient = new XmlRpcClient();
+      xmlClient.setConfig(config);
+
+      String wtag = WTAG_START + term + WTAG_END;
+      Vector params = new Vector();
+      params.add(wtag.getBytes("UTF8"));
+      Hashtable donatusReturn = (Hashtable) xmlClient.execute("donatus.analyze", params);
+      Object s = donatusReturn.get("morphData");
+      String st = new String((byte[])s);
+      XMLReader parser = new SAXParser();
+      parser.setContentHandler(ch);
+      Reader reader = new StringReader(st);
+      InputSource input = new InputSource(reader);
+      parser.parse(input);
+    } catch (SAXException e) {
+      // TODO Auto-generated catch block
+      e.printStackTrace();
+    } catch (MalformedURLException e) {
+      e.printStackTrace();
+    }
+    return ch.value;
+  }
+      
+  private void lineInputAnalyzer() {
+    BufferedReader in = null;
+    try {
+      in = new BufferedReader(new InputStreamReader(System.in, "UTF8"));
+    } catch (UnsupportedEncodingException e) {
+      // TODO Auto-generated catch block
+      e.printStackTrace();
+    }
+    while(true) {
+      // prompt the user
+      System.out.println("Enter query: ");
+      String line = null;
+      try {
+        line = in.readLine();
+      } catch (IOException e) {
+        // TODO Auto-generated catch block
+        e.printStackTrace();
+      }
+      if (line == null || line.length() == -1)
+        break;
+      line = line.trim();
+      if (line.length() == 0)
+        break;
+      try {
+        String donatusResponse = askDonatus(line);
+        System.out.println(donatusResponse);
+      } catch (XmlRpcException e) {
+        // TODO Auto-generated catch block
+        e.printStackTrace();
+      } catch (IOException e) {
+        // TODO Auto-generated catch block
+        e.printStackTrace();
+      }
+    }
+  }
+}
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/software/eXist/mpdl-modules/src/de/mpg/mpiwg/berlin/mpdl/donatus/xmlrpc/DonatusAnalysis.java	Wed Nov 24 17:24:23 2010 +0100
@@ -0,0 +1,22 @@
+package de.mpg.mpiwg.berlin.mpdl.donatus.xmlrpc;
+
+public class DonatusAnalysis {
+  private String desc;
+  private String xlinkType;
+
+  public DonatusAnalysis(String desc, String xlinkType) {
+    this.desc = desc;
+    this.xlinkType = xlinkType;
+  }
+  
+  public String getDesc() {
+    return desc;
+  }
+  
+  public String getXmlString() {
+    String xmlString = "<analysis desc=\"" + desc + "\" xlink:type=\"" + xlinkType + "\">";
+    xmlString += "</analysis>";
+    return xmlString;
+  }
+  
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/software/eXist/mpdl-modules/src/de/mpg/mpiwg/berlin/mpdl/donatus/xmlrpc/DonatusBerkeleyDbEnv.java	Wed Nov 24 17:24:23 2010 +0100
@@ -0,0 +1,75 @@
+package de.mpg.mpiwg.berlin.mpdl.donatus.xmlrpc;
+
+import java.io.File;
+
+import com.sleepycat.je.Database;
+import com.sleepycat.je.DatabaseConfig;
+import com.sleepycat.je.DatabaseException;
+import com.sleepycat.je.Environment;
+import com.sleepycat.je.EnvironmentConfig;
+
+public class DonatusBerkeleyDbEnv {
+  private File envPath;
+  private Environment env;
+  private Database lemmaDb;
+  private Database variantDb;
+
+  public DonatusBerkeleyDbEnv() {
+  }
+
+  public void setup(boolean readOnly) throws DatabaseException {
+    EnvironmentConfig envConfig = new EnvironmentConfig();
+    DatabaseConfig dbConfig = new DatabaseConfig();
+    // If the environment is read-only, then make the databases read-only too.
+    envConfig.setReadOnly(readOnly);
+    dbConfig.setReadOnly(readOnly);
+    // If the environment is opened for write, then we want to be able to create the environment and databases if they do not exist.
+    envConfig.setAllowCreate(!readOnly);
+    dbConfig.setAllowCreate(!readOnly);
+    // Allow transactions if we are writing to the database
+    envConfig.setTransactional(!readOnly);
+    dbConfig.setTransactional(!readOnly);
+    // Open the environment
+    envPath = new File(DonatusConstants.BERKELEY_DB_DIR);
+    env = new Environment(envPath, envConfig);
+    // open databases (and create them if not existent)
+    lemmaDb = env.openDatabase(null, "LemmaDB", dbConfig);
+    variantDb = env.openDatabase(null, "VariantDB", dbConfig);
+  }
+
+  public void removeDatabases() {
+    try {
+      lemmaDb.close();
+      variantDb.close();
+      env.removeDatabase(null, "LemmaDB");    
+      env.removeDatabase(null, "VariantDB");    
+    } catch (DatabaseException e) {
+      e.printStackTrace();  // TODO
+    }
+  }
+  
+  public Environment getEnv() {
+    return env;
+  }
+
+  public Database getLemmaDB() {
+    return lemmaDb;
+  }
+
+  public Database getVariantDB() {
+    return variantDb;
+  }
+
+  public void close() {
+    if (env != null) {
+      try {
+        lemmaDb.close();
+        variantDb.close();
+        env.close();
+      } catch(DatabaseException e) {
+        e.printStackTrace();  // TODO
+      }
+    }
+  }
+}
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/software/eXist/mpdl-modules/src/de/mpg/mpiwg/berlin/mpdl/donatus/xmlrpc/DonatusCache.java	Wed Nov 24 17:24:23 2010 +0100
@@ -0,0 +1,375 @@
+package de.mpg.mpiwg.berlin.mpdl.donatus.xmlrpc;
+
+import java.io.FileNotFoundException;
+import java.io.IOException;
+import java.io.Reader;
+import java.io.StringReader;
+import java.io.UnsupportedEncodingException;
+import java.net.MalformedURLException;
+import java.net.URL;
+import java.util.ArrayList;
+import java.util.Date;
+
+import org.xml.sax.InputSource;
+import org.xml.sax.SAXException;
+import org.xml.sax.XMLReader;
+
+import com.sleepycat.je.Cursor;
+import com.sleepycat.je.Database;
+import com.sleepycat.je.DatabaseEntry;
+import com.sleepycat.je.DatabaseException;
+import com.sleepycat.je.LockMode;
+import com.sleepycat.je.OperationStatus;
+import com.sleepycat.je.Transaction;
+import com.sun.org.apache.xerces.internal.parsers.SAXParser;
+
+import de.mpg.mpiwg.berlin.mpdl.donatus.analysis.DonatusAnalyzer;
+import de.mpg.mpiwg.berlin.mpdl.exception.ApplicationException;
+import de.mpg.mpiwg.berlin.mpdl.util.FileUtil;
+
+public class DonatusCache {
+  private static DonatusCache instance;
+  private DonatusBerkeleyDbEnv berkeleyDBEnv = null;
+  private Date state = null;  // last time the cache is written
+
+  // for performance reasons these variables are needed
+  public static int QUERY_MODE = 0;
+  public static int DOCUMENT_MODE = 1;
+  protected int mode = QUERY_MODE;
+  // for performance reasons the cache contains a donatusMorphologyDocument which 
+  // caches all lemmas for one document (in DOCUMENT_MODE) 
+  private DonatusMorphologyDocument donatusMorphologyDocument = null;
+
+  public static DonatusCache getInstance() throws ApplicationException {
+    if (instance == null) {
+      instance = new DonatusCache();
+      instance.init();
+    }
+    return instance;
+  }
+
+  private void init() throws ApplicationException {
+    try {
+      berkeleyDBEnv = new DonatusBerkeleyDbEnv();
+      berkeleyDBEnv.setup(false); // open databases in read/write mode
+      state = new Date();
+    } catch (DatabaseException e) {
+      throw new ApplicationException(e);
+    }
+  }
+  
+  public int getMode() {
+    return mode;  
+  }
+  
+  public void setMode(int newMode) {
+    this.mode = newMode;
+    if (newMode == QUERY_MODE)    
+      donatusMorphologyDocument = null; // reset the morphology document
+  }
+  
+  public void close() {
+    berkeleyDBEnv.close();
+  }
+  
+  // TODO Aufruf über RPC-API: execute(String path, HashMap parameters); spez. MPDL-Funktion zum Administrieren von BerkeleyDB: org.exist.xquery.modules.mpdldb.BerkeleyDBAdmin
+  public void deleteCache() {
+    berkeleyDBEnv.removeDatabases();
+    state = new Date();
+  }
+  
+  public void analyze(DonatusAnalyzer analyzer, String docUri, ArrayList<String> sentences) throws ApplicationException {
+    DonatusHandler donatusHandler = new DonatusHandler(analyzer);
+    donatusMorphologyDocument = donatusHandler.analyze(docUri, sentences);
+  }
+  
+  public void addVariant(String language, String lemmaForm, String type, String variantForm) throws ApplicationException {
+    DonatusLemma lemma = getLemmaByVariantForm(language, variantForm);
+    // if variantForm is already cached in a lemma then do nothing
+    if (lemma == null) {
+      // if lemmaForm is already cached as a lemma then do nothing else build the new lemma with the variant 
+      lemma = getLemmaByLemmaForm(language, lemmaForm);
+      if (lemma == null) {
+        lemma = new DonatusLemma(donatusMorphologyDocument, language, type, lemmaForm);
+        donatusMorphologyDocument.putLemma(lemma);
+      } else {
+        // nothing
+      }
+    }
+    DonatusVariant v = new DonatusVariant(lemma, type, variantForm);
+    lemma.addVariant(v);
+  }
+  
+  public void saveLemmas() throws ApplicationException {
+    try {
+      String docUri = donatusMorphologyDocument.getDocUri();
+      URL url = new URL(docUri);
+      String path = url.getPath();
+      writeLemmas(donatusMorphologyDocument);
+      Date endOfOperation2 = new Date();
+      String donMorphPath = path.replaceFirst(".xml", "-donatus-morph-v" + endOfOperation2.getTime() + ".xml");
+      String morphDocFilePathStr = DonatusConstants.BERKELEY_DB_DIR + "/donatusAnalyzedFiles" + donMorphPath;
+      FileUtil fileUtil = new FileUtil();
+      byte[] morphDocBytes = donatusMorphologyDocument.getDocumentBytes();
+      fileUtil.saveFile(morphDocBytes, morphDocFilePathStr);
+      String donWtagPath = path.replaceFirst(".xml", "-donatus-wtag-v" + endOfOperation2.getTime() + ".xml");
+      String wtagFilePathStr = DonatusConstants.BERKELEY_DB_DIR + "/donatusAnalyzedFiles" + donWtagPath;
+      byte[] wtagBytes = donatusMorphologyDocument.getWtagBytes();
+      fileUtil.saveFile(wtagBytes, wtagFilePathStr);
+    } catch (MalformedURLException e) {
+      throw new ApplicationException(e);
+    } catch (IOException e) {
+      throw new ApplicationException(e);
+    }
+    state = new Date();
+  }
+  
+  public DonatusLemma getLemmaByVariantForm(String language, String variantForm) throws ApplicationException {
+    DonatusLemma lemma = null;
+    if (mode == QUERY_MODE) {
+      lemma = readVariantLemma(null, language, variantForm);
+    } else {
+      if (donatusMorphologyDocument != null) {
+        DonatusVariant v = donatusMorphologyDocument.getVariant(variantForm);
+        if (v != null) {
+          DonatusLemma l = v.getLemma();
+          lemma = donatusMorphologyDocument.getLemma(l.getForm());
+        }
+      }
+    }
+    return lemma;
+  }
+  
+  public DonatusLemma getLemmaByLemmaForm(String language, String lemmaForm) throws ApplicationException {
+    DonatusLemma lemma = null;
+    if (mode == QUERY_MODE) {
+      lemma = readLemma(null, language, lemmaForm);
+    } else {
+      if (donatusMorphologyDocument != null) {
+        lemma = donatusMorphologyDocument.getLemma(lemmaForm);
+      }
+    }
+    return lemma;
+  }
+  
+  public ArrayList<DonatusVariant> getQueryVariants(String language, String luceneQueryString) throws ApplicationException {
+    ArrayList<DonatusVariant> result = new ArrayList<DonatusVariant>();
+    ArrayList<String> variantsFromQuery = getVariantsFromLuceneQuery(luceneQueryString);
+    if (! (variantsFromQuery == null || variantsFromQuery.isEmpty())) {
+      for (int i=0; i<variantsFromQuery.size(); i++) {
+        String variantStr = variantsFromQuery.get(i);
+        DonatusLemma lemma = getLemmaByVariantForm(language, variantStr);
+        if (lemma != null) {
+          ArrayList<DonatusVariant> lemmaVariants = lemma.getVariants();
+          result.addAll(lemmaVariants);
+        }
+      }
+    }
+    return result;
+  }
+
+  private void writeLemmas(DonatusMorphologyDocument donatusMorphologyDocument) throws ApplicationException {
+    Transaction txn = null;  // without txn
+    // Transaction txn = berkeleyDBEnv.getEnv().beginTransaction(null, null);
+    // delivers all variants of all lemmas - so for example more than one variant with the same form name but in different lemmas
+    ArrayList<DonatusVariant> variants = donatusMorphologyDocument.getVariants();  
+    for (int i=0; i<variants.size(); i++) {
+      DonatusVariant newVariant = variants.get(i);
+      String newVariantForm = newVariant.getForm();
+      String language = newVariant.getLemma().getLanguage();
+      if (newVariantForm != null && language != null && ! newVariantForm.equals("") && ! language.equals("")) {
+        DonatusLemma newVariantLemma = newVariant.getLemma();
+        // look if this variant is already contained in variantDB and if so if the lemma there is the same as the new variant lemma
+        DonatusLemma dbVariantLemma = readVariantLemma(txn, language, newVariantForm);
+        if (dbVariantLemma != null) {
+          if (dbVariantLemma.getForm().equals(newVariantLemma.getForm())) {
+            // the variants of newVariantLemma are added to the existing variantLemma and this lemma is saved
+            ArrayList<DonatusVariant> newVariantLemmaVariants = newVariantLemma.getVariants();
+            for (int j=0; j<newVariantLemmaVariants.size(); j++) {
+              DonatusVariant v = newVariantLemmaVariants.get(j);
+              dbVariantLemma.addVariant(v);
+            }
+            writeLemmaByVariantKey(txn, newVariant, dbVariantLemma);
+          } else {
+            // the two lemmas of the new and existing variant are not the same: nothing should be saved
+          }
+        } else {
+          writeLemmaByVariantKey(txn, newVariant, newVariantLemma);
+        }
+      }
+    }
+    // Only filled, not tested and used yet, for future
+    ArrayList<DonatusLemma> lemmas = donatusMorphologyDocument.getLemmas();
+    for (int i=0; i<lemmas.size(); i++) {
+      DonatusLemma lemma = lemmas.get(i);
+      String lemmaForm = lemma.getForm();
+      String language = lemma.getLanguage();
+      if (lemmaForm != null && language != null && ! lemmaForm.equals("") && ! language.equals("")) {
+        writeLemmaByLemmaKey(txn, lemma);
+      }
+    }
+    state = new Date();
+  }
+  
+  // TODO method is only simple: proof all Lucene cases
+  private ArrayList<String> getVariantsFromLuceneQuery(String queryString) {
+    ArrayList<String> variants = new ArrayList<String>();
+    String[] variantTokens = queryString.split(" ");  // TODO throw the phrases away (e.g.: "bla bla bla")
+    for (int i = 0; i < variantTokens.length; i++) {
+      String token = variantTokens[i];
+      if (! (token.contains("*") || token.contains("?") || token.contains("~") || token.contains("-") || token.contains("+") || token.contains("^") || token.contains("OR") || token.contains("AND") || token.contains("NOT"))) {
+        variants.add(token);
+      }
+    }
+    return variants;
+  }
+
+  private void writeLemmaByVariantKey(Transaction txn, DonatusVariant variantKey, DonatusLemma lemma) throws ApplicationException {
+    try {
+      String variantKeyStr = variantKey.getLemma().getLanguage() + "###" + variantKey.getForm();
+      DatabaseEntry dbEntryKey = new DatabaseEntry(variantKeyStr.getBytes("UTF-8"));
+      String lemmaXmlValue = lemma.getXmlString();
+      DatabaseEntry dbEntryValue = new DatabaseEntry(lemmaXmlValue.getBytes("UTF-8"));
+      Database variantDB = berkeleyDBEnv.getVariantDB();
+      variantDB.put(txn, dbEntryKey, dbEntryValue);
+    } catch (DatabaseException e) {
+      throw new ApplicationException(e);
+    } catch (UnsupportedEncodingException e) {
+      throw new ApplicationException(e);
+    }
+  }
+    
+  private void writeLemmaByLemmaKey(Transaction txn, DonatusLemma lemma) throws ApplicationException {
+    try {
+      String lemmaKeyStr = lemma.getLanguage() + "###" + lemma.getForm();
+      DatabaseEntry dbEntryKey = new DatabaseEntry(lemmaKeyStr.getBytes("UTF-8"));
+      String lemmaXmlValue = lemma.getXmlString();
+      DatabaseEntry dbEntryValue = new DatabaseEntry(lemmaXmlValue.getBytes("UTF-8"));
+      Database lemmaDB = berkeleyDBEnv.getLemmaDB();
+      lemmaDB.put(txn, dbEntryKey, dbEntryValue);
+    } catch (DatabaseException e) {
+      throw new ApplicationException(e);
+    } catch (UnsupportedEncodingException e) {
+      throw new ApplicationException(e);
+    }
+  }
+    
+  private DonatusLemma readVariantLemma(Transaction txn, String language, String variantForm) throws ApplicationException {
+    DonatusLemma lemma = null;
+    String hashKey = language + "###" + variantForm;
+    try {
+      Database variantDB = berkeleyDBEnv.getVariantDB();
+      Cursor cursor = variantDB.openCursor(txn, null);
+      byte[] bHashKey = hashKey.getBytes("UTF-8");
+      DatabaseEntry dbEntryKey = new DatabaseEntry(bHashKey);
+      DatabaseEntry foundXmlLemmaValue = new DatabaseEntry();
+      OperationStatus operationStatus = variantDB.get(null, dbEntryKey, foundXmlLemmaValue, LockMode.DEFAULT);
+      if (operationStatus == OperationStatus.SUCCESS) {
+        byte[] foundXmlLemmaValueBytes = foundXmlLemmaValue.getData();
+        String foundXmlLemmaStr = new String(foundXmlLemmaValueBytes, "UTF-8");
+        lemma = parseXmlLemmaString(language, foundXmlLemmaStr);
+      }
+      cursor.close();
+    } catch (DatabaseException e) {
+      throw new ApplicationException(e);
+    } catch (UnsupportedEncodingException e) {
+      throw new ApplicationException(e);
+    }
+    return lemma;
+  }
+  
+  private DonatusLemma readLemma(Transaction txn, String language, String lemmaForm) throws ApplicationException {
+    DonatusLemma lemma = null;
+    String hashKey = language + "###" + lemmaForm;
+    try {
+      Database lemmaDB = berkeleyDBEnv.getLemmaDB();
+      Cursor cursor = lemmaDB.openCursor(txn, null);
+      byte[] bHashKey = hashKey.getBytes("UTF-8");
+      DatabaseEntry dbEntryKey = new DatabaseEntry(bHashKey);
+      DatabaseEntry foundXmlLemmaValue = new DatabaseEntry();
+      OperationStatus operationStatus = lemmaDB.get(null, dbEntryKey, foundXmlLemmaValue, LockMode.DEFAULT);
+      if (operationStatus == OperationStatus.SUCCESS) {
+        byte[] foundXmlLemmaValueBytes = foundXmlLemmaValue.getData();
+        String foundXmlLemmaStr = new String(foundXmlLemmaValueBytes, "UTF-8");
+        lemma = parseXmlLemmaString(language, foundXmlLemmaStr);
+      }
+      cursor.close();
+    } catch (DatabaseException e) {
+      throw new ApplicationException(e);
+    } catch (UnsupportedEncodingException e) {
+      throw new ApplicationException(e);
+    }
+    return lemma;
+  }
+  
+  private DonatusLemma parseXmlLemmaString(String language, String xmlLemmaString) throws ApplicationException {
+    DonatusLemma lemma = null;
+    DonatusMorphologyDocument morphologyDoc = parseDonatusMorphDoc(language, xmlLemmaString);
+    ArrayList<DonatusLemma> lemmas = morphologyDoc.getLemmas();
+    if (lemmas.size() > 0)
+      lemma = lemmas.get(0);
+    return lemma;
+  }
+
+  private DonatusMorphologyDocument parseDonatusMorphDoc(String language, String xmlString) throws ApplicationException {
+    DonatusMorphologyDocument morphologyDoc = null;
+    try {
+      XMLReader xmlParser = new SAXParser();
+      DonatusMorphologyDocumentContentHandler donatusMorphContentHandler = new DonatusMorphologyDocumentContentHandler("tempDummyUri", language);
+      xmlParser.setContentHandler(donatusMorphContentHandler);
+      String morphDocDefXml = getDonatusMorphDocDefXml();
+      String morphDocMorphStartXml = "<morphology xmlns=\"http://archimedes.fas.harvard.edu/ns/morphology/3\" xmlns:xlink=\"http://www.w3.org/1999/xlink\">\n";
+      String morphDocMorphEndXml = "</morphology>";
+      String morphDocXml = morphDocDefXml + morphDocMorphStartXml + xmlString + morphDocMorphEndXml;
+      Reader reader = new StringReader(morphDocXml);
+      InputSource input = new InputSource(reader);
+      xmlParser.parse(input);
+      morphologyDoc = donatusMorphContentHandler.getResult();
+    } catch (SAXException e) {
+      throw new ApplicationException(e);
+    } catch (IOException e) {
+      throw new ApplicationException(e);
+    }
+    return morphologyDoc;
+  }
+
+  private static String getDonatusMorphDocDefXml() {
+    String defXml = 
+    "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n" +
+    "<!DOCTYPE morphology [\n" +
+    "<!ELEMENT morphology (lemma*, context-form*)>\n" +
+    "<!ELEMENT lemma (definition?, variant*)>\n" +
+    "<!ELEMENT context-form (tokens, analysis)>\n" +
+    "<!ELEMENT definition (#PCDATA)>\n" +
+    "<!ELEMENT variant (analysis)*>\n" +
+    "<!ELEMENT analysis EMPTY>\n" +
+    "<!ELEMENT tokens (token+)>\n" +
+    "<!ELEMENT token EMPTY>\n" +
+    "<!ATTLIST morphology\n" +
+    "   xmlns           CDATA           #FIXED \"http://archimedes.fas.harvard.edu/ns/morphology/3\"\n" +
+    "   xmlns:xlink     CDATA           #FIXED \"http://www.w3.org/1999/xlink\">\n" +
+    "<!ATTLIST lemma\n" +
+    "   form            CDATA           #REQUIRED\n" +
+    "   lang            CDATA           #REQUIRED>\n" +
+    "<!ATTLIST definition\n" +
+    "   lang            CDATA           #IMPLIED>\n" +
+    "<!ATTLIST variant\n" +
+    "   form            CDATA           #REQUIRED\n" +
+    "   modified        (y|n)           #IMPLIED>\n" +
+    "<!ATTLIST analysis\n" +
+    "   desc            CDATA           #IMPLIED\n" +
+    "   xlink:href      CDATA           #IMPLIED\n" +
+    "   xlink:type      (simple)        #FIXED \"simple\"\n" +
+    "   form            CDATA           #IMPLIED\n" +
+    "   id              ID              #IMPLIED>\n" +
+    "<!ATTLIST context-form\n" +
+    "   lang            CDATA           #REQUIRED\n" +
+    "   xlink:href      CDATA           #REQUIRED\n" +
+    "   xlink:type      (simple)        #FIXED \"simple\">\n" +
+    "<!ATTLIST token\n" +
+    "   form            CDATA           #REQUIRED\n" +
+    "   count           CDATA           #REQUIRED>\n" +
+    "]>\n";
+    return defXml;
+  }
+}
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/software/eXist/mpdl-modules/src/de/mpg/mpiwg/berlin/mpdl/donatus/xmlrpc/DonatusCacheOld.java	Wed Nov 24 17:24:23 2010 +0100
@@ -0,0 +1,327 @@
+package de.mpg.mpiwg.berlin.mpdl.donatus.xmlrpc;
+
+import java.io.FileNotFoundException;
+import java.io.IOException;
+import java.io.Reader;
+import java.io.StringReader;
+import java.io.UnsupportedEncodingException;
+import java.net.MalformedURLException;
+import java.net.URL;
+import java.util.ArrayList;
+import java.util.Date;
+
+import org.xml.sax.InputSource;
+import org.xml.sax.SAXException;
+import org.xml.sax.XMLReader;
+
+import com.sleepycat.je.Cursor;
+import com.sleepycat.je.Database;
+import com.sleepycat.je.DatabaseEntry;
+import com.sleepycat.je.DatabaseException;
+import com.sleepycat.je.LockMode;
+import com.sleepycat.je.OperationStatus;
+import com.sleepycat.je.Transaction;
+import com.sun.org.apache.xerces.internal.parsers.SAXParser;
+
+import de.mpg.mpiwg.berlin.mpdl.donatus.analysis.DonatusAnalyzer;
+import de.mpg.mpiwg.berlin.mpdl.exception.ApplicationException;
+import de.mpg.mpiwg.berlin.mpdl.util.FileUtil;
+import de.mpg.mpiwg.berlin.mpdl.util.Util;
+
+public class DonatusCacheOld {
+  private static DonatusCacheOld instance;
+  private DonatusBerkeleyDbEnv berkeleyDBEnv = null;
+  private Date state = null;  // last time the cache is written
+
+  // for performance reasons these variables are needed
+  public static int QUERY_MODE = 0;
+  public static int DOCUMENT_MODE = 1;
+  protected int mode = QUERY_MODE;
+  // for performance reasons the cache contains a donatusMorphologyDocument which 
+  // caches all lemmas for one document (in DOCUMENT_MODE) 
+  private DonatusMorphologyDocument donatusMorphologyDocument = null;
+
+  public static DonatusCacheOld getInstance() throws ApplicationException {
+    if (instance == null) {
+      instance = new DonatusCacheOld();
+      instance.init();
+    }
+    return instance;
+  }
+
+  private void init() throws ApplicationException {
+    try {
+      berkeleyDBEnv = new DonatusBerkeleyDbEnv();
+      berkeleyDBEnv.setup(false); // open databases in read/write mode
+      state = new Date();
+    } catch (DatabaseException e) {
+      throw new ApplicationException(e);
+    }
+  }
+  
+  public int getMode() {
+    return mode;  
+  }
+  
+  public void setMode(int newMode) {
+    this.mode = newMode;
+    if (newMode == QUERY_MODE)    
+      donatusMorphologyDocument = null; // reset the morphology document
+  }
+  
+  public void close() {
+    berkeleyDBEnv.close();
+  }
+  
+  // TODO Aufruf über RPC-API: execute(String path, HashMap parameters); spez. MPDL-Funktion zum Administrieren von BerkeleyDB: org.exist.xquery.modules.mpdldb.BerkeleyDBAdmin
+  public void deleteCache() {
+    berkeleyDBEnv.removeDatabases();
+    state = new Date();
+  }
+  
+  public void cacheLemmas(DonatusAnalyzer analyzer, String docUri, ArrayList<String> sentences) throws ApplicationException {
+    try {
+      Date beginOfOperation1 = new Date();
+      URL url = new URL(docUri);
+      String path = url.getPath();
+      System.out.print("Indexing: " + path + " Donatus-Analyze ... ");
+      DonatusHandler donatusHandler = new DonatusHandler(analyzer);
+      donatusMorphologyDocument = donatusHandler.analyze(docUri, sentences);
+      Date endOfOperation1 = new Date();
+      Double elapsedTime1 = new Util().getSecondWithMillisecondsBetween(beginOfOperation1, endOfOperation1);
+      System.out.print(elapsedTime1 + " sec ... Writing lemmas to BerkeleyDB ... ");
+      Date beginOfOperation2 = new Date();
+      writeLemmas(donatusMorphologyDocument);
+      Date endOfOperation2 = new Date();
+      Double elapsedTime2 = new Util().getSecondWithMillisecondsBetween(beginOfOperation2, endOfOperation2);
+      System.out.print(elapsedTime2 + " sec ... Stemming ... ");
+      String donMorphPath = path.replaceFirst(".xml", "-donatus-morph-v" + endOfOperation2.getTime() + ".xml");
+      String morphDocFilePathStr = DonatusConstants.BERKELEY_DB_DIR + "/donatusAnalyzedFiles" + donMorphPath;
+      FileUtil fileUtil = new FileUtil();
+      byte[] morphDocBytes = donatusMorphologyDocument.getDocumentBytes();
+      fileUtil.saveFile(morphDocBytes, morphDocFilePathStr);
+      String donWtagPath = path.replaceFirst(".xml", "-donatus-wtag-v" + endOfOperation2.getTime() + ".xml");
+      String wtagFilePathStr = DonatusConstants.BERKELEY_DB_DIR + "/donatusAnalyzedFiles" + donWtagPath;
+      byte[] wtagBytes = donatusMorphologyDocument.getWtagBytes();
+      fileUtil.saveFile(wtagBytes, wtagFilePathStr);
+    } catch (MalformedURLException e) {
+      throw new ApplicationException(e);
+    } catch (IOException e) {
+      throw new ApplicationException(e);
+    }
+    state = new Date();
+  }
+  
+  public DonatusLemma getLemma(String language, String variantForm) throws ApplicationException {
+    DonatusLemma lemma = null;
+    if (mode == QUERY_MODE) {
+      lemma = readVariantLemma(null, language, variantForm);
+    } else {
+      if (donatusMorphologyDocument != null) {
+        DonatusVariant v = donatusMorphologyDocument.getVariant(variantForm);
+        if (v != null) {
+          DonatusLemma l = v.getLemma();
+          lemma = donatusMorphologyDocument.getLemma(l.getForm());
+        }
+      }
+    }
+    return lemma;
+  }
+  
+  public ArrayList<DonatusVariant> getQueryVariants(String language, String luceneQueryString) throws ApplicationException {
+    ArrayList<DonatusVariant> result = new ArrayList<DonatusVariant>();
+    ArrayList<String> variantsFromQuery = getVariantsFromLuceneQuery(luceneQueryString);
+    if (! (variantsFromQuery == null || variantsFromQuery.isEmpty())) {
+      for (int i=0; i<variantsFromQuery.size(); i++) {
+        String variantStr = variantsFromQuery.get(i);
+        DonatusLemma lemma = getLemma(language, variantStr);
+        if (lemma != null) {
+          ArrayList<DonatusVariant> lemmaVariants = lemma.getVariants();
+          result.addAll(lemmaVariants);
+        }
+      }
+    }
+    return result;
+  }
+
+  private void writeLemmas(DonatusMorphologyDocument donatusMorphologyDocument) throws ApplicationException {
+    Transaction txn = null;  // without txn
+    // Transaction txn = berkeleyDBEnv.getEnv().beginTransaction(null, null);
+    // delivers all variants of all lemmas - so for example more than one variant with the same form name but in different lemmas
+    ArrayList<DonatusVariant> variants = donatusMorphologyDocument.getVariants();  
+    for (int i=0; i<variants.size(); i++) {
+      DonatusVariant newVariant = variants.get(i);
+      String newVariantForm = newVariant.getForm();
+      String language = newVariant.getLemma().getLanguage();
+      if (newVariantForm != null && language != null && ! newVariantForm.equals("") && ! language.equals("")) {
+        DonatusLemma newVariantLemma = newVariant.getLemma();
+        // look if this variant is already contained in variantDB and if so if the lemma there is the same as the new variant lemma
+        DonatusLemma dbVariantLemma = readVariantLemma(txn, language, newVariantForm);
+        if (dbVariantLemma != null) {
+          if (dbVariantLemma.getForm().equals(newVariantLemma.getForm())) {
+            // the variants of newVariantLemma are added to the existing variantLemma and this lemma is saved
+            ArrayList<DonatusVariant> newVariantLemmaVariants = newVariantLemma.getVariants();
+            for (int j=0; j<newVariantLemmaVariants.size(); j++) {
+              DonatusVariant v = newVariantLemmaVariants.get(j);
+              dbVariantLemma.addVariant(v);
+            }
+            writeLemmaByVariantKey(txn, newVariant, dbVariantLemma);
+          } else {
+            // the two lemmas of the new and existing variant are not the same: nothing should be saved
+          }
+        } else {
+          writeLemmaByVariantKey(txn, newVariant, newVariantLemma);
+        }
+      }
+    }
+    // Only filled, not tested and used yet, for future
+    ArrayList<DonatusLemma> lemmas = donatusMorphologyDocument.getLemmas();
+    for (int i=0; i<lemmas.size(); i++) {
+      DonatusLemma lemma = lemmas.get(i);
+      String lemmaForm = lemma.getForm();
+      String language = lemma.getLanguage();
+      if (lemmaForm != null && language != null && ! lemmaForm.equals("") && ! language.equals("")) {
+        writeLemmaByLemmaKey(txn, lemma);
+      }
+    }
+    state = new Date();
+  }
+  
+  // TODO method is only simple: proof all Lucene cases
+  private ArrayList<String> getVariantsFromLuceneQuery(String queryString) {
+    ArrayList<String> variants = new ArrayList<String>();
+    String[] variantTokens = queryString.split(" ");  // TODO throw the phrases away (e.g.: "bla bla bla")
+    for (int i = 0; i < variantTokens.length; i++) {
+      String token = variantTokens[i];
+      if (! (token.contains("*") || token.contains("?") || token.contains("~") || token.contains("-") || token.contains("+") || token.contains("^") || token.contains("OR") || token.contains("AND") || token.contains("NOT"))) {
+        variants.add(token);
+      }
+    }
+    return variants;
+  }
+
+  private void writeLemmaByVariantKey(Transaction txn, DonatusVariant variantKey, DonatusLemma lemma) throws ApplicationException {
+    try {
+      String variantKeyStr = variantKey.getLemma().getLanguage() + "###" + variantKey.getForm();
+      DatabaseEntry dbEntryKey = new DatabaseEntry(variantKeyStr.getBytes("UTF-8"));
+      String lemmaXmlValue = lemma.getXmlString();
+      DatabaseEntry dbEntryValue = new DatabaseEntry(lemmaXmlValue.getBytes("UTF-8"));
+      Database variantDB = berkeleyDBEnv.getVariantDB();
+      variantDB.put(txn, dbEntryKey, dbEntryValue);
+    } catch (DatabaseException e) {
+      throw new ApplicationException(e);
+    } catch (UnsupportedEncodingException e) {
+      throw new ApplicationException(e);
+    }
+  }
+    
+  private void writeLemmaByLemmaKey(Transaction txn, DonatusLemma lemma) throws ApplicationException {
+    try {
+      String lemmaKeyStr = lemma.getLanguage() + "###" + lemma.getForm();
+      DatabaseEntry dbEntryKey = new DatabaseEntry(lemmaKeyStr.getBytes("UTF-8"));
+      String lemmaXmlValue = lemma.getXmlString();
+      DatabaseEntry dbEntryValue = new DatabaseEntry(lemmaXmlValue.getBytes("UTF-8"));
+      Database lemmaDB = berkeleyDBEnv.getLemmaDB();
+      lemmaDB.put(txn, dbEntryKey, dbEntryValue);
+    } catch (DatabaseException e) {
+      throw new ApplicationException(e);
+    } catch (UnsupportedEncodingException e) {
+      throw new ApplicationException(e);
+    }
+  }
+    
+  private DonatusLemma readVariantLemma(Transaction txn, String language, String variantForm) throws ApplicationException {
+    DonatusLemma lemma = null;
+    String hashKey = language + "###" + variantForm;
+    try {
+      Database variantDB = berkeleyDBEnv.getVariantDB();
+      Cursor cursor = variantDB.openCursor(txn, null);
+      byte[] bHashKey = hashKey.getBytes("UTF-8");
+      DatabaseEntry dbEntryKey = new DatabaseEntry(bHashKey);
+      DatabaseEntry foundXmlLemmaValue = new DatabaseEntry();
+      OperationStatus operationStatus = variantDB.get(null, dbEntryKey, foundXmlLemmaValue, LockMode.DEFAULT);
+      if (operationStatus == OperationStatus.SUCCESS) {
+        byte[] foundXmlLemmaValueBytes = foundXmlLemmaValue.getData();
+        String foundXmlLemmaStr = new String(foundXmlLemmaValueBytes, "UTF-8");
+        lemma = parseXmlLemmaString(language, foundXmlLemmaStr);
+      }
+      cursor.close();
+    } catch (DatabaseException e) {
+      throw new ApplicationException(e);
+    } catch (UnsupportedEncodingException e) {
+      throw new ApplicationException(e);
+    }
+    return lemma;
+  }
+  
+  private DonatusLemma parseXmlLemmaString(String language, String xmlLemmaString) throws ApplicationException {
+    DonatusLemma lemma = null;
+    DonatusMorphologyDocument morphologyDoc = parseDonatusMorphDoc(language, xmlLemmaString);
+    ArrayList<DonatusLemma> lemmas = morphologyDoc.getLemmas();
+    if (lemmas.size() > 0)
+      lemma = lemmas.get(0);
+    return lemma;
+  }
+
+  private DonatusMorphologyDocument parseDonatusMorphDoc(String language, String xmlString) throws ApplicationException {
+    DonatusMorphologyDocument morphologyDoc = null;
+    try {
+      XMLReader xmlParser = new SAXParser();
+      DonatusMorphologyDocumentContentHandler donatusMorphContentHandler = new DonatusMorphologyDocumentContentHandler("tempDummyUri", language);
+      xmlParser.setContentHandler(donatusMorphContentHandler);
+      String morphDocDefXml = getDonatusMorphDocDefXml();
+      String morphDocMorphStartXml = "<morphology xmlns=\"http://archimedes.fas.harvard.edu/ns/morphology/3\" xmlns:xlink=\"http://www.w3.org/1999/xlink\">\n";
+      String morphDocMorphEndXml = "</morphology>";
+      String morphDocXml = morphDocDefXml + morphDocMorphStartXml + xmlString + morphDocMorphEndXml;
+      Reader reader = new StringReader(morphDocXml);
+      InputSource input = new InputSource(reader);
+      xmlParser.parse(input);
+      morphologyDoc = donatusMorphContentHandler.getResult();
+    } catch (SAXException e) {
+      throw new ApplicationException(e);
+    } catch (IOException e) {
+      throw new ApplicationException(e);
+    }
+    return morphologyDoc;
+  }
+
+  private static String getDonatusMorphDocDefXml() {
+    String defXml = 
+    "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n" +
+    "<!DOCTYPE morphology [\n" +
+    "<!ELEMENT morphology (lemma*, context-form*)>\n" +
+    "<!ELEMENT lemma (definition?, variant*)>\n" +
+    "<!ELEMENT context-form (tokens, analysis)>\n" +
+    "<!ELEMENT definition (#PCDATA)>\n" +
+    "<!ELEMENT variant (analysis)*>\n" +
+    "<!ELEMENT analysis EMPTY>\n" +
+    "<!ELEMENT tokens (token+)>\n" +
+    "<!ELEMENT token EMPTY>\n" +
+    "<!ATTLIST morphology\n" +
+    "   xmlns           CDATA           #FIXED \"http://archimedes.fas.harvard.edu/ns/morphology/3\"\n" +
+    "   xmlns:xlink     CDATA           #FIXED \"http://www.w3.org/1999/xlink\">\n" +
+    "<!ATTLIST lemma\n" +
+    "   form            CDATA           #REQUIRED\n" +
+    "   lang            CDATA           #REQUIRED>\n" +
+    "<!ATTLIST definition\n" +
+    "   lang            CDATA           #IMPLIED>\n" +
+    "<!ATTLIST variant\n" +
+    "   form            CDATA           #REQUIRED\n" +
+    "   modified        (y|n)           #IMPLIED>\n" +
+    "<!ATTLIST analysis\n" +
+    "   desc            CDATA           #IMPLIED\n" +
+    "   xlink:href      CDATA           #IMPLIED\n" +
+    "   xlink:type      (simple)        #FIXED \"simple\"\n" +
+    "   form            CDATA           #IMPLIED\n" +
+    "   id              ID              #IMPLIED>\n" +
+    "<!ATTLIST context-form\n" +
+    "   lang            CDATA           #REQUIRED\n" +
+    "   xlink:href      CDATA           #REQUIRED\n" +
+    "   xlink:type      (simple)        #FIXED \"simple\">\n" +
+    "<!ATTLIST token\n" +
+    "   form            CDATA           #REQUIRED\n" +
+    "   count           CDATA           #REQUIRED>\n" +
+    "]>\n";
+    return defXml;
+  }
+}
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/software/eXist/mpdl-modules/src/de/mpg/mpiwg/berlin/mpdl/donatus/xmlrpc/DonatusConstants.java	Wed Nov 24 17:24:23 2010 +0100
@@ -0,0 +1,11 @@
+package de.mpg.mpiwg.berlin.mpdl.donatus.xmlrpc;
+
+public class DonatusConstants {
+  public static String DEFAULT_LANGUAGE = "en";
+  public static String SERVER_NAME = "archimedes.fas.harvard.edu";
+  public static int PORT = 80;
+  public static String URI_RPC_CALL = "/cgi-bin/donatus-rpc";
+  public static String BERKELEY_DB_DIR = System.getProperty("exist.home") + "/webapp/WEB-INF/data/berkeleyDB";  // other call would be: ConfigurationHelper.getExistHome()
+  public static String TYPE_DONATUS = "donatus";
+  public static String TYPE_SNOWBALL = "snowball";
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/software/eXist/mpdl-modules/src/de/mpg/mpiwg/berlin/mpdl/donatus/xmlrpc/DonatusContextForm.java	Wed Nov 24 17:24:23 2010 +0100
@@ -0,0 +1,24 @@
+package de.mpg.mpiwg.berlin.mpdl.donatus.xmlrpc;
+
+import java.util.ArrayList;
+
+public class DonatusContextForm {
+  private String language;
+  private String xlinkHref;
+  private ArrayList<DonatusToken> tokens;
+  private DonatusAnalysis analysis;
+
+  public DonatusContextForm(String language) {
+  }
+  
+  public String getLanguage() {
+    return language;
+  }
+
+  // TODO rest
+  public String getXmlString() {
+    String xmlString = "<context-form lang=\"" + language + "\"></context-form>";  
+    return xmlString;
+  }
+  
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/software/eXist/mpdl-modules/src/de/mpg/mpiwg/berlin/mpdl/donatus/xmlrpc/DonatusHandler.java	Wed Nov 24 17:24:23 2010 +0100
@@ -0,0 +1,31 @@
+package de.mpg.mpiwg.berlin.mpdl.donatus.xmlrpc;
+
+import java.util.ArrayList;
+
+import de.mpg.mpiwg.berlin.mpdl.donatus.analysis.DonatusAnalyzer;
+
+public class DonatusHandler {
+  private DonatusXmlRpcClient xmlClient = null;
+  private DonatusAnalyzer donatusAnalyzer;
+
+  public DonatusHandler(DonatusAnalyzer donatusAnalyzer) {
+    this.xmlClient = new DonatusXmlRpcClient();
+    this.donatusAnalyzer = donatusAnalyzer;
+  }
+
+  public DonatusMorphologyDocument analyze(String docUri, ArrayList<String> sentences) {
+    DonatusWtagDocument donatusWtagDoc = new DonatusWtagDocument(docUri);
+    String language = donatusAnalyzer.getLanguage();
+    DonatusWtagSection donatusWtagSection = new DonatusWtagSection(language);
+    donatusWtagDoc.addSection(donatusWtagSection);
+    for (int i = 0; i < sentences.size(); i++) {
+      String sentence = sentences.get(i);
+      ArrayList<String> token = donatusAnalyzer.getToken(sentence);
+      DonatusWtagContainer donatusWtagContainer = new DonatusWtagContainer("s", Integer.valueOf(i).toString(), token); // a sentence container
+      donatusWtagSection.addContainer(donatusWtagContainer);
+    }
+    DonatusMorphologyDocument donatusMorpDocument = xmlClient.analyze(donatusWtagDoc);
+    return donatusMorpDocument;
+  }
+  
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/software/eXist/mpdl-modules/src/de/mpg/mpiwg/berlin/mpdl/donatus/xmlrpc/DonatusLemma.java	Wed Nov 24 17:24:23 2010 +0100
@@ -0,0 +1,83 @@
+package de.mpg.mpiwg.berlin.mpdl.donatus.xmlrpc;
+
+import java.util.ArrayList;
+
+public class DonatusLemma {
+  private DonatusMorphologyDocument morphDocument;
+  private String language;
+  private String type = DonatusConstants.TYPE_DONATUS;
+  private String form;
+  private String definition = "";
+  private ArrayList<DonatusVariant> variants;
+
+  public DonatusLemma(DonatusMorphologyDocument morphDoc, String language, String type, String form) {
+    this.morphDocument = morphDoc;
+    this.language = language;
+    this.type = type;
+    this.form = form;
+    this.variants = new ArrayList<DonatusVariant>();
+    // always the Lemma form is variant itself
+    DonatusVariant variant = new DonatusVariant(this, type, form);
+    variants.add(variant);
+  }
+  
+  public String getLanguage() {
+    return language;
+  }
+
+  public String getForm() {
+    return form;
+  }
+
+  public ArrayList<DonatusVariant> getVariants() {
+    return variants;
+  }
+  
+  public ArrayList<DonatusVariant> getVariants(String type) {
+    ArrayList<DonatusVariant> result = new ArrayList<DonatusVariant>();
+    for (int i=0; i<variants.size(); i++) {
+      DonatusVariant variant = variants.get(i);
+      String vType = variant.getType();
+      if (vType.equals(type))
+        result.add(variant);
+    }
+    return result;
+  }
+  
+  public void setDefinition(String def) {
+    this.definition = def;  
+  }
+  
+  public void addVariant(DonatusVariant variant) {
+    morphDocument.putVariant(variant);
+    if (! variants.contains(variant) && ! variantsContainsForm(variant.getForm()))
+      variants.add(variant);  
+  }
+  
+  public String getXmlString() {
+    String xmlString = "<lemma form=\"" + form + "\" lang=\"" + language + "\">";
+    xmlString += "<definition>" + definition + "</definition>";
+    for (int i=0; i<variants.size(); i++) {
+      DonatusVariant variant = variants.get(i); 
+      String variantXmlString = variant.getXmlString();
+      xmlString += variantXmlString;
+    }
+    xmlString += "</lemma>\n";
+    return xmlString;
+  }
+  
+  public String toString() {
+    return getXmlString();
+  }
+  
+  private boolean variantsContainsForm(String variantForm) {
+    boolean contains = false;
+    for (int i=0; i<variants.size(); i++) {
+      DonatusVariant v = variants.get(i); 
+      String vForm = v.getForm();
+      if (vForm.equals(variantForm))
+        contains = true;
+    }
+    return contains;
+  }
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/software/eXist/mpdl-modules/src/de/mpg/mpiwg/berlin/mpdl/donatus/xmlrpc/DonatusMorphologyDocument.java	Wed Nov 24 17:24:23 2010 +0100
@@ -0,0 +1,141 @@
+package de.mpg.mpiwg.berlin.mpdl.donatus.xmlrpc;
+
+import java.io.UnsupportedEncodingException;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.HashMap;
+import java.util.Iterator;
+
+import de.mpg.mpiwg.berlin.mpdl.exception.ApplicationException;
+
+public class DonatusMorphologyDocument {
+  private String docUri;
+  private String language = DonatusConstants.DEFAULT_LANGUAGE;
+  private HashMap<String, DonatusVariant> variants; // hold this variable for performance reasons: a key on each lemma variant
+  private HashMap<String, DonatusLemma> lemmas;
+  private ArrayList<DonatusContextForm> contextForms;
+  private byte[] documentBytes;
+  private byte[] wtagBytes;
+
+  public DonatusMorphologyDocument(String docUri) {
+    this.docUri = docUri;
+    this.variants = new HashMap<String, DonatusVariant>();
+    this.lemmas = new HashMap<String, DonatusLemma>();
+    this.contextForms = new ArrayList<DonatusContextForm>();
+  }
+
+  public ArrayList<DonatusLemma> getLemmas() {
+    Collection<DonatusLemma> values = lemmas.values();
+    ArrayList<DonatusLemma> retArrayList = new ArrayList<DonatusLemma>();
+    Iterator<DonatusLemma> iter = values.iterator(); 
+    while(iter.hasNext()) {
+      DonatusLemma lemma = iter.next();
+      retArrayList.add(lemma);
+    } 
+    return retArrayList;
+  }
+  
+  public String getDocUri() {
+    return docUri;
+  }
+  
+  public ArrayList<DonatusContextForm> getContextForms() {
+    return contextForms;
+  }
+  
+  public void setWtagBytes(byte[] wtagBytes) {
+    this.wtagBytes = wtagBytes;
+  }
+  
+  public byte[] getWtagBytes() {
+    return wtagBytes;  
+  }
+  
+  public byte[] getDocumentBytes() throws ApplicationException {
+    String contentXml = getXmlString();
+    try {
+      documentBytes = contentXml.getBytes("UTF-8");
+    } catch (UnsupportedEncodingException e) {
+      throw new ApplicationException(e);
+    }
+    return documentBytes;  
+  }
+  
+  public String getLemmaFormsString() {
+    String result = "";
+    ArrayList<DonatusLemma> lemmas = getLemmas();
+    for (int i=0; i<lemmas.size(); i++) {
+      DonatusLemma lemma = lemmas.get(i);
+      String lemmaFormString = lemma.getForm();
+      result += lemmaFormString + " ";
+    }
+    return result.trim();
+  }
+  
+  public ArrayList<DonatusVariant> getVariants() {
+    ArrayList<DonatusVariant> result = new ArrayList<DonatusVariant>();
+    ArrayList<DonatusLemma> lemmas = getLemmas();
+    for (int i=0; i<lemmas.size(); i++) {
+      DonatusLemma lemma = lemmas.get(i);
+      ArrayList<DonatusVariant> lemmaVariants = lemma.getVariants();
+      result.addAll(lemmaVariants);
+    }
+    return result;
+  }
+  
+  public String getLanguage() {
+    return language;  
+  }
+  
+  public void setLanguage(String language) {
+    this.language = language;  
+  }
+  
+  public DonatusVariant getVariant(String variantForm) {
+    String hashKey = language + "###" + variantForm;
+    DonatusVariant variant = variants.get(hashKey);
+    return variant;
+  }
+  
+  public DonatusVariant putVariant(DonatusVariant variant) {
+    String variantForm = variant.getForm();
+    String hashKey = language + "###" + variantForm;
+    DonatusVariant putReturn = this.variants.put(hashKey, variant);
+    return putReturn;
+  }
+  
+  public DonatusLemma getLemma(String lemmaForm) {
+    String hashKey = language + "###" + lemmaForm;
+    DonatusLemma lemma = lemmas.get(hashKey);
+    return lemma;
+  }
+  
+  public void putLemma(DonatusLemma lemma) { 
+    String lemmaForm = lemma.getForm();
+    String lang = lemma.getLanguage();
+    String hashKey = lang + "###" + lemmaForm;
+    lemmas.put(hashKey, lemma);
+  }
+  
+  public void addContextForm(DonatusContextForm form) {
+    contextForms.add(form);  
+  }
+  
+  public String getXmlString() {
+    StringBuffer xmlString = new StringBuffer("<morphology>");
+    ArrayList<DonatusLemma> lemmas = getLemmas();
+    for (int i=0; i<lemmas.size(); i++) {
+      DonatusLemma lemma = lemmas.get(i);
+      String lemmaXmlString = lemma.getXmlString();
+      xmlString.append(lemmaXmlString);
+    }
+    for (int i=0; i<contextForms.size(); i++) {
+      DonatusContextForm contextForm = contextForms.get(i);
+      String contextFormXmlString = contextForm.getXmlString();
+      xmlString.append(contextFormXmlString);
+    }
+    xmlString.append("</morphology>");
+    return xmlString.toString();
+  }
+  
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/software/eXist/mpdl-modules/src/de/mpg/mpiwg/berlin/mpdl/donatus/xmlrpc/DonatusMorphologyDocumentContentHandler.java	Wed Nov 24 17:24:23 2010 +0100
@@ -0,0 +1,132 @@
+package de.mpg.mpiwg.berlin.mpdl.donatus.xmlrpc;
+
+import org.xml.sax.*;
+
+public class DonatusMorphologyDocumentContentHandler implements ContentHandler {
+  private String docUri;
+  private DonatusMorphologyDocument result;
+  private String language;
+  private Element currentElement;
+  private DonatusLemma currentLemma;
+  private DonatusVariant currentVariant;
+  
+  public DonatusMorphologyDocumentContentHandler(String docUri, String language) {
+    this.docUri = docUri;
+    this.language = language;
+  }
+  
+  public DonatusMorphologyDocument getResult() {
+    return result;
+  }
+  
+  public void startDocument() throws SAXException {
+  }
+
+  public void endDocument() throws SAXException {
+  }
+  
+  public void characters(char[] c, int start, int length) throws SAXException {
+    if (currentElement != null && currentElement.name.equals("definition")) {
+      char[] cCopy = new char[length];
+      System.arraycopy(c, start, cCopy, 0, length);
+      currentLemma.setDefinition(String.valueOf(cCopy));
+    }
+  }
+
+  public void ignorableWhitespace(char[] c, int start, int length) throws SAXException {
+  }
+
+  public void processingInstruction(String target, String data) throws SAXException {
+  }
+
+  public void setDocumentLocator(org.xml.sax.Locator arg1) {
+  }
+
+  public void endPrefixMapping(String prefix) throws SAXException {
+  }
+
+  public void skippedEntity(String name) throws SAXException {
+  }
+
+  public void endElement(String uri, String localName, String name) throws SAXException {
+  }
+
+  public void startElement(String uri, String localName, String name, Attributes attrs) throws SAXException {
+    currentElement = new Element(name);
+    if (name.equals("morphology")) {
+      result = new DonatusMorphologyDocument(docUri);
+      result.setLanguage(language);
+    } else if (name.equals("lemma")) {
+      String language = "";
+      String form = "";
+      if (attrs != null) {
+        int length = attrs.getLength();
+        for (int i = 0; i < length; i++) {
+          String attrName = attrs.getLocalName(i);
+          if (attrName.equals("form")) {
+            form = attrs.getValue(i);
+            if (form.matches(".*#\\d*"))
+              form = form.replaceFirst("#\\d*", "");  // remove #number in the lemma form 
+          } else if (attrName.equals("lang")) {
+            language = attrs.getValue(i);
+          }
+        }
+      }
+      DonatusLemma morphDocLemma = result.getLemma(form);
+      if (morphDocLemma == null) {
+        DonatusLemma newLemma = new DonatusLemma(result, language, DonatusConstants.TYPE_DONATUS, form);
+        currentLemma = newLemma;
+        result.putLemma(currentLemma);
+      } else {
+        currentLemma = morphDocLemma;  // lemma with same form exists already in morphology document e.g. a lemma with a different #number in its form
+      }
+    } else if (name.equals("variant")) {
+      String form = "";
+      if (attrs != null) {
+        int length = attrs.getLength();
+        for (int i = 0; i < length; i++) {
+          String attrName = attrs.getLocalName(i);
+          if (attrName.equals("form"))
+            form = attrs.getValue(i);
+        }
+      }
+      DonatusVariant variant = new DonatusVariant(currentLemma, DonatusConstants.TYPE_DONATUS, form);
+      currentVariant = variant;
+      if (currentLemma != null)
+        currentLemma.addVariant(variant);
+    } else if (name.equals("analysis")) {
+      String desc = "";
+      String xlink = "";
+      if (attrs != null) {
+        int length = attrs.getLength();
+        for (int i = 0; i < length; i++) {
+          String attrName = attrs.getQName(i);
+          if (attrName.equals("desc"))
+            desc = attrs.getValue(i);
+          else if (attrName.equals("xlink:type"))
+            xlink = attrs.getValue(i);
+        }
+      }
+      DonatusAnalysis analysis = new DonatusAnalysis(desc, xlink);
+      if(currentVariant != null)
+        currentVariant.addAnalysis(analysis);
+    }
+  }
+
+  public void startPrefixMapping(String prefix, String uri) throws SAXException {
+  }
+  
+  private class Element {
+    String name;
+    String value;
+    
+    Element(String name) {
+      this.name = name;
+    }
+
+    Element(String name, String value) {
+      this.name = name;
+      this.value = value;
+    }
+  }
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/software/eXist/mpdl-modules/src/de/mpg/mpiwg/berlin/mpdl/donatus/xmlrpc/DonatusToken.java	Wed Nov 24 17:24:23 2010 +0100
@@ -0,0 +1,19 @@
+package de.mpg.mpiwg.berlin.mpdl.donatus.xmlrpc;
+
+public class DonatusToken {
+  private int count;
+  private String form;
+
+  public DonatusToken(int count, String form) {
+    this.count = count;
+    this.form = form;
+  }
+  
+  public String getForm() {
+    return form;
+  }
+
+  public int getCount() {
+    return count;
+  }
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/software/eXist/mpdl-modules/src/de/mpg/mpiwg/berlin/mpdl/donatus/xmlrpc/DonatusVariant.java	Wed Nov 24 17:24:23 2010 +0100
@@ -0,0 +1,45 @@
+package de.mpg.mpiwg.berlin.mpdl.donatus.xmlrpc;
+
+import java.util.ArrayList;
+
+public class DonatusVariant {
+  private DonatusLemma lemma; // lemma to which this variant belongs
+  private String form;
+  private String type = DonatusConstants.TYPE_DONATUS;
+  private ArrayList<DonatusAnalysis> analysis;
+
+  public DonatusVariant(DonatusLemma lemma, String type, String form) {
+    this.lemma = lemma;
+    this.type = type;
+    this.form = form;
+    this.analysis = new ArrayList<DonatusAnalysis>();
+  }
+  
+  public String getForm() {
+    return form;
+  }
+  
+  public DonatusLemma getLemma() {
+    return lemma;
+  }
+  
+  public String getType() {
+    return type;
+  }
+  
+  public void addAnalysis(DonatusAnalysis an) {
+    analysis.add(an);  
+  }
+  
+  public String getXmlString() {
+    String xmlString = "<variant form=\"" + form + "\" type=\"" + type + "\">";
+    for (int i=0; i<analysis.size(); i++) {
+      DonatusAnalysis a = analysis.get(i);
+      String analysisXmlString = a.getXmlString();
+      xmlString += analysisXmlString;
+    }
+    xmlString += "</variant>";
+    return xmlString;
+  }
+
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/software/eXist/mpdl-modules/src/de/mpg/mpiwg/berlin/mpdl/donatus/xmlrpc/DonatusWtagContainer.java	Wed Nov 24 17:24:23 2010 +0100
@@ -0,0 +1,38 @@
+package de.mpg.mpiwg.berlin.mpdl.donatus.xmlrpc;
+
+import java.util.ArrayList;
+
+public class DonatusWtagContainer {
+  private String name; // element name e.g. s
+  private String id; // id string e.g. 1
+  private ArrayList<String> words;
+
+  public DonatusWtagContainer(String name, String id, ArrayList<String> words) {
+    this.name = name;
+    this.id = id;
+    this.words = words;
+  }
+  
+  public String getName() {
+    return name;
+  }
+
+  public String getId() {
+    return id;
+  }
+  
+  public ArrayList<String> getWords() {
+    return words;
+  }
+
+  public String getXmlString() {
+    StringBuffer xmlString = new StringBuffer("<" + name + " id=\"" + id + "\">");
+    for (int i=0; i<words.size(); i++) {
+      String w = words.get(i);
+      xmlString.append("<w>" + w + "</w>");
+    }
+    xmlString.append("</" + name + ">");
+    return xmlString.toString();
+  }
+
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/software/eXist/mpdl-modules/src/de/mpg/mpiwg/berlin/mpdl/donatus/xmlrpc/DonatusWtagDocument.java	Wed Nov 24 17:24:23 2010 +0100
@@ -0,0 +1,72 @@
+package de.mpg.mpiwg.berlin.mpdl.donatus.xmlrpc;
+
+import java.util.ArrayList;
+
+public class DonatusWtagDocument {
+  private String locator;  // e.g. http://archimedes/037.xml
+  private ArrayList<DonatusWtagSection> donatusWtagSections;
+
+  public DonatusWtagDocument(String locator) {
+    this.locator = locator;
+    donatusWtagSections = new ArrayList<DonatusWtagSection>();
+  }
+
+  public DonatusWtagDocument(String locator, ArrayList<DonatusWtagSection> donatusWtagSections) {
+    this.locator = locator;
+    this.donatusWtagSections = donatusWtagSections;
+  }
+
+  public String getLocator() {
+    return locator;  
+  }
+  
+  public ArrayList<DonatusWtagSection> getSections() {
+    return donatusWtagSections;
+  }
+  
+  public String getLanguage() {
+    if (donatusWtagSections != null && donatusWtagSections.size() > 0)
+      return donatusWtagSections.get(0).getLanguage();
+    else
+      return DonatusConstants.DEFAULT_LANGUAGE;
+  }
+  
+  public DonatusWtagContainer getContainer(String language, int position) {
+    DonatusWtagContainer retContainer = null;
+    DonatusWtagSection s = getSection(language);
+    if (s != null) {
+      ArrayList<DonatusWtagContainer> containerArray = s.getContainer();
+      if (containerArray != null)
+        retContainer = containerArray.get(position);
+    }
+    return retContainer;
+  }
+  
+  public DonatusWtagSection getSection(String language) {
+    if (donatusWtagSections == null)
+      return null;
+    for (int i=0; i<donatusWtagSections.size(); i++) {
+      DonatusWtagSection s = donatusWtagSections.get(i);
+      if (s.getLanguage().toLowerCase().equals(language.toLowerCase()))
+        return s;
+    }
+    return null;
+  }
+  
+  public String getXmlString() {
+    StringBuffer xmlString = new StringBuffer("<wtag locator=\"" + locator + "\">");
+    ArrayList<DonatusWtagSection> sections = getSections();
+    for (int i=0; i<sections.size(); i++) {
+      DonatusWtagSection section = sections.get(i);
+      String sectionXmlString = section.getXmlString();
+      xmlString.append(sectionXmlString);
+    }
+    xmlString.append("</wtag>");
+    return xmlString.toString();
+  }
+  
+  public void addSection(DonatusWtagSection section) {
+    donatusWtagSections.add(section);  
+  }
+  
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/software/eXist/mpdl-modules/src/de/mpg/mpiwg/berlin/mpdl/donatus/xmlrpc/DonatusWtagSection.java	Wed Nov 24 17:24:23 2010 +0100
@@ -0,0 +1,45 @@
+package de.mpg.mpiwg.berlin.mpdl.donatus.xmlrpc;
+
+import java.util.ArrayList;
+
+public class DonatusWtagSection {
+  private String language = "en";
+  private ArrayList<DonatusWtagContainer> container;
+
+  public DonatusWtagSection() {
+    this.container = new ArrayList<DonatusWtagContainer>();
+  }
+  
+  public DonatusWtagSection(String language) {
+    this.language = language;
+    this.container = new ArrayList<DonatusWtagContainer>();
+  }
+  
+  public void addContainer(DonatusWtagContainer c) {
+    container.add(c);  
+  }
+  
+  public String getLanguage() {
+    return language;
+  }
+
+  public void setLanguage(String language) {
+    this.language = language;  
+  }
+  
+  public ArrayList<DonatusWtagContainer> getContainer() {
+    return container;
+  }
+  
+  public String getXmlString() {
+    StringBuffer xmlString = new StringBuffer("<section lang=\"" + language + "\">");
+    for (int i=0; i<container.size(); i++) {
+      DonatusWtagContainer c = container.get(i);
+      xmlString.append(c.getXmlString());
+    }
+    xmlString.append("</section>");
+    return xmlString.toString();
+  }
+  
+  
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/software/eXist/mpdl-modules/src/de/mpg/mpiwg/berlin/mpdl/donatus/xmlrpc/DonatusXmlRpcClient.java	Wed Nov 24 17:24:23 2010 +0100
@@ -0,0 +1,106 @@
+package de.mpg.mpiwg.berlin.mpdl.donatus.xmlrpc;
+
+import java.io.IOException;
+import java.io.Reader;
+import java.io.StringReader;
+import java.net.MalformedURLException;
+import java.net.URL;
+import java.util.HashMap;
+import java.util.Vector;
+
+import org.apache.xmlrpc.XmlRpcException;
+import org.apache.xmlrpc.client.XmlRpcClient;
+import org.apache.xmlrpc.client.XmlRpcClientConfigImpl;
+import org.xml.sax.InputSource;
+import org.xml.sax.SAXException;
+import org.xml.sax.XMLReader;
+
+import com.sun.org.apache.xerces.internal.parsers.SAXParser;
+
+public class DonatusXmlRpcClient {
+  private String serverName = DonatusConstants.SERVER_NAME;
+  private int port = DonatusConstants.PORT;
+  private String uriRpcCall = DonatusConstants.URI_RPC_CALL;
+  private String xmlRpcUri = "http://" + serverName + ":" + port + uriRpcCall;
+  private XmlRpcClient xmlClient = null;
+
+  public DonatusXmlRpcClient() {
+    this.xmlRpcUri = "http://" + serverName + uriRpcCall;
+    init();
+  }
+
+  public DonatusXmlRpcClient(String serverName) {
+    this.serverName = serverName;
+    this.xmlRpcUri = "http://" + serverName + uriRpcCall;
+    init();
+  }
+
+  public DonatusXmlRpcClient(String serverName, int port) {
+    this.serverName = serverName;
+    this.port = port;
+    this.xmlRpcUri = "http://" + serverName + ":" + port + uriRpcCall;
+    init();
+  }
+
+  private void init() {
+    try {
+      XmlRpcClientConfigImpl config = new XmlRpcClientConfigImpl();
+      config.setServerURL(new URL(xmlRpcUri));
+      xmlClient = new XmlRpcClient();
+      xmlClient.setConfig(config);
+    } catch (MalformedURLException e) {
+      e.printStackTrace();
+    }    
+  }
+  
+  public DonatusMorphologyDocument analyze(DonatusWtagDocument donatusWtagDocument) {
+    DonatusMorphologyDocument morphologyDoc = null;
+    try {
+      String language = donatusWtagDocument.getLanguage();
+      String wtagString = donatusWtagDocument.getXmlString();
+      Vector params = new Vector();
+      byte[] wtagBytes = wtagString.getBytes("UTF-8");
+      params.add(wtagBytes);
+      HashMap donatusReturn = (HashMap) xmlClient.execute("donatus.analyze", params);
+      Object morphologyDocTypeObject = donatusReturn.get("morphData");
+      byte[] bytesMorphologyDocTypeObject = (byte[]) morphologyDocTypeObject;
+      String morphologyDocTypeString = new String(bytesMorphologyDocTypeObject, "UTF-8");
+      XMLReader xmlParser = new SAXParser();
+      String docUri = donatusWtagDocument.getLocator();
+      DonatusMorphologyDocumentContentHandler donatusMorphContentHandler = new DonatusMorphologyDocumentContentHandler(docUri, language);
+      xmlParser.setContentHandler(donatusMorphContentHandler);
+      Reader reader = new StringReader(morphologyDocTypeString);
+      InputSource input = new InputSource(reader);
+      xmlParser.parse(input);
+      morphologyDoc = donatusMorphContentHandler.getResult();
+      morphologyDoc.setWtagBytes(wtagBytes);
+    } catch (SAXException e) {
+      e.printStackTrace();
+    } catch (XmlRpcException e) {
+      e.printStackTrace();
+    } catch (IOException e) {
+      e.printStackTrace();
+    }
+    return morphologyDoc;
+  }
+  
+  public String analyze(String locator, String language, String documentString) {
+    String morphologyDocTypeString = null;
+    try {
+      String wtagStart = "<wtag locator=\"" + locator + "\"><section lang=\"" + language + "\">";
+      String wtagEnd = "</section></wtag>";
+      String wtagString = wtagStart + documentString + wtagEnd;
+      Vector params = new Vector();
+      params.add(wtagString.getBytes("UTF8"));
+      HashMap donatusReturn = (HashMap) xmlClient.execute("donatus.analyze", params);
+      Object morphologyDocTypeObject = donatusReturn.get("morphData");
+      morphologyDocTypeString = new String((byte[])morphologyDocTypeObject);
+    } catch (XmlRpcException e) {
+      e.printStackTrace();
+    } catch (IOException e) {
+      e.printStackTrace();
+    }
+    return morphologyDocTypeString;
+  }
+  
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/software/eXist/mpdl-modules/src/de/mpg/mpiwg/berlin/mpdl/escidoc/Component.java	Wed Nov 24 17:24:23 2010 +0100
@@ -0,0 +1,39 @@
+package de.mpg.mpiwg.berlin.mpdl.escidoc;
+
+public class Component {
+  private String validStatus;  // e.g. valid
+  private String visibility;  // e.g. public
+  // private String fileName; // original file name: e.g. blabla.xml
+  private String contentCategory;  // e.g. "pre-print" or "JPEG_DEFAULT" or ...
+  private String mimeType;  // e.g. "text/xml"
+  private String url;
+  private String storage; // e.g. "internal-managed" or "external-url" or "external-managed"
+  
+  public Component(String validStatus, String visibility, String contentCategory, String mimeType, String url, String storage) {
+    this.validStatus = validStatus;
+    this.visibility = visibility;
+    this.contentCategory = contentCategory;
+    this.mimeType = mimeType;
+    this.url = url;
+    this.storage = storage;
+  }
+
+  public String toXmlString() {
+    StringBuilder str = new StringBuilder();
+    str.append("    <escidocComponents:component>\n");
+    str.append("      <escidocComponents:properties>\n");
+    str.append("        <prop:valid-status>" + validStatus + "</prop:valid-status>\n");
+    str.append("        <prop:visibility>" + visibility + "</prop:visibility>\n");
+    // if (componentFileName != null)
+    //   str.append("        <prop:file-name>" + componentFileName + "</prop:file-name>\n");
+    if (contentCategory != null)
+      str.append("        <prop:content-category>" + contentCategory + "</prop:content-category>\n");
+    if (mimeType != null)
+      str.append("        <prop:mime-type>" + mimeType + "</prop:mime-type>\n");
+    str.append("      </escidocComponents:properties>\n");
+    str.append("      <escidocComponents:content xlink:href=\"" + url + "\" storage=\"" + storage + "\"/>\n");
+    str.append("    </escidocComponents:component>\n");
+    
+    return str.toString();
+  }
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/software/eXist/mpdl-modules/src/de/mpg/mpiwg/berlin/mpdl/escidoc/Container.java	Wed Nov 24 17:24:23 2010 +0100
@@ -0,0 +1,74 @@
+package de.mpg.mpiwg.berlin.mpdl.escidoc;
+
+import java.util.Date;
+
+public class Container {
+  private String id;
+  private String contentModelId;  
+  private String contextId;
+  private String pid;
+  private MetadataRecord mdRecord;
+  private Date lastModificationDate;
+  
+  public Container(String id, Date lastModificationDate) {
+    this.id = id;
+    this.lastModificationDate = lastModificationDate;
+  }
+
+  public Container(String contentModelId, String contextId, String pid, MetadataRecord mdRecord) {
+    this.contentModelId = contentModelId;
+    this.contextId = contextId;
+    this.pid = pid;
+    this.mdRecord = mdRecord;
+  }
+  
+  public String getId() {
+    return id;
+  }
+  
+  public Date getLastModificationDate() {
+    return lastModificationDate;  
+  }
+
+  public String toXmlString() {
+    StringBuilder str = new StringBuilder();
+    str.append("<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n");
+    str.append("<container:container xml:base=\"http://www.escidoc.de/\"\n");
+    str.append("  xmlns=\"http://www.escidoc.de/schemas/container/0.8\"\n");
+    str.append("  xmlns:container=\"http://www.escidoc.de/schemas/container/0.8\"\n");
+    str.append("  xmlns:prop=\"http://escidoc.de/core/01/properties/\"\n");
+    str.append("  xmlns:srel=\"http://escidoc.de/core/01/structural-relations/\"\n");
+    str.append("  xmlns:escidocMetadataRecords=\"http://www.escidoc.de/schemas/metadatarecords/0.5\"\n");
+    str.append("  xmlns:xlink=\"http://www.w3.org/1999/xlink\"\n");
+    str.append("  xmlns:mpiwg=\"http://www.mpiwg-berlin.mpg.de/ns/mpiwg\"\n");
+    str.append("  xmlns:escidocComponents=\"http://www.escidoc.de/schemas/components/0.9\">\n");
+
+    str.append("  <container:properties>\n");
+    str.append("    <srel:context xlink:href=\"" + contextId + "\"/>\n");
+    str.append("    <srel:content-model xlink:href=\"" + contentModelId + "\"/>\n");
+    if (pid != null) {
+      str.append("    <prop:pid>" + pid + "</prop:pid>\n");
+    }
+    str.append("    <prop:content-model-specific>\n");
+    str.append("      <my-content-model-value-structure />\n");
+    str.append("    </prop:content-model-specific>\n");
+    str.append("  </container:properties>\n");
+
+    str.append("  <!-- A Container could have multiple meta data records. -->\n");
+    str.append("  <escidocMetadataRecords:md-records>\n");
+    str.append("    <!-- At least one meta data record with name escidoc is needed. -->\n");
+    str.append("    <escidocMetadataRecords:md-record name=\"escidoc\">\n");
+    if (mdRecord != null) {
+      str.append(mdRecord.toXmlString());
+    } else {
+      str.append("      <metadata xmlns:dc=\"http://purl.org/dc/elements/1.1/\">\n");
+      str.append("      </metadata>\n");
+    }
+    str.append("    </escidocMetadataRecords:md-record>\n");
+        
+    str.append("  </escidocMetadataRecords:md-records>\n");
+
+    str.append("</container:container>\n");
+    return str.toString();
+  }
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/software/eXist/mpdl-modules/src/de/mpg/mpiwg/berlin/mpdl/escidoc/Context.java	Wed Nov 24 17:24:23 2010 +0100
@@ -0,0 +1,40 @@
+package de.mpg.mpiwg.berlin.mpdl.escidoc;
+
+public class Context {
+  private String organizationalUnit = "/oum/organizational-unit/escidoc:ex3";
+  private String name = "MPDL-XML-Test";
+  private String description = "MPDL-XML-Test";
+  private String type = "MpdlType";
+  
+  public Context(String organizationalUnit, String name, String description, String type) {
+    this.organizationalUnit = organizationalUnit;
+    this.name = name;
+    this.description = description;
+    this.type = type;
+  }
+  
+  public String toXmlString() {
+    StringBuilder str = new StringBuilder();
+    str.append("<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n");
+    str.append("<context:context\n");
+    str.append("  xmlns=\"http://www.escidoc.de/schemas/context/0.7\"\n");
+    str.append("  xmlns:context=\"http://www.escidoc.de/schemas/context/0.7\"\n");
+    str.append("  xmlns:prop=\"http://escidoc.de/core/01/properties/\"\n");
+    str.append("  xmlns:srel=\"http://escidoc.de/core/01/structural-relations/\"\n");
+    str.append("  xmlns:xlink=\"http://www.w3.org/1999/xlink\"\n");
+    str.append("  xmlns:xml=\"http://www.w3.org/XML/1998/namespace\">\n");
+    str.append("  <context:properties>\n");
+    str.append("  <prop:name>" + name + "</prop:name>\n");
+    str.append("  <prop:description>" + description + "</prop:description>\n");
+    str.append("  <prop:type>" + type + "</prop:type>\n");
+    if (organizationalUnit != null) {
+      str.append("  <prop:organizational-units>\n");
+      str.append("    <srel:organizational-unit\n");
+      str.append("      xlink:href=\"" + organizationalUnit + "\" />\n");
+      str.append("  </prop:organizational-units>\n");
+    }
+    str.append("  </context:properties>\n");
+    str.append("</context:context>\n");
+    return str.toString();
+  }
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/software/eXist/mpdl-modules/src/de/mpg/mpiwg/berlin/mpdl/escidoc/ESciDocIngestor.java	Wed Nov 24 17:24:23 2010 +0100
@@ -0,0 +1,213 @@
+package de.mpg.mpiwg.berlin.mpdl.escidoc;
+
+import java.util.ArrayList;
+import java.util.Date;
+
+import javax.xml.namespace.NamespaceContext;
+
+import org.w3c.dom.Node;
+
+import de.mpg.mpiwg.berlin.mpdl.exception.ApplicationException;
+import de.mpg.mpiwg.berlin.mpdl.general.MpdlConstants;
+import de.mpg.mpiwg.berlin.mpdl.schedule.MpdlDocOperation;
+import de.mpg.mpiwg.berlin.mpdl.util.StringUtilEscapeChars;
+import de.mpg.mpiwg.berlin.mpdl.util.XmlUtil;
+import de.mpg.mpiwg.berlin.mpdl.xml.SchemaHandler;
+
+public class ESciDocIngestor {
+  private ESciDocRestSession eSciDocSession;
+  
+  public ESciDocIngestor(ESciDocRestSession eSciDocSession) {
+    this.eSciDocSession = eSciDocSession;
+  }
+
+  public String execute(MpdlDocOperation docOperation) throws ApplicationException {
+    String performedContainerId = null;
+    String operationName = docOperation.getName();
+    if (operationName.equals("create")) {
+      performedContainerId = createDocument(docOperation);
+    } else if (operationName.equals("update")) {
+      performedContainerId = updateDocument(docOperation);
+    } else if (operationName.equals("delete")) {
+      performedContainerId = deleteDocument(docOperation);
+    }
+    String performedESciDocUrl = "http://" + MpdlConstants.MPDL_ESCIDOC_HOST_NAME + ":" + MpdlConstants.MPDL_ESCIDOC_PORT + performedContainerId;
+    docOperation.setESciDocDestUrl(performedESciDocUrl);
+    return performedESciDocUrl;
+  }
+  
+  private String createDocument(MpdlDocOperation docOperation) throws ApplicationException {
+    String pid = eSciDocSession.getPid();
+    String docBaseContainerId = MpdlConstants.MPDL_ESCIDOC_ECHO_CONTAINER_ID;
+    String eXistIdentifier = docOperation.getDestUrl();  // e.g. /echo/la/Benedetti_1585.xml
+    String docBase = docOperation.getDocBase();
+    if (docBase != null && docBase.equals("archimedes"))
+      docBaseContainerId = MpdlConstants.MPDL_ESCIDOC_ARCHIMEDES_CONTAINER_ID;
+    docOperation.setStatus("create document: " + eXistIdentifier + " on eSciDoc server");
+    String destFileName = MpdlConstants.MPDL_EXIST_DATA_DIR + "/documents" + eXistIdentifier;
+    // upload the file to the eSciDoc stage area
+    String eSciDocStageAreaUrl = eSciDocSession.uploadFileToESciDocStageArea(destFileName);
+    Node docNode = docOperation.getDocNode();
+    MetadataRecord mdRecord = docOperation.getMdRecord();
+    if (mdRecord != null) {
+      mdRecord.setMediaType("fulltext");
+    }
+    // create document container for all items
+    Container newContainer = eSciDocSession.createContainerInContainer(pid, mdRecord, docBaseContainerId);
+    String newContainerId = newContainer.getId();
+    Date lastModificationDate = newContainer.getLastModificationDate();
+    eSciDocSession.submitContainer(newContainerId, lastModificationDate, "create document");
+    // create the fulltext item
+    String existViewerUrl = "http://mpdl-proto.mpiwg-berlin.mpg.de/mpdl/page-query-result.xql?document=" + eXistIdentifier + "&amp;mode=text";
+    ArrayList<Component> components = new ArrayList<Component>();
+    String contentCategory = "fulltext XML - ECHO";
+    if (docBase != null && docBase.equals("archimedes"))
+      contentCategory = "fulltext XML - Archimedes";
+    Component componentXmlFulltext = new Component("valid", "public", contentCategory, "text/xml", eSciDocStageAreaUrl, "internal-managed");
+    Component componentExistViewer = new Component("valid", "public", contentCategory, "text/html", existViewerUrl, "external-url");
+    components.add(componentXmlFulltext);
+    components.add(componentExistViewer);
+    Item fulltextItem = eSciDocSession.createItemInContainer(newContainerId, pid, mdRecord, components);
+    String fulltextItemId = fulltextItem.getId();
+    Date fulltextItemLastModificationDate = fulltextItem.getLastModificationDate();
+    eSciDocSession.submitItem(fulltextItemId, fulltextItemLastModificationDate, "create document");
+    // page items: for each page create one item
+    SchemaHandler schemaHandler = new SchemaHandler();
+    ArrayList<String> pbFileNamesArrayStr = schemaHandler.getPBFileNames(docNode, docBase); 
+    createPageItemsInContainer(docOperation, mdRecord, pbFileNamesArrayStr, newContainerId);
+    return newContainerId;
+  }
+  
+  private String updateDocument(MpdlDocOperation docOperation) throws ApplicationException {
+    String docBase = docOperation.getDocBase();
+    String eXistIdentifier = docOperation.getDestUrl();
+    String pid = eSciDocSession.getPid();
+    String destFileName = MpdlConstants.MPDL_EXIST_DATA_DIR + "/documents" + eXistIdentifier;
+    String documentContainerId = eSciDocSession.getContainerIdByEXistId(eXistIdentifier);
+    if (documentContainerId == null)
+      throw new ApplicationException("Document:" + eXistIdentifier + " does not exist.");
+    docOperation.setStatus("update document: " + eXistIdentifier + " on eSciDoc server");
+    // first: upload file to eSciDoc stage area and validate it
+    String eSciDocStageAreaUrl = eSciDocSession.uploadFileToESciDocStageArea(destFileName);
+    // RelaxNG schema validation
+    Node docNode = docOperation.getDocNode();
+    MetadataRecord mdRecord = docOperation.getMdRecord();
+    if (mdRecord != null) {
+      mdRecord.setMediaType("fulltext");
+    }
+    // second: delete all members of the container (page image reference items)
+    String deleteFilter = "<filter name=\"/md-records/md-record/metadata/mediaType\">" + "image" + "</filter>";  // filter to find items of type image
+    deleteContainerItems(docOperation, documentContainerId, deleteFilter);
+    // third: update the fulltext item
+    String fulltextItemFilter = "<filter name=\"/md-records/md-record/metadata/mediaType\">" + "fulltext" + "</filter>";  // filter to find items of type fulltext
+    String fulltextItemsXmlStr = eSciDocSession.getMembersByContainerIdAndFilter(documentContainerId, fulltextItemFilter);
+    String fulltextItemId = eSciDocSession.getFirstItemId(fulltextItemsXmlStr);
+    if (fulltextItemId == null || fulltextItemId.trim().equals(""))
+      throw new ApplicationException("Update of document is not possible: there is no fulltext item in the document container.");
+    Date fulltextItemVersionDate = eSciDocSession.getVersionDate(fulltextItemsXmlStr);
+    ArrayList<Component> components = new ArrayList<Component>();
+    String existViewerUrl = "http://mpdl-proto.mpiwg-berlin.mpg.de/mpdl/page-query-result.xql?document=" + eXistIdentifier + "&amp;mode=text";
+    String contentCategory = "fulltext XML - ECHO";
+    if (docBase != null && docBase.equals("archimedes"))
+      contentCategory = "fulltext XML - Archimedes";
+    Component componentXmlFulltext = new Component("valid", "public", contentCategory, "text/xml", eSciDocStageAreaUrl, "internal-managed");
+    Component componentExistViewer = new Component("valid", "public", contentCategory, "text/html", existViewerUrl, "external-url");
+    components.add(componentXmlFulltext);
+    components.add(componentExistViewer);
+    eSciDocSession.updateItem(fulltextItemId, fulltextItemVersionDate, pid, mdRecord, components);
+    // fourth: page items: for each page create one item
+    SchemaHandler schemaHandler = new SchemaHandler();
+    ArrayList<String> pbFileNamesArrayStr = schemaHandler.getPBFileNames(docNode, docBase); 
+    createPageItemsInContainer(docOperation, mdRecord, pbFileNamesArrayStr, documentContainerId);
+    return documentContainerId;    
+  }
+  
+  private String deleteDocument(MpdlDocOperation docOperation) throws ApplicationException {
+    String eXistIdentifier = docOperation.getDestUrl();  // e.g. /echo/la/bla.xml
+    String documentContainerId = eSciDocSession.getContainerIdByEXistId(eXistIdentifier);
+    if (documentContainerId == null)
+      throw new ApplicationException("eSciDoc: Deletion of eSciDoc container is not possible. There is no eSciDoc container for your document: " + eXistIdentifier);
+    deleteContainer(docOperation, documentContainerId);
+    return documentContainerId;
+  }
+
+  public void deleteContainer(MpdlDocOperation docOperation, String containerId) throws ApplicationException {
+    String eXistIdentifier = docOperation.getDestUrl();  // e.g. /echo/la/bla.xml
+    docOperation.setStatus("delete document: " + eXistIdentifier + " on eSciDoc server");
+    // first: delete all members
+    deleteContainerItems(docOperation, containerId, null);
+    // second: delete container itself
+    eSciDocSession.deleteContainer(containerId);
+  }
+  
+  public void deleteContainerItems(MpdlDocOperation docOperation, String containerId, String filter) throws ApplicationException {
+    String operationName = docOperation.getName();
+    String eXistIdentifier = docOperation.getDestUrl(); 
+    NamespaceContext nsContext = ESciDocRestSession.getNsContext();
+    XmlUtil xmlUtil = XmlUtil.getInstance();
+    String containerXmlStr = eSciDocSession.getContainer(containerId);
+    Date lastModificationDate = eSciDocSession.getLastModificationDate(containerXmlStr);
+    String membersXmlStr = eSciDocSession.getMembersByContainerIdAndFilter(containerId, filter);
+    ArrayList<String> itemMemberIds = xmlUtil.evaluateToStringArray(membersXmlStr, "//escidocItem:item/@xlink:href", nsContext);
+    if (itemMemberIds != null) {
+      eSciDocSession.removeMembers(containerId, lastModificationDate, itemMemberIds);
+      for (int i=0; i< itemMemberIds.size(); i++) {
+        String itemId = itemMemberIds.get(i);
+        int pageNumber = i + 1;
+        docOperation.setStatus(operationName + " document: " + eXistIdentifier + " on eSciDoc server (delete " + pageNumber + " of " + itemMemberIds.size() + " fulltext and image reference items)");
+        eSciDocSession.deleteItem(itemId);
+      }
+    }
+  }
+  
+  private void createPageItemsInContainer(MpdlDocOperation docOperation, MetadataRecord mdRecord, ArrayList<String> pbFileNamesArrayStr, String containerId) throws ApplicationException {
+    if (mdRecord == null || pbFileNamesArrayStr == null || containerId == null)
+      return;
+    String operationName = docOperation.getName();
+    String eXistIdentifier = docOperation.getDestUrl(); 
+    ArrayList<String> memberIds = new ArrayList<String>();
+    SchemaHandler schemaHandler = new SchemaHandler();
+    String pageImgDir = schemaHandler.getPageImgDir(mdRecord);
+    String docBase = docOperation.getDocBase();
+    for (int i=0; i< 10; i++) {  // TODO
+    // for (int i=0; i< pbFileNamesArrayStr.size(); i++) {
+      String pid = eSciDocSession.getPid();
+      int pageNumber = i + 1;
+      docOperation.setStatus(operationName + " document: " + eXistIdentifier + " on eSciDoc server (create " + pageNumber + " of " + pbFileNamesArrayStr.size() + " image references)");
+      String fileName = pbFileNamesArrayStr.get(i);
+      fileName = StringUtilEscapeChars.deresolveXmlEntities(fileName);
+      MetadataRecord mdRecordImage = new MetadataRecord();
+      mdRecordImage.setIdentifier(fileName);
+      mdRecordImage.setTitle("Page: " + pageNumber);
+      mdRecordImage.setMediaType("image");
+      ArrayList<Component> components = new ArrayList<Component>();
+      String imageEchoViewerUrl = "http://echo.mpiwg-berlin.mpg.de/zogilib?fn=" + pageImgDir + "/" + fileName + "&amp;pn=" + pageNumber;
+      String imageExistViewerUrl = "http://mpdl-proto.mpiwg-berlin.mpg.de/mpdl/page-query-result.xql?document=" + eXistIdentifier + "&amp;mode=image" + "&amp;pn=" + pageNumber;
+      String fulltextExistViewerUrl = "http://mpdl-proto.mpiwg-berlin.mpg.de/mpdl/page-query-result.xql?document=" + eXistIdentifier + "&amp;mode=text" + "&amp;pn=" + pageNumber;
+      Component componentImageEchoViewer = new Component("valid", "public", "JPEG_DEFAULT", "text/html", imageEchoViewerUrl, "external-url");
+      Component componentImageExistViewer = new Component("valid", "public", "JPEG_DEFAULT", "text/html", imageExistViewerUrl, "external-url");
+      String contentCategory = "fulltext XML - ECHO";
+      if (docBase != null && docBase.equals("archimedes"))
+        contentCategory = "fulltext XML - Archimedes";
+      Component componentFulltextExistViewer = new Component("valid", "public", contentCategory, "text/html", fulltextExistViewerUrl, "external-url");
+      components.add(componentImageEchoViewer);
+      components.add(componentImageExistViewer);
+      components.add(componentFulltextExistViewer);
+      Item item = eSciDocSession.createItem(pid, mdRecordImage, components);
+      String itemId = item.getId();
+      Date lastModificationDate = item.getLastModificationDate();
+      eSciDocSession.submitItem(itemId, lastModificationDate, "create document");
+      String memberId = null;
+      if (itemId != null) {
+        int index = itemId.indexOf(":");
+        if (index > 0) {
+          memberId = itemId.substring(index + 1);
+          memberIds.add(memberId);
+        }
+      }
+    }
+    String containerXmlStr = eSciDocSession.getContainer(containerId);
+    Date lastModificationDate = eSciDocSession.getLastModificationDate(containerXmlStr);
+    eSciDocSession.addMembers(containerId, lastModificationDate, memberIds);
+  }
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/software/eXist/mpdl-modules/src/de/mpg/mpiwg/berlin/mpdl/escidoc/ESciDocRESTServlet.java	Wed Nov 24 17:24:23 2010 +0100
@@ -0,0 +1,364 @@
+package de.mpg.mpiwg.berlin.mpdl.escidoc;
+
+import java.io.File;
+import java.io.IOException;
+import java.io.PrintWriter;
+import java.io.UnsupportedEncodingException;
+import java.net.URLEncoder;
+import java.util.ArrayList;
+import java.util.Iterator;
+import java.util.List;
+
+import javax.servlet.ServletException;
+import javax.servlet.ServletInputStream;
+import javax.servlet.http.Cookie;
+import javax.servlet.http.HttpServlet;
+import javax.servlet.http.HttpServletRequest;
+import javax.servlet.http.HttpServletResponse;
+
+import org.apache.commons.fileupload.FileItem;
+import org.apache.commons.fileupload.disk.DiskFileItemFactory;
+import org.apache.commons.fileupload.servlet.ServletFileUpload;
+import org.apache.commons.httpclient.HttpClient;
+import org.apache.commons.httpclient.HttpException;
+import org.apache.commons.httpclient.methods.GetMethod;
+
+import de.mpg.mpiwg.berlin.mpdl.exception.ApplicationException;
+import de.mpg.mpiwg.berlin.mpdl.general.MpdlConstants;
+import de.mpg.mpiwg.berlin.mpdl.util.FileUtil;
+import de.mpg.mpiwg.berlin.mpdl.util.XmlUtil;
+import de.mpg.mpiwg.berlin.mpdl.xmlrpc.MpdlXmlRpcDocHandler;
+
+public class ESciDocRESTServlet extends HttpServlet {
+  private static final long serialVersionUID = -4889427839010526185L;
+  private static int FILE_UPLOAD_MAX_MEMORY_SIZE = 100000;  // 100 KB
+  private static String FILE_UPLOAD_TEMP_DIRECTORY = MpdlConstants.MPDL_EXIST_DATA_DIR + "/tmpUploadDir";
+  
+  protected void doGet(HttpServletRequest req, HttpServletResponse resp) throws ServletException, IOException {
+    try {
+      String escidocUrl = req.getParameter("escidocUrl");
+      String query = req.getParameter("query");
+      String queryPath = req.getParameter("queryPath");
+      String parameters = req.getParameter("parameters");
+      String startRecord = req.getParameter("startRecord");
+      String maximumRecords = req.getParameter("maximumRecords");
+      Cookie[] cookies = req.getCookies();
+      String eScidDocCookie = getCookieValue(cookies, "escidocCookie");
+      int startRecordInt = -1;
+      if (startRecord != null && ! startRecord.equals(""))
+        startRecordInt = Integer.parseInt(startRecord);
+      int maximumRecordsInt = -1;
+      if (maximumRecords != null && ! maximumRecords.equals(""))
+        maximumRecordsInt = Integer.parseInt(maximumRecords);
+      resp.setContentType("text/xml;charset=\"utf-8\"");  // important: otherwise the response is sent as iso-8859-1  
+      PrintWriter out = resp.getWriter();
+      // execute xquery script on eXist server
+      if (escidocUrl != null && escidocUrl.startsWith("/exist:xquery/execute")) {
+        if (query != null && ! query.equals("")) {
+          String xqueryResult = xquery("string", query, null, startRecordInt, maximumRecordsInt);
+          out.print(xqueryResult);
+        } else if (queryPath != null && ! queryPath.equals("")) {
+          String xqueryResult = xquery("uri", queryPath, parameters, startRecordInt, maximumRecordsInt);
+          out.print(xqueryResult);
+        }
+      } else if (escidocUrl != null && escidocUrl.startsWith("/ir/item/") && escidocUrl.contains("/exist:xquery/execute")) {
+        String existDocUrl = getExistDocUrl(eScidDocCookie, escidocUrl);
+        String xQueryParamDocument = "<param name=\"document\">" + existDocUrl + "</param>";
+        String params = "";
+        if (parameters != null && ! parameters.equals("")) {
+          int index = parameters.indexOf("</params>");
+          params = parameters.substring(0, index) + xQueryParamDocument + parameters.substring(index);
+        } else {
+          params = "<params>" + xQueryParamDocument + "</params>";
+        }
+        if (query != null && ! query.equals("")) {
+          String xqueryResult = xquery("string", query, params, startRecordInt, maximumRecordsInt);
+          out.print(xqueryResult);
+        } else if (queryPath != null && ! queryPath.equals("")) {
+          String xqueryResult = xquery("uri", queryPath, params, startRecordInt, maximumRecordsInt);
+          out.print(xqueryResult);
+        }
+      } else {
+        out.println("<?xml version=\"1.0\" encoding=\"UTF-8\"?>");
+        out.println("<error>");
+        out.print("EsciDoc does not support your URL: " + escidocUrl);
+        out.print("</error>");
+      }
+    } catch (ApplicationException e) {
+      throw new ServletException(e);
+    }
+  }
+  
+  protected void doPost(HttpServletRequest req, HttpServletResponse resp) throws ServletException, IOException {
+    try {
+      String escidocUrl = req.getParameter("escidocUrl");
+      Cookie[] cookies = req.getCookies();
+      String eScidDocCookie = getCookieValue(cookies, "escidocCookie");
+      PrintWriter out = resp.getWriter();
+      boolean isMultipart = ServletFileUpload.isMultipartContent(req);
+      if (! isMultipart) { // if not multipartContent (e.g. set by client with setRequestBody or setRequestEntity)
+        ServletInputStream is = req.getInputStream();
+        File tmpUploadItemFile = File.createTempFile("item", ".xml", new File(FILE_UPLOAD_TEMP_DIRECTORY));
+        String tmpUploadItemFileName = tmpUploadItemFile.getPath();
+        FileUtil.getInstance().saveInputStreamToLocalFile(is, tmpUploadItemFileName);
+        String containerId = getESciDocContainerId(escidocUrl);
+        String newItemXmlStr = createItemInContainer(eScidDocCookie, containerId, tmpUploadItemFileName);  // create item and eXist document
+        out.println(newItemXmlStr);
+      } else {  // multipart content (each file item is uploaded) 
+        DiskFileItemFactory factory = new DiskFileItemFactory();
+        factory.setSizeThreshold(FILE_UPLOAD_MAX_MEMORY_SIZE);  // default is 100 KB
+        File tmpFileUplaodDir = new File(FILE_UPLOAD_TEMP_DIRECTORY);
+        factory.setRepository(tmpFileUplaodDir);  // for files which are bigger than the threshold; files are deleted, when they are garbage collected
+        ServletFileUpload upload = new ServletFileUpload(factory);
+        List<FileItem> items = upload.parseRequest(req);
+        Iterator<FileItem> iter = items.iterator();
+        while (iter.hasNext()) {
+          /* fetch an item from the iterator above */
+          FileItem item = iter.next();
+          if (!item.isFormField()) {
+            String fileName = item.getName(); 
+            long sizeInBytes = item.getSize(); 
+            File uploadedFile = new File(FILE_UPLOAD_TEMP_DIRECTORY + "/" + fileName); 
+            item.write(uploadedFile); 
+            out.println("<h1>File: " + uploadedFile.getAbsolutePath() + " (size: " + sizeInBytes + ") written</h1><br>");
+          } 
+        }
+      }
+    } catch (Exception e ) {
+      throw new ServletException(e);
+    }
+  }
+
+  protected void doPut(HttpServletRequest req, HttpServletResponse resp) throws ServletException, IOException {
+    PrintWriter out = resp.getWriter();
+    // TODO
+    out.println("<html>");
+    out.println("<body>");
+    out.println("do put");
+    out.println("</body>");
+    out.println("</html>");
+  }
+
+  protected void doDelete(HttpServletRequest req, HttpServletResponse resp) throws ServletException, IOException {
+    try {
+      String escidocUrl = req.getParameter("escidocUrl");
+      Cookie[] cookies = req.getCookies();
+      String cookieId = getCookieValue(cookies, "escidocCookie");
+      PrintWriter out = resp.getWriter();
+      if (escidocUrl != null && escidocUrl.startsWith("/ir/item/escidoc:")) {
+        String existId = getExistDocUrl(cookieId, escidocUrl);
+        ESciDocRestSession escidocRestSession = ESciDocRestSession.getInstance(cookieId);
+        String itemId = escidocUrl;
+        escidocRestSession.deleteItem(itemId);
+        // delete eXist document
+        MpdlXmlRpcDocHandler docHandler = MpdlXmlRpcDocHandler.getInstance();
+        docHandler.deleteDocumentFile(existId);
+        out.println("<?xml version=\"1.0\" encoding=\"UTF-8\"?>");
+        out.println("item: ");
+        out.print(itemId);
+        out.print(" sucessfully deleted in eSciDoc and eXist");
+      } else {
+        out.println("<?xml version=\"1.0\" encoding=\"UTF-8\"?>");
+        out.println("<error>");
+        out.print("EsciDoc does not support your URL: " + escidocUrl);
+        out.print("</error>");
+      }
+    } catch (ApplicationException e) {
+      throw new ServletException(e);
+    }
+  }
+  
+  private void doDeleteOld(HttpServletRequest req, HttpServletResponse resp) throws ServletException, IOException {
+    try {
+      String escidocUrl = req.getParameter("escidocUrl");
+      Cookie[] cookies = req.getCookies();
+      String eScidDocCookie = getCookieValue(cookies, "escidocCookie");
+      PrintWriter out = resp.getWriter();
+      if (escidocUrl != null && escidocUrl.startsWith("/ir/item")) {
+        String xQueryPath = "/mpdl/doc/rest-doc-operation.xql";
+        String existDocUrl = getExistDocUrl(eScidDocCookie, escidocUrl);
+        String strTmp = existDocUrl.substring(1);
+        int index = strTmp.indexOf("/");
+        String docBase = strTmp.substring(0, index);
+        strTmp = strTmp.substring(index + 1);
+        index = strTmp.indexOf("/");
+        String language = strTmp.substring(0, index);
+        String fileName = strTmp.substring(index + 1);
+        String xQueryParamOperation = "<param name=\"operation\">delete</param>";
+        String xQueryParamDocBase = "<param name=\"docBase\">" + docBase + "</param>";
+        String xQueryParamLanguage = "<param name=\"language\">" + language + "</param>";
+        String xQueryParamFileName = "<param name=\"fileName\">" + fileName + "</param>";
+        String xQueryParams = "<params>" + xQueryParamOperation + xQueryParamDocBase + xQueryParamLanguage + xQueryParamFileName + "</params>";
+        String xqueryResult = xqueryByPath(xQueryPath, xQueryParams, 1, 1);
+        out.print(xqueryResult);
+      } else {
+        out.println("<?xml version=\"1.0\" encoding=\"UTF-8\"?>");
+        out.println("<error>");
+        out.print("EsciDoc does not support your URL: " + escidocUrl);
+        out.print("</error>");
+      }
+    } catch (ApplicationException e) {
+      throw new ServletException(e);
+    }
+  }
+  
+  private String createItemInContainer(String cookieId, String containerId, String localFileName) throws ApplicationException {
+    String retItemXmlStr = null;
+    try {
+      // create item in eSciDoc
+      byte[] itemXmlBytes = FileUtil.getInstance().readBytes(localFileName);
+      String itemXmlStr = new String(itemXmlBytes, "utf-8");
+      ESciDocRestSession escidocRestSession = ESciDocRestSession.getInstance(cookieId);
+      Item retItem = escidocRestSession.createItemInContainer(containerId, itemXmlStr);
+      retItemXmlStr = retItem.toXmlString();
+      // create/update eXist document
+      MpdlXmlRpcDocHandler docHandler = MpdlXmlRpcDocHandler.getInstance();
+      String existId = escidocRestSession.getFirstEXistId(itemXmlStr);
+      String componentContentId = escidocRestSession.getFirstComponentId(retItemXmlStr);
+      File tmpUploadExistFile = File.createTempFile("exist", ".xml", new File(FILE_UPLOAD_TEMP_DIRECTORY));
+      String tmpUploadExistFileName = tmpUploadExistFile.getPath();
+      escidocRestSession.saveComponentContentToLocalFile(componentContentId, tmpUploadExistFileName); 
+      docHandler.saveDocumentFile(tmpUploadExistFileName, existId);
+      FileUtil.getInstance().deleteFile(localFileName);
+      FileUtil.getInstance().deleteFile(tmpUploadExistFileName);
+    } catch (Exception e) {
+      throw new ApplicationException(e);      
+    }
+    return retItemXmlStr;
+  }
+  
+  private String getExistDocUrl(String cookieId, String escidocUrl) throws ApplicationException {
+    ESciDocRestSession escidocRestSession = ESciDocRestSession.getInstance(cookieId);
+    String itemId = getESciDocItemId(escidocUrl);
+    String itemXmlStr = escidocRestSession.getItem(itemId);
+    String existDocUrl = getEXistDocUrl(itemXmlStr);
+    return existDocUrl;
+  }
+  
+  private String xquery(String queryType, String xQuery, String xmlParameters, int startRecord, int maximumRecords) throws ApplicationException {
+    String xQueryMethod = "/mpdl/interface/xquery.xql";
+    String xQueryPath = "";
+    String xqueryResult = null;
+    if (queryType.equals("uri")) {
+      xQueryPath = xQuery;
+      xqueryResult = xqueryByPath(xQueryPath, xmlParameters, startRecord, maximumRecords);
+    } else if (queryType.equals("string")) {
+      xQueryPath = xQueryMethod;
+      String paramXQuery = "<param name=\"xquery\">" + xQuery + "</param>";
+      String params = "<params>" + paramXQuery + "</params>";
+      if (xmlParameters != null) {
+        int index = xmlParameters.indexOf("</params>");
+        params = xmlParameters.substring(0, index) + paramXQuery + xmlParameters.substring(index);
+      }
+      xqueryResult = xqueryByPath(xQueryPath, params, startRecord, maximumRecords);
+    }
+    return xqueryResult;
+  }
+  
+  private String xqueryByPath(String xQueryPath, String xmlParameters, int startRecord, int maximumRecords) throws ApplicationException {
+    String requestName = xQueryPath;
+    String parameters = getUrlStringByXmlParams(xmlParameters);
+    if (parameters != null && ! parameters.equals("")) {
+      if (startRecord != -1)
+        parameters = parameters + "&startRecord=" + startRecord;
+      if (maximumRecords != -1)
+        parameters = parameters + "&maximumRecords=" + maximumRecords;
+      requestName = requestName + "?" + parameters;
+    } 
+    String xqueryResult = performGetRequest(requestName, null);
+    return xqueryResult;
+  }
+  
+  private String getUrlStringByXmlParams(String xmlParameters) throws ApplicationException {
+    String parameters = "";
+    if (xmlParameters != null) {
+      XmlUtil xmlUtil = XmlUtil.getInstance();
+      ArrayList<String> paramNames = xmlUtil.evaluateToStringArray(xmlParameters, "//param/@name", null);
+      ArrayList<String> paramValues = xmlUtil.evaluateToStringArray(xmlParameters, "//param", null);
+      int size = paramValues.size();
+      for (int i=0; i<size; i++) {
+        String paramName = paramNames.get(i);
+        String paramValue = paramValues.get(i);
+        String paramValueEncoded = encode(paramValue);
+        parameters = parameters + paramName + "=" + paramValueEncoded + "&";
+      }
+      parameters = parameters.substring(0, parameters.length() - 1); // without last "&"
+    }
+    return parameters;    
+  }
+  
+  private String getEXistDocUrl(String itemXmlStr) throws ApplicationException {
+    String retStr = null;
+    if (itemXmlStr != null) {
+      ESciDocRestSession escidocRestSession = ESciDocRestSession.getInstance(null);
+      retStr = escidocRestSession.getFirstEXistId(itemXmlStr);
+    }
+    return retStr;    
+  }
+  
+  private String getESciDocContainerId(String escidocUrl) {
+    String retStr = null;
+    int index = escidocUrl.indexOf("/ir/container/escidoc:");
+    int indexNum = index + 22;
+    int indexNext = escidocUrl.indexOf("/", indexNum);
+    if (indexNext == -1)
+      indexNext = escidocUrl.length();
+    retStr = escidocUrl.substring(index, indexNext);
+    return retStr;
+  }
+  
+  private String getESciDocItemId(String escidocUrl) {
+    String retStr = null;
+    int index = escidocUrl.indexOf("/ir/item/escidoc:");
+    int indexNum = index + 17;
+    int indexNext = escidocUrl.indexOf("/", indexNum);
+    if (indexNext == -1)
+      indexNext = escidocUrl.length();
+    retStr = escidocUrl.substring(index, indexNext);
+    return retStr;
+  }
+  
+  private String encode(String str) throws ApplicationException {
+    String retStr = null;
+    try {
+      retStr = URLEncoder.encode(str, "utf-8");
+    } catch (UnsupportedEncodingException e) {
+      throw new ApplicationException(e);      
+    }
+    return retStr;    
+  }
+
+  private String getCookieValue(Cookie[] cookies, String name) {
+    String value = null;
+    for (int i=0; i<cookies.length; i++) {
+      Cookie c = cookies[i];
+      String n = c.getName();
+      if (n.equals(name)) {
+        value = c.getValue();
+        break;
+      }
+    }
+    return value;
+  }
+  
+  private String performGetRequest(String requestName, String cookieId) throws ApplicationException {
+    String resultStr = "";
+    try {
+      String urlStr = "http://" + MpdlConstants.MPDL_EXIST_HOST_NAME + ":" + MpdlConstants.MPDL_EXIST_PORT + requestName;
+      GetMethod method = new GetMethod(urlStr);
+      if (cookieId != null)
+        method.setRequestHeader("Cookie", cookieId);
+      HttpClient httpClient = new HttpClient();
+      httpClient.executeMethod(method);
+      byte[] responseBody = method.getResponseBody();
+      resultStr = new String(responseBody, "utf-8");
+      method.releaseConnection();
+    } catch (HttpException e) {
+      throw new ApplicationException(e);      
+    } catch (IOException e) {
+      throw new ApplicationException(e);      
+    }
+    return resultStr;
+  } 
+  
+}
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/software/eXist/mpdl-modules/src/de/mpg/mpiwg/berlin/mpdl/escidoc/ESciDocRestSession.java	Wed Nov 24 17:24:23 2010 +0100
@@ -0,0 +1,823 @@
+package de.mpg.mpiwg.berlin.mpdl.escidoc;
+
+import java.io.BufferedReader;
+import java.io.FileInputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.InputStreamReader;
+import java.io.OutputStream;
+import java.net.HttpURLConnection;
+import java.net.URL;
+import java.util.ArrayList;
+import java.util.Date;
+import java.util.Iterator;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+import javax.xml.namespace.NamespaceContext;
+
+import org.apache.commons.codec.binary.Base64;
+import org.apache.commons.httpclient.Cookie;
+import org.apache.commons.httpclient.Header;
+import org.apache.commons.httpclient.HttpClient;
+import org.apache.commons.httpclient.HttpException;
+import org.apache.commons.httpclient.cookie.CookiePolicy;
+import org.apache.commons.httpclient.cookie.CookieSpec;
+import org.apache.commons.httpclient.methods.DeleteMethod;
+import org.apache.commons.httpclient.methods.GetMethod;
+import org.apache.commons.httpclient.methods.PostMethod;
+import org.apache.commons.httpclient.methods.PutMethod;
+import org.apache.commons.httpclient.params.HttpMethodParams;
+
+import de.mpg.mpiwg.berlin.mpdl.exception.ApplicationException;
+import de.mpg.mpiwg.berlin.mpdl.general.MpdlConstants;
+import de.mpg.mpiwg.berlin.mpdl.util.FileUtil;
+import de.mpg.mpiwg.berlin.mpdl.util.XmlUtil;
+
+public class ESciDocRestSession {
+  private static String CONTENT_ENCODING = "utf-8";
+  private static String STAGE_PATH = "/st/staging-file";
+  private static String LINE_SEPARATOR = System.getProperty("line.separator");
+  private static Pattern PATTERN_XML_BASE_ATTRIBUTE = Pattern.compile("xml:base=\"([^\"]*)\"");
+  private static Pattern PATTERN_XLINK_HREF_ATTRIBUTE = Pattern.compile("xlink:href=\"([^\"]*)\"");
+  private String protocol = "http";
+  private String host = "escidoc-dev.mpiwg-berlin.mpg.de";
+  private int port = 8080;
+  private String contentModelId;  
+  private String contextId;
+  private HttpClient httpClient;
+  private String cookieId;
+  
+  public static ESciDocRestSession getInstance(String cookieId) throws ApplicationException {
+    ESciDocRestSession instance = new ESciDocRestSession();
+    instance.protocol = "http";
+    instance.host = MpdlConstants.MPDL_ESCIDOC_HOST_NAME;
+    instance.port = MpdlConstants.MPDL_ESCIDOC_PORT;
+    instance.contentModelId = MpdlConstants.MPDL_ESCIDOC_CMM_ID;
+    instance.contextId = MpdlConstants.MPDL_ESCIDOC_CONTEXT_ID;
+    instance.cookieId = cookieId;
+    instance.httpClient = new HttpClient();
+    return instance;
+  }
+
+  public static String login(String userName, String pw) throws ApplicationException {
+    String protocol = "http";
+    String host = MpdlConstants.MPDL_ESCIDOC_HOST_NAME;
+    int port = MpdlConstants.MPDL_ESCIDOC_PORT;
+    String cookieId = null;
+    try {
+      String frameworkUrl = protocol + "://" + host + ":" + port;
+      HttpClient client = new HttpClient();
+      client.getHostConfiguration().setHost(host, port, "http");
+      client.getParams().setCookiePolicy(CookiePolicy.BROWSER_COMPATIBILITY);
+     
+      PostMethod login = new PostMethod( frameworkUrl + "/aa/j_spring_security_check");
+      login.addParameter("j_username", userName);
+      login.addParameter("j_password", pw);
+      client.executeMethod(login);
+      login.releaseConnection();
+
+      CookieSpec cookiespec = CookiePolicy.getDefaultSpec();
+      Cookie[] logoncookies = cookiespec.match(host, port, "/", false, client.getState().getCookies());
+      Cookie sessionCookie = logoncookies[0];
+     
+      PostMethod postMethod = new PostMethod("/aa/login");
+      postMethod.addParameter("target", frameworkUrl);
+      client.getState().addCookie(sessionCookie);
+      client.executeMethod(postMethod);
+      Header headers[] = postMethod.getResponseHeaders();
+      for (int i = 0; i < headers.length; ++i) {
+        if ("Location".equals(headers[i].getName())) {
+          String location = headers[i].getValue();
+          int index = location.indexOf('=');
+          Base64 base64 = new Base64();
+          String locationTemp = location.substring(index + 1, location.length());
+          cookieId = new String(base64.decode(locationTemp.getBytes()));
+        }
+      }
+      // if login is possible but the grants are not enough
+      if (cookieId != null) {
+        ESciDocRestSession session = getInstance(cookieId);
+        String grantAdminHref = session.getGrantHrefByUserNameAndRoleName(userName, "escidoc:role-system-administrator");
+        if (grantAdminHref == null)
+          cookieId = "-10";
+      }
+      postMethod.releaseConnection();
+    } catch (Exception e) {
+      throw new ApplicationException(e);
+    }
+    return cookieId;
+  }
+
+  // validation service of eSciDoc    // TODO not implemented yet
+  public void validate(String pid, MetadataRecord mdRecord, String srcUrl) throws ApplicationException {
+    Component component = new Component("valid", "public", "any fulltext", "text/xml", srcUrl, "internal-managed");
+    ArrayList<Component> components = new ArrayList<Component>();
+    components.add(component);
+    Item xmlTemplate = new Item(contextId, pid, mdRecord, contentModelId, components);
+    String itemXmlStr = xmlTemplate.toXmlString();
+    String uri = "/validation/rest/validateItemXmlBySchema";
+    HttpMethodParams parameter = new HttpMethodParams();
+    parameter.setParameter("validation-point", "");  // None (Pick the validation schema from the context provided with the item)
+    parameter.setParameter("validation-schema", "");  // None (Default)
+    String valAnswer = performPostRequest(uri, itemXmlStr, parameter);
+  }
+  
+  public String getCookieId() {
+    return cookieId;  
+  }
+  
+  public void openContext(String contextId) throws ApplicationException {
+    String contextXmlStr = getContextById(contextId);
+    Date lastModificationDate = getLastModificationDate(contextXmlStr);
+    String lastModificationDateStr = XmlUtil.getInstance().toXsDate(lastModificationDate);
+    String bodyContentStr = "<param last-modification-date=\"" + lastModificationDateStr + "\"/>";
+    String uri = "/ir/context/" + contextId + "/open";
+    performPostRequestByBody(uri, bodyContentStr);
+  }
+  
+  public String createContext(String organizationalUnit, String name, String description, String type) throws ApplicationException {
+    Context xmlTemplate = new Context(organizationalUnit, name, description, type);
+    String bodyContentXmlStr = xmlTemplate.toXmlString();
+    String contextXmlStr = performPutRequestByBody("/ir/context", bodyContentXmlStr);
+    String contextId = getFirstContextId(contextXmlStr);
+    return contextId;
+  }
+  
+  public String getContextById(String contextId) throws ApplicationException {
+    String bodyContent = "<param><filter><id>" + contextId + "</id></filter></param>";
+    String requestUrlStr = "/ir/contexts/filter";
+    String resultXmlStr = performPostRequestByBody(requestUrlStr, bodyContent);
+    return resultXmlStr;
+  }
+  
+  public void grant(String userName, String roleName) throws ApplicationException {
+    String grantXmlStr = null;
+    String userId = null;
+    String internalRoleName = null;
+    if (roleName != null && roleName.equals("admin")) {
+      internalRoleName = "escidoc:role-system-administrator";
+      userId = getUserId(userName);
+      Grant grant = new Grant(userName, userId, "System-Administrator", "/aa/role/" + internalRoleName);
+      grantXmlStr = grant.toXmlString();
+    }
+    String grantHref = getGrantHrefByUserNameAndRoleName(userName, internalRoleName);
+    if (grantHref == null || grantHref.equals(""))
+      performPutRequestByBody(userId + "/resources/grants/grant", grantXmlStr);
+  }
+  
+  public String getGrantHrefByUserNameAndRoleName(String userName, String roleName) throws ApplicationException {
+    String resultXmlStr = null;
+    String fullUserId = getUserId(userName);  // // e.g. userId=/aa/user-account/escidoc:22650
+    if (fullUserId != null) {
+      int userIdIndex = fullUserId.lastIndexOf("/");
+      if (userIdIndex != -1) {
+        String userId = fullUserId.substring(userIdIndex + 1);
+        String filterUserName = "<filter name=\"http://escidoc.de/core/01/properties/user\">" + userId + "</filter>"; // e.g. userId=escidoc:22650
+        String filterRoleName = "<filter name=\"http://escidoc.de/core/01/properties/role\">" + roleName + "</filter>";  // e.g. roleName=escidoc:role-system-administrator
+        String bodyContent = "<param>" + filterUserName + filterRoleName + "</param>";  
+        String requestUrlStr = "/aa/grants/filter";
+        resultXmlStr = performPostRequestByBody(requestUrlStr, bodyContent);
+        resultXmlStr = getFirstGrantId(resultXmlStr);
+      }
+    }
+    return resultXmlStr;
+  }
+  
+  public String getGrantsByUserName(String userName) throws ApplicationException {
+    String resultXmlStr = null;
+    String fullUserId = getUserId(userName);  // // e.g. userId=/aa/user-account/escidoc:22650
+    if (fullUserId != null) {
+      int userIdIndex = fullUserId.lastIndexOf("/");
+      if (userIdIndex != -1) {
+        String userId = fullUserId.substring(userIdIndex + 1);
+        String filterUserName = "<filter name=\"http://escidoc.de/core/01/properties/user\">" + userId + "</filter>"; // e.g. userId=escidoc:22650
+        String bodyContent = "<param>" + filterUserName + "</param>";  
+        String requestUrlStr = "/aa/grants/filter";
+        resultXmlStr = performPostRequestByBody(requestUrlStr, bodyContent);
+      }
+    }
+    return resultXmlStr;
+  }
+  
+  public String createContainer(String pid, MetadataRecord mdRecord) throws ApplicationException {
+    Container xmlTemplate = new Container(contentModelId, contextId, pid, mdRecord);
+    String bodyContentXmlStr = xmlTemplate.toXmlString();
+    String containerXmlStr = performPutRequestByBody("/ir/container", bodyContentXmlStr);
+    String containerId = getFirstContainerId(containerXmlStr);
+    return containerId;
+  }
+  
+  public Container createContainerInContainer(String pid, MetadataRecord mdRecord, String containerId) throws ApplicationException {
+    Container xmlTemplate = new Container(contentModelId, contextId, pid, mdRecord);
+    String bodyContentXmlStr = xmlTemplate.toXmlString();
+    String uri = containerId + "/create-container";
+    String containerXmlStr = performPostRequestByBody(uri, bodyContentXmlStr);
+    String retContainerId = getFirstContainerId(containerXmlStr);
+    Date lastModificationDate = getLastModificationDate(containerXmlStr);
+    Container container = new Container(retContainerId, lastModificationDate);
+    return container;
+  }
+  
+  public Item createItemInContainer(String containerId, String pid, MetadataRecord mdRecord, ArrayList<Component> components) throws ApplicationException {
+    Item xmlTemplate = new Item(contextId, pid, mdRecord, contentModelId, components);
+    String bodyContentXmlStr = xmlTemplate.toXmlString();
+    String uri = containerId + "/create-item";
+    String itemXmlStr = performPostRequestByBody(uri, bodyContentXmlStr);
+    String itemId = getFirstItemId(itemXmlStr);
+    Date lastModificationDate = getLastModificationDate(itemXmlStr);
+    Item item = new Item(itemId, lastModificationDate);
+    return item;
+  }
+  
+  public Item createItemInContainer(String containerId, String itemXmlStr) throws ApplicationException {
+    String uri = containerId + "/create-item";
+    String retItemXmlStr = performPostRequestByBody(uri, itemXmlStr);
+    String itemId = getFirstItemId(retItemXmlStr);
+    Date lastModificationDate = getLastModificationDate(retItemXmlStr);
+    String validStatus = ""; // TODO 
+    String visibility = ""; // TODO
+    String contentCategory = ""; // TODO
+    String mimeType = ""; // TODO
+    String url = getFirstComponentId(retItemXmlStr);
+    String storage = ""; // TODO
+    Component component = new Component(validStatus, visibility, contentCategory, mimeType, url, storage);
+    Item item = new Item(itemId, lastModificationDate);
+    item.addComponent(component);
+    return item;
+  }
+  
+  public void submitContainer(String containerId, Date lastModificationDate, String comment) throws ApplicationException {
+    String uri = containerId + "/submit";
+    String dateStr = XmlUtil.getInstance().toXsDate(lastModificationDate);
+    String xmlStr = "<param last-modification-date=\"" + dateStr + "\"><comment>" + comment + "</comment></param>";
+    performPostRequestByBody(uri, xmlStr);
+  }
+  
+  public void submitItem(String itemId, Date lastModificationDate, String comment) throws ApplicationException {
+    String uri = itemId + "/submit";
+    String dateStr = XmlUtil.getInstance().toXsDate(lastModificationDate);
+    String xmlStr = "<param last-modification-date=\"" + dateStr + "\"><comment>" + comment + "</comment></param>";
+    performPostRequestByBody(uri, xmlStr);
+  }
+  
+  public Date addMembers(String containerId, Date lastModificationDate, ArrayList<String> memberIds) throws ApplicationException {
+    if (containerId == null || lastModificationDate == null || memberIds == null)
+      return null;
+    String dateStr = XmlUtil.getInstance().toXsDate(lastModificationDate);
+    String membersXmlStr = "<param last-modification-date=\"" + dateStr + "\">";
+    for (int i=0; i< memberIds.size(); i++) {
+      String memberId = memberIds.get(i);
+      membersXmlStr = membersXmlStr + "<id>" + "escidoc:" + memberId +"</id>";
+    }
+    membersXmlStr += "</param>";
+    String lastModDateXmlStr = performPostRequestByBody(containerId + "/members/add", membersXmlStr);
+    Date lastModDate = getLastModificationDate(lastModDateXmlStr);
+    return lastModDate;
+  }
+  
+  public Date removeMembers(String containerId, Date lastModificationDate, ArrayList<String> memberIds) throws ApplicationException {
+    if (containerId == null || lastModificationDate == null || memberIds == null)
+      return null;
+    String dateStr = XmlUtil.getInstance().toXsDate(lastModificationDate);
+    String membersXmlStr = "<param last-modification-date=\"" + dateStr + "\">";
+    for (int i=0; i< memberIds.size(); i++) {
+      String memberId = memberIds.get(i);
+      // if memberId is a full id and contains non digits they will be removed: e.g. /ir/item/escidoc:4711 will be replaced by 4711
+      if (! memberId.matches("[0-9]+")) {
+        memberId = memberId.replaceAll("[^0-9]+", "");
+      }
+      membersXmlStr = membersXmlStr + "<id>" + "escidoc:" + memberId +"</id>";
+    }
+    membersXmlStr += "</param>";
+    String lastModDateXmlStr = performPostRequestByBody(containerId + "/members/remove", membersXmlStr);
+    Date lastModDate = getLastModificationDate(lastModDateXmlStr);
+    return lastModDate;
+  }
+  
+  public Item createItem(String pid, MetadataRecord mdRecord, ArrayList<Component> components) throws ApplicationException {
+    Item xmlTemplate = new Item(contextId, pid, mdRecord, contentModelId, components);
+    String xmlStr = xmlTemplate.toXmlString();
+    String itemXmlStr = performPutRequestByBody("/ir/item", xmlStr);
+    String itemId = getFirstItemId(itemXmlStr);
+    Date lastModificationDate = getLastModificationDate(itemXmlStr);
+    Item item = new Item(itemId, lastModificationDate);
+    return item;
+  }
+  
+  public Date updateItem(String itemId, Date lastModificationDate, String pid, MetadataRecord mdRecord, ArrayList<Component> components) throws ApplicationException {
+    if (itemId == null || lastModificationDate == null)
+      return null;
+    Item xmlTemplate = new Item(contextId, pid, mdRecord, contentModelId, components);
+    xmlTemplate.setLastModificationDate(lastModificationDate);
+    String xmlStr = xmlTemplate.toXmlString();
+    String itemXmlStr = performPutRequestByBody(itemId, xmlStr);
+    Date newVersionDate = getVersionDate(itemXmlStr);
+    return newVersionDate;
+  }
+  
+  public void deleteItem(String itemId) {
+    if (itemId != null) {
+      performDeleteRequest(itemId);
+    }
+  }
+  
+  public void deleteContainer(String containerId) {
+    if (containerId != null) {
+      performDeleteRequest(containerId);
+    }
+  }
+  
+  public Date getContainerLastModificationDate(String containerId) throws ApplicationException {
+    Date lastModificationDate = null;
+    String resultXmlStr = getContainer(containerId);
+    if (resultXmlStr != null) {
+      lastModificationDate = getLastModificationDate(resultXmlStr);
+    }
+    return lastModificationDate;
+  }
+
+  public String getContainer(String containerId) throws ApplicationException {
+    String resultXmlStr = null;
+    if (containerId != null) {
+      resultXmlStr = performGetRequest(containerId);
+    }
+    return resultXmlStr;
+  }
+
+  public String getItem(String itemId) throws ApplicationException {
+    String resultXmlStr = null;
+    if (itemId != null) {
+      resultXmlStr = performGetRequest(itemId);
+    }
+    return resultXmlStr;
+  }
+  
+  public void saveComponentContentToLocalFile(String componentContentId, String localFileName) throws ApplicationException {
+    if (componentContentId != null) {
+      performGetRequestToLocalFile(componentContentId, localFileName);
+    }
+  }
+  
+  public String getUserId(String userName) throws ApplicationException {
+    String userId = null;
+    if (userName != null) {
+      String userNameAccessStr = userName + ",uid=" + userName + ",ou=users,dc=wisges,dc=rz-berlin,dc=mpg,dc=de";
+      String resultXmlStr = performGetRequest("/aa/user-account/" + userNameAccessStr);
+      userId = getFirstUserId(resultXmlStr);
+    }
+    return userId;
+  }
+  
+  public String getUser(String userName) throws ApplicationException {
+    String resultXmlStr = null;
+    if (userName != null) {
+      String userNameAccessStr = userName + ",uid=" + userName + ",ou=users,dc=wisges,dc=rz-berlin,dc=mpg,dc=de";
+      resultXmlStr = performGetRequest("/aa/user-account/" + userNameAccessStr);
+    }
+    return resultXmlStr;
+  }
+  
+  public String getAllUsers() throws ApplicationException {
+    String bodyContent = "<param><filter></filter></param>";
+    String requestUrlStr = "/aa/user-accounts/filter";
+    String resultXmlStr = performPostRequestByBody(requestUrlStr, bodyContent);
+    return resultXmlStr;
+  }
+  
+  public String getMembersByContainerIdAndFilter(String containerId, String filter) throws ApplicationException {
+    String bodyContent = "<param><filter></filter></param>";
+    if (filter != null)
+      bodyContent = "<param>" + filter + "</param>";
+    String requestUrlStr = containerId + "/members/filter";
+    String resultXmlStr = performPostRequestByBody(requestUrlStr, bodyContent);
+    return resultXmlStr;
+  }
+
+  public String getAllItems() throws ApplicationException {
+    String bodyContent = "<param><filter></filter></param>";
+    String requestUrlStr = "/ir/items/filter";
+    String resultXmlStr = performPostRequestByBody(requestUrlStr, bodyContent);
+    return resultXmlStr;
+  }
+  
+  private String getContainerByEXistId(String existId) throws ApplicationException {
+    String bodyContent = "<param><filter name=\"/md-records/md-record/metadata/exist-identifier\">" + existId + "</filter></param>";  // e.g. existId = /echo/la/alvarus_1509_lat_V40_10.xml
+    String requestUrlStr = "/ir/containers/filter";
+    String resultXmlStr = performPostRequestByBody(requestUrlStr, bodyContent);
+    return resultXmlStr;
+  }
+  
+  public String getContainerIdByEXistId(String existId) throws ApplicationException {
+    String containerXmlStr = getContainerByEXistId(existId);
+    String eScidDocContainerId = null;
+    if (containerXmlStr != null && containerXmlStr != "") {
+      eScidDocContainerId = getFirstContainerId(containerXmlStr);
+    }
+    return eScidDocContainerId;
+  }
+  
+  public String getItemByPid(String pid) throws ApplicationException {
+    String bodyContent = "<param><filter name=\"/properties/pid\">" + pid + "</filter></param>";  
+    String requestUrlStr = "/ir/items/filter";
+    String resultXmlStr = performPostRequestByBody(requestUrlStr, bodyContent);
+    return resultXmlStr;
+  }
+  
+  public String uploadFileToESciDocStageArea(String filePath) throws ApplicationException {
+    StringBuffer result = new StringBuffer();
+    try {
+      URL createUrl = new URL(protocol + "://" + host + ":" + port + STAGE_PATH);
+      HttpURLConnection uploadConnection = (HttpURLConnection) createUrl.openConnection();
+      uploadConnection.setRequestProperty("Cookie", "escidocCookie=" + cookieId);
+      uploadConnection.setRequestMethod("PUT");
+      uploadConnection.setDoOutput(true);
+      // open POST Request
+      OutputStream out = uploadConnection.getOutputStream();
+      // access binary content
+      InputStream in = new FileInputStream(filePath);
+      // write template to POST Request
+      byte[] bytes = new byte[4096];
+      int l = in.read(bytes);
+      while (l > -1) {
+        out.write(bytes, 0, l);
+        l = in.read(bytes);
+      }
+      in.close();
+      out.close();
+      uploadConnection.connect();
+      // connect response reader
+      BufferedReader createdReader = null;
+      String contentEncoding = uploadConnection.getContentEncoding();
+      if (contentEncoding == null) {
+        contentEncoding = CONTENT_ENCODING;
+      }
+      createdReader = new BufferedReader(new InputStreamReader(uploadConnection.getInputStream(), contentEncoding));
+      // read response
+      String line = createdReader.readLine();
+      while (line != null) {
+        result.append(line);
+        result.append(LINE_SEPARATOR);
+        line = createdReader.readLine();
+      }
+      createdReader.close();
+    } catch (IOException e) {
+      throw new ApplicationException(e);
+    }
+    String stageUrl = obtainResourceHref(result.toString());
+    return stageUrl;
+  } 
+  
+  
+  public String getPid() throws ApplicationException {
+    return "mpiwg:47114711";    // TODO
+    /*
+    try {
+      XmlRpcClientConfigImpl config = new XmlRpcClientConfigImpl();
+      XmlRpcClient client = new XmlRpcClient();
+      String zopeUrlStr = "http://xserve07.mpiwg-berlin.mpg.de:18080";
+      config.setServerURL(new URL(zopeUrlStr + "/idGenerator"));
+      client.setConfig(config);
+      Object[] params = new Object[]{};
+      String pid = (String) client.execute("generateId", params);
+      return pid;
+    } catch (MalformedURLException e) {
+      throw new ApplicationException(e);
+    } catch (XmlRpcException e) {
+      throw new ApplicationException(e);
+    }
+    */
+  }
+
+  public ArrayList<String> getContainerIds(String xmlStr) throws ApplicationException {
+    NamespaceContext nsContext = getNsContext();
+    XmlUtil xmlUtil = XmlUtil.getInstance();
+    ArrayList<String> containerIds = xmlUtil.evaluateToStringArray(xmlStr, "//srel:container/@xlink:href", nsContext);
+    return containerIds;
+  }
+  
+  public ArrayList<String> getContainerTitles(String xmlStr) throws ApplicationException {
+    NamespaceContext nsContext = getNsContext();
+    XmlUtil xmlUtil = XmlUtil.getInstance();
+    ArrayList<String> containerTitles = xmlUtil.evaluateToStringArray(xmlStr, "//srel:container/@xlink:title", nsContext);
+    return containerTitles;
+  }
+  
+  public String getLatestVersionId(String xmlStr) throws ApplicationException {
+    NamespaceContext nsContext = getNsContext();
+    XmlUtil xmlUtil = XmlUtil.getInstance();
+    String id = xmlUtil.evaluateToString(xmlStr, "//prop:latest-version/@xlink:href", nsContext);
+    return id;
+  }
+  
+  public String getFirstUserId(String xmlStr) throws ApplicationException {
+    NamespaceContext nsContext = getNsContext();
+    XmlUtil xmlUtil = XmlUtil.getInstance();
+    String id = xmlUtil.evaluateToString(xmlStr, "//user-account:user-account/@xlink:href", nsContext);
+    return id;
+  }
+  
+  public String getFirstGrantId(String xmlStr) throws ApplicationException {
+    NamespaceContext nsContext = getNsContext();
+    XmlUtil xmlUtil = XmlUtil.getInstance();
+    String g = xmlUtil.evaluateToString(xmlStr, "//grants:grant/@xlink:href", nsContext);
+    return g;
+  }
+  
+  public String getFirstContextId(String xmlStr) throws ApplicationException {
+    NamespaceContext nsContext = getNsContext();
+    XmlUtil xmlUtil = XmlUtil.getInstance();
+    String id = xmlUtil.evaluateToString(xmlStr, "//context:context/@xlink:href", nsContext);
+    return id;
+  }
+  
+  public String getFirstContainerId(String xmlStr) throws ApplicationException {
+    NamespaceContext nsContext = getNsContext();
+    XmlUtil xmlUtil = XmlUtil.getInstance();
+    String id = xmlUtil.evaluateToString(xmlStr, "//container:container/@xlink:href", nsContext);
+    return id;
+  }
+  
+  public String getFirstItemId(String xmlStr) throws ApplicationException {
+    NamespaceContext nsContext = getNsContext();
+    XmlUtil xmlUtil = XmlUtil.getInstance();
+    String id = xmlUtil.evaluateToString(xmlStr, "//escidocItem:item/@xlink:href", nsContext);
+    return id;
+  }
+  
+  public String getFirstStageAreaURL(String xmlStr) throws ApplicationException {
+    NamespaceContext nsContext = getNsContext();
+    XmlUtil xmlUtil = XmlUtil.getInstance();
+    String eSciDocStageAreaUrl = xmlUtil.evaluateToString(xmlStr, "//escidocComponents:content/@xlink:href", nsContext);
+    return eSciDocStageAreaUrl;
+  }
+  
+  public String getFirstComponentId(String xmlStr) throws ApplicationException {
+    NamespaceContext nsContext = getNsContext();
+    XmlUtil xmlUtil = XmlUtil.getInstance();
+    String componentId = xmlUtil.evaluateToString(xmlStr, "//escidocComponents:content/@xlink:href", nsContext);
+    return componentId;
+  }
+  
+  public String getFirstEXistId(String xmlStr) throws ApplicationException {
+    NamespaceContext nsContext = getNsContext();
+    XmlUtil xmlUtil = XmlUtil.getInstance();
+    String id = xmlUtil.evaluateToString(xmlStr, "/escidocItem:item/escidocMetadataRecords:md-records/escidocMetadataRecords:md-record/metadata/mpiwg:exist-identifier", nsContext);
+    return id;
+  }
+  
+  public MetadataRecord getFirstMdRecord(String xmlStr) throws ApplicationException {
+    NamespaceContext nsContext = getNsContext();
+    XmlUtil xmlUtil = XmlUtil.getInstance();
+    String id = xmlUtil.evaluateToString(xmlStr, "/escidocItem:item/escidocMetadataRecords:md-records/escidocMetadataRecords:md-record/metadata/dc:identifier", nsContext);
+    String language = xmlUtil.evaluateToString(xmlStr, "/escidocItem:item/escidocMetadataRecords:md-records/escidocMetadataRecords:md-record/metadata/dc:language", nsContext);
+    String creator = xmlUtil.evaluateToString(xmlStr, "/escidocItem:item/escidocMetadataRecords:md-records/escidocMetadataRecords:md-record/metadata/dc:creator", nsContext);
+    String title = xmlUtil.evaluateToString(xmlStr, "/escidocItem:item/escidocMetadataRecords:md-records/escidocMetadataRecords:md-record/metadata/dc:title", nsContext);
+    String type = xmlUtil.evaluateToString(xmlStr, "/escidocItem:item/escidocMetadataRecords:md-records/escidocMetadataRecords:md-record/metadata/dc:type", nsContext);
+    String rights = xmlUtil.evaluateToString(xmlStr, "/escidocItem:item/escidocMetadataRecords:md-records/escidocMetadataRecords:md-record/metadata/dc:rights", nsContext);
+    String dateStr = xmlUtil.evaluateToString(xmlStr, "/escidocItem:item/escidocMetadataRecords:md-records/escidocMetadataRecords:md-record/metadata/dc:date", nsContext);
+    Date date = new Date(dateStr);
+    String license = xmlUtil.evaluateToString(xmlStr, "/escidocItem:item/escidocMetadataRecords:md-records/escidocMetadataRecords:md-record/metadata/mpiwg:license", nsContext);
+    String accessRights = xmlUtil.evaluateToString(xmlStr, "/escidocItem:item/escidocMetadataRecords:md-records/escidocMetadataRecords:md-record/metadata/mpiwg:accessRights", nsContext);
+    String mediaType = xmlUtil.evaluateToString(xmlStr, "/escidocItem:item/escidocMetadataRecords:md-records/escidocMetadataRecords:md-record/metadata/mpiwg:mediaType", nsContext);
+    String existId = xmlUtil.evaluateToString(xmlStr, "/escidocItem:item/escidocMetadataRecords:md-records/escidocMetadataRecords:md-record/metadata/mpiwg:exist-identifier", nsContext);
+    MetadataRecord mdRecord = new MetadataRecord(id, language, creator, title, null, null, type, rights, date);  // TODO vervollständigen, testen
+    mdRecord.setLicense(license);
+    mdRecord.setAccessRights(accessRights);
+    mdRecord.setMediaType(mediaType);
+    mdRecord.setEXistIdentifier(existId);
+    return mdRecord;
+  }
+  
+  public Date getVersionDate(String xmlStr) throws ApplicationException {
+    NamespaceContext nsContext = getNsContext();
+    XmlUtil xmlUtil = XmlUtil.getInstance();
+    String dateStr = xmlUtil.evaluateToString(xmlStr, "//version:date", nsContext);
+    Date lastModificationDate = xmlUtil.toDate(dateStr);
+    return lastModificationDate;
+  }
+  
+  public Date getLastModificationDate(String xmlStr) throws ApplicationException {
+    NamespaceContext nsContext = getNsContext();
+    XmlUtil xmlUtil = XmlUtil.getInstance();
+    String dateStr = xmlUtil.evaluateToString(xmlStr, "//*/@last-modification-date", nsContext);
+    Date lastModificationDate = xmlUtil.toDate(dateStr);
+    return lastModificationDate;
+  }
+
+  private String obtainResourceHref(String xml) {
+    // base
+    String base = "";
+    Matcher baseMatcher = PATTERN_XML_BASE_ATTRIBUTE.matcher(xml);
+    if (baseMatcher.find()) {
+      base = baseMatcher.group(1);
+    }
+    // href
+    String href = null;
+    Matcher hrefMatcher = PATTERN_XLINK_HREF_ATTRIBUTE.matcher(xml);
+    if (hrefMatcher.find()) {
+      href = hrefMatcher.group(1);
+    } else {
+      throw new UnsupportedOperationException("Can not obtain href for resources without xlink:href attribute.");
+    }
+    return base + href;
+  }
+  
+  private String performPostRequestByBody(String requestUrlStr, String bodyContent) throws ApplicationException {
+    String resultStr = null;
+    try {
+      String urlStr = protocol + "://" + host + ":" + port + requestUrlStr;
+      PostMethod method = new PostMethod(urlStr);
+      method.setFollowRedirects(false); 
+      method.setRequestHeader("Cookie", "escidocCookie=" + cookieId);
+      if (bodyContent != null) {
+        method.setRequestBody(bodyContent);
+      }
+      httpClient.executeMethod(method); 
+      byte[] responseBody = method.getResponseBody();
+      resultStr = new String(responseBody, "utf-8");
+      method.releaseConnection();
+    } catch (HttpException e) {
+      throw new ApplicationException(e);      
+    } catch (IOException e) {
+      throw new ApplicationException(e);      
+    }
+    return resultStr; 
+  } 
+
+  // TODO
+  private String performPostRequest(String requestUrlStr, String bodyContent, HttpMethodParams parameter) throws ApplicationException {
+    String resultStr = null;
+    try {
+      String urlStr = protocol + "://" + host + ":" + port + requestUrlStr;
+      PostMethod method = new PostMethod(urlStr);
+      method.setFollowRedirects(false); 
+      method.setRequestHeader("Cookie", "escidocCookie=" + cookieId);
+      if (bodyContent != null) {
+        method.setRequestBody(bodyContent);
+      }
+      if (parameter != null) {
+        method.setParams(parameter);
+      }
+      httpClient.executeMethod(method); 
+      byte[] responseBody = method.getResponseBody();
+      resultStr = new String(responseBody, "utf-8");
+      method.releaseConnection();
+    } catch (HttpException e) {
+      throw new ApplicationException(e);      
+    } catch (IOException e) {
+      throw new ApplicationException(e);      
+    }
+    return resultStr; 
+  } 
+
+  private String performPutRequestByBody(String requestName, String bodyContent) {
+    String resultStr = null;
+    try {
+      String urlStr = protocol + "://" + host + ":" + port + requestName;
+      PutMethod method = new PutMethod(urlStr);
+      method.setRequestHeader("Cookie", "escidocCookie=" + cookieId);
+      if (bodyContent != null) {
+        method.setRequestBody(bodyContent);
+      }
+      httpClient.executeMethod(method); 
+      byte[] responseBody = method.getResponseBody();
+      resultStr = new String(responseBody, "utf-8");
+      method.releaseConnection();
+    } catch (HttpException e) {
+      e.printStackTrace();      
+    } catch (IOException e) {
+      e.printStackTrace();      
+    }
+    return resultStr; 
+  } 
+
+  private void performDeleteRequest(String requestName) {
+    try {
+      String urlStr = protocol + "://" + host + ":" + port + requestName;
+      DeleteMethod method = new DeleteMethod(urlStr);
+      method.setRequestHeader("Cookie", "escidocCookie=" + cookieId);
+      httpClient.executeMethod(method); 
+      method.releaseConnection();
+    } catch (HttpException e) {
+      e.printStackTrace();      
+    } catch (IOException e) {
+      e.printStackTrace();      
+    }
+  } 
+
+  private String performGetRequest(String requestName) {
+    String resultStr = null;
+    try {
+      String urlStr = protocol + "://" + host + ":" + port + requestName;
+      GetMethod method = new GetMethod(urlStr);
+      method.setRequestHeader("Cookie", "escidocCookie=" + cookieId);
+      httpClient.executeMethod(method); 
+      byte[] responseBody = method.getResponseBody();
+      resultStr = new String(responseBody, "utf-8");
+      method.releaseConnection();
+    } catch (HttpException e) {
+      e.printStackTrace();      
+    } catch (IOException e) {
+      e.printStackTrace();
+    }
+    return resultStr;
+  } 
+
+  private void performGetRequestToLocalFile(String requestName, String localFileName) throws ApplicationException {
+    try {
+      String urlStr = protocol + "://" + host + ":" + port + requestName;
+      GetMethod method = new GetMethod(urlStr);
+      method.setRequestHeader("Cookie", "escidocCookie=" + cookieId);
+      httpClient.executeMethod(method); 
+      InputStream responseBodyInputStream = method.getResponseBodyAsStream();
+      FileUtil.getInstance().saveInputStreamToLocalFile(responseBodyInputStream, localFileName);
+      method.releaseConnection();
+    } catch (HttpException e) {
+      throw new ApplicationException(e);      
+    } catch (IOException e) {
+      throw new ApplicationException(e);      
+    }
+  } 
+
+  public static NamespaceContext getNsContext() {
+    NamespaceContext nsContext = new NamespaceContext() {
+      public String getNamespaceURI(String prefix) {
+        String uri;
+        if (prefix.equals("xlink"))
+          uri = "http://www.w3.org/1999/xlink";
+        else if (prefix.equals("escidocItem"))
+          uri = "http://www.escidoc.de/schemas/item/0.9";
+        else if (prefix.equals("user-account"))
+          uri = "http://www.escidoc.de/schemas/useraccount/0.7";
+        else if (prefix.equals("grants"))
+          uri = "http://www.escidoc.de/schemas/grants/0.5";
+        else if (prefix.equals("context"))
+          uri = "http://www.escidoc.de/schemas/context/0.7";
+        else if (prefix.equals("container"))
+          uri = "http://www.escidoc.de/schemas/container/0.8";
+        else if (prefix.equals("escidocMetadataRecords"))
+          uri = "http://www.escidoc.de/schemas/metadatarecords/0.5";
+        else if (prefix.equals("escidocComponents"))
+          uri = "http://www.escidoc.de/schemas/components/0.9";
+        else if (prefix.equals("prop"))
+          uri = "http://escidoc.de/core/01/properties";
+        else if (prefix.equals("struct-map"))
+          uri = "http://www.escidoc.de/schemas/structmap/0.4";
+        else if (prefix.equals("version"))
+          uri = "http://escidoc.de/core/01/properties/version/";
+        else if (prefix.equals("srel"))
+          uri = "http://escidoc.de/core/01/structural-relations/";  
+        else if (prefix.equals("xml"))
+          uri = "http://www.w3.org/XML/1998/namespace";
+        else if (prefix.equals("dc"))
+          uri = "http://purl.org/dc/elements/1.1/";
+        else if (prefix.equals("mpiwg"))
+          uri = "http://www.mpiwg-berlin.mpg.de/ns/mpiwg";
+        else
+          uri = null;
+        return uri;
+      }
+      
+      public String getPrefix(String uri) {
+        if (uri.equals("http://www.w3.org/1999/xlink"))
+          return "xlink";
+        else if (uri.equals("http://www.escidoc.de/schemas/item/0.9"))
+          return "escidocItem";
+        else if (uri.equals("http://www.escidoc.de/schemas/useraccount/0.7"))
+          return "user-account";
+        else if (uri.equals("http://www.escidoc.de/schemas/grants/0.5"))
+          return "grants";
+        else if (uri.equals("http://www.escidoc.de/schemas/context/0.7"))
+          return "context";
+        else if (uri.equals("http://www.escidoc.de/schemas/container/0.8"))
+          return "container";
+        else if (uri.equals("http://www.escidoc.de/schemas/metadatarecords/0.5"))
+          return "escidocMetadataRecords";
+        else if (uri.equals("http://www.escidoc.de/schemas/components/0.9"))
+          return "escidocComponents";
+        else if (uri.equals("http://escidoc.de/core/01/properties"))
+          return "prop";
+        else if (uri.equals("http://www.escidoc.de/schemas/structmap/0.4"))
+          return "struct-map";
+        else if (uri.equals("http://escidoc.de/core/01/properties/version/"))
+            return "version";
+        else if (uri.equals("http://escidoc.de/core/01/structural-relations/"))
+          return "srel";
+        else if (uri.equals("http://www.w3.org/XML/1998/namespace"))
+          return "xml";
+        else if (uri.equals("http://purl.org/dc/elements/1.1/"))
+          return "dc";
+        else if (uri.equals("http://www.mpiwg-berlin.mpg.de/ns/mpiwg"))
+          return "mpiwg";
+        else
+          return null;
+      }
+
+      public Iterator getPrefixes(String namespace) {
+        return null;
+      }
+    };
+    return nsContext;    
+  }
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/software/eXist/mpdl-modules/src/de/mpg/mpiwg/berlin/mpdl/escidoc/Grant.java	Wed Nov 24 17:24:23 2010 +0100
@@ -0,0 +1,29 @@
+package de.mpg.mpiwg.berlin.mpdl.escidoc;
+
+public class Grant {
+  private String userName = "jwillenborg";
+  private String userId = "/aa/user-account/escidoc:22650";
+  private String roleName = "System-Administrator";
+  private String roleId = "/aa/role/escidoc:role-system-administrator";
+  
+  public Grant(String userName, String userId, String roleName, String roleId) {
+    this.userName = userName;
+    this.userId = userId;
+    this.roleName = roleName;
+    this.roleId = roleId;
+  }
+  
+  public String toXmlString() {
+    StringBuilder str = new StringBuilder();
+    str.append("<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n");
+    str.append("<grants:grant  xmlns:grants=\"http://www.escidoc.de/schemas/grants/0.5\"  xmlns:prop=\"http://escidoc.de/core/01/properties/\" " +
+    		"xmlns:srel=\"http://escidoc.de/core/01/structural-relations/\" xmlns:xlink=\"http://www.w3.org/1999/xlink\" >");
+    str.append("<grants:properties>");
+    str.append("<srel:granted-to resource=\"user-account\"  xlink:type=\"simple\" xlink:title=\"" + userName + "\" xlink:href=\"" + userId + "\"/>");
+    str.append("<srel:created-by  xlink:type=\"simple\" xlink:title=\"System Administrator User\" xlink:href=\"/aa/user-account/escidoc:exuser1\" />");
+    str.append("<srel:role  xlink:type=\"simple\" xlink:title=\"" + roleName + "\" xlink:href=\"" + roleId + "\" />");
+    str.append("</grants:properties>");
+    str.append("</grants:grant>");
+    return str.toString();
+  }
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/software/eXist/mpdl-modules/src/de/mpg/mpiwg/berlin/mpdl/escidoc/Item.java	Wed Nov 24 17:24:23 2010 +0100
@@ -0,0 +1,103 @@
+package de.mpg.mpiwg.berlin.mpdl.escidoc;
+
+import java.util.ArrayList;
+import java.util.Date;
+
+import de.mpg.mpiwg.berlin.mpdl.util.XmlUtil;
+
+public class Item {
+  private String id;
+  private String contextId;  // e.g. "/ir/context/escidoc:23002" 
+  private String contentModelId;  
+  private String pid;
+  private MetadataRecord mdRecord;
+  private ArrayList<Component> components;
+  private Date lastModificationDate;
+  
+  public Item(String id, Date lastModificationDate) {
+    this.id = id;
+    this.lastModificationDate = lastModificationDate;
+  }
+
+  public Item(String contextId, String pid, MetadataRecord mdRecord, String contentModelId, ArrayList<Component> components) {
+    this.contextId = contextId;
+    this.pid = pid;
+    this.mdRecord = mdRecord;
+    this.contentModelId = contentModelId;
+    this.components = components;
+  }
+
+  public void addComponent(Component component) {
+    if (components == null)
+      components = new ArrayList<Component>();
+    components.add(component);
+  }
+  
+  public String getId() {
+    return id;
+  }
+  
+  public Date getLastModificationDate() {
+    return lastModificationDate;  
+  }
+  
+  public void setLastModificationDate(Date lastModificationDate) {
+    this.lastModificationDate = lastModificationDate;
+  }
+  
+  public String toXmlString() {
+    StringBuilder str = new StringBuilder();
+    str.append("<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n");
+    str.append("<escidocItem:item\n");
+    str.append("  xmlns:escidocItem=\"http://www.escidoc.de/schemas/item/0.9\"\n");
+    str.append("  xmlns:escidocMetadataRecords=\"http://www.escidoc.de/schemas/metadatarecords/0.5\"\n");
+    str.append("  xmlns:escidocComponents=\"http://www.escidoc.de/schemas/components/0.9\"\n");
+    str.append("  xmlns:prop=\"http://escidoc.de/core/01/properties/\"\n");
+    str.append("  xmlns:srel=\"http://escidoc.de/core/01/structural-relations/\"\n");
+    str.append("  xmlns:xlink=\"http://www.w3.org/1999/xlink\"\n");
+    str.append("  xmlns:mpiwg=\"http://www.mpiwg-berlin.mpg.de/ns/mpiwg\"\n");
+    str.append("  xmlns:xml=\"http://www.w3.org/XML/1998/namespace\"");
+    if (lastModificationDate != null) {
+      XmlUtil xmlUtil = XmlUtil.getInstance();
+      String xsDateStr = xmlUtil.toXsDate(lastModificationDate);
+      str.append("  last-modification-date=\"" + xsDateStr + "\"");
+    }
+    str.append(">\n");
+    str.append("  <escidocItem:properties>\n");
+    str.append("    <srel:context xlink:href=\"" + contextId + "\"/>\n");
+    str.append("    <srel:content-model xlink:href=\"" + contentModelId + "\"/>\n");
+    if (pid != null) {
+      str.append("    <prop:pid>" + pid + "</prop:pid>\n");
+    }
+    str.append("    <prop:content-model-specific>\n");
+    str.append("      <my-content-model-value-structure />\n");
+    str.append("    </prop:content-model-specific>\n");
+    str.append("  </escidocItem:properties>\n");
+
+    str.append("  <!-- A Container could have multiple meta data records. -->\n");
+    str.append("  <escidocMetadataRecords:md-records>\n");
+    str.append("    <escidocMetadataRecords:md-record name=\"escidoc\">\n");
+    if (mdRecord != null) {
+      str.append(mdRecord.toXmlString());
+    } else {
+      str.append("      <metadata xmlns:dc=\"http://purl.org/dc/elements/1.1/\">\n");
+      str.append("      </metadata>\n");
+    }
+    str.append("    </escidocMetadataRecords:md-record>\n");
+        
+    str.append("  </escidocMetadataRecords:md-records>\n");
+
+    if (components != null) {
+      str.append("  <escidocComponents:components>\n");
+      for (int i=0; i<components.size(); i++) {
+        Component component = components.get(i);
+        str.append(component.toXmlString());
+      }
+      str.append("  </escidocComponents:components>\n");
+    }
+    
+    str.append("</escidocItem:item>\n");
+    return str.toString();
+  }
+
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/software/eXist/mpdl-modules/src/de/mpg/mpiwg/berlin/mpdl/escidoc/MetadataRecord.java	Wed Nov 24 17:24:23 2010 +0100
@@ -0,0 +1,233 @@
+package de.mpg.mpiwg.berlin.mpdl.escidoc;
+
+import java.util.Calendar;
+import java.util.Date;
+
+public class MetadataRecord {
+  private String docBase;  // e.g. echo or archimedes
+  private String identifier;  // identification filename, id number, etc.
+  private String language;
+  private String creator;  // author
+  private String title;
+  private String description;
+  private String publisher; // publisher with place: e.g. Springer, New York
+  private String type; // mime type: e.g. text/xml  // TODO ist eigentlich das Feld "format" --> zus. instnace variable "format" definieren
+  private String rights; // e.g. open access
+  private Date date; // creation date, modification date, etc.
+  private String license;  // e.g. http://echo.mpiwg-berlin.mpg.de/policy/oa_basics/declaration
+  private String accessRights;  // e.g. free
+  private String mediaType;  // e.g. image or fulltext
+  private String eXistIdentifier; // e.g. /echo/la/Benedetti_1585.xml
+  private String echoLink; // e.g. echo.mpiwg-berlin.mpg.de?titleID=163127KK
+  private String echoDir; // e.g. /permanent/echo/china/songy_tiang_zh_1637
+ 
+  public MetadataRecord() {
+    
+  }
+  
+  public MetadataRecord(String identifier, String language, String creator, String title, String description, String publisher, String type, String rights, Date date) {
+    this.identifier = identifier;
+    this.language = language;
+    this.creator = creator;
+    this.title = title;
+    this.description = description;
+    this.publisher = publisher;
+    this.type = type;
+    this.rights = rights;
+    this.date = date;
+  }
+  
+  public String toXmlString() {
+    String xmlString = "<metadata xmlns:dc=\"http://purl.org/dc/elements/1.1/\">\n";
+    if (identifier != null)
+      xmlString += "<dc:identifier>" + identifier + "</dc:identifier>\n";
+    if (language != null)
+      xmlString += "<dc:language>" + language + "</dc:language>\n";
+    if (creator != null)
+      xmlString += "<dc:creator>" + creator + "</dc:creator>\n";
+    if (title != null)
+      xmlString += "<dc:title>" + title + "</dc:title>\n";
+    if (description != null)
+      xmlString += "<dc:description>" + description + "</dc:description>\n";
+    if (publisher != null)
+      xmlString += "<dc:publisher>" + publisher + "</dc:publisher>\n";
+    if (type != null)
+      xmlString += "<dc:type>" + type + "</dc:type>\n";
+    if (rights != null)
+      xmlString += "<dc:rights>" + rights + "</dc:rights>\n";
+    if (date != null)
+      xmlString += "<dc:date>" + date + "</dc:date>\n";
+    if (license != null)
+      xmlString += "<mpiwg:license>" + license + "</mpiwg:license>\n";
+    if (accessRights != null)
+      xmlString += "<mpiwg:accessRights>" + accessRights + "</mpiwg:accessRights>\n";
+    if (mediaType != null)
+      xmlString += "<mpiwg:mediaType>" + mediaType + "</mpiwg:mediaType>\n";
+    if (eXistIdentifier != null)
+      xmlString += "<mpiwg:exist-identifier>" + eXistIdentifier + "</mpiwg:exist-identifier>\n";
+    if (echoLink != null)
+      xmlString += "<mpiwg:echolink>" + echoLink + "</mpiwg:echolink>\n";
+    if (echoDir != null)
+      xmlString += "<mpiwg:echodir>" + echoDir + "</mpiwg:echodir>\n";
+    xmlString += "</metadata>\n";
+    return xmlString;
+  }
+
+  public boolean hasEchoDocBase() {
+    boolean retValue = false;
+    if (docBase != null && docBase.equals("echo"))
+      return true;
+    return retValue;
+  }
+  
+  public boolean hasArchimedesDocBase() {
+    boolean retValue = false;
+    if (docBase != null && docBase.equals("archimedes"))
+      return true;
+    return retValue;
+  }
+  
+  public String getDocBase() {
+    return docBase;
+  }
+
+  public void setDocBase(String docBase) {
+    this.docBase = docBase;
+  }
+
+  public String getMediaType() {
+    return mediaType;
+  }
+
+  public void setMediaType(String mediaType) {
+    this.mediaType = mediaType;
+  }
+
+  public String getRights() {
+    return rights;
+  }
+
+  public void setRights(String rights) {
+    this.rights = rights;
+  }
+
+  public String getLicense() {
+    return license;
+  }
+
+  public void setLicense(String license) {
+    this.license = license;
+  }
+
+  public String getAccessRights() {
+    return accessRights;
+  }
+
+  public void setAccessRights(String accessRights) {
+    this.accessRights = accessRights;
+  }
+
+  public String getEchoLink() {
+    return echoLink;
+  }
+
+  public void setEchoLink(String echoLink) {
+    this.echoLink = echoLink;
+  }
+
+  public String getEchoDir() {
+    return echoDir;
+  }
+
+  public void setEchoDir(String echoDir) {
+    this.echoDir = echoDir;
+  }
+
+  public String toString() {
+    return toXmlString();
+  }
+
+  public String getCreator() {
+    return creator;
+  }
+
+  public void setCreator(String creator) {
+    this.creator = creator;
+  }
+
+  public String getTitle() {
+    return title;
+  }
+
+  public void setTitle(String title) {
+    this.title = title;
+  }
+
+  public Date getDate() {
+    return date;
+  }
+
+  public void setDate(Date date) {
+    this.date = date;
+  }
+
+  public String getYear() {
+    String year = null;
+    if (date != null) {
+      Calendar cal = Calendar.getInstance();
+      cal.setTime(date);
+      int iYear = cal.get(Calendar.YEAR);
+      year = "" + iYear;
+    }
+    return year;
+  }
+  
+  public String getDescription() {
+    return description;
+  }
+
+  public String getEXistIdentifier() {
+    return eXistIdentifier;
+  }
+
+  public void setEXistIdentifier(String xistIdentifier) {
+    eXistIdentifier = xistIdentifier;
+  }
+
+  public void setDescription(String description) {
+    this.description = description;
+  }
+
+  public String getIdentifier() {
+    return identifier;
+  }
+
+  public void setIdentifier(String identifier) {
+    this.identifier = identifier;
+  }
+
+  public String getLanguage() {
+    return language;
+  }
+
+  public void setLanguage(String language) {
+    this.language = language;
+  }
+
+  public String getPublisher() {
+    return publisher;
+  }
+
+  public void setPublisher(String publisher) {
+    this.publisher = publisher;
+  }
+
+  public String getType() {
+    return type;
+  }
+
+  public void setType(String type) {
+    this.type = type;
+  }
+
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/software/eXist/mpdl-modules/src/de/mpg/mpiwg/berlin/mpdl/escidoc/TestESciDoc.java	Wed Nov 24 17:24:23 2010 +0100
@@ -0,0 +1,367 @@
+package de.mpg.mpiwg.berlin.mpdl.escidoc;
+
+import java.io.File;
+import java.io.IOException;
+import java.net.MalformedURLException;
+import java.net.URL;
+import java.util.ArrayList;
+import java.util.Date;
+import java.util.Scanner;
+
+import javax.xml.XMLConstants;
+import javax.xml.parsers.DocumentBuilder;
+import javax.xml.parsers.DocumentBuilderFactory;
+import javax.xml.transform.Source;
+import javax.xml.transform.dom.DOMSource;
+import javax.xml.transform.sax.SAXSource;
+import javax.xml.validation.Schema;
+import javax.xml.validation.SchemaFactory;
+import javax.xml.validation.Validator;
+
+import org.quartz.impl.StdSchedulerFactory;
+import org.w3c.dom.Document;
+import org.w3c.dom.Node;
+import org.xml.sax.InputSource;
+import org.xml.sax.SAXException;
+
+import de.mpg.mpiwg.berlin.mpdl.exception.ApplicationException;
+import de.mpg.mpiwg.berlin.mpdl.general.MpdlConstants;
+import de.mpg.mpiwg.berlin.mpdl.schedule.MpdlChainScheduler;
+import de.mpg.mpiwg.berlin.mpdl.util.XmlUtil;
+
+
+public class TestESciDoc {
+  private String cookieId;
+  private ESciDocRestSession eSciDocRestSession;
+  private String organizationalUnit = MpdlConstants.MPDL_ESCIDOC_OUM_ID;
+  private String fullContextId = MpdlConstants.MPDL_ESCIDOC_CONTEXT_ID;
+  private String archimedesContainerId = MpdlConstants.MPDL_ESCIDOC_ARCHIMEDES_CONTAINER_ID;
+  private String echoContainerId = MpdlConstants.MPDL_ESCIDOC_ECHO_CONTAINER_ID;
+
+  public static void main(String[] args) {
+    try {
+      /*
+      byte[] ligatureBytes = new byte[5];
+      ligatureBytes[0] = (byte) Integer.parseInt("61", 16);
+      ligatureBytes[1] = (byte) Integer.parseInt("74", 16);
+      ligatureBytes[2] = (byte) Integer.parseInt("EE", 16);
+      ligatureBytes[3] = (byte) Integer.parseInt("A2", 16);
+      ligatureBytes[4] = (byte) Integer.parseInt("BF", 16);
+      
+      String ligature = new String(ligatureBytes, "utf-8");
+      char[] chars = new char[3];
+      chars[0] = ligature.charAt(0);
+      chars[1] = ligature.charAt(1);
+      chars[2] = ligature.charAt(2);
+      int codepoint = Character.codePointAt(chars, 2);
+      int num = Character.getNumericValue(ligature.charAt(2));
+      int type = Character.getType(ligature.charAt(2));
+      */
+      
+      TestESciDoc test = new TestESciDoc();
+      test.init("jwillenborg");  // init eSciDoc-Session with cookie as user jwillenborg
+      
+      // test.grant("aeisemann", "admin");
+      String uid = test.getUserId("aeisemann");
+      String users = test.getAllUsers();
+      String grantAdmin = test.getGrantHrefByUserNameAndRoleName("aeisemann", "escidoc:role-system-administrator");
+      String grants = test.getGrantsByUserName("aeisemann");
+      String bla = "";
+
+      // test.testSchemaValidation();
+
+      // test.deleteItem("/ir/item/escidoc:48488");
+      // test.deleteContainer("/ir/container/escidoc:48486");
+      /*
+      String containerId = test.createContainer("testJoey1");
+      System.out.println("Begin: " + (new Date()).getTime());
+      Date successDate = test.addMembersToContainer("/ir/container/escidoc:41646");
+      System.out.println("End: " + (new Date()).getTime());
+      */
+
+      // String contextId = test.createContext(test.organizationalUnit, "MPDL-XML-Test", "MPDL-XML-Test", "MpdlType");
+      // test.openContext("escidoc:38600");
+
+      // String containerId = test.createContainer("eXistArchimedesContainer");
+      // System.out.println(containerId);
+      // String containerId = test.createContainer("eXistEchoContainer");
+
+      /*
+      for (int i=0; i< 443; i++) {
+        MetadataRecord mdRecordImage = new MetadataRecord();
+        mdRecordImage.setIdentifier("file_" + i);
+        mdRecordImage.setTitle("ECHO scanned page: " + i);
+        String fileName = "000" + i;
+        String srcUrl = "http://echo.mpiwg-berlin.mpg.de/zogilib?fn=/permanent/library/" + "163127KK" + "/pageimg/" + fileName;  // TODO
+        String itemId = test.createItem("mpiwg:47114711", mdRecordImage, fileName, "image/jpeg", "JPEG_DEFAULT", "external-url", srcUrl);
+        System.out.println(i + ". " + itemId + " created");
+      }
+      */
+      /*
+      String srcUrlAlvarus = "http://mpdl-proto.mpiwg-berlin.mpg.de/exist/rest/db/mpdl/documents/standard/echo/la/alvarus_1509_lat_V40_10.xml";
+      Date pubYearAlvarus = XmlUtil.getInstance().toDate("1509-01-01T00:00:00.000Z");
+      MetadataRecord mdAlvarus = new MetadataRecord("/echo/la/alvarus_1509_lat_V40_10.xml", "la", "Alvarus, Thomas", "Liber de triplici motu proportionibus annexis magiſtri Aluari Thome Ulixboneñ philoſophicas Suiſeth calculationes ex parte declarans", null, null, "text/xml", pubYearAlvarus);
+      String result = test.createItemInContainer(test.echoContainerId, mdAlvarus.getIdentifier(), mdAlvarus, srcUrlAlvarus);
+      
+      String srcUrlBenedetti = "http://mpdl-proto.mpiwg-berlin.mpg.de/exist/rest/db/mpdl/documents/standard/echo/la/Benedetti_1585.xml";
+      Date pubYearBenedetti = XmlUtil.getInstance().toDate("1585-01-01T00:00:00.000Z");
+      MetadataRecord mdBenedetti = new MetadataRecord("/echo/la/Benedetti_1585.xml", "la", "Benedetti, Giovanni Battista de", "Diversarum Speculationum mathematicum, & physicarum liber", null, null, "text/xml", pubYearBenedetti);
+      String result = test.createItemInContainer(test.echoContainerId, mdBenedetti.getIdentifier(), mdBenedetti, srcUrlBenedetti);
+      
+      String srcUrlEuclid = "http://mpdl-proto.mpiwg-berlin.mpg.de/exist/rest/db/mpdl/documents/standard/echo/el/Euclid-300.xml";
+      Date pubYearEuclid = XmlUtil.getInstance().toDate("0300-01-01T00:00:00.000Z");
+      MetadataRecord mdEuclid = new MetadataRecord("/echo/el/Euclid-300.xml", "el", "Euclid", "Στοιχεῖα", null, null, "text/xml", pubYearEuclid);
+      String result = test.createItemInContainer(test.echoContainerId, mdEuclid.getIdentifier(), mdEuclid, srcUrlEuclid);
+      
+      String srcUrlEuclid2 = "http://mpdl-proto.mpiwg-berlin.mpg.de/exist/rest/db/mpdl/documents/standard/echo/zh/Euclid_1966_V8.xml";
+      Date pubYearEuclid2 = XmlUtil.getInstance().toDate("0300-01-01T00:00:00.000Z");
+      MetadataRecord mdEuclid2 = new MetadataRecord("/echo/zh/Euclid_1966_V8.xml", "zh", "Euclid", "Jihe yuanben, 幾何原本", null, null, "text/xml", pubYearEuclid2);
+      String result = test.createItemInContainer(test.echoContainerId, mdEuclid2.getIdentifier(), mdEuclid2, "/echo/zh/Euclid_1966_V8.xml", "text/xml", "any fulltext", "internal-managed", srcUrlEuclid2);
+      
+      String items = test.getItemsByContainerIdAndFilter(test.echoContainerId, null);
+      */
+      
+    } catch (Exception e) {
+      e.printStackTrace();
+    }
+  }
+
+  private void init(String userName) throws ApplicationException {
+    Scanner in = new Scanner(System.in);
+    System.out.print("Username: " + userName + ", Password: ");
+    String password = in.nextLine();
+    in.close();
+    cookieId = ESciDocRestSession.login(userName, password);
+    eSciDocRestSession = ESciDocRestSession.getInstance(cookieId);
+    fullContextId = MpdlConstants.MPDL_ESCIDOC_CONTEXT_ID;
+  }
+
+  private void testSchemaValidation() throws ApplicationException {
+    String[] rncSchemaFiles = {
+        "echo/echo.rnc",
+        "echo/modules/echo-datatype.rnc", "echo/modules/echo-handwritten.rnc", "echo/modules/echo-start.rnc", 
+        "echo/modules/echo-attribute.rnc", "echo/modules/echo-de.rnc", "echo/modules/echo-import-mathml.rnc", "echo/modules/echo-text.rnc", 
+        "echo/modules/echo-block-scholarly.rnc", "echo/modules/echo-div.rnc", "echo/modules/echo-import-xhtml.rnc", "echo/modules/echo-textflows.rnc", 
+        "echo/modules/echo-block.rnc", "echo/modules/echo-figure.rnc", "echo/modules/echo-mathematics.rnc",  
+        "echo/modules/echo-chinese-text.rnc", "echo/modules/echo-float.rnc", "echo/modules/echo-metadata.rnc", 
+        "echo/modules/echo-content-scholarly.rnc", "echo/modules/echo-gap.rnc", "echo/modules/echo-milestone.rnc", 
+        "echo/modules/echo-content.rnc", "echo/modules/echo-gis.rnc", "echo/modules/echo-note.rnc", 
+        "xhtml/xhtml-datatypes.rnc", "xhtml/xhtml-list.rnc", "xhtml/xhtml-attribs.rnc", "xhtml/xhtml-basic-table.rnc"
+        };
+    String[] schemas = {
+        "echo-schema/dcterms.xsd", "echo-schema/echo-datatype.xsd", "echo-schema/echo-handwritten.xsd", "echo-schema/echo-start.xsd", "echo-schema/xhtml-datatypes.xsd",
+        "echo-schema/echo-attribute.xsd", "echo-schema/echo-de.xsd", "echo-schema/ echo-import-mathml.xsd", "echo-schema/echo-text.xsd", "echo-schema/xhtml-list.xsd",
+        "echo-schema/echo-block-scholarly.xsd", "echo-schema/echo-div.xsd", "echo-schema/echo-import-xhtml.xsd", "echo-schema/echo-textflows.xsd", "echo-schema/xlink.xsd",
+        "echo-schema/echo-block.xsd", "echo-schema/echo-figure.xsd", "echo-schema/echo-mathematics.xsd", "echo-schema/echo.xsd", "echo-schema/xml.xsd",
+        "echo-schema/echo-chinese-text.xsd", "echo-schema/echo-float.xsd", "echo-schema/  echo-metadata.xsd", "echo-schema/local.xsd", "echo-schema/xsi.xsd",
+        "echo-schema/echo-content-scholarly.xsd", "echo-schema/echo-gap.xsd", "echo-schema/echo-milestone.xsd", "echo-schema/xhtml-attribs.xsd",
+        "echo-schema/echo-content.xsd", "echo-schema/echo-gis.xsd", "echo-schema/echo-note.xsd", "echo-schema/xhtml-basic-table.xsd"
+        };
+    File xmlFile = new File("/Users/jwillenborg/texts/echo/SongYingxing_1637.xml");
+    // validate(xmlFile, schemas);
+    validateByRelaxNG(xmlFile, rncSchemaFiles);
+  }
+  
+  private void validateDocumentBuilder(File xmlFile, String[] schemaFileNames) throws ApplicationException {
+    String JAXP_SCHEMA_SOURCE = "http://java.sun.com/xml/jaxp/properties/schemaSource";
+    String JAXP_SCHEMA_LANGUAGE = "http://java.sun.com/xml/jaxp/properties/schemaLanguage"; 
+    String W3C_XML_SCHEMA = XMLConstants.W3C_XML_SCHEMA_NS_URI; 
+    Node root = null;
+    try {
+      DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
+      dbf.setNamespaceAware(true);
+      dbf.setAttribute(JAXP_SCHEMA_LANGUAGE, W3C_XML_SCHEMA); 
+      dbf.setAttribute(JAXP_SCHEMA_SOURCE, schemaFileNames);
+      DocumentBuilder db = dbf.newDocumentBuilder();
+      Document doc = db.parse(xmlFile);   
+      root = doc.getFirstChild();
+      String bla = "";
+    } catch (Exception e) {
+      throw new ApplicationException(e);
+    }
+  }
+  
+  public void validateByRelaxNG(File xmlFile, String[] schemaFileNames) throws ApplicationException {
+    // System.setProperty(SchemaFactory.class.getName() + ":" + XMLConstants.RELAXNG_NS_URI, "com.thaiopensource.relaxng.jaxp.XMLSyntaxSchemaFactory");
+    System.setProperty(SchemaFactory.class.getName() + ":" + XMLConstants.RELAXNG_NS_URI, "com.thaiopensource.relaxng.jaxp.CompactSyntaxSchemaFactory");
+    // RELAX NG factory
+    SchemaFactory factory = SchemaFactory.newInstance(XMLConstants.RELAXNG_NS_URI);
+    // Compile the schema.
+    Schema schema = null;
+    try {
+      URL schemaUrl = new URL("http://mpdl-test.mpiwg-berlin.mpg.de:30030/exist/rest/db/mpdl/schema/echo/echo.rnc");
+      schema = factory.newSchema(schemaUrl);
+    } catch (SAXException e) {
+      throw new ApplicationException(e);
+    } catch (MalformedURLException e) {
+      throw new ApplicationException(e);
+    }
+    // Get a validator from the schema.
+    Validator validator = schema.newValidator();
+    // Check the document 
+    InputSource inputSource = new InputSource(xmlFile.getPath());
+    Source source = new SAXSource(inputSource); 
+    try {
+      validator.validate(source);
+    } catch (SAXException e) {
+      throw new ApplicationException(e);
+    } catch (IOException e) {
+      throw new ApplicationException(e);
+    }    
+  }
+  
+  private void validate(File xmlFile, String[] schemaFileNames) throws ApplicationException {
+    SchemaFactory factory = SchemaFactory.newInstance(XMLConstants.W3C_XML_SCHEMA_NS_URI);
+    // Compile the schema.
+    Schema schema = null;
+    Source[] schemaInputSources = new SAXSource[schemaFileNames.length];
+    for (int i=0; i<schemaFileNames.length; i++) {
+      String schemaFileName = "mpdl/extensions/mpdl-modules/src/de/mpg/mpiwg/berlin/mpdl/escidoc/" + schemaFileNames[i];
+      InputSource inputSource = new InputSource(schemaFileName);
+      Source source = new SAXSource(inputSource); 
+      schemaInputSources[i] = source;
+    }
+    try {
+      schema = factory.newSchema(schemaInputSources);
+    } catch (SAXException e) {
+      throw new ApplicationException(e);
+    }
+    // Get a validator from the schema.
+    Validator validator = schema.newValidator();
+    // Check the document 
+    InputSource inputSource = new InputSource(xmlFile.getPath());
+    Source source = new SAXSource(inputSource); 
+    // DOMSource source = new DOMSource(docNode);  // TODO evtl. nur per Node aufrufen 
+    try {
+      validator.validate(source);
+    } catch (SAXException e) {
+      throw new ApplicationException(e);
+    } catch (IOException e) {
+      throw new ApplicationException(e);
+    }    
+  }
+  
+  private String getUserId(String userName) throws ApplicationException {
+    String userId = eSciDocRestSession.getUserId(userName);
+    return userId;
+  }
+  
+  private String getAllUsers() throws ApplicationException {
+    String users = eSciDocRestSession.getAllUsers();
+    return users;
+  }
+  
+  private String getGrantsByUserName(String userName) throws ApplicationException {
+    String grant = eSciDocRestSession.getGrantsByUserName(userName);
+    return grant;
+  }
+  
+  private String getGrantHrefByUserNameAndRoleName(String userName, String roleName) throws ApplicationException {
+    String grant = eSciDocRestSession.getGrantHrefByUserNameAndRoleName(userName, roleName);
+    return grant;
+  }
+  
+  private void grant(String userId, String role) throws ApplicationException {
+    eSciDocRestSession.grant(userId, role);
+  }
+  
+  private String createContext(String organizationalUnit, String name, String description, String type) throws ApplicationException {
+    fullContextId = eSciDocRestSession.createContext(organizationalUnit, name, description, type);
+    return fullContextId;
+  }
+  
+  private void openContext(String contextId) throws ApplicationException {
+    eSciDocRestSession.openContext(contextId);
+  }
+
+  private String createContainer(String containerName) throws ApplicationException {
+    MetadataRecord mdContainerEcho = new MetadataRecord(containerName, null, null, null, null, null, null, null, null);
+    String containerId = eSciDocRestSession.createContainer(containerName, mdContainerEcho);
+    // String nextContainerId = eSciDocRestSession.createContainerInContainer(containerName, mdContainerEcho, "/ir/container/escidoc:25163");
+    // String bla = eSciDocRestSession.createItemInContainer("bla", mdContainerEcho, "bla.xml", "text/xml", "any fulltext", "internal-managed", "/ir/container/escidoc:25164", "http://test.com");
+    String pageImageUrl = "http://nausikaa2.rz-berlin.mpg.de/digitallibrary/servlet/Scaler?fn=/permanent/archimedes/monte_mecha_037_it_1581/037-01-pageimg&amp;pn=1&amp;dh=600";  // TODO
+    // String pageItemId = eSciDocRestSession.createItemInContainer(containerId, "bla", mdContainerEcho, "/permanent/archimedes/monte_mecha_037_it_1581", "image/jpeg", "JPEG_DEFAULT", "external-url", pageImageUrl);  // TODO
+    // pageItemId = eSciDocRestSession.createItemInContainer("/ir/container/escidoc:25164", "bla", mdContainerEcho, "/permanent/archimedes/monte_mecha_037_it_1581", "image/jpeg", "JPEG_DEFAULT", "external-url", pageImageUrl);  // TODO
+    // pageItemId = eSciDocRestSession.createItemInContainer("/ir/container/escidoc:25164", "bla", mdContainerEcho, "/permanent/archimedes/monte_mecha_037_it_1581", "image/jpeg", "JPEG_DEFAULT", "external-url", pageImageUrl);  // TODO
+    return containerId;
+  }
+
+  private Item createItemInContainer(String containerId, String pid, MetadataRecord mdRecord, ArrayList<Component> components) throws ApplicationException {
+    Item result = eSciDocRestSession.createItemInContainer(containerId, pid, mdRecord, components);
+    return result;
+  }
+  
+  private Item createItem(String pid, MetadataRecord mdRecord, ArrayList<Component> components) throws ApplicationException {
+    Item result = eSciDocRestSession.createItem(pid, mdRecord, components);
+    return result;
+  }
+  
+  private Date addMembersToContainer(String containerId) throws ApplicationException {
+    String modDateStr = "2010-04-16T15:00:53.409Z";
+    Date modDate = XmlUtil.getInstance().toDate(modDateStr);
+    ArrayList<String> memberIds = new ArrayList<String>();
+    for (int i=40761; i<= 41645; i = i + 2) {
+      String memberId = "" + i;
+      memberIds.add(memberId);
+    }
+    Date result = eSciDocRestSession.addMembers(containerId, modDate, memberIds);
+    return result;
+  }
+  
+  private String getItemsByContainerIdAndFilter(String containerId, String filter) throws ApplicationException {
+    String members = eSciDocRestSession.getMembersByContainerIdAndFilter(containerId, filter);
+    return members;
+  }
+
+  private void deleteContainer(String containerId) throws ApplicationException {
+    eSciDocRestSession.deleteContainer(containerId);
+  }
+
+  private void deleteItem(String itemId) throws ApplicationException {
+    eSciDocRestSession.deleteItem(itemId);
+  }
+
+  private void test() {
+    /*
+    XmlUtil xmlUtil = XmlUtil.getInstance();
+    ClientSession eSciDocClientSession = new ClientSession("http", "xserve07.mpiwg-berlin.mpg.de", 8080, "jwillenborg", "pucki123");
+    String contextId = "/ir/context/escidoc:23002";
+    String filterDetail = "<filter></filter><order-by sorting=\"ascending\">/id</order-by>";
+    String filter = "<param>" + filterDetail + "</param>";
+    String members = eSciDocClientSession.getItemsByContainerIdAndFilter("escidoc:23003", filter);
+    System.out.println(members);
+
+    String item = eSciDocClientSession.getItem("escidoc:13003");
+    String itemId = xmlUtil.getFirstElementAttributeValue(item, "prop:latest-version", "xlink:href");
+    System.out.println(itemId);
+    /*
+    Date pubYearAlvarus = xmlUtil.toDate("1509-01-01T00:00:00.000Z");
+    MetadataRecord mdAlvarus = new MetadataRecord("/archimedes/la/alvarus_1509_lat_V40_10.xml", "la", "Alvarus, Thomas", "Liber de triplici motu proportionibus annexis magiſtri Aluari Thome Ulixboneñ philoſophicas Suiſeth calculationes ex parte declarans", null, null, "text/xml", pubYearAlvarus);
+    String srcUrlAlvarus = "http://mpdl-proto.mpiwg-berlin.mpg.de/exist/rest/db/mpdl/documents/standard/echo/la/alvarus_1509_lat_V40_10.xml";
+    String result = eSciDocClientSession.createItemInContainer(contextId, "MPDL-XML-Test", "abcdefg", mdAlvarus, "escidoc:23003", srcUrlAlvarus);
+    */
+
+    /*
+    Date pubYearAgricola = xmlUtil.toDate("1912-01-01T00:00:00.000Z");
+    MetadataRecord mdAgricola = new MetadataRecord("/archimedes/en/agric_remet_002_en.xml", "la", "Agricola, Georgius", "De re metallica", null, "London", "text/xml", pubYearAgricola);
+    String srcUrlAgricola = "http://mpdl-proto.mpiwg-berlin.mpg.de/exist/rest/db/mpdl/documents/standard/archimedes/en/agric_remet_002_en.xml";
+    String item = eSciDocClientSession.getItemById("escidoc:23012");
+    String lastVersionDateStr = xmlUtil.getFirstElementValue(item, "version:date");
+    Date lastModificationDate = xmlUtil.toDate(lastVersionDateStr);
+    String lastModificationDateStr = xmlUtil.toXsDate(lastModificationDate);
+    String itemXmlResult = eSciDocClientSession.updateItem(contextId, "MPDL-XML-Test", "abcdefg", mdAgricola, srcUrlAgricola, "escidoc:23012", lastModificationDate);
+    */
+    /*
+    String itemIdAgricola = xmlUtil.getFirstElementAttributeValue(itemXmlResult, "prop:latest-version", "xlink:href");
+    eSciDocClientSession.deleteItem(contextId, "escidoc:23010");
+    */
+    /*
+    String containerName = "testEXistEcho";
+    MetadataRecord mdRecord = new MetadataRecord("testEXistArchimedes", "Echo document container");
+    String containerXmlResult = eSciDocClientSession.addContainer("/ir/context/escidoc:23002", "MPDL-XML-Test", containerPid, mdRecord);
+    String containerId = xmlUtil.getFirstElementAttributeValue(containerXmlResult, "prop:latest-version", "xlink:href");
+    System.out.println(containerId);
+    */
+    // eSciDocClientSession.deleteItem(contextId, "escidoc:23012");
+  }
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/software/eXist/mpdl-modules/src/de/mpg/mpiwg/berlin/mpdl/escidoc/TestESciDocEXist.java	Wed Nov 24 17:24:23 2010 +0100
@@ -0,0 +1,305 @@
+package de.mpg.mpiwg.berlin.mpdl.escidoc;
+
+import java.io.ByteArrayInputStream;
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.UnsupportedEncodingException;
+import java.net.URLEncoder;
+import java.util.ArrayList;
+import java.util.Date;
+import java.util.Scanner;
+
+import org.apache.commons.httpclient.HttpClient;
+import org.apache.commons.httpclient.HttpException;
+import org.apache.commons.httpclient.methods.DeleteMethod;
+import org.apache.commons.httpclient.methods.GetMethod;
+import org.apache.commons.httpclient.methods.InputStreamRequestEntity;
+import org.apache.commons.httpclient.methods.PostMethod;
+import org.apache.commons.httpclient.methods.PutMethod;
+import org.apache.commons.httpclient.params.HttpMethodParams;
+import de.mpg.mpiwg.berlin.mpdl.exception.ApplicationException;
+import de.mpg.mpiwg.berlin.mpdl.util.XmlUtil;
+
+public class TestESciDocEXist {
+  private String protocol = "http";
+  private String host = "mpdl-test.mpiwg-berlin.mpg.de";
+  private int port = 30030;
+  private String userName = "jwillenborg";
+  private HttpClient httpClient;
+  private String eSciDocCookieId;
+
+  public static void main(String[] args) throws ApplicationException {
+    try {
+      TestESciDocEXist test = new TestESciDocEXist();
+      test.init();
+      test.testCalls();
+    } catch (Exception e) {
+      e.printStackTrace();
+    }
+  }
+
+  private void init() throws ApplicationException {
+    httpClient = new HttpClient();
+    Scanner in = new Scanner(System.in);
+    System.out.print("Username: " + userName + ", Type your password: ");
+    String password = in.nextLine();
+    in.close();
+    eSciDocCookieId = ESciDocRestSession.login(userName, password);
+  }
+  
+  private void testCalls() throws ApplicationException {
+    String result = "";
+    xQuery();
+    // xQueryPath();
+    // xQueryByEscidocItemId();
+    // result = createItem();
+    // result = deleteItem();
+  }
+  
+  private void xQuery() throws ApplicationException {
+    String result = "";
+    try {
+      String xquery = 
+        "xquery version \"1.0\"; \n" + 
+        "declare namespace echo=\"http://www.mpiwg-berlin.mpg.de/ns/echo/1.0/\"; \n" + 
+        "let $doc := doc(\"/db/mpdl/documents/standard/echo/zh/SongYingxing_1637.xml\") \n" +
+        "let $sentences := $doc//echo:s \n" +
+        "return $sentences \n";
+      String xQueryEncoded = URLEncoder.encode(xquery, "utf-8");
+      String request = "/mpdl/escidoc/exist:xquery/execute?query=" + xQueryEncoded + "&startRecord=1&maximumRecords=50";
+      result = performGetRequest(request);
+      System.out.println(result);
+    } catch (UnsupportedEncodingException e) {
+      throw new ApplicationException(e);      
+    }
+  }
+  
+  
+  private void xQueryPath() throws ApplicationException {
+    String result = "";
+    try {
+      String xqueryPath = "/mpdl/interface/doc-query.xql";
+      String xQueryPathEncoded = URLEncoder.encode(xqueryPath, "utf-8");
+      String xqueryPathParams = 
+        "<params>" +
+        "<param name=\"queryType\">fulltextMorph</param>" +
+        "<param name=\"document\">/echo/la/Benedetti_1585.xml</param>" +
+        "<param name=\"mode\">text</param>" +
+        "<param name=\"query\">multiplicare</param>" +
+        "<param name=\"queryResultPN\">1</param>" +
+        "<param name=\"queryResultPageSize\">10</param>" +
+        "</params>";
+      String xqueryPathParamsEncoded = URLEncoder.encode(xqueryPathParams, "utf-8");
+      String request = "/mpdl/escidoc/exist:xquery/execute?queryPath=" + xQueryPathEncoded + "&parameters=" + xqueryPathParamsEncoded;
+      result = performGetRequest(request);
+    } catch (UnsupportedEncodingException e) {
+      throw new ApplicationException(e);      
+    }
+    System.out.println(result);
+  }
+  
+  private void xQueryByEscidocItemId() throws ApplicationException {
+    // /ir/item/escidoc:47344 is fulltext item of "/echo/la/Benedetti_1585"
+    String result = "";
+    try {
+      String xquery = "//echo:s";
+      String xqueryEncoded = URLEncoder.encode(xquery, "utf-8");
+      String request = "/mpdl/escidoc/ir/item/escidoc:47344/exist:xquery/execute?query=" + xqueryEncoded + "&startRecord=1&maximumRecords=50";
+      result = performGetRequest(request);
+    } catch (UnsupportedEncodingException e) {
+      throw new ApplicationException(e);      
+    }
+    System.out.println(result);
+  }
+  
+  private String createItem() throws ApplicationException {
+    String xmlDocumentFileName = "/Users/jwillenborg/tmp/echo/la/Archimedes_1565.xml";
+    ESciDocRestSession escidocRestSession = ESciDocRestSession.getInstance(eSciDocCookieId);
+    String eSciDocStageAreaUrl = escidocRestSession.uploadFileToESciDocStageArea(xmlDocumentFileName);  
+    String contentModelId = "/cmm/content-model/escidoc:persistent4";  // TODO  take final contentModelId: "/cmm/content-model/escidoc:exist-xml"
+    String contextId = "/ir/context/escidoc:38600";  // TODO take final contextId
+    String pid = escidocRestSession.getPid();
+    MetadataRecord mdRecord = new MetadataRecord();
+    String docBase = "echo";
+    mdRecord.setCreator("Archimedes");
+    mdRecord.setTitle("Archimedis De iis quae ve huntur in aqua libri duo");
+    Date py = XmlUtil.getInstance().toDate("1565" + "-01-01T00:00:00.000Z");
+    mdRecord.setDate(py);
+    mdRecord.setMediaType("fulltext");
+    mdRecord.setDocBase(docBase);
+    mdRecord.setLanguage("la");
+    mdRecord.setEXistIdentifier("/echo/la/Archimedes_1565.xml");
+    ArrayList<Component> components = new ArrayList<Component>();
+    String contentCategory = "fulltext XML - ECHO";
+    if (docBase != null && docBase.equals("archimedes"))
+      contentCategory = "fulltext XML - Archimedes";
+    Component componentXmlFulltext = new Component("valid", "public", contentCategory, "text/xml", eSciDocStageAreaUrl, "internal-managed");
+    components.add(componentXmlFulltext);
+    Item xmlFulltextItem = new Item(contextId, pid, mdRecord, contentModelId, components);
+    String xmlFulltextItemStr = xmlFulltextItem.toXmlString();
+    String containerIdOfFulltextItem = "/ir/container/escidoc:51122";
+    String newItemXmlStr = performPostRequest("/mpdl/escidoc" + containerIdOfFulltextItem + "/create-item", xmlFulltextItemStr, null);
+    return newItemXmlStr; 
+  }
+  
+  private String deleteItem() {
+    String itemId = "/ir/item/escidoc:73012";
+    String retStr = performDeleteRequest("/mpdl/escidoc" + itemId);
+    return retStr; 
+  }
+
+  private String createContainer() throws ApplicationException {
+    String containerIdOfEchoDocBase = "/ir/container/escidoc:51122";  // TODO
+    String contentModelId = "/cmm/content-model/escidoc:persistent4";  // TODO  take final contentModelId: "/cmm/content-model/escidoc:exist-xml"
+    String contextId = "/ir/context/escidoc:38600";  // TODO take final contextId
+    ESciDocRestSession escidocRestSession = ESciDocRestSession.getInstance(eSciDocCookieId);
+    String pid = escidocRestSession.getPid();
+    MetadataRecord mdRecord = new MetadataRecord();
+    mdRecord.setEXistIdentifier("/echo");
+    Container container = new Container(contentModelId, contextId, pid, mdRecord);
+    String xmlContainerStr = container.toXmlString();
+    String newContainerXmlStr = performPostRequest("/mpdl/escidoc" + containerIdOfEchoDocBase + "/create-container", xmlContainerStr, null);
+    return newContainerXmlStr; 
+  }
+  
+  private String deleteContainer() {
+    String containerId = "/ir/container/escidoc:71010";  // TODO
+    String retStr = performDeleteRequest("/mpdl/escidoc" + containerId);
+    return retStr; 
+  }
+
+  private String performPostRequest(String requestUrlStr, String requestInputStreamStr, HttpMethodParams parameter) throws ApplicationException {
+    String resultStr = null;
+    try {
+      String urlStr = protocol + "://" + host + ":" + port + requestUrlStr;
+      PostMethod method = new PostMethod(urlStr);
+      method.setRequestHeader("Cookie", "escidocCookie=" + eSciDocCookieId);
+      method.setFollowRedirects(false); 
+      if (requestInputStreamStr != null) {
+        InputStream requestInputStream = new ByteArrayInputStream(requestInputStreamStr.getBytes("UTF-8"));
+        InputStreamRequestEntity inputStreamRequestEntity = new InputStreamRequestEntity(requestInputStream);
+        method.setRequestEntity(inputStreamRequestEntity); 
+      }
+      if (parameter != null) {
+        method.setParams(parameter);
+      }
+      httpClient.executeMethod(method); 
+      resultStr = method.getResponseBodyAsString();
+      method.releaseConnection();
+    } catch (HttpException e) {
+      throw new ApplicationException(e);      
+    } catch (IOException e) {
+      throw new ApplicationException(e);      
+    }
+    return resultStr; 
+  } 
+
+  private String performPostRequest(String requestUrlStr, File requestFile, HttpMethodParams parameter) throws ApplicationException {
+    String resultStr = null;
+    try {
+      String urlStr = protocol + "://" + host + ":" + port + requestUrlStr;
+      PostMethod method = new PostMethod(urlStr);
+      method.setRequestHeader("Cookie", "escidocCookie=" + eSciDocCookieId);
+      method.setFollowRedirects(false); 
+      if (requestFile != null) {
+        /**
+        FilePart requestFilePart = new FilePart(requestFile.getName(), requestFile);
+        Part[] parts = { requestFilePart };
+        MultipartRequestEntity multipartRequestEntity = new MultipartRequestEntity(parts, method.getParams());
+        method.setRequestEntity(multipartRequestEntity);
+        **/
+        FileInputStream requestFileInputStream = new FileInputStream(requestFile);
+        InputStreamRequestEntity inputStreamRequestEntity = new InputStreamRequestEntity(requestFileInputStream);
+        method.setRequestEntity(inputStreamRequestEntity); 
+      }
+      if (parameter != null) {
+        method.setParams(parameter);
+      }
+      httpClient.executeMethod(method); 
+      resultStr = method.getResponseBodyAsString();
+      method.releaseConnection();
+    } catch (HttpException e) {
+      throw new ApplicationException(e);      
+    } catch (IOException e) {
+      throw new ApplicationException(e);      
+    }
+    return resultStr; 
+  } 
+
+  private String performPutRequestByBody(String requestName, String bodyContent) {
+    String resultStr = null;
+    try {
+      String urlStr = protocol + "://" + host + ":" + port + requestName;
+      PutMethod method = new PutMethod(urlStr);
+      method.setRequestHeader("Cookie", "escidocCookie=" + eSciDocCookieId);
+      if (bodyContent != null) {
+        method.setRequestBody(bodyContent);
+      }
+      httpClient.executeMethod(method); 
+      resultStr = method.getResponseBodyAsString();
+      method.releaseConnection();
+    } catch (HttpException e) {
+      e.printStackTrace();      
+    } catch (IOException e) {
+      e.printStackTrace();      
+    }
+    return resultStr; 
+  } 
+
+  private String performDeleteRequest(String requestName) {
+    String resultStr = null;
+    try {
+      String urlStr = protocol + "://" + host + ":" + port + requestName;
+      DeleteMethod method = new DeleteMethod(urlStr);
+      method.setRequestHeader("Cookie", "escidocCookie=" + eSciDocCookieId);
+      httpClient.executeMethod(method); 
+      resultStr = method.getResponseBodyAsString();
+      method.releaseConnection();
+    } catch (HttpException e) {
+      e.printStackTrace();      
+    } catch (IOException e) {
+      e.printStackTrace();      
+    }
+    return resultStr;
+  } 
+
+  private String performGetRequest(String requestName) {
+    String resultStr = null;
+    try {
+      String urlStr = protocol + "://" + host + ":" + port + requestName;
+      GetMethod method = new GetMethod(urlStr);
+      method.setRequestHeader("Cookie", "escidocCookie=" + eSciDocCookieId);
+      httpClient.executeMethod(method); 
+      resultStr = method.getResponseBodyAsString();
+      method.releaseConnection();
+    } catch (HttpException e) {
+      e.printStackTrace();      
+    } catch (IOException e) {
+      e.printStackTrace();      
+    }
+    return resultStr;
+  } 
+
+  private String performPostRequestByBody(String requestUrlStr, String bodyContent) throws ApplicationException {
+    String resultStr = null;
+    try {
+      String urlStr = protocol + "://" + host + ":" + port + requestUrlStr;
+      PostMethod method = new PostMethod(urlStr);
+      method.setFollowRedirects(false); 
+      if (bodyContent != null) {
+        method.setRequestBody(bodyContent);
+      }
+      httpClient.executeMethod(method); 
+      resultStr = method.getResponseBodyAsString();
+      method.releaseConnection();
+    } catch (HttpException e) {
+      throw new ApplicationException(e);      
+    } catch (IOException e) {
+      throw new ApplicationException(e);      
+    }
+    return resultStr; 
+  } 
+  
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/software/eXist/mpdl-modules/src/de/mpg/mpiwg/berlin/mpdl/exception/ApplicationException.java	Wed Nov 24 17:24:23 2010 +0100
@@ -0,0 +1,14 @@
+package de.mpg.mpiwg.berlin.mpdl.exception;
+
+public class ApplicationException extends Exception {
+
+  public ApplicationException(Exception e) {
+    super(e);
+  }
+
+  public ApplicationException(String str) {
+    super(str);
+  }
+
+}
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/software/eXist/mpdl-modules/src/de/mpg/mpiwg/berlin/mpdl/general/MpdlConstants.java	Wed Nov 24 17:24:23 2010 +0100
@@ -0,0 +1,47 @@
+package de.mpg.mpiwg.berlin.mpdl.general;
+
+import java.util.Properties;
+
+import de.mpg.mpiwg.berlin.mpdl.util.Util;
+
+public class MpdlConstants {
+  public static String EXIST_HOME = System.getProperty("exist.home");
+  public static String MPDL_SYSTEM_PROPERTIES_FILENAME = EXIST_HOME + "/mpdl/extensions/mpdl-modules/mpdl-system.properties";   
+  public static Properties MPDL_SYSTEM_PROPERTIES = new Util().getProperties(MPDL_SYSTEM_PROPERTIES_FILENAME);
+  
+  // Mpdl: general settings
+  public static String MPDL_PROJECT_NAME = "mpdl";
+  public static String TYPE_STATIC = "static";
+  public static String TYPE_DYNAMIC = "dynamic";
+  
+  // eXist settings: data
+  public static String MPDL_EXIST_DATA_DIR = EXIST_HOME + "/webapp/WEB-INF/dataMpdl";  // other call would be: ConfigurationHelper.getExistHome()
+
+  // eXist settings: XML-RPC-Interface, doc-interface
+  public static String MPDL_FULL_EXIST_HOST_NAME = MPDL_SYSTEM_PROPERTIES.getProperty("exist.fullHostname");  // official eXist server name; used for user displays etc.
+  public static String MPDL_EXIST_HOST_NAME = MPDL_SYSTEM_PROPERTIES.getProperty("exist.hostname");  // used in XML-RPC-Interface etc.
+  public static int MPDL_EXIST_PORT = new Integer(MPDL_SYSTEM_PROPERTIES.getProperty("exist.port"));  // other call could but does not work: System.getProperty("jetty.port")
+  public static String MPDL_EXIST_ADMIN_USER_NAME = MPDL_SYSTEM_PROPERTIES.getProperty("exist.adminUserName");
+  public static String MPDL_EXIST_ADMIN_USER_PW = MPDL_SYSTEM_PROPERTIES.getProperty("exist.adminUserPW");
+  public static String MPDL_ECHO_RELAXNG_PATH = MPDL_SYSTEM_PROPERTIES.getProperty("exist.echoRelaxNGPath");
+  
+  // eSciDoc settings
+  public static String MPDL_ESCIDOC_HOST_NAME = MPDL_SYSTEM_PROPERTIES.getProperty("escidoc.hostname");
+  public static int MPDL_ESCIDOC_PORT = new Integer(MPDL_SYSTEM_PROPERTIES.getProperty("escidoc.port"));
+  public static String MPDL_ESCIDOC_OUM_ID = MPDL_SYSTEM_PROPERTIES.getProperty("escidoc.oumId"); // organizational unit
+  public static String MPDL_ESCIDOC_CMM_ID = MPDL_SYSTEM_PROPERTIES.getProperty("escidoc.cmmId"); // content model
+  public static String MPDL_ESCIDOC_CONTEXT_ID = MPDL_SYSTEM_PROPERTIES.getProperty("escidoc.contextId"); // context id
+  public static String MPDL_ESCIDOC_ARCHIMEDES_CONTAINER_ID = MPDL_SYSTEM_PROPERTIES.getProperty("escidoc.archimedesContainerId");  // archimedes container id 
+  public static String MPDL_ESCIDOC_ECHO_CONTAINER_ID = MPDL_SYSTEM_PROPERTIES.getProperty("escidoc.echoContainerId");  // echo container id 
+  public static String MPDL_ESCIDOC_ADMIN_USER_NAME = MPDL_SYSTEM_PROPERTIES.getProperty("escidoc.adminUserName");
+  public static String MPDL_ESCIDOC_ADMIN_USER_PW = MPDL_SYSTEM_PROPERTIES.getProperty("escidoc.adminUserPW");
+
+  // Mpdl: language technology
+  public static String DEFAULT_LANGUAGE = "en";
+  public static int MORPHOLOGY_CACHE_SIZE = 1000000;
+
+  // Mpdl: language technology: static data management (BerkeleyDB etc.)
+  public static String MPDL_DATA_DIR = "/Users/jwillenborg/java/exist1.4/webapp/WEB-INF/dataMpdl";  // TODO
+  public static String MPDL_DOC_DIR = "/Users/jwillenborg/texts/mpdl";   // TODO
+  public static String MPDL_TEST_DATA_DIR = "/Users/jwillenborg/data/mpdl";  // TODO
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/software/eXist/mpdl-modules/src/de/mpg/mpiwg/berlin/mpdl/lt/analyzer/MpdlFilter.java	Wed Nov 24 17:24:23 2010 +0100
@@ -0,0 +1,48 @@
+package de.mpg.mpiwg.berlin.mpdl.lt.analyzer;
+
+import java.io.IOException;
+
+import org.apache.lucene.analysis.TokenFilter;
+import org.apache.lucene.analysis.Token;
+import org.apache.lucene.analysis.TokenStream;
+
+public class MpdlFilter extends TokenFilter {
+
+  public MpdlFilter(TokenStream in) {
+    super(in);
+  }
+
+  public Token next() throws IOException {
+    return getNext(null);
+  }
+  
+  public Token next(Token reusableToken) throws IOException {
+    return getNext(reusableToken);
+  }
+
+  private Token getNext(Token reusableToken) throws IOException {
+    Token nextToken = null;
+    if (reusableToken == null)
+      nextToken = input.next();
+    else
+      nextToken = input.next(reusableToken);
+    if (nextToken == null)
+      return null;
+    char[] termBuffer = nextToken.termBuffer();
+    int termBufferLength = nextToken.termLength();
+    int newTermBufferLength = 0;
+    // if a hyphen or a newlineChar or tabChar is in the token it is removed
+    for(int i=0;i<termBufferLength;i++) {
+      char c = termBuffer[i];
+      if (c != '-' && c != '\n' && c != '\t')
+        termBuffer[newTermBufferLength++] = c;
+    }
+    // remove the apostrophe with "s" but not always  (e.g. not l'ogni but author's)
+    String tokenText = new String(termBuffer, 0, termBufferLength);
+    if (tokenText.endsWith("'s") || tokenText.endsWith("'S")) {
+      newTermBufferLength = newTermBufferLength - 2;
+    }
+    nextToken.setTermLength(newTermBufferLength);
+    return nextToken;
+  }
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/software/eXist/mpdl-modules/src/de/mpg/mpiwg/berlin/mpdl/lt/analyzer/MpdlMorphAnalyzer.java	Wed Nov 24 17:24:23 2010 +0100
@@ -0,0 +1,179 @@
+package de.mpg.mpiwg.berlin.mpdl.lt.analyzer;
+
+import java.io.File;
+import java.io.IOException;
+import java.io.Reader;
+import java.io.StringReader;
+import java.util.ArrayList;
+import java.util.HashSet;
+import java.util.Hashtable;
+import java.util.Set;
+
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.LowerCaseFilter;
+import org.apache.lucene.analysis.StopFilter;
+import org.apache.lucene.analysis.Token;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.WordlistLoader;
+import org.apache.lucene.analysis.br.BrazilianAnalyzer;
+import org.apache.lucene.analysis.cz.CzechAnalyzer;
+import org.apache.lucene.analysis.de.GermanAnalyzer;
+import org.apache.lucene.analysis.fr.FrenchAnalyzer;
+import org.apache.lucene.analysis.nl.DutchAnalyzer;
+import org.apache.lucene.analysis.standard.StandardAnalyzer;
+
+import de.mpg.mpiwg.berlin.mpdl.general.MpdlConstants;
+
+/**
+ * Analyzer for specific languages. Supports an external list of stopwords (words that
+ * will not be indexed at all) and an external list of exclusions (word that will
+ * not be stemmed, but indexed).
+ * A default set of stopwords is used unless an alternative list is specified, the
+ * exclusion list is empty by default.
+ */
+public class MpdlMorphAnalyzer extends Analyzer {
+  protected String language = MpdlConstants.DEFAULT_LANGUAGE;
+  
+  /**
+   * Contains the stopwords used with the StopFilter.
+   */
+  protected Set stopSet = new HashSet();
+
+  /**
+   * Contains words that should be indexed but not stemmed.
+   */
+  protected Set exclusionSet = new HashSet();
+
+  /**
+   * Builds an analyzer with the stop words for the given language
+   * (<code>GERMAN_STOP_WORDS</code>).
+   */
+  public MpdlMorphAnalyzer() {
+    String[] stopWords = getStopWords(language);  // stopwords for the language
+    stopSet = StopFilter.makeStopSet(stopWords);
+  }
+
+  /**
+   * Builds an analyzer with the given stop words.
+   */
+  public MpdlMorphAnalyzer(String[] stopwords) {
+    stopSet = StopFilter.makeStopSet(stopwords);
+  }
+
+  /**
+   * Builds an analyzer with the given stop words.
+   */
+  public MpdlMorphAnalyzer(Hashtable stopwords) {
+    stopSet = new HashSet(stopwords.keySet());
+  }
+
+  /**
+   * Builds an analyzer with the given stop words.
+   */
+  public MpdlMorphAnalyzer(File stopwords) throws IOException {
+    stopSet = WordlistLoader.getWordSet(stopwords);
+  }
+
+  public String getLanguage() {
+    return language;  
+  }
+  
+  protected void setLanguage(String lang) {
+    this.language = lang;
+  }
+  
+  /**
+   * Get stopwords for the language: fetch them from the open language analyzers for some languages
+Taken from: http://www.perseus.tufts.edu/hopper/stopwords
+# English: a, a's, able, about, above, according, accordingly, across, actually, after, afterwards, again, against, ain't, all, allow, allows, almost, alone, along, already, also, although, always, am, among, amongst, an, and, another, any, anybody, anyhow, anyone, anything, anyway, anyways, anywhere, apart, appear, appreciate, appropriate, are, aren't, around, as, aside, ask, asking, associated, at, available, away, awfully, b, be, became, because, become, becomes, becoming, been, before, beforehand, behind, being, believe, below, beside, besides, best, better, between, beyond, both, brief, but, by, c, c'mon, c's, came, can, can't, cannot, cant, cause, causes, certain, certainly, changes, clearly, co, com, come, comes, concerning, consequently, consider, considering, contain, containing, contains, corresponding, could, couldn't, course, currently, d, definitely, described, despite, did, didn't, different, do, does, doesn't, doing, don't, done, down, downwards, during, e, each, edu, eg, eight, either, else, elsewhere, enough, entirely, especially, et, etc, even, ever, every, everybody, everyone, everything, everywhere, ex, exactly, example, except, f, far, few, fifth, first, five, followed, following, follows, for, former, formerly, forth, four, from, further, furthermore, g, get, gets, getting, given, gives, go, goes, going, gone, got, gotten, greetings, h, had, hadn't, happens, hardly, has, hasn't, have, haven't, having, he, he's, hello, help, hence, her, here, here's, hereafter, hereby, herein, hereupon, hers, herself, hi, him, himself, his, hither, hopefully, how, howbeit, however, i, i'd, i'll, i'm, i've, ie, if, ignored, immediate, in, inasmuch, inc, indeed, indicate, indicated, indicates, inner, insofar, instead, into, inward, is, isn't, it, it'd, it'll, it's, its, itself, j, just, k, keep, keeps, kept, know, known, knows, l, last, lately, later, latter, latterly, least, less, lest, let, let's, like, liked, likely, little, look, looking, looks, ltd, m, mainly, many, may, maybe, me, mean, meanwhile, merely, might, more, moreover, most, mostly, much, must, my, myself, n, name, namely, nd, near, nearly, necessary, need, needs, neither, never, nevertheless, new, next, nine, no, nobody, non, none, noone, nor, normally, not, nothing, novel, now, nowhere, o, obviously, of, off, often, oh, ok, okay, old, on, once, one, ones, only, onto, or, other, others, otherwise, ought, our, ours, ourselves, out, outside, over, overall, own, p, particular, particularly, per, perhaps, placed, please, plus, possible, presumably, probably, provides, q, que, quite, qv, r, rather, rd, re, really, reasonably, regarding, regardless, regards, relatively, respectively, right, s, said, same, saw, say, saying, says, second, secondly, see, seeing, seem, seemed, seeming, seems, seen, self, selves, sensible, sent, serious, seriously, seven, several, shall, she, should, shouldn't, since, six, so, some, somebody, somehow, someone, something, sometime, sometimes, somewhat, somewhere, soon, sorry, specified, specify, specifying, still, sub, such, sup, sure, t, t's, take, taken, tell, tends, th, than, thank, thanks, thanx, that, that's, thats, the, their, theirs, them, themselves, then, thence, there, there's, thereafter, thereby, therefore, therein, theres, thereupon, these, they, they'd, they'll, they're, they've, think, third, this, thorough, thoroughly, those, though, three, through, throughout, thru, thus, to, together, too, took, toward, towards, tried, tries, truly, try, trying, twice, two, u, un, under, unfortunately, unless, unlikely, until, unto, up, upon, us, use, used, useful, uses, using, usually, uucp, v, value, various, very, via, viz, vs, w, want, wants, was, wasn't, way, we, we'd, we'll, we're, we've, welcome, well, went, were, weren't, what, what's, whatever, when, whence, whenever, where, where's, whereafter, whereas, whereby, wherein, whereupon, wherever, whether, which, while, whilst, whither, who, who's, whoever, whole, whom, whose, why, will, willing, wish, with, within, without, won't, wonder, would, wouldn't, x, y, yes, yet, you, you'd, you'll, you're, you've, your, yours, yourself, yourselves, z, zero
+
+# Greek: a)/llos, a)/n, a)/ra, a)ll', a)lla/, a)po/, au)to/s, d', dai/, dai/s, de/, dh/, dia/, e(autou=, e)/ti, e)a/n, e)gw/, e)k, e)mo/s, e)n, e)pi/, ei), ei)/mi, ei)mi/, ei)s, ga/r, ga^, ge, h(, h)/, kai/, kata/, me/n, meta/, mh/, o(, o(/de, o(/s, o(/stis, o(/ti, oi(, ou(/tws, ou(=tos, ou), ou)/te, ou)=n, ou)de/, ou)dei/s, ou)k, para/, peri/, pro/s, so/s, su/, su/n, ta/, te, th/n, th=s, th=|, ti, ti/, ti/s, tis, to/, to/n, toi/, toiou=tos, tou/s, tou=, tw=n, tw=|, u(mo/s, u(pe/r, u(po/, w(/ste, w(s, w)=
+
+# Latin: ab, ac, ad, adhic, aliqui, aliquis, an, ante, apud, at, atque, aut, autem, cum, cur, de, deinde, dum, ego, enim, ergo, es, est, et, etiam, etsi, ex, fio, haud, hic, iam, idem, igitur, ille, in, infra, inter, interim, ipse, is, ita, magis, modo, mox, nam, ne, nec, necque, neque, nisi, non, nos, o, ob, per, possum, post, pro, quae, quam, quare, qui, quia, quicumque, quidem, quilibet, quis, quisnam, quisquam, quisque, quisquis, quo, quoniam, sed, si, sic, sive, sub, sui, sum, super, suus, tam, tamen, trans, tu, tum, ubi, uel, uero, unus, ut
+
+# Italian: a, ad, agli, al, alcun, alcuno, all', alla, alle, allo, altra, altre, altri, altro, assai, avere, bene, c', ch', che, chi, ci, cio, co', col, come, con, cosi, cosi\, d', da, dal, dall', dalla, dalle, de, de', degli, dei, del, dell', della, delle, dello, di, duo, e, ed, egli, essere, et, gia, gia\, gli, gran, grande, i, il, in, io, l', la, le, li, lo, ma, maggior, maggiore, mai, mio, molto, ne, ne', nel, nell', nella, nelle, non, o, ogn', ogni, oue, ove, per, perche, piu, piu\, poco, poi, puo, qual, qualche, qualcun, qualcuno, quale, quanta, quante, quanti, quanto, quasi, quella, quelle, quelli, quello, questa, queste, questi, questo, qui, s', se, sempre, senza, si, sotto, su, sua, sue, sui, suo, tal, tanta, tante, tanti, tanto, tra, tre, tutta, tutte, tutti, tutto, un, una, uno, vn, vna, vno
+
+# German: aber, alle, als, also, am, an, andern, auch, auf, aus, bei, bey, bis, da, daher, das, dass, de, dem, den, der, des, die, diese, dieser, dieses, doch, durch, eben, ein, eine, einem, einen, einer, eines, er, es, fur, gegen, haben, hat, ihre, im, in, ist, kan, man, mehr, mit, nach, nicht, noch, nur, oder, ohne, sehr, sei, selbst, sey, sich, sie, sind, so, uber, um, und, unter, vgl, vom, von, weil, welche, wenn, werden, wie, wird, zu, zur
+
+# French: a, amp, au, auec, aussi, autre, autres, aux, bien, car, ce, ces, cette, ceux, chose, choses, comme, d', dans, de, des, deux, dire, dont, du, elle, elles, en, encore, est, estre, et, faire, fait, faut, force, grande, ie, il, ils, l', la, le, les, leur, leurs, lors, luy, mais, mesme, n', ne, nous, on, ont, or, ou, par, parce, pas, peut, plus, plusieurs, point, pour, pourquoy, puis, qu', quand, que, qui, quoy, sa, sans, se, ses, si, soit, son, sont, sur, tous, tout, toutes, vn, vne, y
+   * @param language
+   * @return stopwords
+   */
+  public String[] getStopWords(String language) {
+    String[] stopwords = new String[0];
+    if (language != null) {
+      if (language.equals("en"))
+        stopwords = StandardAnalyzer.STOP_WORDS;
+      else if(language.equals("br"))
+        stopwords = BrazilianAnalyzer.BRAZILIAN_STOP_WORDS;
+      else if(language.equals("cz"))
+        stopwords = CzechAnalyzer.CZECH_STOP_WORDS;
+      else if(language.equals("de"))
+        stopwords = GermanAnalyzer.GERMAN_STOP_WORDS;
+      else if(language.equals("fr"))
+        stopwords = FrenchAnalyzer.FRENCH_STOP_WORDS;
+      else if(language.equals("nl"))
+        stopwords = DutchAnalyzer.DUTCH_STOP_WORDS;
+   }
+    return stopwords;
+  }
+  
+  /**
+   * Builds an exclusionlist from an array of Strings.
+   */
+  public void setStemExclusionTable(String[] exclusionlist) {
+    exclusionSet = StopFilter.makeStopSet(exclusionlist);
+  }
+
+  /**
+   * Builds an exclusionlist from a Hashtable.
+   */
+  public void setStemExclusionTable(Hashtable exclusionlist) {
+    exclusionSet = new HashSet(exclusionlist.keySet());
+  }
+
+  /**
+   * Builds an exclusionlist from the words contained in the given file.
+   */
+  public void setStemExclusionTable(File exclusionlist) throws IOException {
+    exclusionSet = WordlistLoader.getWordSet(exclusionlist);
+  }
+
+  /**
+   * Creates a TokenStream which tokenizes all the text in the provided Reader.
+   *
+   * @return A TokenStream build from a StandardTokenizer filtered with
+   *         StandardFilter, LowerCaseFilter, StopFilter, DonatusStemFilter
+   */
+  public TokenStream tokenStream(String fieldName, Reader reader) {
+    MpdlNormalizer mpdlNormalizer = new MpdlNormalizer(language);
+    TokenStream result = new MpdlTokenizer(reader, language, mpdlNormalizer);
+    result = new MpdlFilter(result);  // filter to remove the hyphen in a token etc.
+    result = new LowerCaseFilter(result);
+    result = new StopFilter(result, stopSet);
+    result = new MpdlStemFilter(this, result, exclusionSet);
+    return result;
+  }
+  
+  public ArrayList<String> getToken(String inputString) {
+    ArrayList<String> token = new ArrayList<String>();
+    try {
+      Reader reader = new StringReader(inputString);
+      MpdlNormalizer mpdlNormalizer = new MpdlNormalizer(language);
+      TokenStream result = new MpdlTokenizer(reader, language, mpdlNormalizer);
+      result = new MpdlFilter(result);  // filter to remove the hyphen in a token etc.
+      result = new LowerCaseFilter(result);
+      result = new StopFilter(result, stopSet);
+      Token t = result.next();
+      while (t != null) {
+        String currentToken = String.valueOf(t.termBuffer());
+        token.add(currentToken);
+        t = result.next();
+      }
+    } catch (IOException e) {
+      e.printStackTrace();
+    }
+    return token;
+  }
+  
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/software/eXist/mpdl-modules/src/de/mpg/mpiwg/berlin/mpdl/lt/analyzer/MpdlNormalizer.java	Wed Nov 24 17:24:23 2010 +0100
@@ -0,0 +1,1078 @@
+package de.mpg.mpiwg.berlin.mpdl.lt.analyzer;
+
+import java.io.BufferedReader;
+import java.io.IOException;
+import java.io.InputStreamReader;
+import java.io.UnsupportedEncodingException;
+import java.util.ArrayList;
+
+import de.mpg.mpiwg.berlin.mpdl.exception.ApplicationException;
+import de.mpg.mpiwg.berlin.mpdl.lt.doc.regularization.Regularization;
+import de.mpg.mpiwg.berlin.mpdl.lt.doc.regularization.RegularizationManager;
+import de.mpg.mpiwg.berlin.mpdl.lt.general.Language;
+
+public class MpdlNormalizer {
+  static final private String IT_VOWELS = "AEIOUaeiou" +
+                                          "\u00c6\u00e6" + // AE ligatures
+                                          "\u0152\u0153";  // OE ligatures
+  static final private String IT_CONS = "BCDFGHKLMNPQRSTVWXZ" +
+                                        "bcdfghklmnpqrstvwxz" +
+                                        "\u017f\u00df";  // long/sharp S
+  private String[] normFunctionsToUse = {"reg", "norm"};  // default is to use all of these normalization functions
+  private String language;
+  private int[] offsets;
+  
+  public MpdlNormalizer(String[] normFunctionsToUse, String lang) {
+    this.normFunctionsToUse = normFunctionsToUse;
+    String language = Language.getInstance().getLanguageId(lang); 
+    this.language = language;
+  }
+
+  public MpdlNormalizer(String language) {
+    this.language = language;
+  }
+
+  /**
+   * Applies the normalization rules in <code>language</code> to
+   * <code>s</code>, without offset tracking.
+   *
+   * @param s            source string
+   * @return             normalized string
+   */
+  public String normalize(String s) throws ApplicationException {
+    String normStr = s;
+    if (useRegFunction()) {
+      // try to regularize the string to the norm form over predefined regularizations
+      RegularizationManager regManager = RegularizationManager.getInstance();
+      ArrayList<Regularization> regs = regManager.findRegsByOrig(language, s);
+      if (regs != null && regs.size() > 0) {
+        Regularization reg = regs.get(0);  // only one: the first one
+        String regNormStr = reg.getNorm();
+        normStr = regNormStr;
+      }
+    }
+    if (useNormFunction()) {
+      // normalize the string by string replace
+      normStr = normalize(normStr, null);
+    }
+    return normStr;
+  }
+
+  private boolean useRegFunction() {
+    boolean useReg = false;
+    for (int i=0; i< normFunctionsToUse.length; i++) {
+      String function = normFunctionsToUse[i];
+      if (function.equals("reg"))
+        return true;
+    }
+    return useReg;
+  }
+  
+  private boolean useNormFunction() {
+    boolean useNorm = false;
+    for (int i=0; i< normFunctionsToUse.length; i++) {
+      String function = normFunctionsToUse[i];
+      if (function.equals("norm"))
+        return true;
+    }
+    return useNorm;
+  }
+
+  /**
+   * Applies the normalization rules in <code>language</code> to
+   * <code>s</code>, with offset tracking.<p>
+   *
+   * <strong>WARNING:</strong>
+   * Arboreal will not work properly if a normalization substitution
+   * replaces a source character with more than two target characters!
+   * This is simply a BUG, and should be fixed. Fortunately, however,
+   * one does not often need such a replacement.<p>
+   *
+   * @param s            source string
+   * @param offsets      character offset table
+   * @return             normalized string
+   */
+  public String normalize(String s, int[] offsets) {
+    this.offsets = offsets;
+    if (language.equals("la") || language.equals("lat")) {
+      StringBuffer buf = new StringBuffer();
+      int n = 0;
+      for (int i = 0; i < s.length(); i++) {
+        char c = s.charAt(i);
+        String replace = new String();
+        switch (c) {
+          case 'j': replace = "i"; break;
+          case 'v': replace = "u"; break;
+          /*
+           * Linguistic note: /u/ and /v/ are rarely phonemic
+           * in Latin, as in alui 's/he nourished' vs.
+           * alvi 'of a belly', volui 's/he wished' or 'it rolled'
+           * vs. volvi 'to be rolled', (in)seruit 's/he joined
+           * together' vs. (in)servit 's/he serves'.
+           */
+          case 'q':
+            if ((i < s.length() - 1) && (s.charAt(i + 1) == ';'))
+              replace = "qu";
+            else
+              replace = "q";
+            break;
+          case ';':
+            if ((i > 0) && (s.charAt(i - 1) == 'q'))
+              replace = "e";
+            else if ((i == 0) || ! Character.isLetter(s.charAt(i - 1)))
+              replace = ";";
+            else
+              replace = "";
+            break;
+          case '\u0300': replace = ""; break; // COMBINING GRAVE ACCENT
+          case '\u0301': replace = ""; break; // COMBINING ACCUTE ACCENT
+          case '\u0302': replace = ""; break; // COMBINING CIRCUMFLEX ACCENT
+    
+          case '\u00c0': replace = "A"; break; // LATIN CAPITAL LETTER A GRAVE
+          case '\u00c1': replace = "A"; break; // LATIN CAPITAL LETTER A ACUTE
+          case '\u00c2': replace = "A"; break; // LATIN CAPITAL LETTER A CIRCUMFLEX
+          case '\u00c4': replace = "A"; break; // LATIN CAPITAL LETTER A DIAERESIS
+          case '\u00c6': replace = "Ae"; break; // LATIN CAPITAL LETTER A E
+          case '\u00c7': replace = "C"; break; // LATIN CAPITAL LETTER C CEDILLA
+          case '\u00c8': replace = "E"; break; // LATIN CAPITAL LETTER E GRAVE
+          case '\u00c9': replace = "E"; break; // LATIN CAPITAL LETTER E ACUTE
+          case '\u00ca': replace = "E"; break; // LATIN CAPITAL LETTER E CIRCUMFLEX
+          case '\u00cb': replace = "E"; break; // LATIN CAPITAL LETTER E DIAERESIS   
+          case '\u00cc': replace = "I"; break; // LATIN CAPITAL LETTER I GRAVE;
+          case '\u00cd': replace = "I"; break; // LATIN CAPITAL LETTER I ACUTE
+          case '\u00ce': replace = "I"; break; // LATIN CAPITAL LETTER I CIRCUMFLEX
+          case '\u00cf': replace = "I"; break; // LATIN CAPITAL LETTER I DIAERESIS
+          case '\u00d2': replace = "O"; break; // LATIN CAPITAL LETTER O GRAVE
+          case '\u00d3': replace = "O"; break; // LATIN CAPITAL LETTER O ACUTE
+          case '\u00d4': replace = "O"; break; // LATIN CAPITAL LETTER O CIRCUMFLEX
+          case '\u00d6': replace = "O"; break; // LATIN CAPITAL LETTER O DIAERESIS
+          case '\u00d9': replace = "U"; break; // LATIN CAPITAL LETTER U GRAVE
+          case '\u00da': replace = "U"; break; // LATIN CAPITAL LETTER U ACUTE
+          case '\u00db': replace = "U"; break; // LATIN CAPITAL LETTER U CIRCUMFLEX
+          case '\u00dc': replace = "U"; break; // LATIN CAPITAL LETTER U DIAERESIS 
+          case '\u00e0': replace = "a"; break; // LATIN SMALL LETTER A GRAVE
+          case '\u00e1': replace = "a"; break; // LATIN SMALL LETTER A ACUTE
+          case '\u00e2': replace = "a"; break; // LATIN SMALL LETTER A CIRCUMFLEX
+          case '\u00e4': replace = "a"; break; // LATIN SMALL LETTER A DIAERESIS
+          case '\u00e6': replace = "ae"; break; // LATIN SMALL LETTER A E
+          case '\u00e7': replace = "c"; break; // LATIN SMALL LETTER C CEDILLA
+          case '\u00e8': replace = "e"; break; // LATIN SMALL LETTER E GRAVE
+          case '\u00e9': replace = "e"; break; // LATIN SMALL LETTER E ACUTE
+          case '\u00ea': replace = "e"; break; // LATIN SMALL LETTER E CIRCUMFLEX
+          case '\u00eb': replace = "e"; break; // LATIN SMALL LETTER E DIAERESIS
+          case '\u00ec': replace = "i"; break; // LATIN SMALL LETTER I GRAVE
+          case '\u00ed': replace = "i"; break; // LATIN SMALL LETTER I ACUTE
+          case '\u00ee': replace = "i"; break; // LATIN SMALL LETTER I CIRCUMFLEX
+          case '\u00ef': replace = "i"; break; // LATIN SMALL LETTER I DIAERESIS
+          case '\u00f2': replace = "o"; break; // LATIN SMALL LETTER O GRAVE
+          case '\u00f3': replace = "o"; break; // LATIN SMALL LETTER O ACUTE
+          case '\u00f4': replace = "o"; break; // LATIN SMALL LETTER O CIRCUMFLEX
+          case '\u00f6': replace = "o"; break; // LATIN SMALL LETTER O DIAERESIS
+          case '\u00f9': replace = "u"; break; // LATIN SMALL LETTER U GRAVE
+          case '\u00fa': replace = "u"; break; // LATIN SMALL LETTER U ACUTE
+          case '\u00fb': replace = "u"; break; // LATIN SMALL LETTER U CIRCUMFLEX
+          case '\u00fc': replace = "u"; break; // LATIN SMALL LETTER U DIAERESIS
+          case '\u0100': replace = "A"; break; // LATIN CAPITAL LETTER A MACRON
+          case '\u0101': replace = "a"; break; // LATIN SMALL LETTER A MACRON
+          case '\u0102': replace = "A"; break; // LATIN CAPITAL LETTER A BREVE
+          case '\u0103': replace = "a"; break; // LATIN SMALL LETTER A BREVE
+          case '\u0112': replace = "E"; break; // LATIN CAPITAL LETTER E MACRON
+          case '\u0113': replace = "e"; break; // LATIN SMALL LETTER E MACRON
+          case '\u0114': replace = "E"; break; // LATIN CAPITAL LETTER E BREVE
+          case '\u0115': replace = "e"; break; // LATIN SMALL LETTER E BREVE
+          case '\u0118': replace = "Ae"; break; // LATIN CAPITAL LETTER E OGONEK
+          case '\u0119': replace = "ae"; break; // LATIN SMALL LETTER E OGONEK
+          case '\u012a': replace = "I"; break; // LATIN CAPITAL LETTER I MACRON
+          case '\u012b': replace = "i"; break; // LATIN SMALL LETTER I MACRON
+          case '\u012c': replace = "I"; break; // LATIN CAPITAL LETTER I BREVE
+          case '\u012d': replace = "i"; break; // LATIN SMALL LETTER I BREVE
+          case '\u014c': replace = "O"; break; // LATIN CAPITAL LETTER O MACRON
+          case '\u014d': replace = "o"; break; // LATIN SMALL LETTER O MACRON
+          case '\u014e': replace = "O"; break; // LATIN CAPITAL LETTER O BREVE
+          case '\u014f': replace = "o"; break; // LATIN SMALL LETTER O BREVE
+          case '\u0152': replace = "Oe"; break; // LATIN CAPITAL LETTER O E
+          case '\u0153': replace = "oe"; break; // LATIN SMALL LETTER O E
+          case '\u016a': replace = "U"; break; // LATIN CAPITAL LETTER U MACRON
+          case '\u016b': replace = "u"; break; // LATIN SMALL LETTER U MACRON
+          case '\u016c': replace = "U"; break; // LATIN CAPITAL LETTER U BREVE
+          case '\u016d': replace = "u"; break; // LATIN SMALL LETTER U BREVE
+          case '\u017f': replace = "s"; break; // LATIN SMALL LETTER LONG S
+          case '\u00df': replace = "ss"; break; // LATIN SMALL LETTER SHARP S
+          case '\u00ad': break; // soft hyphen
+          // new in MPDL project by J. Willenborg
+          case '\u1e14': replace = "e"; break; // LATIN ... LETTER E WITH ...
+          case '\u1e15': replace = "e"; break; // LATIN ... LETTER E WITH ...
+          case '\u1e16': replace = "e"; break; // LATIN ... LETTER E WITH ...
+          case '\u1e17': replace = "e"; break; // LATIN ... LETTER E WITH ...
+          case '\u1e18': replace = "e"; break; // LATIN ... LETTER E WITH ...
+          case '\u1e19': replace = "e"; break; // LATIN ... LETTER E WITH ...
+          case '\u1e1a': replace = "e"; break; // LATIN ... LETTER E WITH ...
+          case '\u1e1b': replace = "e"; break; // LATIN ... LETTER E WITH ...
+          case '\u1e1c': replace = "e"; break; // LATIN ... LETTER E WITH ...
+          case '\u1e1d': replace = "e"; break; // LATIN ... LETTER E WITH ...
+          case '\u1eb8': replace = "e"; break; // LATIN ... LETTER E WITH ...
+          case '\u1eb9': replace = "e"; break; // LATIN ... LETTER E WITH ...
+          case '\u1eba': replace = "e"; break; // LATIN ... LETTER E WITH ...
+          case '\u1ebb': replace = "e"; break; // LATIN ... LETTER E WITH ...
+          case '\u1ebc': replace = "e"; break; // LATIN ... LETTER E WITH ...
+          case '\u1ebd': replace = "e"; break; // LATIN ... LETTER E WITH ...
+          case '\u1ebe': replace = "e"; break; // LATIN ... LETTER E WITH ...
+          case '\u1ebf': replace = "e"; break; // LATIN ... LETTER E WITH ...
+          case '\u1ec0': replace = "e"; break; // LATIN ... LETTER E WITH ...
+          case '\u1ec1': replace = "e"; break; // LATIN ... LETTER E WITH ...
+          case '\u1ec2': replace = "e"; break; // LATIN ... LETTER E WITH ...
+          case '\u1ec3': replace = "e"; break; // LATIN ... LETTER E WITH ...
+          case '\u1ec4': replace = "e"; break; // LATIN ... LETTER E WITH ...
+          case '\u1ec5': replace = "e"; break; // LATIN ... LETTER E WITH ...
+          case '\u1ec6': replace = "e"; break; // LATIN ... LETTER E WITH ...
+          case '\u1ec7': replace = "e"; break; // LATIN ... LETTER E WITH ...
+          // by Malcolm
+          case '\u2329': break; // BRA
+          case '\u232a': break; // KET
+          default: replace += c; break;
+        }
+        buf.append(replace);
+        // update offsets if replacement is a different length
+        if (offsets != null) {
+          int r = replace.length();
+          if (r == 0) 
+            this.offsets = arrayKill(this.offsets, i - n);
+          else if (r == 2) 
+            this.offsets = arrayInsert(this.offsets, i - n + 1, this.offsets[i - n], r - 1);
+          n += 1 - r;
+        }
+      }
+      return buf.toString();
+    } else if (language.equals("it")) {
+      // new Mpdl code: added by J. Willenborg: some of Malcolms code did not work without errors so it has to be taken away, also all latin stuff is imported
+      StringBuffer buf = new StringBuffer();
+      int n = 0;
+      for (int i = 0; i < s.length(); i++) {
+        char c = s.charAt(i);
+        String replace = new String();
+        switch (c) {
+          case '\u00c0': replace = "A"; break; // LATIN CAPITAL LETTER A GRAVE
+          case '\u00c1': replace = "A"; break; // LATIN CAPITAL LETTER A ACUTE
+          case '\u00c2': replace = "A"; break; // LATIN CAPITAL LETTER A CIRCUMFLEX
+          case '\u00c4': replace = "A"; break; // LATIN CAPITAL LETTER A DIAERESIS
+          case '\u00c6': replace = "Ae"; break; // LATIN CAPITAL LETTER A E
+          case '\u00c7': replace = "C"; break; // LATIN CAPITAL LETTER C CEDILLA
+          case '\u00c8': replace = "E"; break; // LATIN CAPITAL LETTER E GRAVE
+          case '\u00c9': replace = "E"; break; // LATIN CAPITAL LETTER E ACUTE
+          case '\u00ca': replace = "E"; break; // LATIN CAPITAL LETTER E CIRCUMFLEX
+          case '\u00cb': replace = "E"; break; // LATIN CAPITAL LETTER E DIAERESIS
+          case '\u00cc': replace = "I"; break; // LATIN CAPITAL LETTER I GRAVE;
+          case '\u00cd': replace = "I"; break; // LATIN CAPITAL LETTER I ACUTE
+          case '\u00ce': replace = "I"; break; // LATIN CAPITAL LETTER I CIRCUMFLEX
+          case '\u00cf': replace = "I"; break; // LATIN CAPITAL LETTER I DIAERESIS
+          case '\u00d2': replace = "O"; break; // LATIN CAPITAL LETTER O GRAVE
+          case '\u00d3': replace = "O"; break; // LATIN CAPITAL LETTER O ACUTE
+          case '\u00d4': replace = "O"; break; // LATIN CAPITAL LETTER O CIRCUMFLEX
+          case '\u00d6': replace = "O"; break; // LATIN CAPITAL LETTER O DIAERESIS
+          case '\u00d9': replace = "U"; break; // LATIN CAPITAL LETTER U GRAVE
+          case '\u00da': replace = "U"; break; // LATIN CAPITAL LETTER U ACUTE
+          case '\u00db': replace = "U"; break; // LATIN CAPITAL LETTER U CIRCUMFLEX
+          case '\u00dc': replace = "U"; break; // LATIN CAPITAL LETTER U DIAERESIS
+          case '\u00e0': replace = "a"; break; // LATIN SMALL LETTER A GRAVE
+          case '\u00e1': replace = "a"; break; // LATIN SMALL LETTER A ACUTE
+          case '\u00e2': replace = "a"; break; // LATIN SMALL LETTER A CIRCUMFLEX
+          case '\u00e4': replace = "a"; break; // LATIN SMALL LETTER A DIAERESIS
+          case '\u00e6': replace = "ae"; break; // LATIN SMALL LETTER A E
+          case '\u00e7': replace = "c"; break; // LATIN SMALL LETTER C CEDILLA
+          case '\u00e8': replace = "e"; break; // LATIN SMALL LETTER E GRAVE
+          case '\u00e9': replace = "e"; break; // LATIN SMALL LETTER E ACUTE
+          case '\u00ea': replace = "e"; break; // LATIN SMALL LETTER E CIRCUMFLEX
+          case '\u00eb': replace = "e"; break; // LATIN SMALL LETTER E DIAERESIS
+          case '\u00ec': replace = "i"; break; // LATIN SMALL LETTER I GRAVE
+          case '\u00ed': replace = "i"; break; // LATIN SMALL LETTER I ACUTE
+          case '\u00ee': replace = "i"; break; // LATIN SMALL LETTER I CIRCUMFLEX
+          case '\u00ef': replace = "i"; break; // LATIN SMALL LETTER I DIAERESIS
+          case '\u00f2': replace = "o"; break; // LATIN SMALL LETTER O GRAVE
+          case '\u00f3': replace = "o"; break; // LATIN SMALL LETTER O ACUTE
+          case '\u00f4': replace = "o"; break; // LATIN SMALL LETTER O CIRCUMFLEX
+          case '\u00f6': replace = "o"; break; // LATIN SMALL LETTER O DIAERESIS
+          case '\u00f9': replace = "u"; break; // LATIN SMALL LETTER U GRAVE
+          case '\u00fa': replace = "u"; break; // LATIN SMALL LETTER U ACUTE
+          case '\u00fb': replace = "u"; break; // LATIN SMALL LETTER U CIRCUMFLEX
+          case '\u00fc': replace = "u"; break; // LATIN SMALL LETTER U DIAERESIS
+          case '\u0100': replace = "A"; break; // LATIN CAPITAL LETTER A MACRON
+          case '\u0101': replace = "a"; break; // LATIN SMALL LETTER A MACRON
+          case '\u0102': replace = "A"; break; // LATIN CAPITAL LETTER A BREVE
+          case '\u0103': replace = "a"; break; // LATIN SMALL LETTER A BREVE
+          case '\u0112': replace = "E"; break; // LATIN CAPITAL LETTER E MACRON
+          case '\u0113': replace = "e"; break; // LATIN SMALL LETTER E MACRON
+          case '\u0114': replace = "E"; break; // LATIN CAPITAL LETTER E BREVE
+          case '\u0115': replace = "e"; break; // LATIN SMALL LETTER E BREVE
+          case '\u0118': replace = "Ae"; break; // LATIN CAPITAL LETTER E OGONEK
+          case '\u0119': replace = "ae"; break; // LATIN SMALL LETTER E OGONEK
+          case '\u012a': replace = "I"; break; // LATIN CAPITAL LETTER I MACRON
+          case '\u012b': replace = "i"; break; // LATIN SMALL LETTER I MACRON
+          case '\u012c': replace = "I"; break; // LATIN CAPITAL LETTER I BREVE
+          case '\u012d': replace = "i"; break; // LATIN SMALL LETTER I BREVE
+          case '\u014c': replace = "O"; break; // LATIN CAPITAL LETTER O MACRON
+          case '\u014d': replace = "o"; break; // LATIN SMALL LETTER O MACRON
+          case '\u014e': replace = "O"; break; // LATIN CAPITAL LETTER O BREVE
+          case '\u014f': replace = "o"; break; // LATIN SMALL LETTER O BREVE
+          case '\u0152': replace = "Oe"; break; // LATIN CAPITAL LETTER O E
+          case '\u0153': replace = "oe"; break; // LATIN SMALL LETTER O E
+          case '\u016a': replace = "U"; break; // LATIN CAPITAL LETTER U MACRON
+          case '\u016b': replace = "u"; break; // LATIN SMALL LETTER U MACRON
+          case '\u016c': replace = "U"; break; // LATIN CAPITAL LETTER U BREVE
+          case '\u016d': replace = "u"; break; // LATIN SMALL LETTER U BREVE
+          case '\u017f': replace = "s"; break; // LATIN SMALL LETTER LONG S
+          case '\u00df': replace = "ss"; break; // LATIN SMALL LETTER SHARP S
+          // new in MPDL project by J. Willenborg
+          case '\u1e8d': replace = "e"; break; // LATIN SMALL LETTER E WITH TILDE
+          default: replace += c; break;
+        }
+        buf.append(replace);
+        // update offsets if replacement is a different length
+        if (offsets != null) {
+          int r = replace.length();
+          if (r == 0) this.offsets = arrayKill(this.offsets, i - n);
+          else if (r == 2)
+            this.offsets = arrayInsert(this.offsets, i - n + 1, this.offsets[i - n], r - 1);
+          n += 1 - r;
+        }
+      }
+      return buf.toString();
+    // new Mpdl code: added by J. Willenborg: most of the latin replacements also in english
+    } else if (language.equals("en")) {
+      StringBuffer buf = new StringBuffer();
+      int n = 0;
+      for (int i = 0; i < s.length(); i++) {
+        char c = s.charAt(i);
+        String replace = new String();
+        switch (c) {
+          case '\u0300': replace = ""; break; // COMBINING GRAVE ACCENT
+          case '\u0301': replace = ""; break; // COMBINING ACCUTE ACCENT
+          case '\u0302': replace = ""; break; // COMBINING CIRCUMFLEX ACCENT
+    
+          case '\u00c0': replace = "A"; break; // LATIN CAPITAL LETTER A GRAVE
+          case '\u00c1': replace = "A"; break; // LATIN CAPITAL LETTER A ACUTE
+          case '\u00c2': replace = "A"; break; // LATIN CAPITAL LETTER A CIRCUMFLEX
+          case '\u00c4': replace = "A"; break; // LATIN CAPITAL LETTER A DIAERESIS
+          case '\u00c6': replace = "Ae"; break; // LATIN CAPITAL LETTER A E
+          case '\u00c7': replace = "C"; break; // LATIN CAPITAL LETTER C CEDILLA
+          case '\u00c8': replace = "E"; break; // LATIN CAPITAL LETTER E GRAVE
+          case '\u00c9': replace = "E"; break; // LATIN CAPITAL LETTER E ACUTE
+          case '\u00ca': replace = "E"; break; // LATIN CAPITAL LETTER E CIRCUMFLEX
+          case '\u00cb': replace = "E"; break; // LATIN CAPITAL LETTER E DIAERESIS
+          case '\u00cc': replace = "I"; break; // LATIN CAPITAL LETTER I GRAVE;
+          case '\u00cd': replace = "I"; break; // LATIN CAPITAL LETTER I ACUTE
+          case '\u00ce': replace = "I"; break; // LATIN CAPITAL LETTER I CIRCUMFLEX
+          case '\u00cf': replace = "I"; break; // LATIN CAPITAL LETTER I DIAERESIS
+          case '\u00d2': replace = "O"; break; // LATIN CAPITAL LETTER O GRAVE
+          case '\u00d3': replace = "O"; break; // LATIN CAPITAL LETTER O ACUTE
+          case '\u00d4': replace = "O"; break; // LATIN CAPITAL LETTER O CIRCUMFLEX
+          case '\u00d6': replace = "O"; break; // LATIN CAPITAL LETTER O DIAERESIS
+          case '\u00d9': replace = "U"; break; // LATIN CAPITAL LETTER U GRAVE
+          case '\u00da': replace = "U"; break; // LATIN CAPITAL LETTER U ACUTE
+          case '\u00db': replace = "U"; break; // LATIN CAPITAL LETTER U CIRCUMFLEX
+          case '\u00dc': replace = "U"; break; // LATIN CAPITAL LETTER U DIAERESIS
+          case '\u00e0': replace = "a"; break; // LATIN SMALL LETTER A GRAVE
+          case '\u00e1': replace = "a"; break; // LATIN SMALL LETTER A ACUTE
+          case '\u00e2': replace = "a"; break; // LATIN SMALL LETTER A CIRCUMFLEX
+          case '\u00e4': replace = "a"; break; // LATIN SMALL LETTER A DIAERESIS
+          case '\u00e6': replace = "ae"; break; // LATIN SMALL LETTER A E
+          case '\u00e7': replace = "c"; break; // LATIN SMALL LETTER C CEDILLA
+          case '\u00e8': replace = "e"; break; // LATIN SMALL LETTER E GRAVE
+          case '\u00e9': replace = "e"; break; // LATIN SMALL LETTER E ACUTE
+          case '\u00ea': replace = "e"; break; // LATIN SMALL LETTER E CIRCUMFLEX
+          case '\u00eb': replace = "e"; break; // LATIN SMALL LETTER E DIAERESIS
+          case '\u00ec': replace = "i"; break; // LATIN SMALL LETTER I GRAVE
+          case '\u00ed': replace = "i"; break; // LATIN SMALL LETTER I ACUTE
+          case '\u00ee': replace = "i"; break; // LATIN SMALL LETTER I CIRCUMFLEX
+          case '\u00ef': replace = "i"; break; // LATIN SMALL LETTER I DIAERESIS
+          case '\u00f2': replace = "o"; break; // LATIN SMALL LETTER O GRAVE
+          case '\u00f3': replace = "o"; break; // LATIN SMALL LETTER O ACUTE
+          case '\u00f4': replace = "o"; break; // LATIN SMALL LETTER O CIRCUMFLEX
+          case '\u00f6': replace = "o"; break; // LATIN SMALL LETTER O DIAERESIS
+          case '\u00f9': replace = "u"; break; // LATIN SMALL LETTER U GRAVE
+          case '\u00fa': replace = "u"; break; // LATIN SMALL LETTER U ACUTE
+          case '\u00fb': replace = "u"; break; // LATIN SMALL LETTER U CIRCUMFLEX
+          case '\u00fc': replace = "u"; break; // LATIN SMALL LETTER U DIAERESIS
+          case '\u0100': replace = "A"; break; // LATIN CAPITAL LETTER A MACRON
+          case '\u0101': replace = "a"; break; // LATIN SMALL LETTER A MACRON
+          case '\u0102': replace = "A"; break; // LATIN CAPITAL LETTER A BREVE
+          case '\u0103': replace = "a"; break; // LATIN SMALL LETTER A BREVE
+          case '\u0112': replace = "E"; break; // LATIN CAPITAL LETTER E MACRON
+          case '\u0113': replace = "e"; break; // LATIN SMALL LETTER E MACRON
+          case '\u0114': replace = "E"; break; // LATIN CAPITAL LETTER E BREVE
+          case '\u0115': replace = "e"; break; // LATIN SMALL LETTER E BREVE
+          case '\u0118': replace = "Ae"; break; // LATIN CAPITAL LETTER E OGONEK
+          case '\u0119': replace = "ae"; break; // LATIN SMALL LETTER E OGONEK
+          case '\u012a': replace = "I"; break; // LATIN CAPITAL LETTER I MACRON
+          case '\u012b': replace = "i"; break; // LATIN SMALL LETTER I MACRON
+          case '\u012c': replace = "I"; break; // LATIN CAPITAL LETTER I BREVE
+          case '\u012d': replace = "i"; break; // LATIN SMALL LETTER I BREVE
+          case '\u014c': replace = "O"; break; // LATIN CAPITAL LETTER O MACRON
+          case '\u014d': replace = "o"; break; // LATIN SMALL LETTER O MACRON
+          case '\u014e': replace = "O"; break; // LATIN CAPITAL LETTER O BREVE
+          case '\u014f': replace = "o"; break; // LATIN SMALL LETTER O BREVE
+          case '\u0152': replace = "Oe"; break; // LATIN CAPITAL LETTER O E
+          case '\u0153': replace = "oe"; break; // LATIN SMALL LETTER O E
+          case '\u016a': replace = "U"; break; // LATIN CAPITAL LETTER U MACRON
+          case '\u016b': replace = "u"; break; // LATIN SMALL LETTER U MACRON
+          case '\u016c': replace = "U"; break; // LATIN CAPITAL LETTER U BREVE
+          case '\u016d': replace = "u"; break; // LATIN SMALL LETTER U BREVE
+          case '\u017f': replace = "s"; break; // LATIN SMALL LETTER LONG S
+          case '\u00df': replace = "ss"; break; // LATIN SMALL LETTER SHARP S
+          // new in MPDL project by J. Willenborg
+          case '\u1e8d': replace = "e"; break; // LATIN SMALL LETTER E WITH TILDE
+          // by Malcolm
+          case '\u00ad': break; // soft hyphen
+          case '\u2329': break; // BRA
+          case '\u232a': break; // KET
+          default: replace += c; break;
+        }
+        buf.append(replace);
+        // update offsets if replacement is a different length
+        if (offsets != null) {
+          int r = replace.length();
+          if (r == 0) 
+            this.offsets = arrayKill(this.offsets, i - n);
+          else if (r == 2)
+            this.offsets = arrayInsert(this.offsets, i - n + 1, this.offsets[i - n], r - 1);
+          n += 1 - r;
+        }
+      }
+      return buf.toString();
+    } else if (language.equals("fr")) {
+      // new Mpdl code: added by J. Willenborg: some of Malcolms code did not work without errors so it has to be taken away
+      StringBuffer buf = new StringBuffer();
+      int n = 0;
+      for (int i = 0; i < s.length(); i++) {
+        char c = s.charAt(i);
+        String replace = new String();
+        switch (c) {
+          case '\u00e6': replace = "ae"; break; // LATIN SMALL LETTER A E
+          case '\u017f': replace = "s"; break; // LATIN SMALL LETTER LONG S
+          case '\u00df': replace = "ss"; break; // LATIN SMALL LETTER SHARP S
+          case '\u00ad': break; // soft hyphen
+          case '-': break;
+          default: replace += c; break;
+        }
+        buf.append(replace);
+        // update offsets if replacement is a different length
+        if (offsets != null) {
+          int r = replace.length();
+          if (r == 0) 
+            this.offsets = arrayKill(this.offsets, i - n);
+          else if (r == 2)
+            this.offsets = arrayInsert(this.offsets, i - n + 1, this.offsets[i - n], r - 1);
+          n += 1 - r;
+        }
+      }
+      return buf.toString();
+    } else if (language.equals("de")) {
+      StringBuffer buf = new StringBuffer();
+      int n = 0;
+      for (int i = 0; i < s.length(); i++) {
+        char c = s.charAt(i);
+        String replace = new String();
+        switch (c) {
+          case '\u00c4': replace = "Ae"; break;
+          case '\u00d6': replace = "Oe"; break;
+          case '\u00dc': replace = "Ue"; break;
+          case '\u00df': replace = "ss"; break;
+          case '\u00e4': replace = "ae"; break;
+          case '\u00f6': replace = "oe"; break;
+          case '\u00fc': replace = "ue"; break;
+          case '\u00e9': replace = "e"; break;
+          case '\u00ad': break; // soft hyphen
+          case '-': break;
+          default: replace += c; break;
+        }
+        buf.append(replace);
+        // update offsets if replacement is a different length
+        if (offsets != null) {
+          int r = replace.length();
+          if (r == 0) 
+            this.offsets = arrayKill(this.offsets, i - n);
+          else if (r == 2)
+            this.offsets = arrayInsert(this.offsets, i - n + 1, this.offsets[i - n], r - 1);
+          n += 1 - r;
+        }
+      }
+      return buf.toString();
+    } else if (language.equals("zh")) {
+      StringBuffer buf = new StringBuffer();
+      int n = 0;
+      for (int i = 0; i < s.length(); i++) {
+        char c = s.charAt(i);
+        String replace = new String();
+        switch (c) {
+          case '\u00b9': replace = "1"; break;
+          case '\u00b2': replace = "2"; break;
+          case '\u00b3': replace = "3"; break;
+          case '\u2074': replace = "4"; break;
+          case '\u2075': replace = "5"; break;
+          // original by Malcolm Hyman: with the following replacements // TODO uncomment these 3 lines
+          // case '\u3000': replace = " "; break;
+          // case '\u3001': replace = ","; break;
+          // case '\u3002': replace = "."; break;
+          // case '\u200b': break; // BREAKS EVERYTHING!
+          default: replace += c; break;
+        }
+        buf.append(replace);
+        // update offsets if replacement is a different length
+        if (offsets != null) {
+          int r = replace.length();
+          if (r == 0) 
+            this.offsets = arrayKill(this.offsets, i - n);
+          else if (r == 2)
+            this.offsets = arrayInsert(this.offsets, i - n + 1, this.offsets[i - n], r - 1);
+          n += 1 - r;
+        }
+      }
+      return buf.toString();
+    } else if (language.equals("akk") ||
+       language.equals("qam") ||
+       language.equals("qpc") ||
+       language.equals("elx") ||
+       language.equals("sux") ||
+       language.equals("hit") ||
+       language.equals("qhu") ||
+       language.equals("peo") ||
+       language.equals("uga") ||
+       language.equals("ura") ||
+       language.equals("qcu")) {
+      StringBuffer buf = new StringBuffer();
+      int n = 0;
+      char last = '\u0000';
+      for (int i = 0; i < s.length(); i++) {
+        char c = s.charAt(i);
+        c = Character.toLowerCase(c);
+        String replace = new String();
+        switch (c) {
+          case '{': replace += "-"; break;
+          case '}': replace += "-"; break;
+          // These are from PSD::ATF::Unicode by Steve Tinney
+          case '\u0161': replace += "sz"; break;
+          case '\u1e63': replace += "s,"; break;
+          case '\u1e6d': replace += "t,"; break;
+          case '\u014b': replace += "j"; break;
+          case '\u015b': replace += "s'"; break;
+          case '\u2080': replace += "0"; break;
+          case '\u2081': replace += "1"; break;
+          case '\u2082': replace += "2"; break;
+          case '\u2083': replace += "3"; break;
+          case '\u2084': replace += "4"; break;
+          case '\u2085': replace += "5"; break;
+          case '\u2086': replace += "6"; break;
+          case '\u2087': replace += "7"; break;
+          case '\u2088': replace += "8"; break;
+          case '\u2089': replace += "9"; break;
+    
+          case 'c': // shin (except where used as modifier)
+            if ((i > 0) && ((last == '~') || (last == '@')))
+              replace += "c";
+            else replace += "sz";
+              break;
+          default: replace += c; break;
+        }
+        // suppress grapheme boundary before or after word boundary
+        if (replace.equals("-")) {
+          if ((i + 1 == s.length()) || (s.charAt(i + 1) == ' ') || (i == 0) || (s.charAt(i - 1) == ' ')) 
+            replace = "";
+        }
+        last = c;
+        buf.append(replace);
+        // update offsets if replacement is a different length
+        if (offsets != null) {
+          int r = replace.length();
+          if (r == 0) 
+            this.offsets = arrayKill(this.offsets, i - n);
+          else if (r == 2)
+            this.offsets = arrayInsert(this.offsets, i - n + 1, this.offsets[i - n], r - 1);
+          n += 1 - r;
+        }
+      }
+      return buf.toString();
+    } else if (language.equals("el") || language.equals("grc")) {
+      StringBuffer buf = new StringBuffer();
+      int n = 0;
+      for (int i = 0; i < s.length(); i++) {
+        char c = s.charAt(i);
+        String replace = new String();
+        switch (c) {
+          case '\u03c2': replace = "\u03c3"; break; // GREEK SMALL LETTER FINAL SIGMA
+          case '<': break;
+          case '>': break;
+          case '[': break;
+          case ']': break;
+          case '1': break;
+          case '2': break;
+          case '\u03ac': replace = "\u1f71"; break;
+          case '\u03ad': replace = "\u1f73"; break;
+          case '\u03ae': replace = "\u1f75"; break;
+          case '\u03af': replace = "\u1f77"; break;
+          case '\u03cc': replace = "\u1f79"; break;
+          case '\u03cd': replace = "\u1f7b"; break;
+          case '\u03ce': replace = "\u1f7d"; break;
+          case '-': break; // same treatment as soft hyphen
+          case '\u00ad': break; // soft hyphen
+          default: replace += c; break;
+        }
+        buf.append(replace);
+        // update offsets if replacement is a different length
+        if (offsets != null) {
+          int r = replace.length();
+          if (r == 0) 
+            this.offsets = arrayKill(this.offsets, i - n);
+          else if (r == 2)
+            this.offsets = arrayInsert(this.offsets, i - n + 1, this.offsets[i - n], r - 1);
+          n += 1 - r;
+        }
+      }
+      return buf.toString();
+    } else if (language.equals("el_atonic")) {
+      StringBuffer buf = new StringBuffer();
+      int n = 0;
+      for (int i = 0; i < s.length(); i++) {
+        char c = s.charAt(i);
+        String replace = new String();
+        switch (c) {
+          case '\u03c2': replace = "\u03c3"; break; // GREEK SMALL LETTER FINAL SIGMA
+          // map characters with diacritics to their plain equivalent
+          // cf. <code>BetaCode.java</code>
+          case '\u03aa': replace = "\u0399"; break;
+          case '\u03ab': replace = "\u03a5"; break;
+          case '\u03ac': replace = "\u0381"; break;
+          case '\u03ad': replace = "\u0385"; break;
+          case '\u03ae': replace = "\u0387"; break;
+          case '\u03af': replace = "\u0389"; break;
+          case '\u03ca': replace = "\u03b9"; break;
+          case '\u03cb': replace = "\u03c5"; break;
+          case '\u03cc': replace = "\u03bf"; break;
+          case '\u03cd': replace = "\u03c5"; break;
+          case '\u03ce': replace = "\u03c9"; break;
+          case '\u1f00': replace = "\u03b1"; break;
+          case '\u1f01': replace = "\u03b1"; break;
+          case '\u1f02': replace = "\u03b1"; break;
+          case '\u1f03': replace = "\u03b1"; break;
+          case '\u1f04': replace = "\u03b1"; break;
+          case '\u1f05': replace = "\u03b1"; break;
+          case '\u1f06': replace = "\u03b1"; break;
+          case '\u1f07': replace = "\u03b1"; break;
+          case '\u1f08': replace = "\u0391"; break;
+          case '\u1f09': replace = "\u0391"; break;
+          case '\u1f0a': replace = "\u0391"; break;
+          case '\u1f0b': replace = "\u0391"; break;
+          case '\u1f0c': replace = "\u0391"; break;
+          case '\u1f0d': replace = "\u0391"; break;
+          case '\u1f0e': replace = "\u0391"; break;
+          case '\u1f0f': replace = "\u0391"; break;
+          case '\u1f10': replace = "\u03b5"; break;
+          case '\u1f11': replace = "\u03b5"; break;
+          case '\u1f12': replace = "\u03b5"; break;
+          case '\u1f13': replace = "\u03b5"; break;
+          case '\u1f14': replace = "\u03b5"; break;
+          case '\u1f15': replace = "\u03b5"; break;
+          case '\u1f18': replace = "\u0395"; break;
+          case '\u1f19': replace = "\u0395"; break;
+          case '\u1f1a': replace = "\u0395"; break;
+          case '\u1f1b': replace = "\u0395"; break;
+          case '\u1f1c': replace = "\u0395"; break;
+          case '\u1f1d': replace = "\u0395"; break;
+          case '\u1f20': replace = "\u03b7"; break;
+          case '\u1f21': replace = "\u03b7"; break;
+          case '\u1f22': replace = "\u03b7"; break;
+          case '\u1f23': replace = "\u03b7"; break;
+          case '\u1f24': replace = "\u03b7"; break;
+          case '\u1f25': replace = "\u03b7"; break;
+          case '\u1f26': replace = "\u03b7"; break;
+          case '\u1f27': replace = "\u03b7"; break;
+          case '\u1f28': replace = "\u0397"; break;
+          case '\u1f29': replace = "\u0397"; break;
+          case '\u1f2a': replace = "\u0397"; break;
+          case '\u1f2b': replace = "\u0397"; break;
+          case '\u1f2c': replace = "\u0397"; break;
+          case '\u1f2d': replace = "\u0397"; break;
+          case '\u1f2e': replace = "\u0397"; break;
+          case '\u1f2f': replace = "\u0397"; break;
+          case '\u1f30': replace = "\u03b9"; break;
+          case '\u1f31': replace = "\u03b9"; break;
+          case '\u1f32': replace = "\u03b9"; break;
+          case '\u1f33': replace = "\u03b9"; break;
+          case '\u1f34': replace = "\u03b9"; break;
+          case '\u1f35': replace = "\u03b9"; break;
+          case '\u1f36': replace = "\u03b9"; break;
+          case '\u1f37': replace = "\u03b9"; break;
+          case '\u1f38': replace = "\u0399"; break;
+          case '\u1f39': replace = "\u0399"; break;
+          case '\u1f3a': replace = "\u0399"; break;
+          case '\u1f3b': replace = "\u0399"; break;
+          case '\u1f3c': replace = "\u0399"; break;
+          case '\u1f3d': replace = "\u0399"; break;
+          case '\u1f3e': replace = "\u0399"; break;
+          case '\u1f3f': replace = "\u0399"; break;
+          case '\u1f40': replace = "\u03bf"; break;
+          case '\u1f41': replace = "\u03bf"; break;
+          case '\u1f42': replace = "\u03bf"; break;
+          case '\u1f43': replace = "\u03bf"; break;
+          case '\u1f44': replace = "\u03bf"; break;
+          case '\u1f45': replace = "\u03bf"; break;
+          case '\u1f48': replace = "\u039f"; break;
+          case '\u1f49': replace = "\u039f"; break;
+          case '\u1f4a': replace = "\u039f"; break;
+          case '\u1f4b': replace = "\u039f"; break;
+          case '\u1f4c': replace = "\u039f"; break;
+          case '\u1f4d': replace = "\u039f"; break;
+          case '\u1f50': replace = "\u03c5"; break;
+          case '\u1f51': replace = "\u03c5"; break;
+          case '\u1f52': replace = "\u03c5"; break;
+          case '\u1f53': replace = "\u03c5"; break;
+          case '\u1f54': replace = "\u03c5"; break;
+          case '\u1f55': replace = "\u03c5"; break;
+          case '\u1f56': replace = "\u03c5"; break;
+          case '\u1f57': replace = "\u03c5"; break;
+          case '\u1f58': replace = "\u03a5"; break;
+          case '\u1f59': replace = "\u03a5"; break;
+          case '\u1f5a': replace = "\u03a5"; break;
+          case '\u1f5b': replace = "\u03a5"; break;
+          case '\u1f5c': replace = "\u03a5"; break;
+          case '\u1f5d': replace = "\u03a5"; break;
+          case '\u1f5e': replace = "\u03a5"; break;
+          case '\u1f5f': replace = "\u03a5"; break;
+          case '\u1f60': replace = "\u03c9"; break;
+          case '\u1f61': replace = "\u03c9"; break;
+          case '\u1f62': replace = "\u03c9"; break;
+          case '\u1f63': replace = "\u03c9"; break;
+          case '\u1f64': replace = "\u03c9"; break;
+          case '\u1f65': replace = "\u03c9"; break;
+          case '\u1f66': replace = "\u03c9"; break;
+          case '\u1f67': replace = "\u03c9"; break;
+          case '\u1f68': replace = "\u03a9"; break;
+          case '\u1f69': replace = "\u03a9"; break;
+          case '\u1f6a': replace = "\u03a9"; break;
+          case '\u1f6b': replace = "\u03a9"; break;
+          case '\u1f6c': replace = "\u03a9"; break;
+          case '\u1f6d': replace = "\u03a9"; break;
+          case '\u1f6e': replace = "\u03a9"; break;
+          case '\u1f6f': replace = "\u03a9"; break;
+          case '\u1f70': replace = "\u03b1"; break;
+          case '\u1f71': replace = "\u03b1"; break;
+          case '\u1f72': replace = "\u03b5"; break;
+          case '\u1f73': replace = "\u03b5"; break;
+          case '\u1f74': replace = "\u03b7"; break;
+          case '\u1f75': replace = "\u03b7"; break;
+          case '\u1f76': replace = "\u03b9"; break;
+          case '\u1f77': replace = "\u03b9"; break;
+          case '\u1f78': replace = "\u03bf"; break;
+          case '\u1f79': replace = "\u03bf"; break;
+          case '\u1f7a': replace = "\u03c5"; break;
+          case '\u1f7b': replace = "\u03c5"; break;
+          case '\u1f7c': replace = "\u03c9"; break;
+          case '\u1f7d': replace = "\u03c9"; break;
+          case '\u1f80': replace = "\u03b1"; break;
+          case '\u1f81': replace = "\u03b1"; break;
+          case '\u1f82': replace = "\u03b1"; break;
+          case '\u1f83': replace = "\u03b1"; break;
+          case '\u1f84': replace = "\u03b1"; break;
+          case '\u1f85': replace = "\u03b1"; break;
+          case '\u1f86': replace = "\u03b1"; break;
+          case '\u1f87': replace = "\u03b1"; break;
+          case '\u1f88': replace = "\u0391"; break;
+          case '\u1f89': replace = "\u0391"; break;
+          case '\u1f8a': replace = "\u0391"; break;
+          case '\u1f8b': replace = "\u0391"; break;
+          case '\u1f8c': replace = "\u0391"; break;
+          case '\u1f8d': replace = "\u0391"; break;
+          case '\u1f8e': replace = "\u0391"; break;
+          case '\u1f8f': replace = "\u0391"; break;
+          case '\u1f90': replace = "\u03b7"; break;
+          case '\u1f91': replace = "\u03b7"; break;
+          case '\u1f92': replace = "\u03b7"; break;
+          case '\u1f93': replace = "\u03b7"; break;
+          case '\u1f94': replace = "\u03b7"; break;
+          case '\u1f95': replace = "\u03b7"; break;
+          case '\u1f96': replace = "\u03b7"; break;
+          case '\u1f97': replace = "\u03b7"; break;
+          case '\u1f98': replace = "\u0397"; break;
+          case '\u1f99': replace = "\u0397"; break;
+          case '\u1f9a': replace = "\u0397"; break;
+          case '\u1f9b': replace = "\u0397"; break;
+          case '\u1f9c': replace = "\u0397"; break;
+          case '\u1f9d': replace = "\u0397"; break;
+          case '\u1f9e': replace = "\u0397"; break;
+          case '\u1f9f': replace = "\u0397"; break;
+          case '\u1fa0': replace = "\u03c9"; break;
+          case '\u1fa1': replace = "\u03c9"; break;
+          case '\u1fa2': replace = "\u03c9"; break;
+          case '\u1fa3': replace = "\u03c9"; break;
+          case '\u1fa4': replace = "\u03c9"; break;
+          case '\u1fa5': replace = "\u03c9"; break;
+          case '\u1fa6': replace = "\u03c9"; break;
+          case '\u1fa7': replace = "\u03c9"; break;
+          case '\u1fa8': replace = "\u03a9"; break;
+          case '\u1fa9': replace = "\u03a9"; break;
+          case '\u1faa': replace = "\u03a9"; break;
+          case '\u1fab': replace = "\u03a9"; break;
+          case '\u1fac': replace = "\u03a9"; break;
+          case '\u1fad': replace = "\u03a9"; break;
+          case '\u1fae': replace = "\u03a9"; break;
+          case '\u1faf': replace = "\u03a9"; break;
+          case '\u1fb2': replace = "\u03b1"; break;
+          case '\u1fb3': replace = "\u03b1"; break;
+          case '\u1fb4': replace = "\u03b1"; break;
+          case '\u1fb6': replace = "\u03b1"; break;
+          case '\u1fb7': replace = "\u03b1"; break;
+          case '\u1fba': replace = "\u0391"; break;
+          case '\u1fbb': replace = "\u0391"; break;
+          case '\u1fbc': replace = "\u0391"; break;
+          case '\u1fc2': replace = "\u03b7"; break;
+          case '\u1fc3': replace = "\u03b7"; break;
+          case '\u1fc4': replace = "\u03b7"; break;
+          case '\u1fc6': replace = "\u03b7"; break;
+          case '\u1fc7': replace = "\u03b7"; break;
+          case '\u1fca': replace = "\u0397"; break;
+          case '\u1fcb': replace = "\u0397"; break;
+          case '\u1fcc': replace = "\u0397"; break;
+          case '\u1fd2': replace = "\u03b9"; break;
+          case '\u1fd3': replace = "\u03b9"; break;
+          case '\u1fd6': replace = "\u03b9"; break;
+          case '\u1fd7': replace = "\u03b9"; break;
+          case '\u1fda': replace = "\u0399"; break;
+          case '\u1fdb': replace = "\u039f"; break;
+          case '\u1fe2': replace = "\u03c5"; break;
+          case '\u1fe3': replace = "\u03c5"; break;
+          case '\u1fe4': replace = "\u03c1"; break;
+          case '\u1fe5': replace = "\u03c1"; break;
+          case '\u1fe6': replace = "\u03c5"; break;
+          case '\u1fe7': replace = "\u03c5"; break;
+          case '\u1fea': replace = "\u03a5"; break;
+          case '\u1feb': replace = "\u03a5"; break;
+          case '\u1fec': replace = "\u03a1"; break;
+          case '\u1ff2': replace = "\u03c9"; break;
+          case '\u1ff3': replace = "\u03c9"; break;
+          case '\u1ff4': replace = "\u03c9"; break;
+          case '\u1ff6': replace = "\u03c9"; break;
+          case '\u1ff7': replace = "\u03c9"; break;
+          case '\u1ff8': replace = "\u039f"; break;
+          case '\u1ff9': replace = "\u039f"; break;
+          case '\u1ffa': replace = "\u03a9"; break;
+          case '\u1ffb': replace = "\u03a9"; break;
+          case '\u1ffc': replace = "\u03a9"; break;
+    
+          case '<': break;
+          case '>': break;
+          case '-': break; // same treatment as soft hyphen
+          case '\u00ad': break; // soft hyphen
+          default: replace += c; break;
+        }
+        buf.append(replace);
+        // update offsets if replacement is a different length
+        if (offsets != null) {
+          int r = replace.length();
+          if (r == 0) 
+            this.offsets = arrayKill(this.offsets, i - n);
+          else if (r == 2)
+            this.offsets = arrayInsert(this.offsets, i - n + 1, this.offsets[i - n], r - 1);
+          n += 1 - r;
+        }
+      }
+      return buf.toString();
+    } else {      // unknown or no language
+      return s;
+    }
+  }
+  
+  public String deNormalizeToRegExpr(String s) {
+    // TODO all characters in all languages
+    if (language.equals("la") || language.equals("lat")) {
+      StringBuffer buf = new StringBuffer();
+      if (s.indexOf("ae") != -1) {
+        String str1 = s;
+        str1 = str1.replaceAll("ae", "\u0119");
+        String str2 = s;
+        str2 = str2.replaceAll("ae", "\u00c6");
+        String str3 = s;
+        str3 = str3.replaceAll("ae", "\u00e6");
+        buf.append(str1 + "|" + str2 + "|" + str3 + "|");
+      }
+      if (s.indexOf("oe") != -1) {
+        String str1 = s;
+        str1 = str1.replaceAll("oe", "\u0152");
+        String str2 = s;
+        str2 = str2.replaceAll("oe", "\u0153");
+        buf.append(str1 + "|" + str2 + "|");
+      }
+      if (s.indexOf("ss") != -1) {
+        String str1 = s;
+        str1 = str1.replaceAll("ss", "\u00df");
+        buf.append(str1 + "|");
+      }
+      boolean beginWord = true;
+      for (int i = 0; i < s.length(); i++) {
+        char c = s.charAt(i);
+        if (! beginWord) 
+          c = Character.toLowerCase(c);
+        beginWord = Character.isWhitespace(c);
+        String replace = new String();
+        switch (c) {
+          case 'a': replace = "[a\u00c0\u00c1\u00c2\u00c4\u00e0\u00e1\u00e2\u00e4]"; break; 
+          case 'c': replace = "[c\u00c7\u00e7]"; break;
+          case 'e': replace = "[e\u00c8\u00c9\u00ca\u00cb\u00e8\u00e9\u00ea\u00eb\u0113\u0115\u1ebd]"; break; 
+          case 'i': replace = "[ij\u00cc\u00cd\u00ce\u00cf\u00ec\u00ed\u00ee\u00ef\u012a\u012b\u012c\u012d]"; break;
+          case 'o': replace = "[o\u00d2\u00d3\u00d4\u00d6\u00f2\u00f3\u00f4\u00f6\u014c\u014d\u014e\u014f]"; break; 
+          case 'u': replace = "[uv\u00d9\u00da\u00db\u00dc\u00f9\u00fa\u00fb\u00fc\u016a\u016b\u016c\u016d]"; break; 
+          case 's': replace = "[s\u017f]"; break; 
+          default: replace += c; break;
+        }
+        buf.append(replace);
+      }
+      return buf.toString();
+    } else if (language.equals("en")) {
+      StringBuffer buf = new StringBuffer();
+      if (s.indexOf("ae") != -1) {
+        String str1 = s;
+        str1 = str1.replaceAll("ae", "\u0119");
+        String str2 = s;
+        str2 = str2.replaceAll("ae", "\u00c6");
+        String str3 = s;
+        str3 = str3.replaceAll("ae", "\u00e6");
+        buf.append(str1 + "|" + str2 + "|" + str3 + "|");
+      }
+      if (s.indexOf("oe") != -1) {
+        String str1 = s;
+        str1 = str1.replaceAll("oe", "\u0152");
+        String str2 = s;
+        str2 = str2.replaceAll("oe", "\u0153");
+        buf.append(str1 + "|" + str2 + "|");
+      }
+      if (s.indexOf("ss") != -1) {
+        String str1 = s;
+        str1 = str1.replaceAll("ss", "\u00df");
+        buf.append(str1 + "|");
+      }
+      boolean beginWord = true;
+      for (int i = 0; i < s.length(); i++) {
+        char c = s.charAt(i);
+        if (! beginWord) 
+          c = Character.toLowerCase(c);
+        beginWord = Character.isWhitespace(c);
+        String replace = new String();
+        switch (c) {
+          case 'a': replace = "[a\u00c0\u00c1\u00c2\u00c4\u00e0\u00e1\u00e2\u00e4]"; break; 
+          case 'c': replace = "[c\u00c7\u00e7]"; break;
+          case 'e': replace = "[e\u00c8\u00c9\u00ca\u00cb\u00e8\u00e9\u00ea\u00eb\u0113\u0115\u1e8d]"; break; 
+          case 'i': replace = "[i\u00cc\u00cd\u00ce\u00cf\u00ec\u00ed\u00ee\u00ef\u012a\u012b\u012c\u012d]"; break;
+          case 'o': replace = "[o\u00d2\u00d3\u00d4\u00d6\u00f2\u00f3\u00f4\u00f6\u014c\u014d\u014e\u014f]‚"; break; 
+          case 'u': replace = "[u\u00d9\u00da\u00db\u00dc\u00f9\u00fa\u00fb\u00fc\u016a\u016b\u016c\u016d]"; break; 
+          case 's': replace = "[s\u017f]"; break; 
+          default: replace += c; break;
+        }
+        buf.append(replace);
+      }
+      return buf.toString();
+    } else if (language.equals("de")) {
+      StringBuffer buf = new StringBuffer();
+      if (s.indexOf("ss") != -1) {
+        String str1 = s;
+        str1 = str1.replaceAll("ss", "\u00df");
+        buf.append(str1 + "|");
+      }
+      if (s.indexOf("ae") != -1) {
+        String str1 = s;
+        str1 = str1.replaceAll("ae", "\u00e4");
+        buf.append(str1 + "|");
+      }
+      if (s.indexOf("oe") != -1) {
+        String str1 = s;
+        str1 = str1.replaceAll("oe", "\u00f6");
+        buf.append(str1 + "|");
+      }
+      if (s.indexOf("ue") != -1) {
+        String str1 = s;
+        str1 = str1.replaceAll("ue", "\u00fc");
+        buf.append(str1 + "|");
+      }
+      boolean beginWord = true;
+      for (int i = 0; i < s.length(); i++) {
+        char c = s.charAt(i);
+        if (! beginWord) 
+          c = Character.toLowerCase(c);
+        beginWord = Character.isWhitespace(c);
+        String replace = new String();
+        switch (c) {
+          case 'e': replace = "[e\u00e9]"; break; 
+          default: replace += c; break;
+        }
+        buf.append(replace);
+      }
+      return buf.toString();
+    } else {      // unknown or no language
+      return s;
+    }
+  }
+  
+  /**
+   * Returns the offset table.
+   *
+   * @return             offset table
+   */
+  public int[] getOffsetTable() {
+    return offsets;
+  }
+
+  /**
+   * Returns a copy of an integer array with the element at
+   * <code>index</code> removed ("killed").
+   *
+   * @param array        integer array
+   * @param index        index of element to remove
+   */
+  static private int[] arrayKill(int[] array, int index) {
+    int[] newArray = new int[array.length - 1];
+    System.arraycopy(array, 0, newArray, 0, index);
+    System.arraycopy(array, index + 1, newArray, index, array.length - index - 1);
+    return newArray;
+  }
+
+  /**
+   * Returns a copy of an integer array with <code>count</code> elements
+   * inserted at <code>index</code>.
+   *
+   * @param array        integer array
+   * @param index        index to insert new elements
+   * @param value        value to insert into new slots
+   * @param count        number of new slots to insert
+   */
+  static private int[] arrayInsert(int[] array, int index, int value, int count) {
+    int[] newArray = new int[array.length + count];
+    System.arraycopy(array, 0, newArray, 0, index);
+    for (int i = 0; i < count; i++) newArray[index + i] = value;
+    System.arraycopy(array, index, newArray, index + count, array.length - index);
+    return newArray;
+  }
+
+  /**
+   * We provide <code>main()</code> so that our services will be available
+   * outside Java (i.e., so we can run as a Un*x-style filter).
+   */
+  static public void main(String[] argv) throws ApplicationException {
+    if (argv.length != 1) {
+      System.err.println("You must specify a language.");
+      System.exit(1);
+    }
+    String rec;
+    StringBuffer buf = new StringBuffer();
+    BufferedReader bin = null;
+    try {
+      bin = new BufferedReader(new InputStreamReader(System.in, "UTF-8"));
+      while ((rec = bin.readLine()) != null) 
+        buf.append(rec + "\n");
+    }
+    catch (UnsupportedEncodingException e) {
+      System.err.println(e);
+      System.exit(1);
+    } catch (IOException e) {
+      System.err.println(e);
+      System.exit(1);
+    }
+    MpdlNormalizer orth = new MpdlNormalizer(argv[0]);
+    System.out.print(orth.normalize(buf.toString()));
+  }
+}
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/software/eXist/mpdl-modules/src/de/mpg/mpiwg/berlin/mpdl/lt/analyzer/MpdlStandardAnalyzer.java	Wed Nov 24 17:24:23 2010 +0100
@@ -0,0 +1,103 @@
+package de.mpg.mpiwg.berlin.mpdl.lt.analyzer;
+
+import org.apache.lucene.analysis.*;
+import org.apache.lucene.analysis.standard.StandardFilter;
+import org.apache.lucene.analysis.standard.StandardTokenizer;
+
+import java.io.File;
+import java.io.IOException;
+import java.io.Reader;
+import java.util.Set;
+
+/**
+ * StandardAnalyzer which is case insensitive (no LowerCaseFilter in method tokenStream 
+ * and reusableTokenStream)
+ *
+ */
+public class MpdlStandardAnalyzer extends Analyzer {
+  private Set stopSet;
+  /** An array containing some common English words that are usually not
+  useful for searching. */
+  public static final String[] STOP_WORDS = StopAnalyzer.ENGLISH_STOP_WORDS;
+
+  /** Builds an analyzer with the default stop words ({@link #STOP_WORDS}). */
+  public MpdlStandardAnalyzer() {
+    this(STOP_WORDS);
+  }
+
+  /** Builds an analyzer with the given stop words. */
+  public MpdlStandardAnalyzer(Set stopWords) {
+    stopSet = stopWords;
+  }
+
+  /** Builds an analyzer with the given stop words. */
+  public MpdlStandardAnalyzer(String[] stopWords) {
+    stopSet = StopFilter.makeStopSet(stopWords);
+  }
+
+  /** Builds an analyzer with the stop words from the given file.
+   * @see WordlistLoader#getWordSet(File)
+   */
+  public MpdlStandardAnalyzer(File stopwords) throws IOException {
+    stopSet = WordlistLoader.getWordSet(stopwords);
+  }
+
+  /** Builds an analyzer with the stop words from the given reader.
+   * @see WordlistLoader#getWordSet(Reader)
+   */
+  public MpdlStandardAnalyzer(Reader stopwords) throws IOException {
+    stopSet = WordlistLoader.getWordSet(stopwords);
+  }
+
+  /** Constructs a {@link StandardTokenizer} filtered by a {@link
+  StandardFilter}, not a {@link LowerCaseFilter} and a {@link StopFilter}. */
+  public TokenStream tokenStream(String fieldName, Reader reader) {
+    StandardTokenizer tokenStream = new StandardTokenizer(reader);
+    tokenStream.setMaxTokenLength(maxTokenLength);
+    TokenStream result = new StandardFilter(tokenStream);
+    result = new StopFilter(result, stopSet);
+    return result;
+  }
+
+  private static final class SavedStreams {
+    StandardTokenizer tokenStream;
+    TokenStream filteredTokenStream;
+  }
+
+  /** Default maximum allowed token length */
+  public static final int DEFAULT_MAX_TOKEN_LENGTH = 255;
+
+  private int maxTokenLength = DEFAULT_MAX_TOKEN_LENGTH;
+
+  /**
+   * Set maximum allowed token length.  If a token is seen
+   * that exceeds this length then it is discarded.  This
+   * setting only takes effect the next time tokenStream or
+   * reusableTokenStream is called.
+   */
+  public void setMaxTokenLength(int length) {
+    maxTokenLength = length;
+  }
+    
+  /**
+   * @see #setMaxTokenLength
+   */
+  public int getMaxTokenLength() {
+    return maxTokenLength;
+  }
+  
+  public TokenStream reusableTokenStream(String fieldName, Reader reader) throws IOException {
+    SavedStreams streams = (SavedStreams) getPreviousTokenStream();
+    if (streams == null) {
+      streams = new SavedStreams();
+      setPreviousTokenStream(streams);
+      streams.tokenStream = new StandardTokenizer(reader);
+      streams.filteredTokenStream = new StandardFilter(streams.tokenStream);
+      streams.filteredTokenStream = new StopFilter(streams.filteredTokenStream, stopSet);
+    } else {
+      streams.tokenStream.reset(reader);
+    }
+    streams.tokenStream.setMaxTokenLength(maxTokenLength);
+    return streams.filteredTokenStream;
+  }
+}
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/software/eXist/mpdl-modules/src/de/mpg/mpiwg/berlin/mpdl/lt/analyzer/MpdlStemFilter.java	Wed Nov 24 17:24:23 2010 +0100
@@ -0,0 +1,52 @@
+package de.mpg.mpiwg.berlin.mpdl.lt.analyzer;
+
+import org.apache.lucene.analysis.Token;
+import org.apache.lucene.analysis.TokenFilter;
+import org.apache.lucene.analysis.TokenStream;
+
+import java.io.IOException;
+import java.util.Set;
+
+public final class MpdlStemFilter extends TokenFilter {
+  private MpdlMorphAnalyzer analyzer;
+  private Token token = null;
+  private MpdlStemmer stemmer = null;
+  private Set exclusionSet = null;
+
+  public MpdlStemFilter(TokenStream in) {
+    super(in);
+    stemmer = new MpdlStemmer();
+  }
+
+  public MpdlStemFilter(MpdlMorphAnalyzer analyzer, TokenStream in, Set exclusionSet) {
+    this(in);
+    this.analyzer = analyzer;
+    this.exclusionSet = exclusionSet;
+    this.stemmer.setLanguage(analyzer.getLanguage());
+  }
+
+  public final Token next() throws IOException {
+    if (( token = input.next()) == null) {
+      return null;
+    } else if ( exclusionSet != null && exclusionSet.contains( token.termText() ) ) {
+      return token;
+    } else {
+      String s = stemmer.stem(token.termText());
+      // If not stemmed, dont waste the time creating a new token
+      if ( !s.equals( token.termText() ) ) {
+        return new Token( s, token.startOffset(), token.endOffset(), token.type() );
+      }
+      return token;
+    }
+  }
+
+  public void setStemmer(MpdlStemmer stemmer) {
+    if ( stemmer != null ) {
+      this.stemmer = stemmer;
+    }
+  }
+
+  public void setExclusionSet(Set exclusionSet) {
+    this.exclusionSet = exclusionSet;
+  }
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/software/eXist/mpdl-modules/src/de/mpg/mpiwg/berlin/mpdl/lt/analyzer/MpdlStemmer.java	Wed Nov 24 17:24:23 2010 +0100
@@ -0,0 +1,159 @@
+package de.mpg.mpiwg.berlin.mpdl.lt.analyzer;
+
+import java.util.ArrayList;
+
+import org.apache.log4j.Logger;
+
+import de.mpg.mpiwg.berlin.mpdl.exception.ApplicationException;
+import de.mpg.mpiwg.berlin.mpdl.general.MpdlConstants;
+import de.mpg.mpiwg.berlin.mpdl.lt.morph.app.Form;
+import de.mpg.mpiwg.berlin.mpdl.lt.morph.app.Lemma;
+import de.mpg.mpiwg.berlin.mpdl.lt.morph.app.MorphologyCache;
+
+public class MpdlStemmer {
+  private String language = MpdlConstants.DEFAULT_LANGUAGE;
+
+  protected void setLanguage(String language) {
+    this.language = language;
+  }
+  
+  /**
+   * Used for indexing documents and for querying
+   * @param term
+   * @return
+   */
+  protected String stem(String term) {
+    String stem = null;
+    term = term.toLowerCase();
+    // special case: term is already lemma: begins with "lemmalemma"
+    if (term.startsWith("lemmalemma"))
+      return term;
+    // try to find the stem by the MorphologyCache
+    ArrayList<Lemma> lemmas = null;
+    try {
+      MorphologyCache morphologyCache = MorphologyCache.getInstance();
+      lemmas = morphologyCache.getLemmasByFormName(language, term, false);  // do not normalize again, already done
+    } catch (ApplicationException e) {
+      // nothing, do not disturb
+    }
+    if (lemmas != null && ! lemmas.isEmpty()) {
+      if (lemmas.size() == 1) {
+        stem = lemmas.get(0).getLemmaName();
+      } else {
+        stem = "";
+        for (int i=0; i<lemmas.size(); i++) {
+          Lemma lemma = lemmas.get(i);
+          String lemmaName = lemma.getLemmaName();
+          stem = stem + "+++" + lemmaName;  // e.g. "+++edo+++sum"
+        }
+      }
+    }
+    // if not found in MorphologyCache use Snowball
+    if (stem == null) {
+      stem = stemBySnowball(term, language); 
+      // if term is not equal to the base form and also the stem is not too short (> 2 characters) then add this Snowball form to the dynamic morphology cache
+      if ((! stem.equals(term)) && stem.length() > 2) {
+        try {
+          MorphologyCache morphologyCache = MorphologyCache.getInstance();
+          if (morphologyCache.getMode() == MorphologyCache.DOCUMENT_MODE) {
+            Form newForm = new Form("snowball", language, term);
+            newForm.setLemmaName(stem);
+            morphologyCache.insertFormDynamic(newForm);
+          }
+        } catch (ApplicationException e) {
+          Logger.getLogger(MpdlStemmer.class).warn("MorphologyCache: an exception was caught while indexing a document: " + e.getMessage(), e);
+        }
+      }
+    }
+    return stem;
+  }
+
+  private String stemBySnowball(String term, String language) {
+    String stem = null; 
+    if (language.equals("de")) {
+      net.sf.snowball.ext.GermanStemmer stemmer = new net.sf.snowball.ext.GermanStemmer();
+      stemmer.setCurrent(term); 
+      stemmer.stem();
+      stem = stemmer.getCurrent();
+    } else if (language.equals("en")) {
+      net.sf.snowball.ext.EnglishStemmer stemmer = new net.sf.snowball.ext.EnglishStemmer();
+      stemmer.setCurrent(term);
+      stemmer.stem();
+      stem = stemmer.getCurrent();
+    } else if (language.equals("nl")) {
+      net.sf.snowball.ext.DutchStemmer stemmer = new net.sf.snowball.ext.DutchStemmer();
+      stemmer.setCurrent(term);
+      stemmer.stem();
+      stem = stemmer.getCurrent();
+    } else if (language.equals("fi")) {
+      net.sf.snowball.ext.FinnishStemmer stemmer = new net.sf.snowball.ext.FinnishStemmer();
+      stemmer.setCurrent(term);
+      stemmer.stem();
+      stem = stemmer.getCurrent();
+    } else if (language.equals("fr")) {
+      net.sf.snowball.ext.FrenchStemmer stemmer = new net.sf.snowball.ext.FrenchStemmer();
+      stemmer.setCurrent(term);
+      stemmer.stem();
+      stem = stemmer.getCurrent();
+    } else if (language.equals("it")) {
+      net.sf.snowball.ext.ItalianStemmer stemmer = new net.sf.snowball.ext.ItalianStemmer();
+      stemmer.setCurrent(term);
+      stemmer.stem();
+      stem = stemmer.getCurrent();
+    } else if (language.equals("no")) {
+      net.sf.snowball.ext.NorwegianStemmer stemmer = new net.sf.snowball.ext.NorwegianStemmer();
+      stemmer.setCurrent(term);
+      stemmer.stem();
+      stem = stemmer.getCurrent();
+    } else if (language.equals("pt")) {
+      net.sf.snowball.ext.PortugueseStemmer stemmer = new net.sf.snowball.ext.PortugueseStemmer();
+      stemmer.setCurrent(term);
+      stemmer.stem();
+      stem = stemmer.getCurrent();
+    } else if (language.equals("ru")) {
+      net.sf.snowball.ext.RussianStemmer stemmer = new net.sf.snowball.ext.RussianStemmer();
+      stemmer.setCurrent(term);
+      stemmer.stem();
+      stem = stemmer.getCurrent();
+    } else if (language.equals("es")) {
+      net.sf.snowball.ext.SpanishStemmer stemmer = new net.sf.snowball.ext.SpanishStemmer();
+      stemmer.setCurrent(term);
+      stemmer.stem();
+      stem = stemmer.getCurrent();
+    } else if (language.equals("sv")) {
+      net.sf.snowball.ext.SwedishStemmer stemmer = new net.sf.snowball.ext.SwedishStemmer();
+      stemmer.setCurrent(term);
+      stemmer.stem();
+      stem = stemmer.getCurrent();
+    } else {
+      stem = term; // if no language fits deliver the term itself as the stem form
+    }
+    return stem;
+  }
+
+  /*
+  private String stemByLanguageStemmers(String term, String language) {
+    // TODO provide other languages
+    String stem = null;
+    if (language.equals("br")) {
+      BrazilianStemmer stemmer = new BrazilianStemmer();
+      stem = stemmer.stem(term);
+    } else if (language.equals("de")) {
+      GermanStemmer stemmer = new GermanStemmer();
+      stem = stemmer.stem(term);
+    } else if (language.equals("fr")) {
+      FrenchStemmer stemmer = new FrenchStemmer();
+      stem = stemmer.stem(term);
+    } else if (language.equals("nl")) {
+      DutchStemmer stemmer = new DutchStemmer();
+      stem = stemmer.stem(term);
+    } else if (language.equals("ru")) {
+      RussianStemmer stemmer = new RussianStemmer();
+      stem = stemmer.stem(term);
+    } else {
+      stem = term; // if no language fits deliver the term itself as the stem form
+    }
+    return stem;
+  }
+  */
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/software/eXist/mpdl-modules/src/de/mpg/mpiwg/berlin/mpdl/lt/analyzer/MpdlTokenizer.java	Wed Nov 24 17:24:23 2010 +0100
@@ -0,0 +1,113 @@
+package de.mpg.mpiwg.berlin.mpdl.lt.analyzer;
+
+import java.io.IOException;
+import java.io.Reader;
+
+import org.apache.lucene.analysis.Token;
+import org.apache.lucene.analysis.Tokenizer;
+
+import de.mpg.mpiwg.berlin.mpdl.exception.ApplicationException;
+
+public class MpdlTokenizer extends Tokenizer {
+  private static final int MAX_WORD_LEN = 255;
+  private static final int IO_BUFFER_SIZE = 1024;
+  private String language;  // TODO make the tokenizer language dependent
+  private int offset = 0, bufferIndex = 0, dataLen = 0;
+  private char[] buffer = new char[MAX_WORD_LEN];
+  private char[] ioBuffer = new char[IO_BUFFER_SIZE];
+  private MpdlNormalizer normalizer;
+
+  public MpdlTokenizer(Reader input, String language) {
+    super(input);
+    this.language = language;
+  }
+
+  public MpdlTokenizer(Reader input, String language, MpdlNormalizer normalizer) {
+    super(input);
+    this.language = language;
+    this.normalizer = normalizer;
+  }
+
+  /** Returns true iff a character should be included in a token.  This
+   * tokenizer generates as tokens adjacent sequences of characters which
+   * satisfy this predicate.  Characters for which this is false are used to
+   * define token boundaries and are not included in tokens. */
+  protected boolean isTokenChar(char c) {
+    boolean isTokenChar = true;
+    switch (c) {
+      case ' ': isTokenChar = false; break;
+      case '.': isTokenChar = false; break;
+      case ',': isTokenChar = false; break;
+      case '!': isTokenChar = false; break;
+      case '?': isTokenChar = false; break;
+      case ';': isTokenChar = false; break;
+      case ':': isTokenChar = false; break;
+      case '(': isTokenChar = false; break;
+      case ')': isTokenChar = false; break;
+      case '[': isTokenChar = false; break;
+      case ']': isTokenChar = false; break;
+      case '<': isTokenChar = false; break;
+      case '>': isTokenChar = false; break;
+      case '&': isTokenChar = false; break;
+      case '+': isTokenChar = false; break;
+      case '"': isTokenChar = false; break;
+      case '\'': isTokenChar = false; break;
+      // case '\t': isTokenChar = false; break; 
+      // case '\n': isTokenChar = false; break;  // do not break words which are on another line
+    }
+    return isTokenChar;
+  }
+
+  /** Called on each token character to normalize it before it is added to the
+   * token.  The default implementation does nothing.  Subclasses may use this
+   * to, e.g., lowercase tokens. */
+  protected char normalize(char c) {
+    return c;
+  }
+
+  /** Returns the next token in the stream, or null at EOS. */
+  public final Token next() throws IOException {
+    int length = 0;
+    int start = offset;
+    while (true) {
+      final char c;
+      offset++;
+      if (bufferIndex >= dataLen) {
+        dataLen = input.read(ioBuffer);
+        bufferIndex = 0;
+      }
+      if (dataLen == -1) {
+        if (length > 0)
+          break;
+        else
+          return null;
+      } else {
+        c = ioBuffer[bufferIndex++];
+      }
+      if (isTokenChar(c)) {              // if it's a token char
+        if (length == 0)                 // start of token
+          start = offset - 1;
+        buffer[length++] = normalize(c); // buffer it, normalized
+        if (length == MAX_WORD_LEN)      // buffer overflow!
+          break;
+      } else if (length > 0)             // at non-Letter w/ chars
+        break;                           // return 'em
+    }
+    Token newToken = new Token(start, start + length);
+    newToken.setTermBuffer(buffer, 0, length);
+    if (normalizer != null) {
+      char[] termBuffer = newToken.termBuffer();
+      int termBufferLength = newToken.termLength();
+      String tokenText = new String(termBuffer, 0, termBufferLength);
+      try {
+        String normalizedTokenText = normalizer.normalize(tokenText);
+        int normalizedTokenTextLength = normalizedTokenText.length();
+        char[] normalizedTokenTextBuffer = normalizedTokenText.toCharArray();
+        newToken.setTermBuffer(normalizedTokenTextBuffer, 0, normalizedTokenTextLength);
+      } catch (ApplicationException e) {
+        throw new IOException(e);        
+      }
+    }
+    return newToken;
+  }
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/software/eXist/mpdl-modules/src/de/mpg/mpiwg/berlin/mpdl/lt/analyzer/MpdlTokenizerAnalyzer.java	Wed Nov 24 17:24:23 2010 +0100
@@ -0,0 +1,55 @@
+package de.mpg.mpiwg.berlin.mpdl.lt.analyzer;
+
+import java.io.IOException;
+import java.io.Reader;
+import java.io.StringReader;
+import java.util.ArrayList;
+
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.LowerCaseFilter;
+import org.apache.lucene.analysis.Token;
+import org.apache.lucene.analysis.TokenStream;
+
+import de.mpg.mpiwg.berlin.mpdl.exception.ApplicationException;
+import de.mpg.mpiwg.berlin.mpdl.general.MpdlConstants;
+
+public class MpdlTokenizerAnalyzer extends Analyzer {
+  protected String language = MpdlConstants.DEFAULT_LANGUAGE;
+  protected MpdlNormalizer normalizer = null;  
+  
+  public MpdlTokenizerAnalyzer(String language) {
+    this.language = language;
+    this.normalizer = new MpdlNormalizer(language);  // default normalizer
+  }
+
+  public MpdlTokenizerAnalyzer(MpdlNormalizer normalizer, String language) {
+    this.language = language;
+    this.normalizer = normalizer;
+  }
+
+  public TokenStream tokenStream(String fieldName, Reader reader) {
+    TokenStream result = new MpdlTokenizer(reader, language, normalizer);
+    result = new MpdlFilter(result);  // filter to remove the hyphen in a token etc.
+    result = new LowerCaseFilter(result);
+    return result;
+  }
+  
+  public ArrayList<Token> getToken(String inputString) throws ApplicationException {
+    ArrayList<Token> token = new ArrayList<Token>();
+    try {
+      Reader reader = new StringReader(inputString);
+      TokenStream result = new MpdlTokenizer(reader, language, normalizer);
+      result = new MpdlFilter(result);  // filter to remove the hyphen in a token etc.
+      result = new LowerCaseFilter(result);
+      Token t = result.next();
+      while (t != null) {
+        token.add(t);
+        t = result.next();
+      }
+    } catch (IOException e) {
+      throw new ApplicationException(e);
+    }
+    return token;
+  }
+  
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/software/eXist/mpdl-modules/src/de/mpg/mpiwg/berlin/mpdl/lt/analyzer/lang/BrazilianStemmer.java	Wed Nov 24 17:24:23 2010 +0100
@@ -0,0 +1,1021 @@
+package de.mpg.mpiwg.berlin.mpdl.lt.analyzer.lang;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * A stemmer for Brazilian words.
+ */
+public class BrazilianStemmer {
+
+	/**
+	 * Changed term
+	 */
+	private   String TERM ;
+	private   String CT ;
+	private   String R1 ;
+	private   String R2 ;
+	private   String RV ;
+
+
+	public BrazilianStemmer() {
+	}
+
+	/**
+	 * Stemms the given term to an unique <tt>discriminator</tt>.
+	 *
+	 * @param term  The term that should be stemmed.
+	 * @return      Discriminator for <tt>term</tt>
+	 */
+	public String stem( String term ) {
+    boolean altered = false ; // altered the term
+
+    // creates CT
+    createCT(term) ;
+
+		if ( !isIndexable( CT ) ) {
+			return null;
+		}
+		if ( !isStemmable( CT ) ) {
+			return CT ;
+		}
+
+    R1 = getR1(CT) ;
+    R2 = getR1(R1) ;
+    RV = getRV(CT) ;
+    TERM = term + ";" +CT ;
+
+    altered = step1() ;
+    if (!altered) {
+      altered = step2() ;
+    }
+
+    if (altered) {
+      step3();
+    } else {
+      step4();
+    }
+
+    step5() ;
+
+    return CT ;
+	}
+
+	/**
+	 * Checks a term if it can be processed correctly.
+	 *
+	 * @return  true if, and only if, the given term consists in letters.
+	 */
+	private boolean isStemmable( String term ) {
+		for ( int c = 0; c < term.length(); c++ ) {
+			// Discard terms that contain non-letter characters.
+			if ( !Character.isLetter(term.charAt(c))) {
+				return false;
+			}
+		}
+		return true;
+	}
+
+	/**
+	 * Checks a term if it can be processed indexed.
+	 *
+	 * @return  true if it can be indexed
+	 */
+	private boolean isIndexable( String term ) {
+		return (term.length() < 30) && (term.length() > 2) ;
+	}
+
+	/**
+	 * See if string is 'a','e','i','o','u'
+   *
+   * @return true if is vowel
+	 */
+	private boolean isVowel( char value ) {
+    return (value == 'a') ||
+           (value == 'e') ||
+           (value == 'i') ||
+           (value == 'o') ||
+           (value == 'u') ;
+  }
+
+	/**
+	 * Gets R1
+   *
+   * R1 - is the region after the first non-vowel follwing a vowel,
+   *      or is the null region at the end of the word if there is
+   *      no such non-vowel.
+   *
+   * @return null or a string representing R1
+	 */
+	private String getR1( String value ) {
+    int     i;
+    int     j;
+
+    // be-safe !!!
+    if (value == null) {
+      return null ;
+    }
+
+    // find 1st vowel
+    i = value.length()-1 ;
+    for (j=0 ; j < i ; j++) {
+      if (isVowel(value.charAt(j))) {
+        break ;
+      }
+    }
+
+    if (!(j < i)) {
+      return null ;
+    }
+
+    // find 1st non-vowel
+    for ( ; j < i ; j++) {
+      if (!(isVowel(value.charAt(j)))) {
+        break ;
+      }
+    }
+
+    if (!(j < i)) {
+      return null ;
+    }
+
+    return value.substring(j+1) ;
+  }
+
+	/**
+	 * Gets RV
+   *
+   * RV - IF the second letter is a consoant, RV is the region after
+   *      the next following vowel,
+   *
+   *      OR if the first two letters are vowels, RV is the region
+   *      after the next consoant,
+   *
+   *      AND otherwise (consoant-vowel case) RV is the region after
+   *      the third letter.
+   *
+   *      BUT RV is the end of the word if this positions cannot be
+   *      found.
+   *
+   * @return null or a string representing RV
+	 */
+	private String getRV( String value ) {
+    int     i;
+    int     j;
+
+    // be-safe !!!
+    if (value == null) {
+      return null ;
+    }
+
+    i = value.length()-1 ;
+
+    // RV - IF the second letter is a consoant, RV is the region after
+    //      the next following vowel,
+    if ((i > 0) && !isVowel(value.charAt(1))) {
+      // find 1st vowel
+      for (j=2 ; j < i ; j++) {
+        if (isVowel(value.charAt(j))) {
+          break ;
+        }
+      }
+
+      if (j < i) {
+        return value.substring(j+1) ;
+      }
+    }
+
+
+    // RV - OR if the first two letters are vowels, RV is the region
+    //      after the next consoant,
+    if ((i > 1) &&
+        isVowel(value.charAt(0)) &&
+        isVowel(value.charAt(1))) {
+      // find 1st consoant
+      for (j=2 ; j < i ; j++) {
+        if (!isVowel(value.charAt(j))) {
+          break ;
+        }
+      }
+
+      if (j < i) {
+        return value.substring(j+1) ;
+      }
+    }
+
+    // RV - AND otherwise (consoant-vowel case) RV is the region after
+    //      the third letter.
+    if (i > 2) {
+      return value.substring(3) ;
+    }
+
+    return null ;
+  }
+
+	/**
+   * 1) Turn to lowercase
+   * 2) Remove accents
+   * 3) ã -> a ; õ -> o
+   * 4) ç -> c
+   *
+   * @return null or a string transformed
+	 */
+	private String changeTerm( String value ) {
+    int     j;
+    String  r = "" ;
+
+    // be-safe !!!
+    if (value == null) {
+      return null ;
+    }
+
+    value = value.toLowerCase() ;
+    for (j=0 ; j < value.length() ; j++) {
+      if ((value.charAt(j) == 'á') ||
+          (value.charAt(j) == 'â') ||
+          (value.charAt(j) == 'ã')) {
+        r= r + "a" ; continue ;
+      }
+      if ((value.charAt(j) == 'é') ||
+          (value.charAt(j) == 'ê')) {
+        r= r + "e" ; continue ;
+      }
+      if (value.charAt(j) == 'í') {
+        r= r + "i" ; continue ;
+      }
+      if ((value.charAt(j) == 'ó') ||
+          (value.charAt(j) == 'ô') ||
+          (value.charAt(j) == 'õ')) {
+        r= r + "o" ; continue ;
+      }
+      if ((value.charAt(j) == 'ú') ||
+          (value.charAt(j) == 'ü')) {
+        r= r + "u" ; continue ;
+      }
+      if (value.charAt(j) == 'ç') {
+        r= r + "c" ; continue ;
+      }
+      if (value.charAt(j) == 'ñ') {
+        r= r + "n" ; continue ;
+      }
+
+      r= r+ value.charAt(j) ;
+    }
+
+    return r ;
+  }
+
+	/**
+   * Check if a string ends with a suffix
+   *
+   * @return true if the string ends with the specified suffix
+	 */
+	private boolean suffix( String value, String suffix ) {
+
+    // be-safe !!!
+    if ((value == null) || (suffix == null)) {
+      return false ;
+    }
+
+    if (suffix.length() > value.length()) {
+      return false ;
+    }
+
+    return value.substring(value.length()-suffix.length()).equals(suffix);
+  }
+
+	/**
+   * Replace a string suffix by another
+   *
+   * @return the replaced String
+	 */
+	private String replaceSuffix( String value, String toReplace, String changeTo ) {
+    String vvalue ;
+
+    // be-safe !!!
+    if ((value == null) ||
+        (toReplace == null) ||
+        (changeTo == null) ) {
+      return value ;
+    }
+
+    vvalue = removeSuffix(value,toReplace) ;
+
+    if (value.equals(vvalue)) {
+      return value ;
+    } else {
+      return vvalue + changeTo ;
+    }
+  }
+
+	/**
+   * Remove a string suffix
+   *
+   * @return the String without the suffix
+	 */
+	private String removeSuffix( String value, String toRemove ) {
+    // be-safe !!!
+    if ((value == null) ||
+        (toRemove == null) ||
+        !suffix(value,toRemove) ) {
+      return value ;
+    }
+
+    return value.substring(0,value.length()-toRemove.length()) ;
+  }
+
+	/**
+   * See if a suffix is preceded by a String
+   *
+   * @return true if the suffix is preceded
+	 */
+	private boolean suffixPreceded( String value, String suffix, String preceded ) {
+    // be-safe !!!
+    if ((value == null) ||
+        (suffix == null) ||
+        (preceded == null) ||
+        !suffix(value,suffix) ) {
+      return false ;
+    }
+
+    return suffix(removeSuffix(value,suffix),preceded) ;
+  }
+
+	/**
+	 * Creates CT (changed term) , substituting * 'ã' and 'õ' for 'a~' and 'o~'.
+	 */
+	private void createCT( String term ) {
+    CT = changeTerm(term) ;
+
+    if (CT.length() < 2) return ;
+
+    // if the first character is ... , remove it
+    if ((CT.charAt(0) == '"')  ||
+        (CT.charAt(0) == '\'') ||
+        (CT.charAt(0) == '-')  ||
+        (CT.charAt(0) == ',')  ||
+        (CT.charAt(0) == ';')  ||
+        (CT.charAt(0) == '.')  ||
+        (CT.charAt(0) == '?')  ||
+        (CT.charAt(0) == '!')
+        ) {
+        CT = CT.substring(1);
+    }
+
+    if (CT.length() < 2) return ;
+
+    // if the last character is ... , remove it
+    if ((CT.charAt(CT.length()-1) == '-') ||
+        (CT.charAt(CT.length()-1) == ',') ||
+        (CT.charAt(CT.length()-1) == ';') ||
+        (CT.charAt(CT.length()-1) == '.') ||
+        (CT.charAt(CT.length()-1) == '?') ||
+        (CT.charAt(CT.length()-1) == '!') ||
+        (CT.charAt(CT.length()-1) == '\'') ||
+        (CT.charAt(CT.length()-1) == '"')
+        ) {
+        CT = CT.substring(0,CT.length()-1);
+    }
+  }
+
+
+	/**
+	 * Standart suffix removal.
+   * Search for the longest among the following suffixes, and perform
+   * the following actions:
+   *
+   * @return false if no ending was removed
+	 */
+	private boolean step1() {
+    if (CT == null) return false ;
+
+    // suffix lenght = 7
+    if (suffix(CT,"uciones") && suffix(R2,"uciones")) {
+        CT = replaceSuffix(CT,"uciones","u") ; return true;
+    }
+
+    // suffix lenght = 6
+    if (CT.length() >= 6) {
+      if (suffix(CT,"imentos") && suffix(R2,"imentos")) {
+          CT = removeSuffix(CT,"imentos") ; return true;
+      }
+      if (suffix(CT,"amentos") && suffix(R2,"amentos")) {
+          CT = removeSuffix(CT,"amentos") ; return true;
+      }
+      if (suffix(CT,"adores") && suffix(R2,"adores")) {
+          CT = removeSuffix(CT,"adores") ; return true;
+      }
+      if (suffix(CT,"adoras") && suffix(R2,"adoras")) {
+          CT = removeSuffix(CT,"adoras") ; return true;
+      }
+      if (suffix(CT,"logias") && suffix(R2,"logias")) {
+          replaceSuffix(CT,"logias","log") ; return true;
+      }
+      if (suffix(CT,"encias") && suffix(R2,"encias")) {
+          CT = replaceSuffix(CT,"encias","ente") ; return true;
+      }
+      if (suffix(CT,"amente") && suffix(R1,"amente")) {
+          CT = removeSuffix(CT,"amente") ; return true;
+      }
+      if (suffix(CT,"idades") && suffix(R2,"idades")) {
+          CT = removeSuffix(CT,"idades") ; return true;
+      }
+    }
+
+    // suffix lenght = 5
+    if (CT.length() >= 5) {
+      if (suffix(CT,"acoes") && suffix(R2,"acoes")) {
+          CT = removeSuffix(CT,"acoes") ; return true;
+      }
+      if (suffix(CT,"imento") && suffix(R2,"imento")) {
+          CT = removeSuffix(CT,"imento") ; return true;
+      }
+      if (suffix(CT,"amento") && suffix(R2,"amento")) {
+          CT = removeSuffix(CT,"amento") ; return true;
+      }
+      if (suffix(CT,"adora") && suffix(R2,"adora")) {
+          CT = removeSuffix(CT,"adora") ; return true;
+      }
+      if (suffix(CT,"ismos") && suffix(R2,"ismos")) {
+          CT = removeSuffix(CT,"ismos") ; return true;
+      }
+      if (suffix(CT,"istas") && suffix(R2,"istas")) {
+          CT = removeSuffix(CT,"istas") ; return true;
+      }
+      if (suffix(CT,"logia") && suffix(R2,"logia")) {
+          CT = replaceSuffix(CT,"logia","log") ; return true;
+      }
+      if (suffix(CT,"ucion") && suffix(R2,"ucion")) {
+          CT = replaceSuffix(CT,"ucion","u") ; return true;
+      }
+      if (suffix(CT,"encia") && suffix(R2,"encia")) {
+          CT = replaceSuffix(CT,"encia","ente") ; return true;
+      }
+      if (suffix(CT,"mente") && suffix(R2,"mente")) {
+          CT = removeSuffix(CT,"mente") ; return true;
+      }
+      if (suffix(CT,"idade") && suffix(R2,"idade")) {
+          CT = removeSuffix(CT,"idade") ; return true;
+      }
+    }
+
+    // suffix lenght = 4
+    if (CT.length() >= 4) {
+      if (suffix(CT,"acao") && suffix(R2,"acao")) {
+          CT = removeSuffix(CT,"acao") ; return true;
+      }
+      if (suffix(CT,"ezas") && suffix(R2,"ezas")) {
+          CT = removeSuffix(CT,"ezas") ; return true;
+      }
+      if (suffix(CT,"icos") && suffix(R2,"icos")) {
+          CT = removeSuffix(CT,"icos") ; return true ;
+      }
+      if (suffix(CT,"icas") && suffix(R2,"icas")) {
+          CT = removeSuffix(CT,"icas") ; return true ;
+      }
+      if (suffix(CT,"ismo") && suffix(R2,"ismo")) {
+          CT = removeSuffix(CT,"ismo") ; return true ;
+      }
+      if (suffix(CT,"avel") && suffix(R2,"avel")) {
+          CT = removeSuffix(CT,"avel") ; return true ;
+      }
+      if (suffix(CT,"ivel") && suffix(R2,"ivel")) {
+          CT = removeSuffix(CT,"ivel") ; return true ;
+      }
+      if (suffix(CT,"ista") && suffix(R2,"ista")) {
+          CT = removeSuffix(CT,"ista") ; return true ;
+      }
+      if (suffix(CT,"osos") && suffix(R2,"osos")) {
+          CT = removeSuffix(CT,"osos") ; return true ;
+      }
+      if (suffix(CT,"osas") && suffix(R2,"osas")) {
+          CT = removeSuffix(CT,"osas") ; return true ;
+      }
+      if (suffix(CT,"ador") && suffix(R2,"ador")) {
+          CT = removeSuffix(CT,"ador") ; return true ;
+      }
+      if (suffix(CT,"ivas") && suffix(R2,"ivas")) {
+          CT = removeSuffix(CT,"ivas") ; return true ;
+      }
+      if (suffix(CT,"ivos") && suffix(R2,"ivos")) {
+          CT = removeSuffix(CT,"ivos") ; return true ;
+      }
+      if (suffix(CT,"iras") &&
+          suffix(RV,"iras") &&
+          suffixPreceded(CT,"iras","e")) {
+          CT = replaceSuffix(CT,"iras","ir") ; return true ;
+      }
+    }
+
+    // suffix lenght = 3
+    if (CT.length() >= 3) {
+      if (suffix(CT,"eza") && suffix(R2,"eza")) {
+          CT = removeSuffix(CT,"eza") ; return true ;
+      }
+      if (suffix(CT,"ico") && suffix(R2,"ico")) {
+          CT = removeSuffix(CT,"ico") ; return true ;
+      }
+      if (suffix(CT,"ica") && suffix(R2,"ica")) {
+          CT = removeSuffix(CT,"ica") ; return true ;
+      }
+      if (suffix(CT,"oso") && suffix(R2,"oso")) {
+          CT = removeSuffix(CT,"oso") ; return true ;
+      }
+      if (suffix(CT,"osa") && suffix(R2,"osa")) {
+          CT = removeSuffix(CT,"osa") ; return true ;
+      }
+      if (suffix(CT,"iva") && suffix(R2,"iva")) {
+          CT = removeSuffix(CT,"iva") ; return true ;
+      }
+      if (suffix(CT,"ivo") && suffix(R2,"ivo")) {
+          CT = removeSuffix(CT,"ivo") ; return true ;
+      }
+      if (suffix(CT,"ira") &&
+          suffix(RV,"ira") &&
+          suffixPreceded(CT,"ira","e")) {
+          CT = replaceSuffix(CT,"ira","ir") ; return true ;
+      }
+    }
+
+    // no ending was removed by step1
+    return false ;
+  }
+
+
+	/**
+	 * Verb suffixes.
+   *
+   * Search for the longest among the following suffixes in RV,
+   * and if found, delete.
+   *
+   * @return false if no ending was removed
+	*/
+	private boolean step2() {
+    if (RV == null) return false ;
+
+    // suffix lenght = 7
+    if (RV.length() >= 7) {
+      if (suffix(RV,"issemos")) {
+        CT = removeSuffix(CT,"issemos") ; return true;
+      }
+      if (suffix(RV,"essemos")) {
+        CT = removeSuffix(CT,"essemos") ; return true;
+      }
+      if (suffix(RV,"assemos")) {
+        CT = removeSuffix(CT,"assemos") ; return true;
+      }
+      if (suffix(RV,"ariamos")) {
+        CT = removeSuffix(CT,"ariamos") ; return true;
+      }
+      if (suffix(RV,"eriamos")) {
+        CT = removeSuffix(CT,"eriamos") ; return true;
+      }
+      if (suffix(RV,"iriamos")) {
+        CT = removeSuffix(CT,"iriamos") ; return true;
+      }
+    }
+
+    // suffix lenght = 6
+    if (RV.length() >= 6) {
+      if (suffix(RV,"iremos")) {
+        CT = removeSuffix(CT,"iremos") ; return true;
+      }
+      if (suffix(RV,"eremos")) {
+        CT = removeSuffix(CT,"eremos") ; return true;
+      }
+      if (suffix(RV,"aremos")) {
+        CT = removeSuffix(CT,"aremos") ; return true;
+      }
+      if (suffix(RV,"avamos")) {
+        CT = removeSuffix(CT,"avamos") ; return true;
+      }
+      if (suffix(RV,"iramos")) {
+        CT = removeSuffix(CT,"iramos") ; return true;
+      }
+      if (suffix(RV,"eramos")) {
+        CT = removeSuffix(CT,"eramos") ; return true;
+      }
+      if (suffix(RV,"aramos")) {
+        CT = removeSuffix(CT,"aramos") ; return true;
+      }
+      if (suffix(RV,"asseis")) {
+        CT = removeSuffix(CT,"asseis") ; return true;
+      }
+      if (suffix(RV,"esseis")) {
+        CT = removeSuffix(CT,"esseis") ; return true;
+      }
+      if (suffix(RV,"isseis")) {
+        CT = removeSuffix(CT,"isseis") ; return true;
+      }
+      if (suffix(RV,"arieis")) {
+        CT = removeSuffix(CT,"arieis") ; return true;
+      }
+      if (suffix(RV,"erieis")) {
+        CT = removeSuffix(CT,"erieis") ; return true;
+      }
+      if (suffix(RV,"irieis")) {
+        CT = removeSuffix(CT,"irieis") ; return true;
+      }
+    }
+
+
+    // suffix lenght = 5
+    if (RV.length() >= 5) {
+      if (suffix(RV,"irmos")) {
+        CT = removeSuffix(CT,"irmos") ; return true;
+      }
+      if (suffix(RV,"iamos")) {
+        CT = removeSuffix(CT,"iamos") ; return true;
+      }
+      if (suffix(RV,"armos")) {
+        CT = removeSuffix(CT,"armos") ; return true;
+      }
+      if (suffix(RV,"ermos")) {
+        CT = removeSuffix(CT,"ermos") ; return true;
+      }
+      if (suffix(RV,"areis")) {
+        CT = removeSuffix(CT,"areis") ; return true;
+      }
+      if (suffix(RV,"ereis")) {
+        CT = removeSuffix(CT,"ereis") ; return true;
+      }
+      if (suffix(RV,"ireis")) {
+        CT = removeSuffix(CT,"ireis") ; return true;
+      }
+      if (suffix(RV,"asses")) {
+        CT = removeSuffix(CT,"asses") ; return true;
+      }
+      if (suffix(RV,"esses")) {
+        CT = removeSuffix(CT,"esses") ; return true;
+      }
+      if (suffix(RV,"isses")) {
+        CT = removeSuffix(CT,"isses") ; return true;
+      }
+      if (suffix(RV,"astes")) {
+        CT = removeSuffix(CT,"astes") ; return true;
+      }
+      if (suffix(RV,"assem")) {
+        CT = removeSuffix(CT,"assem") ; return true;
+      }
+      if (suffix(RV,"essem")) {
+        CT = removeSuffix(CT,"essem") ; return true;
+      }
+      if (suffix(RV,"issem")) {
+        CT = removeSuffix(CT,"issem") ; return true;
+      }
+      if (suffix(RV,"ardes")) {
+        CT = removeSuffix(CT,"ardes") ; return true;
+      }
+      if (suffix(RV,"erdes")) {
+        CT = removeSuffix(CT,"erdes") ; return true;
+      }
+      if (suffix(RV,"irdes")) {
+        CT = removeSuffix(CT,"irdes") ; return true;
+      }
+      if (suffix(RV,"ariam")) {
+        CT = removeSuffix(CT,"ariam") ; return true;
+      }
+      if (suffix(RV,"eriam")) {
+        CT = removeSuffix(CT,"eriam") ; return true;
+      }
+      if (suffix(RV,"iriam")) {
+        CT = removeSuffix(CT,"iriam") ; return true;
+      }
+      if (suffix(RV,"arias")) {
+        CT = removeSuffix(CT,"arias") ; return true;
+      }
+      if (suffix(RV,"erias")) {
+        CT = removeSuffix(CT,"erias") ; return true;
+      }
+      if (suffix(RV,"irias")) {
+        CT = removeSuffix(CT,"irias") ; return true;
+      }
+      if (suffix(RV,"estes")) {
+        CT = removeSuffix(CT,"estes") ; return true;
+      }
+      if (suffix(RV,"istes")) {
+        CT = removeSuffix(CT,"istes") ; return true;
+      }
+      if (suffix(RV,"areis")) {
+        CT = removeSuffix(CT,"areis") ; return true;
+      }
+      if (suffix(RV,"aveis")) {
+        CT = removeSuffix(CT,"aveis") ; return true;
+      }
+    }
+
+    // suffix lenght = 4
+    if (RV.length() >= 4) {
+      if (suffix(RV,"aria")) {
+        CT = removeSuffix(CT,"aria") ; return true;
+      }
+      if (suffix(RV,"eria")) {
+        CT = removeSuffix(CT,"eria") ; return true;
+      }
+      if (suffix(RV,"iria")) {
+        CT = removeSuffix(CT,"iria") ; return true;
+      }
+      if (suffix(RV,"asse")) {
+        CT = removeSuffix(CT,"asse") ; return true;
+      }
+      if (suffix(RV,"esse")) {
+        CT = removeSuffix(CT,"esse") ; return true;
+      }
+      if (suffix(RV,"isse")) {
+        CT = removeSuffix(CT,"isse") ; return true;
+      }
+      if (suffix(RV,"aste")) {
+        CT = removeSuffix(CT,"aste") ; return true;
+      }
+      if (suffix(RV,"este")) {
+        CT = removeSuffix(CT,"este") ; return true;
+      }
+      if (suffix(RV,"iste")) {
+        CT = removeSuffix(CT,"iste") ; return true;
+      }
+      if (suffix(RV,"arei")) {
+        CT = removeSuffix(CT,"arei") ; return true;
+      }
+      if (suffix(RV,"erei")) {
+        CT = removeSuffix(CT,"erei") ; return true;
+      }
+      if (suffix(RV,"irei")) {
+        CT = removeSuffix(CT,"irei") ; return true;
+      }
+      if (suffix(RV,"aram")) {
+        CT = removeSuffix(CT,"aram") ; return true;
+      }
+      if (suffix(RV,"eram")) {
+        CT = removeSuffix(CT,"eram") ; return true;
+      }
+      if (suffix(RV,"iram")) {
+        CT = removeSuffix(CT,"iram") ; return true;
+      }
+      if (suffix(RV,"avam")) {
+        CT = removeSuffix(CT,"avam") ; return true;
+      }
+      if (suffix(RV,"arem")) {
+        CT = removeSuffix(CT,"arem") ; return true;
+      }
+      if (suffix(RV,"erem")) {
+        CT = removeSuffix(CT,"erem") ; return true;
+      }
+      if (suffix(RV,"irem")) {
+        CT = removeSuffix(CT,"irem") ; return true;
+      }
+      if (suffix(RV,"ando")) {
+        CT = removeSuffix(CT,"ando") ; return true;
+      }
+      if (suffix(RV,"endo")) {
+        CT = removeSuffix(CT,"endo") ; return true;
+      }
+      if (suffix(RV,"indo")) {
+        CT = removeSuffix(CT,"indo") ; return true;
+      }
+      if (suffix(RV,"arao")) {
+        CT = removeSuffix(CT,"arao") ; return true;
+      }
+      if (suffix(RV,"erao")) {
+        CT = removeSuffix(CT,"erao") ; return true;
+      }
+      if (suffix(RV,"irao")) {
+        CT = removeSuffix(CT,"irao") ; return true;
+      }
+      if (suffix(RV,"adas")) {
+        CT = removeSuffix(CT,"adas") ; return true;
+      }
+      if (suffix(RV,"idas")) {
+        CT = removeSuffix(CT,"idas") ; return true;
+      }
+      if (suffix(RV,"aras")) {
+        CT = removeSuffix(CT,"aras") ; return true;
+      }
+      if (suffix(RV,"eras")) {
+        CT = removeSuffix(CT,"eras") ; return true;
+      }
+      if (suffix(RV,"iras")) {
+        CT = removeSuffix(CT,"iras") ; return true;
+      }
+      if (suffix(RV,"avas")) {
+        CT = removeSuffix(CT,"avas") ; return true;
+      }
+      if (suffix(RV,"ares")) {
+        CT = removeSuffix(CT,"ares") ; return true;
+      }
+      if (suffix(RV,"eres")) {
+        CT = removeSuffix(CT,"eres") ; return true;
+      }
+      if (suffix(RV,"ires")) {
+        CT = removeSuffix(CT,"ires") ; return true;
+      }
+      if (suffix(RV,"ados")) {
+        CT = removeSuffix(CT,"ados") ; return true;
+      }
+      if (suffix(RV,"idos")) {
+        CT = removeSuffix(CT,"idos") ; return true;
+      }
+      if (suffix(RV,"amos")) {
+        CT = removeSuffix(CT,"amos") ; return true;
+      }
+      if (suffix(RV,"emos")) {
+        CT = removeSuffix(CT,"emos") ; return true;
+      }
+      if (suffix(RV,"imos")) {
+        CT = removeSuffix(CT,"imos") ; return true;
+      }
+      if (suffix(RV,"iras")) {
+        CT = removeSuffix(CT,"iras") ; return true;
+      }
+      if (suffix(RV,"ieis")) {
+        CT = removeSuffix(CT,"ieis") ; return true;
+      }
+    }
+
+    // suffix lenght = 3
+    if (RV.length() >= 3) {
+      if (suffix(RV,"ada")) {
+        CT = removeSuffix(CT,"ada") ; return true;
+      }
+      if (suffix(RV,"ida")) {
+        CT = removeSuffix(CT,"ida") ; return true;
+      }
+      if (suffix(RV,"ara")) {
+        CT = removeSuffix(CT,"ara") ; return true;
+      }
+      if (suffix(RV,"era")) {
+        CT = removeSuffix(CT,"era") ; return true;
+      }
+      if (suffix(RV,"ira")) {
+        CT = removeSuffix(CT,"ava") ; return true;
+      }
+      if (suffix(RV,"iam")) {
+        CT = removeSuffix(CT,"iam") ; return true;
+      }
+      if (suffix(RV,"ado")) {
+        CT = removeSuffix(CT,"ado") ; return true;
+      }
+      if (suffix(RV,"ido")) {
+        CT = removeSuffix(CT,"ido") ; return true;
+      }
+      if (suffix(RV,"ias")) {
+        CT = removeSuffix(CT,"ias") ; return true;
+      }
+      if (suffix(RV,"ais")) {
+        CT = removeSuffix(CT,"ais") ; return true;
+      }
+      if (suffix(RV,"eis")) {
+        CT = removeSuffix(CT,"eis") ; return true;
+      }
+      if (suffix(RV,"ira")) {
+        CT = removeSuffix(CT,"ira") ; return true;
+      }
+      if (suffix(RV,"ear")) {
+        CT = removeSuffix(CT,"ear") ; return true;
+      }
+    }
+
+    // suffix lenght = 2
+    if (RV.length() >= 2) {
+      if (suffix(RV,"ia")) {
+        CT = removeSuffix(CT,"ia") ; return true;
+      }
+      if (suffix(RV,"ei")) {
+        CT = removeSuffix(CT,"ei") ; return true;
+      }
+      if (suffix(RV,"am")) {
+        CT = removeSuffix(CT,"am") ; return true;
+      }
+      if (suffix(RV,"em")) {
+        CT = removeSuffix(CT,"em") ; return true;
+      }
+      if (suffix(RV,"ar")) {
+        CT = removeSuffix(CT,"ar") ; return true;
+      }
+      if (suffix(RV,"er")) {
+        CT = removeSuffix(CT,"er") ; return true;
+      }
+      if (suffix(RV,"ir")) {
+        CT = removeSuffix(CT,"ir") ; return true;
+      }
+      if (suffix(RV,"as")) {
+        CT = removeSuffix(CT,"as") ; return true;
+      }
+      if (suffix(RV,"es")) {
+        CT = removeSuffix(CT,"es") ; return true;
+      }
+      if (suffix(RV,"is")) {
+        CT = removeSuffix(CT,"is") ; return true;
+      }
+      if (suffix(RV,"eu")) {
+        CT = removeSuffix(CT,"eu") ; return true;
+      }
+      if (suffix(RV,"iu")) {
+        CT = removeSuffix(CT,"iu") ; return true;
+      }
+      if (suffix(RV,"iu")) {
+        CT = removeSuffix(CT,"iu") ; return true;
+      }
+      if (suffix(RV,"ou")) {
+        CT = removeSuffix(CT,"ou") ; return true;
+      }
+    }
+
+    // no ending was removed by step2
+    return false ;
+  }
+
+	/**
+	 * Delete suffix 'i' if in RV and preceded by 'c'
+   *
+	*/
+	private void step3() {
+    if (RV == null) return ;
+
+    if (suffix(RV,"i") && suffixPreceded(RV,"i","c")) {
+      CT = removeSuffix(CT,"i") ;
+    }
+
+  }
+
+	/**
+	 * Residual suffix
+   *
+   * If the word ends with one of the suffixes (os a i o á í ó)
+   * in RV, delete it
+   *
+	*/
+	private void step4() {
+    if (RV == null) return  ;
+
+    if (suffix(RV,"os")) {
+      CT = removeSuffix(CT,"os") ; return ;
+    }
+    if (suffix(RV,"a")) {
+      CT = removeSuffix(CT,"a") ; return ;
+    }
+    if (suffix(RV,"i")) {
+      CT = removeSuffix(CT,"i") ; return ;
+    }
+    if (suffix(RV,"o")) {
+      CT = removeSuffix(CT,"o") ; return ;
+    }
+
+  }
+
+	/**
+	 * If the word ends with one of ( e é ê) in RV,delete it,
+   * and if preceded by 'gu' (or 'ci') with the 'u' (or 'i') in RV,
+   * delete the 'u' (or 'i')
+   *
+   * Or if the word ends ç remove the cedilha
+   *
+	*/
+	private void step5() {
+    if (RV == null) return  ;
+
+    if (suffix(RV,"e")) {
+      if (suffixPreceded(RV,"e","gu")) {
+        CT = removeSuffix(CT,"e") ;
+        CT = removeSuffix(CT,"u") ;
+        return ;
+      }
+
+      if (suffixPreceded(RV,"e","ci")) {
+        CT = removeSuffix(CT,"e") ;
+        CT = removeSuffix(CT,"i") ;
+        return ;
+      }
+
+      CT = removeSuffix(CT,"e") ; return ;
+    }
+  }
+
+	/**
+	 * For log and debug purpose
+	 *
+	 * @return  TERM, CT, RV, R1 and R2
+	 */
+	public String log() {
+    return " (TERM = " + TERM + ")" +
+           " (CT = " + CT +")" +
+           " (RV = " + RV +")" +
+           " (R1 = " + R1 +")" +
+           " (R2 = " + R2 +")" ;
+	}
+
+}
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/software/eXist/mpdl-modules/src/de/mpg/mpiwg/berlin/mpdl/lt/analyzer/lang/DutchStemmer.java	Wed Nov 24 17:24:23 2010 +0100
@@ -0,0 +1,407 @@
+package de.mpg.mpiwg.berlin.mpdl.lt.analyzer.lang;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.util.Map;
+
+/**
+ *
+ * A stemmer for Dutch words. The algorithm is an implementation of
+ * the <a href="http://snowball.tartarus.org/algorithms/dutch/stemmer.html">dutch stemming</a>
+ * algorithm in Martin Porter's snowball project.
+ * 
+ * @author Edwin de Jonge (ejne at cbs.nl)
+ */
+
+public class DutchStemmer {
+  /**
+   * Buffer for the terms while stemming them.
+   */
+  private StringBuffer sb = new StringBuffer();
+  private boolean _removedE;
+  private Map _stemDict;
+
+  private int _R1;
+  private int _R2;
+
+  //TODO convert to internal
+  /*
+   * Stemms the given term to an unique <tt>discriminator</tt>.
+   *
+   * @param term The term that should be stemmed.
+   * @return Discriminator for <tt>term</tt>
+   */
+  public String stem(String term) {
+    term = term.toLowerCase();
+    if (!isStemmable(term))
+      return term;
+    if (_stemDict != null && _stemDict.containsKey(term))
+      if (_stemDict.get(term) instanceof String)
+        return (String) _stemDict.get(term);
+      else
+        return null;
+
+    // Reset the StringBuffer.
+    sb.delete(0, sb.length());
+    sb.insert(0, term);
+    // Stemming starts here...
+    substitute(sb);
+    storeYandI(sb);
+    _R1 = getRIndex(sb, 0);
+    _R1 = Math.max(3, _R1);
+    step1(sb);
+    step2(sb);
+    _R2 = getRIndex(sb, _R1);
+    step3a(sb);
+    step3b(sb);
+    step4(sb);
+    reStoreYandI(sb);
+    return sb.toString();
+  }
+
+  private boolean enEnding(StringBuffer sb) {
+    String[] enend = new String[]{"ene", "en"};
+    for (int i = 0; i < enend.length; i++) {
+      String end = enend[i];
+      String s = sb.toString();
+      int index = s.length() - end.length();
+      if (s.endsWith(end) &&
+          index >= _R1 &&
+          isValidEnEnding(sb, index - 1)
+      ) {
+        sb.delete(index, index + end.length());
+        unDouble(sb, index);
+        return true;
+      }
+    }
+    return false;
+  }
+
+
+  private void step1(StringBuffer sb) {
+    if (_R1 >= sb.length())
+      return;
+
+    String s = sb.toString();
+    int lengthR1 = sb.length() - _R1;
+    int index;
+
+    if (s.endsWith("heden")) {
+      sb.replace(_R1, lengthR1 + _R1, sb.substring(_R1, lengthR1 + _R1).replaceAll("heden", "heid"));
+      return;
+    }
+
+    if (enEnding(sb))
+      return;
+
+    if (s.endsWith("se") &&
+        (index = s.length() - 2) >= _R1 &&
+        isValidSEnding(sb, index - 1)
+    ) {
+      sb.delete(index, index + 2);
+      return;
+    }
+    if (s.endsWith("s") &&
+        (index = s.length() - 1) >= _R1 &&
+        isValidSEnding(sb, index - 1)) {
+      sb.delete(index, index + 1);
+    }
+  }
+
+  /**
+   * Delete suffix e if in R1 and
+   * preceded by a non-vowel, and then undouble the ending
+   *
+   * @param sb String being stemmed
+   */
+  private void step2(StringBuffer sb) {
+    _removedE = false;
+    if (_R1 >= sb.length())
+      return;
+    String s = sb.toString();
+    int index = s.length() - 1;
+    if (index >= _R1 &&
+        s.endsWith("e") &&
+        !isVowel(sb.charAt(index - 1))) {
+      sb.delete(index, index + 1);
+      unDouble(sb);
+      _removedE = true;
+    }
+  }
+
+  /**
+   * Delete "heid"
+   *
+   * @param sb String being stemmed
+   */
+  private void step3a(StringBuffer sb) {
+    if (_R2 >= sb.length())
+      return;
+    String s = sb.toString();
+    int index = s.length() - 4;
+    if (s.endsWith("heid") && index >= _R2 && sb.charAt(index - 1) != 'c') {
+      sb.delete(index, index + 4); //remove heid
+      enEnding(sb);
+    }
+  }
+
+  /**
+   * <p>A d-suffix, or derivational suffix, enables a new word,
+   * often with a different grammatical category, or with a different
+   * sense, to be built from another word. Whether a d-suffix can be
+   * attached is discovered not from the rules of grammar, but by
+   * referring to a dictionary. So in English, ness can be added to
+   * certain adjectives to form corresponding nouns (littleness,
+   * kindness, foolishness ...) but not to all adjectives
+   * (not for example, to big, cruel, wise ...) d-suffixes can be
+   * used to change meaning, often in rather exotic ways.</p>
+   * Remove "ing", "end", "ig", "lijk", "baar" and "bar"
+   *
+   * @param sb String being stemmed
+   */
+  private void step3b(StringBuffer sb) {
+    if (_R2 >= sb.length())
+      return;
+    String s = sb.toString();
+    int index = 0;
+
+    if ((s.endsWith("end") || s.endsWith("ing")) &&
+        (index = s.length() - 3) >= _R2) {
+      sb.delete(index, index + 3);
+      if (sb.charAt(index - 2) == 'i' &&
+          sb.charAt(index - 1) == 'g') {
+        if (sb.charAt(index - 3) != 'e' & index - 2 >= _R2) {
+          index -= 2;
+          sb.delete(index, index + 2);
+        }
+      } else {
+        unDouble(sb, index);
+      }
+      return;
+    }
+    if (s.endsWith("ig") &&
+        (index = s.length() - 2) >= _R2
+    ) {
+      if (sb.charAt(index - 1) != 'e')
+        sb.delete(index, index + 2);
+      return;
+    }
+    if (s.endsWith("lijk") &&
+        (index = s.length() - 4) >= _R2
+    ) {
+      sb.delete(index, index + 4);
+      step2(sb);
+      return;
+    }
+    if (s.endsWith("baar") &&
+        (index = s.length() - 4) >= _R2
+    ) {
+      sb.delete(index, index + 4);
+      return;
+    }
+    if (s.endsWith("bar") &&
+        (index = s.length() - 3) >= _R2
+    ) {
+      if (_removedE)
+        sb.delete(index, index + 3);
+      return;
+    }
+  }
+
+  /**
+   * undouble vowel
+   * If the words ends CVD, where C is a non-vowel, D is a non-vowel other than I, and V is double a, e, o or u, remove one of the vowels from V (for example, maan -> man, brood -> brod).
+   *
+   * @param sb String being stemmed
+   */
+  private void step4(StringBuffer sb) {
+    if (sb.length() < 4)
+      return;
+    String end = sb.substring(sb.length() - 4, sb.length());
+    char c = end.charAt(0);
+    char v1 = end.charAt(1);
+    char v2 = end.charAt(2);
+    char d = end.charAt(3);
+    if (v1 == v2 &&
+        d != 'I' &&
+        v1 != 'i' &&
+        isVowel(v1) &&
+        !isVowel(d) &&
+        !isVowel(c)) {
+      sb.delete(sb.length() - 2, sb.length() - 1);
+    }
+  }
+
+  /**
+   * Checks if a term could be stemmed.
+   *
+   * @return true if, and only if, the given term consists in letters.
+   */
+  private boolean isStemmable(String term) {
+    for (int c = 0; c < term.length(); c++) {
+      if (!Character.isLetter(term.charAt(c))) return false;
+    }
+    return true;
+  }
+
+  /**
+   * Substitute ä, ë, ï, ö, ü, á , é, í, ó, ú
+   */
+  private void substitute(StringBuffer buffer) {
+    for (int i = 0; i < buffer.length(); i++) {
+      switch (buffer.charAt(i)) {
+        case 'ä':
+        case 'á':
+          {
+            buffer.setCharAt(i, 'a');
+            break;
+          }
+        case 'ë':
+        case 'é':
+          {
+            buffer.setCharAt(i, 'e');
+            break;
+          }
+        case 'ü':
+        case 'ú':
+          {
+            buffer.setCharAt(i, 'u');
+            break;
+          }
+        case 'ï':
+        case 'i':
+          {
+            buffer.setCharAt(i, 'i');
+            break;
+          }
+        case 'ö':
+        case 'ó':
+          {
+            buffer.setCharAt(i, 'o');
+            break;
+          }
+      }
+    }
+  }
+
+  /*private boolean isValidSEnding(StringBuffer sb) {
+    return isValidSEnding(sb, sb.length() - 1);
+  }*/
+
+  private boolean isValidSEnding(StringBuffer sb, int index) {
+    char c = sb.charAt(index);
+    if (isVowel(c) || c == 'j')
+      return false;
+    return true;
+  }
+
+  /*private boolean isValidEnEnding(StringBuffer sb) {
+    return isValidEnEnding(sb, sb.length() - 1);
+  }*/
+
+  private boolean isValidEnEnding(StringBuffer sb, int index) {
+    char c = sb.charAt(index);
+    if (isVowel(c))
+      return false;
+    if (c < 3)
+      return false;
+    // ends with "gem"?
+    if (c == 'm' && sb.charAt(index - 2) == 'g' && sb.charAt(index - 1) == 'e')
+      return false;
+    return true;
+  }
+
+  private void unDouble(StringBuffer sb) {
+    unDouble(sb, sb.length());
+  }
+
+  private void unDouble(StringBuffer sb, int endIndex) {
+    String s = sb.substring(0, endIndex);
+    if (s.endsWith("kk") || s.endsWith("tt") || s.endsWith("dd") || s.endsWith("nn") || s.endsWith("mm") || s.endsWith("ff")) {
+      sb.delete(endIndex - 1, endIndex);
+    }
+  }
+
+  private int getRIndex(StringBuffer sb, int start) {
+    if (start == 0)
+      start = 1;
+    int i = start;
+    for (; i < sb.length(); i++) {
+      //first non-vowel preceded by a vowel
+      if (!isVowel(sb.charAt(i)) && isVowel(sb.charAt(i - 1))) {
+        return i + 1;
+      }
+    }
+    return i + 1;
+  }
+
+  private void storeYandI(StringBuffer sb) {
+    if (sb.charAt(0) == 'y')
+      sb.setCharAt(0, 'Y');
+
+    int last = sb.length() - 1;
+
+    for (int i = 1; i < last; i++) {
+      switch (sb.charAt(i)) {
+        case 'i':
+          {
+            if (isVowel(sb.charAt(i - 1)) &&
+                isVowel(sb.charAt(i + 1))
+            )
+              sb.setCharAt(i, 'I');
+            break;
+          }
+        case 'y':
+          {
+            if (isVowel(sb.charAt(i - 1)))
+              sb.setCharAt(i, 'Y');
+            break;
+          }
+      }
+    }
+    if (last > 0 && sb.charAt(last) == 'y' && isVowel(sb.charAt(last - 1)))
+      sb.setCharAt(last, 'Y');
+  }
+
+  private void reStoreYandI(StringBuffer sb) {
+    String tmp = sb.toString();
+    sb.delete(0, sb.length());
+    sb.insert(0, tmp.replaceAll("I", "i").replaceAll("Y", "y"));
+  }
+
+  private boolean isVowel(char c) {
+    switch (c) {
+      case 'e':
+      case 'a':
+      case 'o':
+      case 'i':
+      case 'u':
+      case 'y':
+      case 'è':
+        {
+          return true;
+        }
+    }
+    return false;
+  }
+
+  void setStemDictionary(Map dict) {
+    _stemDict = dict;
+  }
+
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/software/eXist/mpdl-modules/src/de/mpg/mpiwg/berlin/mpdl/lt/analyzer/lang/FrenchStemmer.java	Wed Nov 24 17:24:23 2010 +0100
@@ -0,0 +1,709 @@
+package de.mpg.mpiwg.berlin.mpdl.lt.analyzer.lang;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * A stemmer for French words. The algorithm is based on the work of
+ * Dr Martin Porter on his snowball project<br>
+ * refer to http://snowball.sourceforge.net/french/stemmer.html<br>
+ * (French stemming algorithm) for details
+ *
+ * @author    Patrick Talbot
+ */
+
+public class FrenchStemmer {
+
+    /**
+     * Buffer for the terms while stemming them.
+     */
+    private StringBuffer sb = new StringBuffer();
+
+    /**
+     * A temporary buffer, used to reconstruct R2
+     */
+     private StringBuffer tb = new StringBuffer();
+
+	/**
+	 * Region R0 is equal to the whole buffer
+	 */
+	private String R0;
+
+	/**
+	 * Region RV
+	 * "If the word begins with two vowels, RV is the region after the third letter,
+	 * otherwise the region after the first vowel not at the beginning of the word,
+	 * or the end of the word if these positions cannot be found."
+	 */
+    private String RV;
+
+	/**
+	 * Region R1
+	 * "R1 is the region after the first non-vowel following a vowel
+	 * or is the null region at the end of the word if there is no such non-vowel"
+	 */
+    private String R1;
+
+	/**
+	 * Region R2
+	 * "R2 is the region after the first non-vowel in R1 following a vowel
+	 * or is the null region at the end of the word if there is no such non-vowel"
+	 */
+    private String R2;
+
+
+	/**
+	 * Set to true if we need to perform step 2
+	 */
+    private boolean suite;
+
+	/**
+	 * Set to true if the buffer was modified
+	 */
+    private boolean modified;
+
+
+    /**
+     * Stemms the given term to a unique <tt>discriminator</tt>.
+     *
+     * @param term  java.langString The term that should be stemmed
+     * @return java.lang.String  Discriminator for <tt>term</tt>
+     */
+    public String stem( String term ) {
+		if ( !isStemmable( term ) ) {
+			return term;
+		}
+
+		// Use lowercase for medium stemming.
+		term = term.toLowerCase();
+
+		// Reset the StringBuffer.
+		sb.delete( 0, sb.length() );
+		sb.insert( 0, term );
+
+		// reset the booleans
+		modified = false;
+		suite = false;
+
+		sb = treatVowels( sb );
+
+		setStrings();
+
+		step1();
+
+		if (!modified || suite)
+		{
+			if (RV != null)
+			{
+				suite = step2a();
+				if (!suite)
+					step2b();
+			}
+		}
+
+		if (modified || suite)
+			step3();
+		else
+			step4();
+
+		step5();
+
+		step6();
+
+		return sb.toString();
+    }
+
+	/**
+	 * Sets the search region Strings<br>
+	 * it needs to be done each time the buffer was modified
+	 */
+	private void setStrings() {
+		// set the strings
+		R0 = sb.toString();
+		RV = retrieveRV( sb );
+		R1 = retrieveR( sb );
+		if ( R1 != null )
+		{
+			tb.delete( 0, tb.length() );
+			tb.insert( 0, R1 );
+			R2 = retrieveR( tb );
+		}
+		else
+			R2 = null;
+	}
+
+	/**
+	 * First step of the Porter Algorithmn<br>
+	 * refer to http://snowball.sourceforge.net/french/stemmer.html for an explanation
+	 */
+	private void step1( ) {
+		String[] suffix = { "ances", "iqUes", "ismes", "ables", "istes", "ance", "iqUe", "isme", "able", "iste" };
+		deleteFrom( R2, suffix );
+
+		replaceFrom( R2, new String[] { "logies", "logie" }, "log" );
+		replaceFrom( R2, new String[] { "usions", "utions", "usion", "ution" }, "u" );
+		replaceFrom( R2, new String[] { "ences", "ence" }, "ent" );
+
+		String[] search = { "atrices", "ateurs", "ations", "atrice", "ateur", "ation"};
+		deleteButSuffixFromElseReplace( R2, search, "ic",  true, R0, "iqU" );
+
+		deleteButSuffixFromElseReplace( R2, new String[] { "ements", "ement" }, "eus", false, R0, "eux" );
+		deleteButSuffixFrom( R2, new String[] { "ements", "ement" }, "ativ", false );
+		deleteButSuffixFrom( R2, new String[] { "ements", "ement" }, "iv", false );
+		deleteButSuffixFrom( R2, new String[] { "ements", "ement" }, "abl", false );
+		deleteButSuffixFrom( R2, new String[] { "ements", "ement" }, "iqU", false );
+
+		deleteFromIfTestVowelBeforeIn( R1, new String[] { "issements", "issement" }, false, R0 );
+		deleteFrom( RV, new String[] { "ements", "ement" } );
+
+		deleteButSuffixFromElseReplace( R2, new String[] { "ités", "ité" }, "abil", false, R0, "abl" );
+		deleteButSuffixFromElseReplace( R2, new String[] { "ités", "ité" }, "ic", false, R0, "iqU" );
+		deleteButSuffixFrom( R2, new String[] { "ités", "ité" }, "iv", true );
+
+		String[] autre = { "ifs", "ives", "if", "ive" };
+		deleteButSuffixFromElseReplace( R2, autre, "icat", false, R0, "iqU" );
+		deleteButSuffixFromElseReplace( R2, autre, "at", true, R2, "iqU" );
+
+		replaceFrom( R0, new String[] { "eaux" }, "eau" );
+
+		replaceFrom( R1, new String[] { "aux" }, "al" );
+
+		deleteButSuffixFromElseReplace( R2, new String[] { "euses", "euse" }, "", true, R1, "eux" );
+
+		deleteFrom( R2, new String[] { "eux" } );
+
+		// if one of the next steps is performed, we will need to perform step2a
+		boolean temp = false;
+		temp = replaceFrom( RV, new String[] { "amment" }, "ant" );
+		if (temp == true)
+			suite = true;
+		temp = replaceFrom( RV, new String[] { "emment" }, "ent" );
+		if (temp == true)
+			suite = true;
+		temp = deleteFromIfTestVowelBeforeIn( RV, new String[] { "ments", "ment" }, true, RV );
+		if (temp == true)
+			suite = true;
+
+	}
+
+	/**
+	 * Second step (A) of the Porter Algorithmn<br>
+	 * Will be performed if nothing changed from the first step
+	 * or changed were done in the amment, emment, ments or ment suffixes<br>
+	 * refer to http://snowball.sourceforge.net/french/stemmer.html for an explanation
+	 *
+	 * @return boolean - true if something changed in the StringBuffer
+	 */
+	private boolean step2a() {
+		String[] search = { "îmes", "îtes", "iraIent", "irait", "irais", "irai", "iras", "ira",
+							"irent", "iriez", "irez", "irions", "irons", "iront",
+							"issaIent", "issais", "issantes", "issante", "issants", "issant",
+							"issait", "issais", "issions", "issons", "issiez", "issez", "issent",
+							"isses", "isse", "ir", "is", "ît", "it", "ies", "ie", "i" };
+		return deleteFromIfTestVowelBeforeIn( RV, search, false, RV );
+	}
+
+	/**
+	 * Second step (B) of the Porter Algorithmn<br>
+	 * Will be performed if step 2 A was performed unsuccessfully<br>
+	 * refer to http://snowball.sourceforge.net/french/stemmer.html for an explanation
+	 */
+	private void step2b() {
+		String[] suffix = { "eraIent", "erais", "erait", "erai", "eras", "erions", "eriez",
+							"erons", "eront","erez", "èrent", "era", "ées", "iez",
+							"ée", "és", "er", "ez", "é" };
+		deleteFrom( RV, suffix );
+
+		String[] search = { "assions", "assiez", "assent", "asses", "asse", "aIent",
+							"antes", "aIent", "Aient", "ante", "âmes", "âtes", "ants", "ant",
+							"ait", "aît", "ais", "Ait", "Aît", "Ais", "ât", "as", "ai", "Ai", "a" };
+		deleteButSuffixFrom( RV, search, "e", true );
+
+		deleteFrom( R2, new String[] { "ions" } );
+	}
+
+	/**
+	 * Third step of the Porter Algorithmn<br>
+	 * refer to http://snowball.sourceforge.net/french/stemmer.html for an explanation
+	 */
+	private void step3() {
+		if (sb.length()>0)
+		{
+			char ch = sb.charAt( sb.length()-1 );
+			if (ch == 'Y')
+			{
+				sb.setCharAt( sb.length()-1, 'i' );
+				setStrings();
+			}
+			else if (ch == 'ç')
+			{
+				sb.setCharAt( sb.length()-1, 'c' );
+				setStrings();
+			}
+		}
+	}
+
+	/**
+	 * Fourth step of the Porter Algorithmn<br>
+	 * refer to http://snowball.sourceforge.net/french/stemmer.html for an explanation
+	 */
+	private void step4() {
+		if (sb.length() > 1)
+		{
+			char ch = sb.charAt( sb.length()-1 );
+			if (ch == 's')
+			{
+				char b = sb.charAt( sb.length()-2 );
+				if (b != 'a' && b != 'i' && b != 'o' && b != 'u' && b != 'è' && b != 's')
+				{
+					sb.delete( sb.length() - 1, sb.length());
+					setStrings();
+				}
+			}
+		}
+		boolean found = deleteFromIfPrecededIn( R2, new String[] { "ion" }, RV, "s" );
+		if (!found)
+		found = deleteFromIfPrecededIn( R2, new String[] { "ion" }, RV, "t" );
+
+		replaceFrom( RV, new String[] { "Ière", "ière", "Ier", "ier" }, "i" );
+		deleteFrom( RV, new String[] { "e" } );
+		deleteFromIfPrecededIn( RV, new String[] { "ë" }, R0, "gu" );
+	}
+
+	/**
+	 * Fifth step of the Porter Algorithmn<br>
+	 * refer to http://snowball.sourceforge.net/french/stemmer.html for an explanation
+	 */
+	private void step5() {
+		if (R0 != null)
+		{
+			if (R0.endsWith("enn") || R0.endsWith("onn") || R0.endsWith("ett") || R0.endsWith("ell") || R0.endsWith("eill"))
+			{
+				sb.delete( sb.length() - 1, sb.length() );
+				setStrings();
+			}
+		}
+	}
+
+	/**
+	 * Sixth (and last!) step of the Porter Algorithmn<br>
+	 * refer to http://snowball.sourceforge.net/french/stemmer.html for an explanation
+	 */
+	private void step6() {
+		if (R0!=null && R0.length()>0)
+		{
+			boolean seenVowel = false;
+			boolean seenConson = false;
+			int pos = -1;
+			for (int i = R0.length()-1; i > -1; i--)
+			{
+				char ch = R0.charAt(i);
+				if (isVowel(ch))
+				{
+					if (!seenVowel)
+					{
+						if (ch == 'é' || ch == 'è')
+						{
+							pos = i;
+							break;
+						}
+					}
+					seenVowel = true;
+				}
+				else
+				{
+					if (seenVowel)
+						break;
+					else
+						seenConson = true;
+				}
+			}
+			if (pos > -1 && seenConson && !seenVowel)
+				sb.setCharAt(pos, 'e');
+		}
+	}
+
+	/**
+	 * Delete a suffix searched in zone "source" if zone "from" contains prefix + search string
+	 *
+	 * @param source java.lang.String - the primary source zone for search
+	 * @param search java.lang.String[] - the strings to search for suppression
+	 * @param from java.lang.String - the secondary source zone for search
+	 * @param prefix java.lang.String - the prefix to add to the search string to test
+	 * @return boolean - true if modified
+	 */
+	private boolean deleteFromIfPrecededIn( String source, String[] search, String from, String prefix ) {
+		boolean found = false;
+		if (source!=null )
+		{
+			for (int i = 0; i < search.length; i++) {
+				if ( source.endsWith( search[i] ))
+				{
+					if (from!=null && from.endsWith( prefix + search[i] ))
+					{
+						sb.delete( sb.length() - search[i].length(), sb.length());
+						found = true;
+						setStrings();
+						break;
+					}
+				}
+			}
+		}
+		return found;
+	}
+
+	/**
+	 * Delete a suffix searched in zone "source" if the preceding letter is (or isn't) a vowel
+	 *
+	 * @param source java.lang.String - the primary source zone for search
+	 * @param search java.lang.String[] - the strings to search for suppression
+	 * @param vowel boolean - true if we need a vowel before the search string
+	 * @param from java.lang.String - the secondary source zone for search (where vowel could be)
+	 * @return boolean - true if modified
+	 */
+	private boolean deleteFromIfTestVowelBeforeIn( String source, String[] search, boolean vowel, String from ) {
+		boolean found = false;
+		if (source!=null && from!=null)
+		{
+			for (int i = 0; i < search.length; i++) {
+				if ( source.endsWith( search[i] ))
+				{
+					if ((search[i].length() + 1) <= from.length())
+					{
+						boolean test = isVowel(sb.charAt(sb.length()-(search[i].length()+1)));
+						if (test == vowel)
+						{
+							sb.delete( sb.length() - search[i].length(), sb.length());
+							modified = true;
+							found = true;
+							setStrings();
+							break;
+						}
+					}
+				}
+			}
+		}
+		return found;
+	}
+
+	/**
+	 * Delete a suffix searched in zone "source" if preceded by the prefix
+	 *
+	 * @param source java.lang.String - the primary source zone for search
+	 * @param search java.lang.String[] - the strings to search for suppression
+	 * @param prefix java.lang.String - the prefix to add to the search string to test
+	 * @param without boolean - true if it will be deleted even without prefix found
+	 */
+	private void deleteButSuffixFrom( String source, String[] search, String prefix, boolean without ) {
+		if (source!=null)
+		{
+			for (int i = 0; i < search.length; i++) {
+				if ( source.endsWith( prefix + search[i] ))
+				{
+					sb.delete( sb.length() - (prefix.length() + search[i].length()), sb.length() );
+					modified = true;
+					setStrings();
+					break;
+				}
+				else if ( without && source.endsWith( search[i] ))
+				{
+					sb.delete( sb.length() - search[i].length(), sb.length() );
+					modified = true;
+					setStrings();
+					break;
+				}
+			}
+		}
+	}
+
+	/**
+	 * Delete a suffix searched in zone "source" if preceded by prefix<br>
+	 * or replace it with the replace string if preceded by the prefix in the zone "from"<br>
+	 * or delete the suffix if specified
+	 *
+	 * @param source java.lang.String - the primary source zone for search
+	 * @param search java.lang.String[] - the strings to search for suppression
+	 * @param prefix java.lang.String - the prefix to add to the search string to test
+	 * @param without boolean - true if it will be deleted even without prefix found
+	 */
+	private void deleteButSuffixFromElseReplace( String source, String[] search, String prefix, boolean without, String from, String replace ) {
+		if (source!=null)
+		{
+			for (int i = 0; i < search.length; i++) {
+				if ( source.endsWith( prefix + search[i] ))
+				{
+					sb.delete( sb.length() - (prefix.length() + search[i].length()), sb.length() );
+					modified = true;
+					setStrings();
+					break;
+				}
+				else if ( from!=null && from.endsWith( prefix + search[i] ))
+				{
+					sb.replace( sb.length() - (prefix.length() + search[i].length()), sb.length(), replace );
+					modified = true;
+					setStrings();
+					break;
+				}
+				else if ( without && source.endsWith( search[i] ))
+				{
+					sb.delete( sb.length() - search[i].length(), sb.length() );
+					modified = true;
+					setStrings();
+					break;
+				}
+			}
+		}
+	}
+
+	/**
+	 * Replace a search string with another within the source zone
+	 *
+	 * @param source java.lang.String - the source zone for search
+	 * @param search java.lang.String[] - the strings to search for replacement
+	 * @param replace java.lang.String - the replacement string
+	 */
+	private boolean replaceFrom( String source, String[] search, String replace ) {
+		boolean found = false;
+		if (source!=null)
+		{
+			for (int i = 0; i < search.length; i++) {
+				if ( source.endsWith( search[i] ))
+				{
+					sb.replace( sb.length() - search[i].length(), sb.length(), replace );
+					modified = true;
+					found = true;
+					setStrings();
+					break;
+				}
+			}
+		}
+		return found;
+	}
+
+	/**
+	 * Delete a search string within the source zone
+	 *
+	 * @param source the source zone for search
+	 * @param suffix the strings to search for suppression
+	 */
+	private void deleteFrom(String source, String[] suffix ) {
+		if (source!=null)
+		{
+			for (int i = 0; i < suffix.length; i++) {
+				if (source.endsWith( suffix[i] ))
+				{
+					sb.delete( sb.length() - suffix[i].length(), sb.length());
+					modified = true;
+					setStrings();
+					break;
+				}
+			}
+		}
+	}
+
+	/**
+	 * Test if a char is a french vowel, including accentuated ones
+	 *
+	 * @param ch the char to test
+	 * @return boolean - true if the char is a vowel
+	 */
+	private boolean isVowel(char ch) {
+		switch (ch)
+		{
+			case 'a':
+			case 'e':
+			case 'i':
+			case 'o':
+			case 'u':
+			case 'y':
+			case 'â':
+			case 'à':
+			case 'ë':
+			case 'é':
+			case 'ê':
+			case 'è':
+			case 'ï':
+			case 'î':
+			case 'ô':
+			case 'ü':
+			case 'ù':
+			case 'û':
+				return true;
+			default:
+				return false;
+		}
+	}
+
+	/**
+	 * Retrieve the "R zone" (1 or 2 depending on the buffer) and return the corresponding string<br>
+	 * "R is the region after the first non-vowel following a vowel
+	 * or is the null region at the end of the word if there is no such non-vowel"<br>
+	 * @param buffer java.lang.StringBuffer - the in buffer
+	 * @return java.lang.String - the resulting string
+	 */
+	private String retrieveR( StringBuffer buffer ) {
+		int len = buffer.length();
+		int pos = -1;
+		for (int c = 0; c < len; c++) {
+			if (isVowel( buffer.charAt( c )))
+			{
+				pos = c;
+				break;
+			}
+		}
+		if (pos > -1)
+		{
+			int consonne = -1;
+			for (int c = pos; c < len; c++) {
+				if (!isVowel(buffer.charAt( c )))
+				{
+					consonne = c;
+					break;
+				}
+			}
+			if (consonne > -1 && (consonne+1) < len)
+				return buffer.substring( consonne+1, len );
+			else
+				return null;
+		}
+		else
+			return null;
+	}
+
+	/**
+	 * Retrieve the "RV zone" from a buffer an return the corresponding string<br>
+	 * "If the word begins with two vowels, RV is the region after the third letter,
+	 * otherwise the region after the first vowel not at the beginning of the word,
+	 * or the end of the word if these positions cannot be found."<br>
+	 * @param buffer java.lang.StringBuffer - the in buffer
+	 * @return java.lang.String - the resulting string
+	 */
+	private String retrieveRV( StringBuffer buffer ) {
+		int len = buffer.length();
+		if ( buffer.length() > 3)
+		{
+			if ( isVowel(buffer.charAt( 0 )) && isVowel(buffer.charAt( 1 ))) {
+				return buffer.substring(3,len);
+			}
+			else
+			{
+				int pos = 0;
+				for (int c = 1; c < len; c++) {
+					if (isVowel( buffer.charAt( c )))
+					{
+						pos = c;
+						break;
+					}
+				}
+				if ( pos+1 < len )
+					return buffer.substring( pos+1, len );
+				else
+					return null;
+			}
+		}
+		else
+			return null;
+	}
+
+
+
+    /**
+	 * Turns u and i preceded AND followed by a vowel to UpperCase<br>
+	 * Turns y preceded OR followed by a vowel to UpperCase<br>
+	 * Turns u preceded by q to UpperCase<br>
+     *
+     * @param buffer java.util.StringBuffer - the buffer to treat
+     * @return java.util.StringBuffer - the treated buffer
+     */
+    private StringBuffer treatVowels( StringBuffer buffer ) {
+		for ( int c = 0; c < buffer.length(); c++ ) {
+			char ch = buffer.charAt( c );
+
+			if (c == 0) // first char
+			{
+				if (buffer.length()>1)
+				{
+					if (ch == 'y' && isVowel(buffer.charAt( c + 1 )))
+						buffer.setCharAt( c, 'Y' );
+				}
+			}
+			else if (c == buffer.length()-1) // last char
+			{
+				if (ch == 'u' && buffer.charAt( c - 1 ) == 'q')
+					buffer.setCharAt( c, 'U' );
+				if (ch == 'y' && isVowel(buffer.charAt( c - 1 )))
+					buffer.setCharAt( c, 'Y' );
+			}
+			else // other cases
+			{
+				if (ch == 'u')
+				{
+					if (buffer.charAt( c - 1) == 'q')
+						buffer.setCharAt( c, 'U' );
+					else if (isVowel(buffer.charAt( c - 1 )) && isVowel(buffer.charAt( c + 1 )))
+						buffer.setCharAt( c, 'U' );
+				}
+				if (ch == 'i')
+				{
+					if (isVowel(buffer.charAt( c - 1 )) && isVowel(buffer.charAt( c + 1 )))
+						buffer.setCharAt( c, 'I' );
+				}
+				if (ch == 'y')
+				{
+					if (isVowel(buffer.charAt( c - 1 )) || isVowel(buffer.charAt( c + 1 )))
+						buffer.setCharAt( c, 'Y' );
+				}
+			}
+		}
+
+		return buffer;
+    }
+
+    /**
+     * Checks a term if it can be processed correctly.
+     *
+     * @return boolean - true if, and only if, the given term consists in letters.
+     */
+    private boolean isStemmable( String term ) {
+		boolean upper = false;
+		int first = -1;
+		for ( int c = 0; c < term.length(); c++ ) {
+			// Discard terms that contain non-letter characters.
+			if ( !Character.isLetter( term.charAt( c ) ) ) {
+				return false;
+			}
+			// Discard terms that contain multiple uppercase letters.
+			if ( Character.isUpperCase( term.charAt( c ) ) ) {
+				if ( upper ) {
+					return false;
+				}
+			// First encountered uppercase letter, set flag and save
+			// position.
+				else {
+					first = c;
+					upper = true;
+				}
+			}
+		}
+		// Discard the term if it contains a single uppercase letter that
+		// is not starting the term.
+		if ( first > 0 ) {
+			return false;
+		}
+		return true;
+    }
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/software/eXist/mpdl-modules/src/de/mpg/mpiwg/berlin/mpdl/lt/analyzer/lang/GermanStemmer.java	Wed Nov 24 17:24:23 2010 +0100
@@ -0,0 +1,267 @@
+package de.mpg.mpiwg.berlin.mpdl.lt.analyzer.lang;
+// This file is encoded in UTF-8
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * A stemmer for German words. The algorithm is based on the report
+ * "A Fast and Simple Stemming Algorithm for German Words" by J&ouml;rg
+ * Caumanns (joerg.caumanns at isst.fhg.de).
+ *
+ *
+ * @version   $Id: GermanStemmer.java 564236 2007-08-09 15:21:19Z gsingers $
+ */
+public class GermanStemmer
+{
+    /**
+     * Buffer for the terms while stemming them.
+     */
+    private StringBuffer sb = new StringBuffer();
+
+    /**
+     * Amount of characters that are removed with <tt>substitute()</tt> while stemming.
+     */
+    private int substCount = 0;
+
+    /**
+     * Stemms the given term to an unique <tt>discriminator</tt>.
+     *
+     * @param term  The term that should be stemmed.
+     * @return      Discriminator for <tt>term</tt>
+     */
+    public String stem( String term )
+    {
+      // Use lowercase for medium stemming.
+      term = term.toLowerCase();
+      if ( !isStemmable( term ) )
+        return term;
+      // Reset the StringBuffer.
+      sb.delete( 0, sb.length() );
+      sb.insert( 0, term );
+      // Stemming starts here...
+      substitute( sb );
+      strip( sb );
+      optimize( sb );
+      resubstitute( sb );
+      removeParticleDenotion( sb );
+      return sb.toString();
+    }
+
+    /**
+     * Checks if a term could be stemmed.
+     *
+     * @return  true if, and only if, the given term consists in letters.
+     */
+    private boolean isStemmable( String term )
+    {
+      for ( int c = 0; c < term.length(); c++ ) {
+        if ( !Character.isLetter( term.charAt( c ) ) )
+          return false;
+      }
+      return true;
+    }
+
+    /**
+     * suffix stripping (stemming) on the current term. The stripping is reduced
+     * to the seven "base" suffixes "e", "s", "n", "t", "em", "er" and * "nd",
+     * from which all regular suffixes are build of. The simplification causes
+     * some overstemming, and way more irregular stems, but still provides unique.
+     * discriminators in the most of those cases.
+     * The algorithm is context free, except of the length restrictions.
+     */
+    private void strip( StringBuffer buffer )
+    {
+      boolean doMore = true;
+      while ( doMore && buffer.length() > 3 ) {
+        if ( ( buffer.length() + substCount > 5 ) &&
+          buffer.substring( buffer.length() - 2, buffer.length() ).equals( "nd" ) )
+        {
+          buffer.delete( buffer.length() - 2, buffer.length() );
+        }
+        else if ( ( buffer.length() + substCount > 4 ) &&
+          buffer.substring( buffer.length() - 2, buffer.length() ).equals( "em" ) ) {
+            buffer.delete( buffer.length() - 2, buffer.length() );
+        }
+        else if ( ( buffer.length() + substCount > 4 ) &&
+          buffer.substring( buffer.length() - 2, buffer.length() ).equals( "er" ) ) {
+            buffer.delete( buffer.length() - 2, buffer.length() );
+        }
+        else if ( buffer.charAt( buffer.length() - 1 ) == 'e' ) {
+          buffer.deleteCharAt( buffer.length() - 1 );
+        }
+        else if ( buffer.charAt( buffer.length() - 1 ) == 's' ) {
+          buffer.deleteCharAt( buffer.length() - 1 );
+        }
+        else if ( buffer.charAt( buffer.length() - 1 ) == 'n' ) {
+          buffer.deleteCharAt( buffer.length() - 1 );
+        }
+        // "t" occurs only as suffix of verbs.
+        else if ( buffer.charAt( buffer.length() - 1 ) == 't' ) {
+          buffer.deleteCharAt( buffer.length() - 1 );
+        }
+        else {
+          doMore = false;
+        }
+      }
+    }
+
+    /**
+     * Does some optimizations on the term. This optimisations are
+     * contextual.
+     */
+    private void optimize( StringBuffer buffer )
+    {
+      // Additional step for female plurals of professions and inhabitants.
+      if ( buffer.length() > 5 && buffer.substring( buffer.length() - 5, buffer.length() ).equals( "erin*" ) ) {
+        buffer.deleteCharAt( buffer.length() -1 );
+        strip( buffer );
+      }
+      // Additional step for irregular plural nouns like "Matrizen -> Matrix".
+      if ( buffer.charAt( buffer.length() - 1 ) == ( 'z' ) ) {
+        buffer.setCharAt( buffer.length() - 1, 'x' );
+      }
+    }
+
+    /**
+     * Removes a particle denotion ("ge") from a term.
+     */
+    private void removeParticleDenotion( StringBuffer buffer )
+    {
+      if ( buffer.length() > 4 ) {
+        for ( int c = 0; c < buffer.length() - 3; c++ ) {
+          if ( buffer.substring( c, c + 4 ).equals( "gege" ) ) {
+            buffer.delete( c, c + 2 );
+            return;
+          }
+        }
+      }
+    }
+
+    /**
+     * Do some substitutions for the term to reduce overstemming:
+     *
+     * - Substitute Umlauts with their corresponding vowel: äöü -> aou,
+     *   "ß" is substituted by "ss"
+     * - Substitute a second char of a pair of equal characters with
+     *   an asterisk: ?? -> ?*
+     * - Substitute some common character combinations with a token:
+     *   sch/ch/ei/ie/ig/st -> $/§/%/&/#/!
+     */
+    private void substitute( StringBuffer buffer )
+    {
+      substCount = 0;
+      for ( int c = 0; c < buffer.length(); c++ ) {
+        // Replace the second char of a pair of the equal characters with an asterisk
+        if ( c > 0 && buffer.charAt( c ) == buffer.charAt ( c - 1 )  ) {
+          buffer.setCharAt( c, '*' );
+        }
+        // Substitute Umlauts.
+        else if ( buffer.charAt( c ) == 'ä' ) {
+          buffer.setCharAt( c, 'a' );
+        }
+        else if ( buffer.charAt( c ) == 'ö' ) {
+          buffer.setCharAt( c, 'o' );
+        }
+        else if ( buffer.charAt( c ) == 'ü' ) {
+          buffer.setCharAt( c, 'u' );
+        }
+        // Fix bug so that 'ß' at the end of a word is replaced.
+        else if ( buffer.charAt( c ) == 'ß' ) {
+            buffer.setCharAt( c, 's' );
+            buffer.insert( c + 1, 's' );
+            substCount++;
+        }
+        // Take care that at least one character is left left side from the current one
+        if ( c < buffer.length() - 1 ) {
+          // Masking several common character combinations with an token
+          if ( ( c < buffer.length() - 2 ) && buffer.charAt( c ) == 's' &&
+            buffer.charAt( c + 1 ) == 'c' && buffer.charAt( c + 2 ) == 'h' )
+          {
+            buffer.setCharAt( c, '$' );
+            buffer.delete( c + 1, c + 3 );
+            substCount =+ 2;
+          }
+          else if ( buffer.charAt( c ) == 'c' && buffer.charAt( c + 1 ) == 'h' ) {
+            buffer.setCharAt( c, '§' );
+            buffer.deleteCharAt( c + 1 );
+            substCount++;
+          }
+          else if ( buffer.charAt( c ) == 'e' && buffer.charAt( c + 1 ) == 'i' ) {
+            buffer.setCharAt( c, '%' );
+            buffer.deleteCharAt( c + 1 );
+            substCount++;
+          }
+          else if ( buffer.charAt( c ) == 'i' && buffer.charAt( c + 1 ) == 'e' ) {
+            buffer.setCharAt( c, '&' );
+            buffer.deleteCharAt( c + 1 );
+            substCount++;
+          }
+          else if ( buffer.charAt( c ) == 'i' && buffer.charAt( c + 1 ) == 'g' ) {
+            buffer.setCharAt( c, '#' );
+            buffer.deleteCharAt( c + 1 );
+            substCount++;
+          }
+          else if ( buffer.charAt( c ) == 's' && buffer.charAt( c + 1 ) == 't' ) {
+            buffer.setCharAt( c, '!' );
+            buffer.deleteCharAt( c + 1 );
+            substCount++;
+          }
+        }
+      }
+    }
+
+    /**
+     * Undoes the changes made by substitute(). That are character pairs and
+     * character combinations. Umlauts will remain as their corresponding vowel,
+     * as "ß" remains as "ss".
+     */
+    private void resubstitute( StringBuffer buffer )
+    {
+      for ( int c = 0; c < buffer.length(); c++ ) {
+        if ( buffer.charAt( c ) == '*' ) {
+          char x = buffer.charAt( c - 1 );
+          buffer.setCharAt( c, x );
+        }
+        else if ( buffer.charAt( c ) == '$' ) {
+          buffer.setCharAt( c, 's' );
+          buffer.insert( c + 1, new char[]{'c', 'h'}, 0, 2 );
+        }
+        else if ( buffer.charAt( c ) == '§' ) {
+          buffer.setCharAt( c, 'c' );
+          buffer.insert( c + 1, 'h' );
+        }
+        else if ( buffer.charAt( c ) == '%' ) {
+          buffer.setCharAt( c, 'e' );
+          buffer.insert( c + 1, 'i' );
+        }
+        else if ( buffer.charAt( c ) == '&' ) {
+          buffer.setCharAt( c, 'i' );
+          buffer.insert( c + 1, 'e' );
+        }
+        else if ( buffer.charAt( c ) == '#' ) {
+          buffer.setCharAt( c, 'i' );
+          buffer.insert( c + 1, 'g' );
+        }
+        else if ( buffer.charAt( c ) == '!' ) {
+          buffer.setCharAt( c, 's' );
+          buffer.insert( c + 1, 't' );
+        }
+      }
+    }
+    
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/software/eXist/mpdl-modules/src/de/mpg/mpiwg/berlin/mpdl/lt/analyzer/lang/MpdlAnalyzerAR.java	Wed Nov 24 17:24:23 2010 +0100
@@ -0,0 +1,41 @@
+package de.mpg.mpiwg.berlin.mpdl.lt.analyzer.lang;
+
+import java.io.File;
+import java.io.IOException;
+import java.util.Hashtable;
+
+import de.mpg.mpiwg.berlin.mpdl.lt.analyzer.MpdlMorphAnalyzer;
+
+public class MpdlAnalyzerAR extends MpdlMorphAnalyzer {
+  private static String LANGUAGE = "ar";
+
+  public MpdlAnalyzerAR() {
+    super();
+    setLanguage(LANGUAGE);
+  }
+
+  /**
+   * Builds an analyzer with the given stop words.
+   */
+  public MpdlAnalyzerAR(String[] stopwords) {
+    super(stopwords);
+    setLanguage(LANGUAGE);
+  }
+
+  /**
+   * Builds an analyzer with the given stop words.
+   */
+  public MpdlAnalyzerAR(Hashtable stopwords) {
+    super(stopwords);
+    setLanguage(LANGUAGE);
+  }
+
+  /**
+   * Builds an analyzer with the given stop words.
+   */
+  public MpdlAnalyzerAR(File stopwords) throws IOException {
+    super(stopwords);
+    setLanguage(LANGUAGE);
+  }
+
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/software/eXist/mpdl-modules/src/de/mpg/mpiwg/berlin/mpdl/lt/analyzer/lang/MpdlAnalyzerDE.java	Wed Nov 24 17:24:23 2010 +0100
@@ -0,0 +1,41 @@
+package de.mpg.mpiwg.berlin.mpdl.lt.analyzer.lang;
+
+import java.io.File;
+import java.io.IOException;
+import java.util.Hashtable;
+
+import de.mpg.mpiwg.berlin.mpdl.lt.analyzer.MpdlMorphAnalyzer;
+
+public class MpdlAnalyzerDE extends MpdlMorphAnalyzer {
+  private static String LANGUAGE = "de";
+
+  public MpdlAnalyzerDE() {
+    super();
+    setLanguage(LANGUAGE);
+  }
+
+  /**
+   * Builds an analyzer with the given stop words.
+   */
+  public MpdlAnalyzerDE(String[] stopwords) {
+    super(stopwords);
+    setLanguage(LANGUAGE);
+  }
+
+  /**
+   * Builds an analyzer with the given stop words.
+   */
+  public MpdlAnalyzerDE(Hashtable stopwords) {
+    super(stopwords);
+    setLanguage(LANGUAGE);
+  }
+
+  /**
+   * Builds an analyzer with the given stop words.
+   */
+  public MpdlAnalyzerDE(File stopwords) throws IOException {
+    super(stopwords);
+    setLanguage(LANGUAGE);
+  }
+
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/software/eXist/mpdl-modules/src/de/mpg/mpiwg/berlin/mpdl/lt/analyzer/lang/MpdlAnalyzerEL.java	Wed Nov 24 17:24:23 2010 +0100
@@ -0,0 +1,41 @@
+package de.mpg.mpiwg.berlin.mpdl.lt.analyzer.lang;
+
+import java.io.File;
+import java.io.IOException;
+import java.util.Hashtable;
+
+import de.mpg.mpiwg.berlin.mpdl.lt.analyzer.MpdlMorphAnalyzer;
+
+public class MpdlAnalyzerEL extends MpdlMorphAnalyzer {
+  private static String LANGUAGE = "el";
+
+  public MpdlAnalyzerEL() {
+    super();
+    setLanguage(LANGUAGE);
+  }
+
+  /**
+   * Builds an analyzer with the given stop words.
+   */
+  public MpdlAnalyzerEL(String[] stopwords) {
+    super(stopwords);
+    setLanguage(LANGUAGE);
+  }
+
+  /**
+   * Builds an analyzer with the given stop words.
+   */
+  public MpdlAnalyzerEL(Hashtable stopwords) {
+    super(stopwords);
+    setLanguage(LANGUAGE);
+  }
+
+  /**
+   * Builds an analyzer with the given stop words.
+   */
+  public MpdlAnalyzerEL(File stopwords) throws IOException {
+    super(stopwords);
+    setLanguage(LANGUAGE);
+  }
+
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/software/eXist/mpdl-modules/src/de/mpg/mpiwg/berlin/mpdl/lt/analyzer/lang/MpdlAnalyzerEN.java	Wed Nov 24 17:24:23 2010 +0100
@@ -0,0 +1,41 @@
+package de.mpg.mpiwg.berlin.mpdl.lt.analyzer.lang;
+
+import java.io.File;
+import java.io.IOException;
+import java.util.Hashtable;
+
+import de.mpg.mpiwg.berlin.mpdl.lt.analyzer.MpdlMorphAnalyzer;
+
+public class MpdlAnalyzerEN extends MpdlMorphAnalyzer {
+  private static String LANGUAGE = "en";
+
+  public MpdlAnalyzerEN() {
+    super();
+    setLanguage(LANGUAGE);
+  }
+
+  /**
+   * Builds an analyzer with the given stop words.
+   */
+  public MpdlAnalyzerEN(String[] stopwords) {
+    super(stopwords);
+    setLanguage(LANGUAGE);
+  }
+
+  /**
+   * Builds an analyzer with the given stop words.
+   */
+  public MpdlAnalyzerEN(Hashtable stopwords) {
+    super(stopwords);
+    setLanguage(LANGUAGE);
+  }
+
+  /**
+   * Builds an analyzer with the given stop words.
+   */
+  public MpdlAnalyzerEN(File stopwords) throws IOException {
+    super(stopwords);
+    setLanguage(LANGUAGE);
+  }
+
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/software/eXist/mpdl-modules/src/de/mpg/mpiwg/berlin/mpdl/lt/analyzer/lang/MpdlAnalyzerFR.java	Wed Nov 24 17:24:23 2010 +0100
@@ -0,0 +1,41 @@
+package de.mpg.mpiwg.berlin.mpdl.lt.analyzer.lang;
+
+import java.io.File;
+import java.io.IOException;
+import java.util.Hashtable;
+
+import de.mpg.mpiwg.berlin.mpdl.lt.analyzer.MpdlMorphAnalyzer;
+
+public class MpdlAnalyzerFR extends MpdlMorphAnalyzer {
+  private static String LANGUAGE = "fr";
+
+  public MpdlAnalyzerFR() {
+    super();
+    setLanguage(LANGUAGE);
+  }
+
+  /**
+   * Builds an analyzer with the given stop words.
+   */
+  public MpdlAnalyzerFR(String[] stopwords) {
+    super(stopwords);
+    setLanguage(LANGUAGE);
+  }
+
+  /**
+   * Builds an analyzer with the given stop words.
+   */
+  public MpdlAnalyzerFR(Hashtable stopwords) {
+    super(stopwords);
+    setLanguage(LANGUAGE);
+  }
+
+  /**
+   * Builds an analyzer with the given stop words.
+   */
+  public MpdlAnalyzerFR(File stopwords) throws IOException {
+    super(stopwords);
+    setLanguage(LANGUAGE);
+  }
+
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/software/eXist/mpdl-modules/src/de/mpg/mpiwg/berlin/mpdl/lt/analyzer/lang/MpdlAnalyzerIT.java	Wed Nov 24 17:24:23 2010 +0100
@@ -0,0 +1,41 @@
+package de.mpg.mpiwg.berlin.mpdl.lt.analyzer.lang;
+
+import java.io.File;
+import java.io.IOException;
+import java.util.Hashtable;
+
+import de.mpg.mpiwg.berlin.mpdl.lt.analyzer.MpdlMorphAnalyzer;
+
+public class MpdlAnalyzerIT extends MpdlMorphAnalyzer {
+  private static String LANGUAGE = "it";
+
+  public MpdlAnalyzerIT() {
+    super();
+    setLanguage(LANGUAGE);
+  }
+
+  /**
+   * Builds an analyzer with the given stop words.
+   */
+  public MpdlAnalyzerIT(String[] stopwords) {
+    super(stopwords);
+    setLanguage(LANGUAGE);
+  }
+
+  /**
+   * Builds an analyzer with the given stop words.
+   */
+  public MpdlAnalyzerIT(Hashtable stopwords) {
+    super(stopwords);
+    setLanguage(LANGUAGE);
+  }
+
+  /**
+   * Builds an analyzer with the given stop words.
+   */
+  public MpdlAnalyzerIT(File stopwords) throws IOException {
+    super(stopwords);
+    setLanguage(LANGUAGE);
+  }
+
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/software/eXist/mpdl-modules/src/de/mpg/mpiwg/berlin/mpdl/lt/analyzer/lang/MpdlAnalyzerLA.java	Wed Nov 24 17:24:23 2010 +0100
@@ -0,0 +1,41 @@
+package de.mpg.mpiwg.berlin.mpdl.lt.analyzer.lang;
+
+import java.io.File;
+import java.io.IOException;
+import java.util.Hashtable;
+
+import de.mpg.mpiwg.berlin.mpdl.lt.analyzer.MpdlMorphAnalyzer;
+
+public class MpdlAnalyzerLA extends MpdlMorphAnalyzer {
+  private static String LANGUAGE = "la";
+
+  public MpdlAnalyzerLA() {
+    super();
+    setLanguage(LANGUAGE);
+  }
+
+  /**
+   * Builds an analyzer with the given stop words.
+   */
+  public MpdlAnalyzerLA(String[] stopwords) {
+    super(stopwords);
+    setLanguage(LANGUAGE);
+  }
+
+  /**
+   * Builds an analyzer with the given stop words.
+   */
+  public MpdlAnalyzerLA(Hashtable stopwords) {
+    super(stopwords);
+    setLanguage(LANGUAGE);
+  }
+
+  /**
+   * Builds an analyzer with the given stop words.
+   */
+  public MpdlAnalyzerLA(File stopwords) throws IOException {
+    super(stopwords);
+    setLanguage(LANGUAGE);
+  }
+
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/software/eXist/mpdl-modules/src/de/mpg/mpiwg/berlin/mpdl/lt/analyzer/lang/MpdlAnalyzerNL.java	Wed Nov 24 17:24:23 2010 +0100
@@ -0,0 +1,41 @@
+package de.mpg.mpiwg.berlin.mpdl.lt.analyzer.lang;
+
+import java.io.File;
+import java.io.IOException;
+import java.util.Hashtable;
+
+import de.mpg.mpiwg.berlin.mpdl.lt.analyzer.MpdlMorphAnalyzer;
+
+public class MpdlAnalyzerNL extends MpdlMorphAnalyzer {
+  private static String LANGUAGE = "nl";
+
+  public MpdlAnalyzerNL() {
+    super();
+    setLanguage(LANGUAGE);
+  }
+
+  /**
+   * Builds an analyzer with the given stop words.
+   */
+  public MpdlAnalyzerNL(String[] stopwords) {
+    super(stopwords);
+    setLanguage(LANGUAGE);
+  }
+
+  /**
+   * Builds an analyzer with the given stop words.
+   */
+  public MpdlAnalyzerNL(Hashtable stopwords) {
+    super(stopwords);
+    setLanguage(LANGUAGE);
+  }
+
+  /**
+   * Builds an analyzer with the given stop words.
+   */
+  public MpdlAnalyzerNL(File stopwords) throws IOException {
+    super(stopwords);
+    setLanguage(LANGUAGE);
+  }
+
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/software/eXist/mpdl-modules/src/de/mpg/mpiwg/berlin/mpdl/lt/analyzer/lang/MpdlAnalyzerZH.java	Wed Nov 24 17:24:23 2010 +0100
@@ -0,0 +1,41 @@
+package de.mpg.mpiwg.berlin.mpdl.lt.analyzer.lang;
+
+import java.io.File;
+import java.io.IOException;
+import java.util.Hashtable;
+
+import de.mpg.mpiwg.berlin.mpdl.lt.analyzer.MpdlMorphAnalyzer;
+
+public class MpdlAnalyzerZH extends MpdlMorphAnalyzer {
+  private static String LANGUAGE = "zh";
+
+  public MpdlAnalyzerZH() {
+    super();
+    setLanguage(LANGUAGE);
+  }
+
+  /**
+   * Builds an analyzer with the given stop words.
+   */
+  public MpdlAnalyzerZH(String[] stopwords) {
+    super(stopwords);
+    setLanguage(LANGUAGE);
+  }
+
+  /**
+   * Builds an analyzer with the given stop words.
+   */
+  public MpdlAnalyzerZH(Hashtable stopwords) {
+    super(stopwords);
+    setLanguage(LANGUAGE);
+  }
+
+  /**
+   * Builds an analyzer with the given stop words.
+   */
+  public MpdlAnalyzerZH(File stopwords) throws IOException {
+    super(stopwords);
+    setLanguage(LANGUAGE);
+  }
+
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/software/eXist/mpdl-modules/src/de/mpg/mpiwg/berlin/mpdl/lt/analyzer/lang/RussianStemmer.java	Wed Nov 24 17:24:23 2010 +0100
@@ -0,0 +1,630 @@
+package de.mpg.mpiwg.berlin.mpdl.lt.analyzer.lang;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * Russian stemming algorithm implementation (see http://snowball.sourceforge.net for detailed description).
+ *
+ *
+ * @version $Id: RussianStemmer.java 564236 2007-08-09 15:21:19Z gsingers $
+ */
+public class RussianStemmer
+{
+    private char[] charset;
+
+    // positions of RV, R1 and R2 respectively
+    private int RV, R1, R2;
+
+    // letters (currently unused letters are commented out)
+    private final static char A = 0;
+    //private final static char B = 1;
+    private final static char V = 2;
+    private final static char G = 3;
+    //private final static char D = 4;
+    private final static char E = 5;
+    //private final static char ZH = 6;
+    //private final static char Z = 7;
+    private final static char I = 8;
+    private final static char I_ = 9;
+    //private final static char K = 10;
+    private final static char L = 11;
+    private final static char M = 12;
+    private final static char N = 13;
+    private final static char O = 14;
+    //private final static char P = 15;
+    //private final static char R = 16;
+    private final static char S = 17;
+    private final static char T = 18;
+    private final static char U = 19;
+    //private final static char F = 20;
+    private final static char X = 21;
+    //private final static char TS = 22;
+    //private final static char CH = 23;
+    private final static char SH = 24;
+    private final static char SHCH = 25;
+    //private final static char HARD = 26;
+    private final static char Y = 27;
+    private final static char SOFT = 28;
+    private final static char AE = 29;
+    private final static char IU = 30;
+    private final static char IA = 31;
+
+    // stem definitions
+    private static char[] vowels = { A, E, I, O, U, Y, AE, IU, IA };
+
+    private static char[][] perfectiveGerundEndings1 = {
+        { V },
+        { V, SH, I },
+        { V, SH, I, S, SOFT }
+    };
+
+    private static char[][] perfectiveGerund1Predessors = {
+        { A },
+        { IA }
+    };
+
+    private static char[][] perfectiveGerundEndings2 = { { I, V }, {
+        Y, V }, {
+            I, V, SH, I }, {
+                Y, V, SH, I }, {
+                    I, V, SH, I, S, SOFT }, {
+                        Y, V, SH, I, S, SOFT }
+    };
+
+    private static char[][] adjectiveEndings = {
+        { E, E },
+        { I, E },
+        { Y, E },
+        { O, E },
+        { E, I_ },
+        { I, I_ },
+        { Y, I_ },
+        { O, I_ },
+        { E, M },
+        { I, M },
+        { Y, M },
+        { O, M },
+        { I, X },
+        { Y, X },
+        { U, IU },
+        { IU, IU },
+        { A, IA },
+        { IA, IA },
+        { O, IU },
+        { E, IU },
+        { I, M, I },
+        { Y, M, I },
+        { E, G, O },
+        { O, G, O },
+        { E, M, U },
+        {O, M, U }
+    };
+
+    private static char[][] participleEndings1 = {
+        { SHCH },
+        { E, M },
+        { N, N },
+        { V, SH },
+        { IU, SHCH }
+    };
+
+    private static char[][] participleEndings2 = {
+        { I, V, SH },
+        { Y, V, SH },
+        { U, IU, SHCH }
+    };
+
+    private static char[][] participle1Predessors = {
+        { A },
+        { IA }
+    };
+
+    private static char[][] reflexiveEndings = {
+        { S, IA },
+        { S, SOFT }
+    };
+
+    private static char[][] verbEndings1 = {
+        { I_ },
+        { L },
+        { N },
+        { L, O },
+        { N, O },
+        { E, T },
+        { IU, T },
+        { L, A },
+        { N, A },
+        { L, I },
+        { E, M },
+        { N, Y },
+        { E, T, E },
+        { I_, T, E },
+        { T, SOFT },
+        { E, SH, SOFT },
+        { N, N, O }
+    };
+
+    private static char[][] verbEndings2 = {
+        { IU },
+        { U, IU },
+        { E, N },
+        { E, I_ },
+        { IA, T },
+        { U, I_ },
+        { I, L },
+        { Y, L },
+        { I, M },
+        { Y, M },
+        { I, T },
+        { Y, T },
+        { I, L, A },
+        { Y, L, A },
+        { E, N, A },
+        { I, T, E },
+        { I, L, I },
+        { Y, L, I },
+        { I, L, O },
+        { Y, L, O },
+        { E, N, O },
+        { U, E, T },
+        { U, IU, T },
+        { E, N, Y },
+        { I, T, SOFT },
+        { Y, T, SOFT },
+        { I, SH, SOFT },
+        { E, I_, T, E },
+        { U, I_, T, E }
+    };
+
+    private static char[][] verb1Predessors = {
+        { A },
+        { IA }
+    };
+
+    private static char[][] nounEndings = {
+        { A },
+        { U },
+        { I_ },
+        { O },
+        { U },
+        { E },
+        { Y },
+        { I },
+        { SOFT },
+        { IA },
+        { E, V },
+        { O, V },
+        { I, E },
+        { SOFT, E },
+        { IA, X },
+        { I, IU },
+        { E, I },
+        { I, I },
+        { E, I_ },
+        { O, I_ },
+        { E, M },
+        { A, M },
+        { O, M },
+        { A, X },
+        { SOFT, IU },
+        { I, IA },
+        { SOFT, IA },
+        { I, I_ },
+        { IA, M },
+        { IA, M, I },
+        { A, M, I },
+        { I, E, I_ },
+        { I, IA, M },
+        { I, E, M },
+        { I, IA, X },
+        { I, IA, M, I }
+    };
+
+    private static char[][] superlativeEndings = {
+        { E, I_, SH },
+        { E, I_, SH, E }
+    };
+
+    private static char[][] derivationalEndings = {
+        { O, S, T },
+        { O, S, T, SOFT }
+    };
+
+    /**
+     * RussianStemmer constructor comment.
+     */
+    public RussianStemmer()
+    {
+        super();
+    }
+
+    /**
+     * RussianStemmer constructor comment.
+     */
+    public RussianStemmer(char[] charset)
+    {
+        super();
+        this.charset = charset;
+    }
+
+    /**
+     * Adjectival ending is an adjective ending,
+     * optionally preceded by participle ending.
+     * Creation date: (17/03/2002 12:14:58 AM)
+     * @param stemmingZone java.lang.StringBuffer
+     */
+    private boolean adjectival(StringBuffer stemmingZone)
+    {
+        // look for adjective ending in a stemming zone
+        if (!findAndRemoveEnding(stemmingZone, adjectiveEndings))
+            return false;
+        // if adjective ending was found, try for participle ending.
+        // variable r is unused, we are just interested in the side effect of
+        // findAndRemoveEnding():
+        boolean r =
+            findAndRemoveEnding(stemmingZone, participleEndings1, participle1Predessors)
+            ||
+            findAndRemoveEnding(stemmingZone, participleEndings2);
+        return true;
+    }
+
+    /**
+     * Derivational endings
+     * Creation date: (17/03/2002 12:14:58 AM)
+     * @param stemmingZone java.lang.StringBuffer
+     */
+    private boolean derivational(StringBuffer stemmingZone)
+    {
+        int endingLength = findEnding(stemmingZone, derivationalEndings);
+        if (endingLength == 0)
+             // no derivational ending found
+            return false;
+        else
+        {
+            // Ensure that the ending locates in R2
+            if (R2 - RV <= stemmingZone.length() - endingLength)
+            {
+                stemmingZone.setLength(stemmingZone.length() - endingLength);
+                return true;
+            }
+            else
+            {
+                return false;
+            }
+        }
+    }
+
+    /**
+     * Finds ending among given ending class and returns the length of ending found(0, if not found).
+     * Creation date: (17/03/2002 8:18:34 PM)
+     */
+    private int findEnding(StringBuffer stemmingZone, int startIndex, char[][] theEndingClass)
+    {
+        boolean match = false;
+        for (int i = theEndingClass.length - 1; i >= 0; i--)
+        {
+            char[] theEnding = theEndingClass[i];
+            // check if the ending is bigger than stemming zone
+            if (startIndex < theEnding.length - 1)
+            {
+                match = false;
+                continue;
+            }
+            match = true;
+            int stemmingIndex = startIndex;
+            for (int j = theEnding.length - 1; j >= 0; j--)
+            {
+                if (stemmingZone.charAt(stemmingIndex--) != charset[theEnding[j]])
+                {
+                    match = false;
+                    break;
+                }
+            }
+            // check if ending was found
+            if (match)
+            {
+                return theEndingClass[i].length; // cut ending
+            }
+        }
+        return 0;
+    }
+
+    private int findEnding(StringBuffer stemmingZone, char[][] theEndingClass)
+    {
+        return findEnding(stemmingZone, stemmingZone.length() - 1, theEndingClass);
+    }
+
+    /**
+     * Finds the ending among the given class of endings and removes it from stemming zone.
+     * Creation date: (17/03/2002 8:18:34 PM)
+     */
+    private boolean findAndRemoveEnding(StringBuffer stemmingZone, char[][] theEndingClass)
+    {
+        int endingLength = findEnding(stemmingZone, theEndingClass);
+        if (endingLength == 0)
+            // not found
+            return false;
+        else {
+            stemmingZone.setLength(stemmingZone.length() - endingLength);
+            // cut the ending found
+            return true;
+        }
+    }
+
+    /**
+     * Finds the ending among the given class of endings, then checks if this ending was
+     * preceded by any of given predessors, and if so, removes it from stemming zone.
+     * Creation date: (17/03/2002 8:18:34 PM)
+     */
+    private boolean findAndRemoveEnding(StringBuffer stemmingZone,
+        char[][] theEndingClass, char[][] thePredessors)
+    {
+        int endingLength = findEnding(stemmingZone, theEndingClass);
+        if (endingLength == 0)
+            // not found
+            return false;
+        else
+        {
+            int predessorLength =
+                findEnding(stemmingZone,
+                    stemmingZone.length() - endingLength - 1,
+                    thePredessors);
+            if (predessorLength == 0)
+                return false;
+            else {
+                stemmingZone.setLength(stemmingZone.length() - endingLength);
+                // cut the ending found
+                return true;
+            }
+        }
+
+    }
+
+    /**
+     * Marks positions of RV, R1 and R2 in a given word.
+     * Creation date: (16/03/2002 3:40:11 PM)
+     */
+    private void markPositions(String word)
+    {
+        RV = 0;
+        R1 = 0;
+        R2 = 0;
+        int i = 0;
+        // find RV
+        while (word.length() > i && !isVowel(word.charAt(i)))
+        {
+            i++;
+        }
+        if (word.length() - 1 < ++i)
+            return; // RV zone is empty
+        RV = i;
+        // find R1
+        while (word.length() > i && isVowel(word.charAt(i)))
+        {
+            i++;
+        }
+        if (word.length() - 1 < ++i)
+            return; // R1 zone is empty
+        R1 = i;
+        // find R2
+        while (word.length() > i && !isVowel(word.charAt(i)))
+        {
+            i++;
+        }
+        if (word.length() - 1 < ++i)
+            return; // R2 zone is empty
+        while (word.length() > i && isVowel(word.charAt(i)))
+        {
+            i++;
+        }
+        if (word.length() - 1 < ++i)
+            return; // R2 zone is empty
+        R2 = i;
+    }
+
+    /**
+     * Checks if character is a vowel..
+     * Creation date: (16/03/2002 10:47:03 PM)
+     * @return boolean
+     * @param letter char
+     */
+    private boolean isVowel(char letter)
+    {
+        for (int i = 0; i < vowels.length; i++)
+        {
+            if (letter == charset[vowels[i]])
+                return true;
+        }
+        return false;
+    }
+
+    /**
+     * Noun endings.
+     * Creation date: (17/03/2002 12:14:58 AM)
+     * @param stemmingZone java.lang.StringBuffer
+     */
+    private boolean noun(StringBuffer stemmingZone)
+    {
+        return findAndRemoveEnding(stemmingZone, nounEndings);
+    }
+
+    /**
+     * Perfective gerund endings.
+     * Creation date: (17/03/2002 12:14:58 AM)
+     * @param stemmingZone java.lang.StringBuffer
+     */
+    private boolean perfectiveGerund(StringBuffer stemmingZone)
+    {
+        return findAndRemoveEnding(
+            stemmingZone,
+            perfectiveGerundEndings1,
+            perfectiveGerund1Predessors)
+            || findAndRemoveEnding(stemmingZone, perfectiveGerundEndings2);
+    }
+
+    /**
+     * Reflexive endings.
+     * Creation date: (17/03/2002 12:14:58 AM)
+     * @param stemmingZone java.lang.StringBuffer
+     */
+    private boolean reflexive(StringBuffer stemmingZone)
+    {
+        return findAndRemoveEnding(stemmingZone, reflexiveEndings);
+    }
+
+    /**
+     * Insert the method's description here.
+     * Creation date: (17/03/2002 12:14:58 AM)
+     * @param stemmingZone java.lang.StringBuffer
+     */
+    private boolean removeI(StringBuffer stemmingZone)
+    {
+        if (stemmingZone.length() > 0
+            && stemmingZone.charAt(stemmingZone.length() - 1) == charset[I])
+        {
+            stemmingZone.setLength(stemmingZone.length() - 1);
+            return true;
+        }
+        else
+        {
+            return false;
+        }
+    }
+
+    /**
+     * Insert the method's description here.
+     * Creation date: (17/03/2002 12:14:58 AM)
+     * @param stemmingZone java.lang.StringBuffer
+     */
+    private boolean removeSoft(StringBuffer stemmingZone)
+    {
+        if (stemmingZone.length() > 0
+            && stemmingZone.charAt(stemmingZone.length() - 1) == charset[SOFT])
+        {
+            stemmingZone.setLength(stemmingZone.length() - 1);
+            return true;
+        }
+        else
+        {
+            return false;
+        }
+    }
+
+    /**
+     * Insert the method's description here.
+     * Creation date: (16/03/2002 10:58:42 PM)
+     * @param newCharset char[]
+     */
+    public void setCharset(char[] newCharset)
+    {
+        charset = newCharset;
+    }
+
+    /**
+     * Finds the stem for given Russian word.
+     * Creation date: (16/03/2002 3:36:48 PM)
+     * @return java.lang.String
+     * @param input java.lang.String
+     */
+    public String stem(String input)
+    {
+        markPositions(input);
+        if (RV == 0)
+            return input; //RV wasn't detected, nothing to stem
+        StringBuffer stemmingZone = new StringBuffer(input.substring(RV));
+        // stemming goes on in RV
+        // Step 1
+
+        if (!perfectiveGerund(stemmingZone))
+        {
+            reflexive(stemmingZone);
+            // variable r is unused, we are just interested in the flow that gets
+            // created by logical expression: apply adjectival(); if that fails,
+            // apply verb() etc
+            boolean r =
+                adjectival(stemmingZone)
+                || verb(stemmingZone)
+                || noun(stemmingZone);
+        }
+        // Step 2
+        removeI(stemmingZone);
+        // Step 3
+        derivational(stemmingZone);
+        // Step 4
+        superlative(stemmingZone);
+        undoubleN(stemmingZone);
+        removeSoft(stemmingZone);
+        // return result
+        return input.substring(0, RV) + stemmingZone.toString();
+    }
+
+    /**
+     * Superlative endings.
+     * Creation date: (17/03/2002 12:14:58 AM)
+     * @param stemmingZone java.lang.StringBuffer
+     */
+    private boolean superlative(StringBuffer stemmingZone)
+    {
+        return findAndRemoveEnding(stemmingZone, superlativeEndings);
+    }
+
+    /**
+     * Undoubles N.
+     * Creation date: (17/03/2002 12:14:58 AM)
+     * @param stemmingZone java.lang.StringBuffer
+     */
+    private boolean undoubleN(StringBuffer stemmingZone)
+    {
+        char[][] doubleN = {
+            { N, N }
+        };
+        if (findEnding(stemmingZone, doubleN) != 0)
+        {
+            stemmingZone.setLength(stemmingZone.length() - 1);
+            return true;
+        }
+        else
+        {
+            return false;
+        }
+    }
+
+    /**
+     * Verb endings.
+     * Creation date: (17/03/2002 12:14:58 AM)
+     * @param stemmingZone java.lang.StringBuffer
+     */
+    private boolean verb(StringBuffer stemmingZone)
+    {
+        return findAndRemoveEnding(
+            stemmingZone,
+            verbEndings1,
+            verb1Predessors)
+            || findAndRemoveEnding(stemmingZone, verbEndings2);
+    }
+
+    /**
+     * Static method for stemming with different charsets
+     */
+    public static String stem(String theWord, char[] charset)
+    {
+        RussianStemmer stemmer = new RussianStemmer();
+        stemmer.setCharset(charset);
+        return stemmer.stem(theWord);
+    }
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/software/eXist/mpdl-modules/src/de/mpg/mpiwg/berlin/mpdl/lt/doc/ArchimedesDocContentHandler.java	Wed Nov 24 17:24:23 2010 +0100
@@ -0,0 +1,184 @@
+package de.mpg.mpiwg.berlin.mpdl.lt.doc;
+
+import java.io.BufferedOutputStream;
+import java.io.File;
+import java.io.FileNotFoundException;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.io.OutputStream;
+
+import org.xml.sax.*;
+
+import de.mpg.mpiwg.berlin.mpdl.exception.ApplicationException;
+import de.mpg.mpiwg.berlin.mpdl.lt.general.Transcoder;
+import de.mpg.mpiwg.berlin.mpdl.util.StringUtilEscapeChars;
+
+public class ArchimedesDocContentHandler implements ContentHandler {
+  private String xmlnsString = "";
+  private File outputFile;
+  private String language;
+  private String fromEncoding;
+  private String toEncoding;
+  private OutputStream out;
+  private Element currentElement;
+  
+  public ArchimedesDocContentHandler(String language, String fromEncoding, String toEncoding, File outputFile) throws ApplicationException {
+    this.language = language;
+    this.outputFile = outputFile;
+    this.fromEncoding = fromEncoding;
+    this.toEncoding = toEncoding;
+  }
+  
+  public void startDocument() throws SAXException {
+    try {
+      out = new BufferedOutputStream(new FileOutputStream(outputFile));
+      write("<?xml version=\"1.0\"?>\n");
+    } catch (FileNotFoundException e) {
+      throw new SAXException(e);
+    }
+  }
+
+  public void endDocument() throws SAXException {
+    try { 
+      if (out != null)
+        out.close(); 
+    } catch (Exception e) { 
+        // nothing: always close the stream at the end of the method
+    }  
+  }
+  
+  public void characters(char[] c, int start, int length) throws SAXException {
+    char[] cCopy = new char[length];
+    System.arraycopy(c, start, cCopy, 0, length);
+    String charactersStr = String.valueOf(cCopy);
+    if (charactersStr != null) {
+      String elemName = null;
+      if (currentElement != null)
+        elemName = currentElement.name;
+      if ((! isArchMetadata(elemName)) && (currentElement == null || currentElement.isGreek() || currentElement.isArabic())) {
+        try {
+          if (fromEncoding.equals("betacode") && toEncoding.equals("unicode")) {
+              charactersStr = transcodeFromBetaCode2Unicode(charactersStr);
+          } else if (fromEncoding.equals("buckwalter") && toEncoding.equals("unicode")) {
+            charactersStr = transcodeFromBuckwalter2Unicode(charactersStr);
+          }
+        } catch (ApplicationException e) {
+          throw new SAXException(e);
+        }
+      }
+      charactersStr = StringUtilEscapeChars.forXML(charactersStr);
+      if (currentElement != null)
+        currentElement.value = charactersStr;
+      write(charactersStr);
+    }
+  }
+
+  public void ignorableWhitespace(char[] c, int start, int length) throws SAXException {
+  }
+
+  public void processingInstruction(String target, String data) throws SAXException {
+    
+  }
+
+  public void setDocumentLocator(Locator locator) {
+  }
+
+  public void startPrefixMapping(String prefix, String uri) throws SAXException {
+    xmlnsString += "xmlns:" + prefix + "=\"" + uri + "\" ";
+  }
+  
+  public void endPrefixMapping(String prefix) throws SAXException {
+  }
+
+  public void skippedEntity(String name) throws SAXException {
+  }
+
+  public void startElement(String uri, String localName, String name, Attributes attrs) throws SAXException {
+    currentElement = new Element(language, name);
+    int attrSize = attrs.getLength();
+    String attrString = "";
+    for (int i=0; i<attrSize; i++) {
+      String attrQName = attrs.getQName(i);
+      String attrValue = attrs.getValue(i);
+      attrValue = StringUtilEscapeChars.forXML(attrValue);
+      attrString = attrString + " " + attrQName + "=\"" + attrValue + "\"";
+      if (attrQName != null && attrQName.equals("lang") && attrValue != null) {
+        currentElement.language = attrValue;
+      }
+    }
+    currentElement.attrString = attrString;
+    if (xmlnsString.equals("")) {
+      write("<" + name + attrString + ">");
+    } else { 
+      currentElement.xmlnsString = xmlnsString;
+      write("<" + name + " " + xmlnsString + attrString + ">");
+    }
+    xmlnsString = "";
+  }
+
+  public void endElement(String uri, String localName, String name) throws SAXException {
+    currentElement = null;
+    write("</" + name + ">");
+  }
+
+  private void write(String outStr) throws SAXException {
+    try {
+      byte[] bytes = outStr.getBytes("utf-8");
+      out.write(bytes, 0, bytes.length);
+      out.flush();
+    } catch (IOException e) {
+      throw new SAXException(e);
+    }
+  }
+  
+  private String transcodeFromBetaCode2Unicode(String inputStr) throws ApplicationException {
+    Transcoder transcoder = Transcoder.getInstance();
+    String encodedUnicodeStr = transcoder.transcodeFromBetaCode2Unicode(inputStr);
+    return encodedUnicodeStr;
+  }
+  
+  private String transcodeFromBuckwalter2Unicode(String inputStr) throws ApplicationException {
+    Transcoder transcoder = Transcoder.getInstance();
+    String encodedUnicodeStr = transcoder.transcodeFromBuckwalter2Unicode(inputStr);
+    return encodedUnicodeStr;
+  }
+  
+  private boolean isArchMetadata(String elemName) {
+    boolean isArchMetadata = false;
+    if (elemName == null)
+      return false;
+    String elName = elemName.toLowerCase().trim();
+    if (elName.equals("info") || elName.equals("author") || elName.equals("title") || elName.equals("date") || elName.equals("place") 
+        || elName.equals("translator") || elName.equals("lang") || elName.equals("cvs_file") || elName.equals("cvs_version") || elName.equals("comments") || elName.equals("locator")) {
+      isArchMetadata = true;
+    }
+    return isArchMetadata;
+  }
+
+  private class Element {
+    String name;
+    String language;
+    String xmlnsString;
+    String attrString;
+    String value;
+    
+    Element(String language, String name) {
+      this.language = language;
+      this.name = name;
+    }
+    
+    boolean isGreek() {
+      boolean isGreek = false;
+      if (language != null && language.equals("el"))
+        isGreek = true;
+      return isGreek;
+    }
+
+    boolean isArabic() {
+      boolean isArabic = false;
+      if (language != null && language.equals("ar"))
+        isArabic = true;
+      return isArabic;
+    }
+  }
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/software/eXist/mpdl-modules/src/de/mpg/mpiwg/berlin/mpdl/lt/doc/ArchimedesDocForeignLangContentHandler.java	Wed Nov 24 17:24:23 2010 +0100
@@ -0,0 +1,176 @@
+package de.mpg.mpiwg.berlin.mpdl.lt.doc;
+
+import java.io.BufferedOutputStream;
+import java.io.File;
+import java.io.FileNotFoundException;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.io.OutputStream;
+
+import org.xml.sax.*;
+
+import de.mpg.mpiwg.berlin.mpdl.exception.ApplicationException;
+import de.mpg.mpiwg.berlin.mpdl.lt.general.Transcoder;
+import de.mpg.mpiwg.berlin.mpdl.util.StringUtilEscapeChars;
+
+public class ArchimedesDocForeignLangContentHandler implements ContentHandler {
+  private String xmlnsString = "";
+  private File outputFile;
+  private OutputStream out;
+  private Element currentElement;
+  private boolean currentElementIsForeign = false;
+  
+  public ArchimedesDocForeignLangContentHandler(File outputFile) throws ApplicationException {
+    this.outputFile = outputFile;
+  }
+  
+  public void startDocument() throws SAXException {
+    try {
+      out = new BufferedOutputStream(new FileOutputStream(outputFile));
+      write("<?xml version=\"1.0\"?>\n");
+    } catch (FileNotFoundException e) {
+      throw new SAXException(e);
+    }
+  }
+
+  public void endDocument() throws SAXException {
+    try { 
+      if (out != null)
+        out.close(); 
+    } catch (Exception e) { 
+        // nothing: always close the stream at the end of the method
+    }  
+  }
+  
+  public void characters(char[] c, int start, int length) throws SAXException {
+    char[] cCopy = new char[length];
+    System.arraycopy(c, start, cCopy, 0, length);
+    String charactersStr = String.valueOf(cCopy);
+    if (charactersStr != null) {
+      String elemName = null;
+      if (currentElement != null)
+        elemName = currentElement.name;
+      if ((! isArchMetadata(elemName)) && (currentElementIsForeign)) {
+        try {
+          charactersStr = transcodeFromBetaCode2Unicode(charactersStr);
+        } catch (ApplicationException e) {
+          throw new SAXException(e);
+        }
+      }
+      charactersStr = StringUtilEscapeChars.forXML(charactersStr);
+      if (currentElement != null)
+        currentElement.value = charactersStr;
+      write(charactersStr);
+    }
+  }
+
+  public void ignorableWhitespace(char[] c, int start, int length) throws SAXException {
+  }
+
+  public void processingInstruction(String target, String data) throws SAXException {
+    
+  }
+
+  public void setDocumentLocator(Locator locator) {
+  }
+
+  public void startPrefixMapping(String prefix, String uri) throws SAXException {
+    xmlnsString += "xmlns:" + prefix + "=\"" + uri + "\" ";
+  }
+  
+  public void endPrefixMapping(String prefix) throws SAXException {
+  }
+
+  public void skippedEntity(String name) throws SAXException {
+  }
+
+  public void startElement(String uri, String localName, String name, Attributes attrs) throws SAXException {
+    Element newElement = new Element(null, name);
+    if (currentElement != null) {
+      if (currentElement.language != null)
+        newElement.language = currentElement.language;  // language wird an Kinder vererbt
+    }
+    currentElement = newElement;
+    int attrSize = attrs.getLength();
+    String attrString = "";
+    for (int i=0; i<attrSize; i++) {
+      String attrQName = attrs.getQName(i);
+      String attrValue = attrs.getValue(i);
+      attrValue = StringUtilEscapeChars.forXML(attrValue);
+      if (attrQName != null && attrQName.equals("lang") && attrValue != null) {
+        currentElement.language = attrValue; // wenn xml:lang belegt ist, wird es an das neue Element gesetzt und überschreibt vom Vater geerbte Werte
+      }
+      // replace "lang=greek" by "lang=grc"
+      if (name.equals("foreign") && attrQName.equals("lang") && attrValue.equals("greek")) {
+        currentElementIsForeign = true;
+        attrString = attrString + " " + attrQName + "=\"" + "grc" + "\"";
+      } else {
+        attrString = attrString + " " + attrQName + "=\"" + attrValue + "\"";
+      }
+    }
+    currentElement.attrString = attrString;
+    if (xmlnsString.equals("")) {
+      write("<" + name + attrString + ">");
+    } else { 
+      currentElement.xmlnsString = xmlnsString;
+      write("<" + name + " " + xmlnsString + attrString + ">");
+    }
+    xmlnsString = "";
+  }
+
+  public void endElement(String uri, String localName, String name) throws SAXException {
+    if (name.equals("foreign"))
+      currentElementIsForeign = false;
+    currentElement = null;
+    write("</" + name + ">");
+  }
+
+  private void write(String outStr) throws SAXException {
+    try {
+      byte[] bytes = outStr.getBytes("utf-8");
+      out.write(bytes, 0, bytes.length);
+      out.flush();
+    } catch (IOException e) {
+      throw new SAXException(e);
+    }
+  }
+  
+  private String transcodeFromBetaCode2Unicode(String inputStr) throws ApplicationException {
+    Transcoder transcoder = Transcoder.getInstance();
+    String encodedUnicodeStr = transcoder.transcodeFromBetaCode2Unicode(inputStr);
+    return encodedUnicodeStr;
+  }
+  
+  private boolean isArchMetadata(String elemName) {
+    boolean isArchMetadata = false;
+    if (elemName == null)
+      return false;
+    String elName = elemName.toLowerCase().trim();
+    if (elName.equals("info") || elName.equals("author") || elName.equals("title") || elName.equals("date") || elName.equals("place") 
+        || elName.equals("translator") || elName.equals("lang") || elName.equals("cvs_file") || elName.equals("cvs_version") || elName.equals("comments") || elName.equals("locator")) {
+      isArchMetadata = true;
+    }
+    return isArchMetadata;
+  }
+
+  private class Element {
+    String name;
+    String language;
+    String xmlnsString;
+    String attrString;
+    String value;
+    
+    Element(String language, String name) {
+      this.language = language;
+      this.name = name;
+    }
+    
+    boolean isGreek() {
+      boolean isGreek = false;
+      if (language != null && (language.equals("el") || language.equals("greek") || language.equals("grc")))
+        isGreek = true;
+      return isGreek;
+    }
+
+  }
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/software/eXist/mpdl-modules/src/de/mpg/mpiwg/berlin/mpdl/lt/doc/ArchimedesDocManager.java	Wed Nov 24 17:24:23 2010 +0100
@@ -0,0 +1,147 @@
+package de.mpg.mpiwg.berlin.mpdl.lt.doc;
+
+import java.io.BufferedInputStream;
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.FilenameFilter;
+import java.io.IOException;
+import java.io.InputStream;
+import java.util.Date;
+
+import org.xml.sax.InputSource;
+import org.xml.sax.SAXException;
+import org.xml.sax.XMLReader;
+
+import com.sun.org.apache.xerces.internal.parsers.SAXParser;
+
+import de.mpg.mpiwg.berlin.mpdl.exception.ApplicationException;
+import de.mpg.mpiwg.berlin.mpdl.general.MpdlConstants;
+import de.mpg.mpiwg.berlin.mpdl.util.Util;
+import de.mpg.mpiwg.berlin.mpdl.util.FilenameFilterExtension;
+
+public class ArchimedesDocManager {
+  private static ArchimedesDocManager instance;
+  private static String MPDL_DOC_DIR = MpdlConstants.MPDL_DOC_DIR;
+  private static String ARCH_DOC_DIR = MPDL_DOC_DIR + "/documents/archimedes";
+  private static String ARCH_DOC_OUT_DIR = MPDL_DOC_DIR + "/documentsTranscodedToUnicode/archimedes";
+  private ArchimedesDocContentHandler archimedesDocContentHandler;
+  private ArchimedesDocForeignLangContentHandler archimedesDocForeignLangContentHandler;
+  private Date beginOfOperation;
+  private Date endOfOperation;
+  
+  public static ArchimedesDocManager getInstance() throws ApplicationException {
+    if (instance == null) {
+      instance = new ArchimedesDocManager();
+    }
+    return instance;
+  }
+
+  /**
+   * 
+   */
+  public static void main(String[] args) throws ApplicationException {
+    getInstance();
+    instance.beginOperation();
+    System.out.print("Start ...");
+    // Greek
+    String inputDirGreek = ARCH_DOC_DIR + "/el";
+    String outputDirGreek = ARCH_DOC_OUT_DIR + "/el";
+    // instance.transcodeDirectory("el", "betacode", "unicode", inputDirGreek, outputDirGreek);
+    // Arabic
+    String inputDirArabic = ARCH_DOC_DIR + "/ar";
+    String outputDirArabic = ARCH_DOC_OUT_DIR + "/ar";
+    // instance.transcodeDirectory("ar", "buckwalter", "unicode", inputDirArabic, outputDirArabic);
+
+    // Foreign lang=greek transcoding
+    instance.transcodeForeignLangFiles();
+    
+    instance.end();
+    instance.endOperation();
+    Double elapsedTime = new Util().getSecondWithMillisecondsBetween(instance.beginOfOperation, instance.endOfOperation);
+    System.out.println("End.");
+    System.out.println("Needed time: " + elapsedTime + " seconds");
+  }
+
+  private void transcodeDirectory(String language, String fromEncoding, String toEncoding, String inputDirName, String outputDirName) throws ApplicationException {
+    File inputDir = new File(inputDirName);
+    FilenameFilter filter = new FilenameFilterExtension("xml");
+    File[] files = inputDir.listFiles(filter);
+    for (int i=0; i < files.length; i++) {
+      File inputFile = files[i];
+      String outputFileName = inputFile.getName();
+      File outputFile = new File(outputDirName + "/" + outputFileName);
+      File outputDir = new File(outputFile.getParent()); 
+      if (! outputDir.exists()) {
+        outputDir.mkdirs();  // create the directory including parent directories which do not exist
+      }
+      transcodeFile(language, fromEncoding, toEncoding, inputFile, outputFile);
+    }  
+  }
+  
+  private void transcodeFile(String language, String fromEncoding, String toEncoding, File inputFile, File outputFile) throws ApplicationException {
+    archimedesDocContentHandler = new ArchimedesDocContentHandler(language, fromEncoding, toEncoding, outputFile);
+    try {
+      XMLReader xmlParser = new SAXParser();
+      xmlParser.setContentHandler(archimedesDocContentHandler);
+      InputStream inputStream = new FileInputStream(inputFile);
+      BufferedInputStream bufferedInputStream = new BufferedInputStream(inputStream);
+      InputSource input = new InputSource(bufferedInputStream);
+      xmlParser.parse(input);
+      bufferedInputStream.close();
+    } catch (SAXException e) {
+      throw new ApplicationException(e);
+    } catch (IOException e) {
+      throw new ApplicationException(e);
+    }
+  }
+  
+  private void transcodeForeignLangFiles() throws ApplicationException {
+    String[] languages = {"en", "fr", "it", "la"};
+    for (int i=0; i<languages.length; i++) {
+      String language = languages[i];
+      File inputDir = new File(ARCH_DOC_DIR + "/" + language);
+      String outputDirName = ARCH_DOC_OUT_DIR + "/" + language;
+      FilenameFilter filter = new FilenameFilterExtension("xml");
+      File[] files = inputDir.listFiles(filter);
+      for (int j=0; j < files.length; j++) {
+        File inputFile = files[j];
+        String outputFileName = inputFile.getName();
+        File outputFile = new File(outputDirName + "/" + outputFileName);
+        File outputDir = new File(outputFile.getParent()); 
+        if (! outputDir.exists()) {
+          outputDir.mkdirs();  // create the directory including parent directories which do not exist
+        }
+        transcodeForeignLangFile(inputFile, outputFile);
+      }
+    }
+  }
+  
+  private void transcodeForeignLangFile(File inputFile, File outputFile) throws ApplicationException {
+    archimedesDocForeignLangContentHandler = new ArchimedesDocForeignLangContentHandler(outputFile);
+    try {
+      XMLReader xmlParser = new SAXParser();
+      xmlParser.setContentHandler(archimedesDocForeignLangContentHandler);
+      InputStream inputStream = new FileInputStream(inputFile);
+      BufferedInputStream bufferedInputStream = new BufferedInputStream(inputStream);
+      InputSource input = new InputSource(bufferedInputStream);
+      xmlParser.parse(input);
+      bufferedInputStream.close();
+    } catch (SAXException e) {
+      throw new ApplicationException(e);
+    } catch (IOException e) {
+      throw new ApplicationException(e);
+    }
+  }
+  
+  private void end() throws ApplicationException {
+  }
+
+  private void beginOperation() {
+    beginOfOperation = new Date();
+  }
+
+  private void endOperation() {
+    endOfOperation = new Date();
+  }
+
+}
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/software/eXist/mpdl-modules/src/de/mpg/mpiwg/berlin/mpdl/lt/doc/DictionarizerContentHandler.java	Wed Nov 24 17:24:23 2010 +0100
@@ -0,0 +1,276 @@
+package de.mpg.mpiwg.berlin.mpdl.lt.doc;
+
+import java.util.ArrayList;
+
+import org.apache.lucene.analysis.Token;
+import org.xml.sax.*;
+
+import de.mpg.mpiwg.berlin.mpdl.exception.ApplicationException;
+import de.mpg.mpiwg.berlin.mpdl.lt.analyzer.MpdlTokenizerAnalyzer;
+import de.mpg.mpiwg.berlin.mpdl.lt.lex.db.LexHandler;
+import de.mpg.mpiwg.berlin.mpdl.util.StringUtilEscapeChars;
+
+public class DictionarizerContentHandler implements ContentHandler {
+  private static String MARK = "COMPLEXELEMENTTTTT";
+  private static int MARK_SIZE = MARK.length();
+  private static int ELEMENT_TYPE_CHARACTERS = 1;
+  private static int ELEMENT_TYPE_COMPLEX = 2;
+  private String xmlnsString = "";
+  private String language;
+  private String outputXmlFragment = "";
+  private Element rootElement;
+  private Element currentElement;
+  private ArrayList<Element> elementQueue;
+  
+  public DictionarizerContentHandler(String language) throws ApplicationException {
+    this.language = language;
+  }
+
+  public String getXmlFragment() {
+    return outputXmlFragment;  
+  }
+  
+  public void startDocument() throws SAXException {
+  }
+
+  public void endDocument() throws SAXException {
+    String rootElemToStr = rootElement.toXmlString();
+    write(rootElemToStr);
+    write("\n");
+  }
+  
+  public void characters(char[] c, int start, int length) throws SAXException {
+    char[] cCopy = new char[length];
+    System.arraycopy(c, start, cCopy, 0, length);
+    String charactersStr = String.valueOf(cCopy);
+    if (charactersStr != null && ! charactersStr.equals("")) {
+      if (currentElement != null) {
+        Element charElement = new Element("characters", ELEMENT_TYPE_CHARACTERS);
+        charElement.value = StringUtilEscapeChars.deresolveXmlEntities(charactersStr);
+        if (currentElement.composites == null)
+          currentElement.composites = new ArrayList<Element>();
+        currentElement.composites.add(charElement);
+      }
+    }
+  }
+
+  public void ignorableWhitespace(char[] c, int start, int length) throws SAXException {
+  }
+
+  public void processingInstruction(String target, String data) throws SAXException {
+  }
+
+  public void setDocumentLocator(Locator locator) {
+  }
+
+  public void startPrefixMapping(String prefix, String uri) throws SAXException {
+    xmlnsString += "xmlns:" + prefix + "=\"" + uri + "\" ";
+  }
+  
+  public void endPrefixMapping(String prefix) throws SAXException {
+  }
+
+  public void skippedEntity(String name) throws SAXException {
+  }
+
+  public void startElement(String uri, String localName, String name, Attributes attrs) throws SAXException {
+    if (elementQueue == null)
+      elementQueue = new ArrayList<Element>();
+    Element newElement = new Element(name); // element of type: complex
+    if (currentElement != null) {
+      if (currentElement.composites == null)
+        currentElement.composites = new ArrayList<Element>();
+      if (currentElement.lang != null)
+        newElement.lang = currentElement.lang;  // language wird an Kinder vererbt
+      currentElement.composites.add(newElement);
+    }
+    currentElement = newElement;
+    int attrSize = attrs.getLength();
+    String attrString = "";
+    for (int i=0; i<attrSize; i++) {
+      String attrQName = attrs.getQName(i);
+      String attrValue = attrs.getValue(i);
+      attrValue = StringUtilEscapeChars.forXML(attrValue);
+      attrString = attrString + " " + attrQName + "=\"" + attrValue + "\"";
+      if (attrQName != null && (attrQName.toLowerCase().equals("xml:lang") || attrQName.toLowerCase().equals("lang")))
+        currentElement.lang = attrValue;  // wenn xml:lang belegt ist, wird es an das neue Element gesetzt und überschreibt vom Vater geerbte Werte
+    }
+    currentElement.attrString = attrString;
+    if (! xmlnsString.equals("")) {
+      currentElement.xmlnsString = xmlnsString;
+    }
+    xmlnsString = "";
+    elementQueue.add(currentElement);
+    // only the first element is the root element
+    if(rootElement == null)
+      rootElement = currentElement;
+  }
+
+  public void endElement(String uri, String localName, String name) throws SAXException {
+    if (elementQueue != null && elementQueue.size() > 0) {
+      int lastIndex = elementQueue.size() - 1;
+      elementQueue.remove(lastIndex);
+    }
+    if (elementQueue != null && elementQueue.size() > 0) {
+      int lastIndex = elementQueue.size() - 1;
+      currentElement = elementQueue.get(lastIndex);
+    } else {
+      currentElement = null;
+    }
+  }
+
+  public int getCharIndex(String compositesCharsDictionarized, int indexComplexElemCompositesCharsWithMarks) {
+    if (indexComplexElemCompositesCharsWithMarks == 0)
+      return -1;
+    int size = compositesCharsDictionarized.length();
+    if (size == 0)
+      return -1;
+    int index = 0;
+    int counter = 0;
+    boolean isInTag = false;
+    boolean success = false;
+    while (!success) {
+      if (counter > size)
+        return -1;
+      char c = compositesCharsDictionarized.charAt(counter);
+      if (c == '<')
+        isInTag = true;
+      if (! isInTag)
+        index++;
+      if (index == indexComplexElemCompositesCharsWithMarks)
+        success = true;
+      if (c == '>')
+        isInTag = false;
+      counter++;
+    }
+    return counter + 1;
+  }
+  
+  private void write(String outStr) throws SAXException {
+    outputXmlFragment += outStr;
+  }
+  
+  private class Element {
+    private int type;
+    private String name;
+    private String xmlnsString;
+    private String attrString;
+    private String value;
+    private String lang;  // normalerweise mit dem Wert aus dem Attribut xml:lang belegt bzw. mit dem aus dem Vaterknoten wererbten xml:lang-Wert
+    private ArrayList<Element> composites;
+    
+    private Element(String name) {
+      this.type = ELEMENT_TYPE_COMPLEX;
+      this.name = name;
+    }
+
+    private Element(String name, int type) {
+      this.type = type;
+      this.name = name;
+    }
+
+    private boolean isComplex() {
+      boolean isComplex = false;
+      if (type == ELEMENT_TYPE_COMPLEX)
+        isComplex = true;
+      return isComplex;
+    }
+    
+    private String toXmlString() throws SAXException {
+      String retString = "";
+      String elemLanguage = language;  // default value for the document/page
+      if (lang != null)
+        elemLanguage = lang;  // der Wert des Elements falls vorhanden 
+      // write this element
+      if (! isComplex()) {
+        retString += value;
+      } else {
+        String xmlNsString = this.xmlnsString;
+        if (xmlNsString == null || xmlNsString.equals("")) {
+          retString = retString + "<" + name + attrString + ">";
+        } else { 
+          retString = retString + "<" + name + " " + xmlNsString + attrString + ">";
+        }
+        if (composites != null) {
+          String compositesChars = "";
+          String compositesCharsWithMarks = "";
+          ArrayList<Element> complexElements = new ArrayList<Element>();
+          for (int i=0; i<composites.size(); i++) {
+            Element composite = composites.get(i);
+            if (! composite.isComplex()) {
+              if (composite.value != null && ! composite.value.equals("")) {
+                String compositeValueStr = composite.value;
+                compositesChars += compositeValueStr;
+                compositesCharsWithMarks += compositeValueStr;
+              }
+            } else {
+              complexElements.add(composite);
+              compositesCharsWithMarks += MARK;
+            }
+          }
+          String compositesCharsDictionarized = characters2DictWords(compositesChars, elemLanguage);
+          if (complexElements.size() > 0) {
+            for (int i=0; i<complexElements.size(); i++) {
+              int indexComplexElemCompositesCharsWithMarks = compositesCharsWithMarks.indexOf(MARK);
+              int indexComplexElemCompositesCharsDictionarized = getCharIndex(compositesCharsDictionarized, indexComplexElemCompositesCharsWithMarks);
+              Element complexElem = complexElements.get(i);
+              String complexElementStr = complexElem.toXmlString();
+              String firstPiece = "";
+              if (indexComplexElemCompositesCharsDictionarized > 0) {
+                firstPiece = compositesCharsDictionarized.substring(0, indexComplexElemCompositesCharsDictionarized - 1);
+                compositesCharsDictionarized = compositesCharsDictionarized.substring(indexComplexElemCompositesCharsDictionarized - 1);
+              }
+              retString = retString + firstPiece + complexElementStr;
+              compositesCharsWithMarks = compositesCharsWithMarks.substring(indexComplexElemCompositesCharsWithMarks + MARK_SIZE);
+            }
+            retString = retString + compositesCharsDictionarized; // last one must also be added
+          } else {
+            retString = retString + compositesCharsDictionarized; // last one must also be added
+          }
+        }
+        retString = retString + "</" + name + ">";
+      }
+      return retString;
+    }
+    
+    private String characters2DictWords(String charactersStrDeresolved, String language) throws SAXException {
+      String charactersStr = StringUtilEscapeChars.resolveXmlEntities(charactersStrDeresolved);
+      String retStr = "";
+      try {
+        MpdlTokenizerAnalyzer dictionarizerAnalyzer = new MpdlTokenizerAnalyzer(language);
+        ArrayList<Token> wordTokens = dictionarizerAnalyzer.getToken(charactersStr);
+        int endPos = 0;
+        for (int i=0; i < wordTokens.size(); i++) {
+          Token wordToken = wordTokens.get(i);
+          int startPos = wordToken.startOffset();
+          String beforeStr = charactersStr.substring(endPos, startPos);
+          endPos = wordToken.endOffset();
+          String wordStr = charactersStr.substring(startPos, endPos);
+          String beforeStrDeresolved = StringUtilEscapeChars.deresolveXmlEntities(beforeStr);
+          String wordStrDeresolved = StringUtilEscapeChars.deresolveXmlEntities(wordStr);
+          String wordTokenText = wordToken.termText();
+          LexHandler lexHandler = LexHandler.getInstance();
+          // delivers lex entries by help of the morphology component (lex entry of the stem of the normalized word form)
+          ArrayList<String> lexEntryKeys = lexHandler.getLexEntryKeys(wordTokenText, language, false);
+          if (lexEntryKeys != null) {
+            String lexForms = "";
+            for (int j=0; j<lexEntryKeys.size(); j++) {
+              String lexEntryKey = lexEntryKeys.get(j);
+              lexForms = lexForms + lexEntryKey + " ";
+            }
+            lexForms = lexForms.substring(0, lexForms.length() - 1);
+            retStr = retStr + beforeStrDeresolved + "<w lang=\"" + language + "\"" + " form=\"" + wordTokenText + "\"" + " lexForms=\"" + lexForms + "\">" + wordStrDeresolved + "</w>";
+          } else {
+            retStr = retStr + beforeStrDeresolved + wordStrDeresolved;
+          }
+        }
+        String lastAfterStr = charactersStr.substring(endPos);
+        String lastAfterStrDeresolved = StringUtilEscapeChars.deresolveXmlEntities(lastAfterStr);
+        retStr = retStr + lastAfterStrDeresolved;
+      } catch (ApplicationException e) {
+        throw new SAXException(e);
+      }
+      return retStr;
+    }
+  }
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/software/eXist/mpdl-modules/src/de/mpg/mpiwg/berlin/mpdl/lt/doc/NormalizeCharsContentHandler.java	Wed Nov 24 17:24:23 2010 +0100
@@ -0,0 +1,145 @@
+package de.mpg.mpiwg.berlin.mpdl.lt.doc;
+
+import java.util.ArrayList;
+
+import org.apache.lucene.analysis.Token;
+import org.xml.sax.Attributes;
+import org.xml.sax.ContentHandler;
+import org.xml.sax.Locator;
+import org.xml.sax.SAXException;
+
+import de.mpg.mpiwg.berlin.mpdl.exception.ApplicationException;
+import de.mpg.mpiwg.berlin.mpdl.lt.analyzer.MpdlNormalizer;
+import de.mpg.mpiwg.berlin.mpdl.lt.analyzer.MpdlTokenizerAnalyzer;
+import de.mpg.mpiwg.berlin.mpdl.util.StringUtilEscapeChars;
+
+public class NormalizeCharsContentHandler implements ContentHandler {
+  private String xmlnsString = "";
+  private String[] normalizeFunctions = {};  // default: without normalize functions
+  private String language;
+  private String outputXmlFragment = "";
+  private Element currentElement;
+  
+  public NormalizeCharsContentHandler(String[] normalizeFunctions, String language) throws ApplicationException {
+    this.normalizeFunctions = normalizeFunctions;
+    this.language = language;
+  }
+
+  public String getXmlFragment() {
+    return outputXmlFragment;  
+  }
+  
+  public void startDocument() throws SAXException {
+  }
+
+  public void endDocument() throws SAXException {
+  }
+  
+  public void characters(char[] c, int start, int length) throws SAXException {
+    char[] cCopy = new char[length];
+    System.arraycopy(c, start, cCopy, 0, length);
+    String charactersStr = String.valueOf(cCopy);
+    if (charactersStr != null && ! charactersStr.equals("")) {
+      charactersStr = normalize(charactersStr);
+      if (currentElement != null)
+        currentElement.value = charactersStr;
+      write(charactersStr);
+    }
+  }
+
+  public void ignorableWhitespace(char[] c, int start, int length) throws SAXException {
+  }
+
+  public void processingInstruction(String target, String data) throws SAXException {
+  }
+
+  public void setDocumentLocator(Locator locator) {
+  }
+
+  public void startPrefixMapping(String prefix, String uri) throws SAXException {
+    xmlnsString += "xmlns:" + prefix + "=\"" + uri + "\" ";
+  }
+  
+  public void endPrefixMapping(String prefix) throws SAXException {
+  }
+
+  public void skippedEntity(String name) throws SAXException {
+  }
+
+  public void startElement(String uri, String localName, String name, Attributes attrs) throws SAXException {
+    currentElement = new Element(language, name);
+    int attrSize = attrs.getLength();
+    String attrString = "";
+    for (int i=0; i<attrSize; i++) {
+      String attrQName = attrs.getQName(i);
+      String attrValue = attrs.getValue(i);
+      attrValue = StringUtilEscapeChars.forXML(attrValue);
+      attrString = attrString + " " + attrQName + "=\"" + attrValue + "\"";
+      if (attrQName != null && attrQName.equals("lang") && attrValue != null) {
+        currentElement.language = attrValue;
+      }
+    }
+    currentElement.attrString = attrString;
+    if (xmlnsString.equals("")) {
+      write("<" + name + attrString + ">");
+    } else { 
+      currentElement.xmlnsString = xmlnsString;
+      write("<" + name + " " + xmlnsString + attrString + ">");
+    }
+    xmlnsString = "";
+  }
+
+  public void endElement(String uri, String localName, String name) throws SAXException {
+    currentElement = null;
+    write("</" + name + ">");
+  }
+
+  private void write(String outStr) throws SAXException {
+    outputXmlFragment += outStr;
+  }
+
+  private String normalize(String charactersStr) throws SAXException {
+    String retStr = "";
+    try {
+      MpdlTokenizerAnalyzer tokenizerAnalyzer = new MpdlTokenizerAnalyzer(language);
+      ArrayList<Token> wordTokens = tokenizerAnalyzer.getToken(charactersStr);
+      int endPos = 0;
+      for (int i=0; i < wordTokens.size(); i++) {
+        Token wordToken = wordTokens.get(i);
+        int startPos = wordToken.startOffset();
+        String beforeStr = charactersStr.substring(endPos, startPos);
+        String beforeStrDeresolved = StringUtilEscapeChars.deresolveXmlEntities(beforeStr);
+        endPos = wordToken.endOffset();
+        String wordStr = charactersStr.substring(startPos, endPos);
+
+        MpdlNormalizer mpdlNormalizer = new MpdlNormalizer(normalizeFunctions, language);
+        String normalizedWordStr = mpdlNormalizer.normalize(wordStr);
+
+        String normalizedWordStrDeresolved = StringUtilEscapeChars.deresolveXmlEntities(normalizedWordStr);
+        // String wordTokenText = wordToken.termText();
+        retStr = retStr + beforeStrDeresolved + normalizedWordStrDeresolved;
+      }
+      String lastAfterStr = charactersStr.substring(endPos);
+      String lastAfterStrDeresolved = StringUtilEscapeChars.deresolveXmlEntities(lastAfterStr);
+      retStr = retStr + lastAfterStrDeresolved;
+    } catch (ApplicationException e) {
+      throw new SAXException(e);
+    }
+    return retStr;
+  }  
+  
+  private class Element {
+    String name;
+    String language;
+    String xmlnsString;
+    String attrString;
+    String value;
+    
+    Element(String language, String name) {
+      this.language = language;
+      this.name = name;
+    }
+    
+  }
+  
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/software/eXist/mpdl-modules/src/de/mpg/mpiwg/berlin/mpdl/lt/doc/regularization/DBRegularizationHandler.java	Wed Nov 24 17:24:23 2010 +0100
@@ -0,0 +1,146 @@
+package de.mpg.mpiwg.berlin.mpdl.lt.doc.regularization;
+
+import java.io.UnsupportedEncodingException;
+import java.util.ArrayList;
+
+import com.sleepycat.je.Cursor;
+import com.sleepycat.je.Database;
+import com.sleepycat.je.DatabaseEntry;
+import com.sleepycat.je.DatabaseException;
+import com.sleepycat.je.LockMode;
+import com.sleepycat.je.OperationStatus;
+
+import de.mpg.mpiwg.berlin.mpdl.exception.ApplicationException;
+import de.mpg.mpiwg.berlin.mpdl.lt.general.Language;
+
+public class DBRegularizationHandler {
+  private String dbDirectory;
+  private DbEnvRegularization regDbEnv;
+  
+  public DBRegularizationHandler(String dbDir) {
+    this.dbDirectory = dbDir;
+  }
+  
+  public void start() throws ApplicationException {
+    regDbEnv = new DbEnvRegularization();
+    regDbEnv.setDataDir(dbDirectory);
+    regDbEnv.init(); // open databases in read/write mode
+  }
+  
+  public void openDatabases() throws ApplicationException {
+    regDbEnv.openDatabases();
+  }
+
+  public void closeDatabases() throws ApplicationException {
+    regDbEnv.close();
+  }
+
+  public void deleteData() throws ApplicationException {
+    regDbEnv.removeDatabases();
+  }
+  
+  public void writeOrigReg(Regularization reg) throws ApplicationException {
+    try {
+      String language = Language.getInstance().getLanguageId(reg.getLanguage());
+      String keyStr = language + "###" + reg.getOrig();
+      String valueStr = reg.getXmlString();
+      DatabaseEntry dbEntryKey = new DatabaseEntry(keyStr.getBytes("utf-8"));
+      DatabaseEntry dbEntryValue = new DatabaseEntry(valueStr.getBytes("utf-8"));
+      Database origDB = regDbEnv.getOrigDB();
+      origDB.put(null, dbEntryKey, dbEntryValue);
+    } catch (DatabaseException e) {
+      throw new ApplicationException(e);
+    } catch (UnsupportedEncodingException e) {
+      throw new ApplicationException(e);
+    }
+  }
+    
+  public void writeNormReg(Regularization reg) throws ApplicationException {
+    try {
+      String language = Language.getInstance().getLanguageId(reg.getLanguage());
+      String keyStr = language + "###" + reg.getNorm();
+      String valueStr = reg.getXmlString();
+      DatabaseEntry dbEntryKey = new DatabaseEntry(keyStr.getBytes("utf-8"));
+      DatabaseEntry dbEntryValue = new DatabaseEntry(valueStr.getBytes("utf-8"));
+      Database normDB = regDbEnv.getNormDB();
+      normDB.put(null, dbEntryKey, dbEntryValue);
+    } catch (DatabaseException e) {
+      throw new ApplicationException(e);
+    } catch (UnsupportedEncodingException e) {
+      throw new ApplicationException(e);
+    }
+  }
+    
+  public void deleteReg(Regularization reg) throws ApplicationException {
+    try {
+      String language = Language.getInstance().getLanguageId(reg.getLanguage());
+      String keyStrOrig = language + "###" + reg.getOrig();
+      DatabaseEntry dbEntryKey = new DatabaseEntry(keyStrOrig.getBytes("utf-8"));
+      Database origDB = regDbEnv.getOrigDB();
+      origDB.delete(null, dbEntryKey);
+      String keyStrNorm = reg.getLanguage() + "###" + reg.getNorm();
+      dbEntryKey = new DatabaseEntry(keyStrNorm.getBytes("utf-8"));
+      Database normDB = regDbEnv.getNormDB();
+      normDB.delete(null, dbEntryKey);
+    } catch (DatabaseException e) {
+      throw new ApplicationException(e);
+    } catch (UnsupportedEncodingException e) {
+      throw new ApplicationException(e);
+    }
+  }      
+      
+  public ArrayList<Regularization> readRegsByOrig(String lang, String orig) throws ApplicationException {
+    String language = Language.getInstance().getLanguageId(lang);
+    ArrayList<Regularization> retRegs = new ArrayList<Regularization>();
+    String hashKey = language + "###" + orig;
+    try {
+      Database origDB = regDbEnv.getOrigDB();
+      Cursor cursor = origDB.openCursor(null, null);
+      byte[] bHashKey = hashKey.getBytes("utf-8");
+      DatabaseEntry dbEntryKey = new DatabaseEntry(bHashKey);
+      DatabaseEntry foundValue = new DatabaseEntry();
+      OperationStatus operationStatus = cursor.getSearchKey(dbEntryKey, foundValue, LockMode.DEFAULT);
+      while (operationStatus == OperationStatus.SUCCESS) {
+        byte[] foundValueBytes = foundValue.getData();
+        String foundValueStr = new String(foundValueBytes, "utf-8");
+        Regularization reg = Regularization.getInstance(foundValueStr);
+        retRegs.add(reg);
+        operationStatus = cursor.getNextDup(dbEntryKey, foundValue, LockMode.DEFAULT);
+      }
+      cursor.close();
+    } catch (DatabaseException e) {
+      throw new ApplicationException(e);
+    } catch (UnsupportedEncodingException e) {
+      throw new ApplicationException(e);
+    }
+    return retRegs;
+  }
+ 
+  public ArrayList<Regularization> readRegsByNorm(String lang, String norm) throws ApplicationException {
+    String language = Language.getInstance().getLanguageId(lang);
+    ArrayList<Regularization> retRegs = new ArrayList<Regularization>();
+    String hashKey = language + "###" + norm;
+    try {
+      Database normDB = regDbEnv.getNormDB();
+      Cursor cursor = normDB.openCursor(null, null);
+      byte[] bHashKey = hashKey.getBytes("utf-8");
+      DatabaseEntry dbEntryKey = new DatabaseEntry(bHashKey);
+      DatabaseEntry foundValue = new DatabaseEntry();
+      OperationStatus operationStatus = cursor.getSearchKey(dbEntryKey, foundValue, LockMode.DEFAULT);
+      while (operationStatus == OperationStatus.SUCCESS) {
+        byte[] foundValueBytes = foundValue.getData();
+        String foundValueStr = new String(foundValueBytes, "utf-8");
+        Regularization reg = Regularization.getInstance(foundValueStr);
+        retRegs.add(reg);
+        operationStatus = cursor.getNextDup(dbEntryKey, foundValue, LockMode.DEFAULT);
+      }
+      cursor.close();
+    } catch (DatabaseException e) {
+      throw new ApplicationException(e);
+    } catch (UnsupportedEncodingException e) {
+      throw new ApplicationException(e);
+    }
+    return retRegs;
+  }
+ 
+}
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/software/eXist/mpdl-modules/src/de/mpg/mpiwg/berlin/mpdl/lt/doc/regularization/DbEnvRegularization.java	Wed Nov 24 17:24:23 2010 +0100
@@ -0,0 +1,100 @@
+package de.mpg.mpiwg.berlin.mpdl.lt.doc.regularization;
+
+import java.io.File;
+
+import com.sleepycat.je.Database;
+import com.sleepycat.je.DatabaseConfig;
+import com.sleepycat.je.DatabaseException;
+import com.sleepycat.je.Environment;
+import com.sleepycat.je.EnvironmentConfig;
+
+import de.mpg.mpiwg.berlin.mpdl.exception.ApplicationException;
+
+public class DbEnvRegularization {
+  private String dataDir;
+  private File envPath;
+  private Environment env;
+  private EnvironmentConfig envConfig;
+  private DatabaseConfig dbConfig;
+  private Database origDB;
+  private Database normDB;
+
+  public DbEnvRegularization() {
+  }
+
+  public void setDataDir(String dataDir) {
+    this.dataDir = dataDir;
+  }
+  
+  public void init() throws ApplicationException {
+    try {
+      envConfig = new EnvironmentConfig();
+      dbConfig = new DatabaseConfig();
+      envConfig.setReadOnly(false);
+      dbConfig.setReadOnly(false);
+      envConfig.setAllowCreate(true);
+      dbConfig.setAllowCreate(true);
+      envConfig.setTransactional(true);
+      dbConfig.setTransactional(true);
+      // allow duplicates for keys
+      dbConfig.setSortedDuplicates(true);
+      envPath = new File(dataDir);
+      env = new Environment(envPath, envConfig);
+    } catch (DatabaseException e) {
+      throw new ApplicationException(e);
+    }
+  }
+
+  public void openDatabases() throws ApplicationException {
+    try {
+      // open databases (and create them if they do not exist)
+      origDB = env.openDatabase(null, "OrigDB", dbConfig);
+      normDB = env.openDatabase(null, "NormDB", dbConfig);
+    } catch (DatabaseException e) {
+      throw new ApplicationException(e);
+    }
+  }
+  
+  public void removeDatabases() throws ApplicationException {
+    try {
+      if (origDB != null)
+        origDB.close();
+      if (normDB != null)
+        normDB.close();
+      env.removeDatabase(null, "OrigDB");    
+      env.removeDatabase(null, "NormDB");
+      origDB = null;
+      normDB = null;
+    } catch (DatabaseException e) {
+      throw new ApplicationException(e);
+    }
+  }
+  
+  public Environment getEnv() {
+    return env;
+  }
+
+  public Database getNormDB() {
+    return normDB;
+  }
+
+  public Database getOrigDB() {
+    return origDB;
+  }
+
+  public void close() throws ApplicationException {
+    if (env != null) {
+      try {
+        if (origDB != null)
+          origDB.close();
+        if (normDB != null)
+          normDB.close();
+        if (env != null)
+          env.close();
+      } catch (DatabaseException e) {
+        throw new ApplicationException(e);
+      }
+    }
+  }
+}
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/software/eXist/mpdl-modules/src/de/mpg/mpiwg/berlin/mpdl/lt/doc/regularization/Regularization.java	Wed Nov 24 17:24:23 2010 +0100
@@ -0,0 +1,89 @@
+package de.mpg.mpiwg.berlin.mpdl.lt.doc.regularization;
+
+import de.mpg.mpiwg.berlin.mpdl.exception.ApplicationException;
+import de.mpg.mpiwg.berlin.mpdl.util.StringUtilEscapeChars;
+import de.mpg.mpiwg.berlin.mpdl.util.XmlUtil;
+
+public class Regularization {
+  private String language;
+  private String orig;
+  private String norm;
+  private String source;
+  private int sourcePosition;
+
+  public Regularization(String language, String orig, String norm, String source) {
+    this.language = language;
+    this.orig = orig;
+    this.norm = norm;
+    this.source = source;
+  }
+
+  public static Regularization getInstance(String xmlStr) throws ApplicationException {
+    XmlUtil xmlUtil = XmlUtil.getInstance();
+    String language = xmlUtil.evaluateToString(xmlStr, "//language", null);
+    String orig = xmlUtil.evaluateToString(xmlStr, "//orig", null);
+    String norm = xmlUtil.evaluateToString(xmlStr, "//norm", null);
+    String source = xmlUtil.evaluateToString(xmlStr, "//source", null);
+    String sourcePosStr = xmlUtil.evaluateToString(xmlStr, "//source/@position", null);
+    int sourcePos = new Integer(sourcePosStr);
+    Regularization reg = new Regularization(language, orig, norm, source);
+    reg.setSourcePosition(sourcePos);
+    return reg;
+  }
+  
+  public String getLanguage() {
+    return language;
+  }
+
+  public void setLanguage(String language) {
+    this.language = language;
+  }
+
+  public String getOrig() {
+    return orig;
+  }
+
+  public void setOrig(String orig) {
+    this.orig = orig;
+  }
+
+  public String getNorm() {
+    return norm;
+  }
+
+  public void setNorm(String norm) {
+    this.norm = norm;
+  }
+
+  public String getSource() {
+    return source;
+  }
+
+  public void setSource(String source) {
+    this.source = source;
+  }
+
+  public int getSourcePosition() {
+    return sourcePosition;
+  }
+
+  public void setSourcePosition(int sourcePosition) {
+    this.sourcePosition = sourcePosition;
+  }
+
+  public String getXmlString() {
+    String xmlString = "<reg>\n";
+    if (language != null)
+      xmlString += "  <language>" + language + "</language>\n";
+    if (orig != null)
+      xmlString += "  <orig>" + StringUtilEscapeChars.deresolveXmlEntities(orig) + "</orig>\n";
+    if (norm != null)
+      xmlString += "  <norm>" + StringUtilEscapeChars.deresolveXmlEntities(norm) + "</norm>\n";
+    if (source != null)
+      xmlString += "  <source position=\"" + sourcePosition + "\">" + StringUtilEscapeChars.deresolveXmlEntities(source) + "</source>\n";
+    xmlString += "</reg>\n";
+    return xmlString;
+  }
+
+  
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/software/eXist/mpdl-modules/src/de/mpg/mpiwg/berlin/mpdl/lt/doc/regularization/RegularizationManager.java	Wed Nov 24 17:24:23 2010 +0100
@@ -0,0 +1,287 @@
+package de.mpg.mpiwg.berlin.mpdl.lt.doc.regularization;
+
+import java.io.BufferedOutputStream;
+import java.io.File;
+import java.io.FileNotFoundException;
+import java.io.FileOutputStream;
+import java.io.FilenameFilter;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Date;
+import java.util.Enumeration;
+import java.util.Hashtable;
+
+import javax.xml.namespace.NamespaceContext;
+
+import org.w3c.dom.Node;
+import org.xml.sax.InputSource;
+
+import de.mpg.mpiwg.berlin.mpdl.exception.ApplicationException;
+import de.mpg.mpiwg.berlin.mpdl.general.MpdlConstants;
+import de.mpg.mpiwg.berlin.mpdl.lucene.LuceneUtil;
+import de.mpg.mpiwg.berlin.mpdl.util.FileUtil;
+import de.mpg.mpiwg.berlin.mpdl.util.FilenameFilterExtension;
+import de.mpg.mpiwg.berlin.mpdl.util.Util;
+import de.mpg.mpiwg.berlin.mpdl.util.XmlUtil;
+import de.mpg.mpiwg.berlin.mpdl.xml.SchemaHandler;
+
+public class RegularizationManager {
+  private static RegularizationManager instance;
+  private static String MPDL_DOC_DIR = MpdlConstants.MPDL_DOC_DIR;
+  private static String MPDL_EXIST_DATA_DIR = MpdlConstants.MPDL_EXIST_DATA_DIR;
+  private static String ECHO_DOC_DIR = MPDL_DOC_DIR + "/documents/echo";
+  private static String REGULARIZATION_DATA_DIR = MPDL_EXIST_DATA_DIR + "/dataFiles/regularization";
+  private static String REGULARIZATION_DB_DIR = MPDL_EXIST_DATA_DIR + "/dataBerkeleyDB/regularization";
+  private static String[] LANGUAGES = {"ar", "de", "el", "en", "fr", "it", "la", "nl", "zh"};
+  private DBRegularizationHandler dbRegHandler;
+  private Hashtable<String, ArrayList<Regularization>> regsOrig;
+  private Hashtable<String, ArrayList<Regularization>> regsNorm;
+  private Date beginOfOperation;
+  private Date endOfOperation;
+  
+  public static RegularizationManager getInstance() throws ApplicationException {
+    if (instance == null) {
+      instance = new RegularizationManager();
+      instance.init();
+    }
+    return instance;
+  }
+
+  public static void main(String[] args) throws ApplicationException {
+    getInstance();
+    instance.beginOperation();
+    System.out.print("Start ...");
+
+    // instance.writeAllRegs();
+
+    ArrayList<Regularization> regs = instance.findRegsByNorm("la", "Illiusque");
+    Regularization bla = regs.get(0);
+    
+    instance.end();
+    instance.endOperation();
+    Double elapsedTime = new Util().getSecondWithMillisecondsBetween(instance.beginOfOperation, instance.endOfOperation);
+    System.out.println("End.");
+    System.out.println("Needed time: " + elapsedTime + " seconds");
+  }
+
+  private void init() throws ApplicationException {
+    regsOrig = new Hashtable<String, ArrayList<Regularization>>();
+    regsNorm = new Hashtable<String, ArrayList<Regularization>>();
+    dbRegHandler = new DBRegularizationHandler(REGULARIZATION_DB_DIR);
+    dbRegHandler.start();
+    dbRegHandler.openDatabases();
+  }
+
+  public ArrayList<Regularization> findRegsByOrig(String language, String orig) throws ApplicationException {
+    orig = orig.toLowerCase();
+    String hashKey = language + "###" + orig;
+    ArrayList<Regularization> regs = regsOrig.get(hashKey);
+    if (regs == null) {
+      regs = dbRegHandler.readRegsByOrig(language, orig);
+      if (regs == null || regs.isEmpty())
+        regsOrig.put(hashKey, new ArrayList<Regularization>());
+      else
+        regsOrig.put(hashKey, regs);
+    }
+    return regs;
+  }
+  
+  public ArrayList<Regularization> findRegsByNorm(String language, String norm) throws ApplicationException {
+    norm = norm.toLowerCase();
+    String hashKey = language + "###" + norm;
+    ArrayList<Regularization> regs = regsNorm.get(hashKey);
+    if (regs == null || regs.isEmpty()) {
+      regs = dbRegHandler.readRegsByNorm(language, norm);
+      if (regs == null)
+        regsNorm.put(hashKey, new ArrayList<Regularization>());
+      else
+        regsNorm.put(hashKey, regs);
+    }
+    return regs;
+  }
+  
+  public ArrayList<String> getRegOrigsByNormLuceneQueryString(String language, String luceneQueryString) throws ApplicationException {
+    ArrayList<String> regForms = new ArrayList<String>();
+    LuceneUtil luceneUtil = LuceneUtil.getInstance();
+    ArrayList<String> variants = luceneUtil.getVariantsFromLuceneQuery(luceneQueryString);
+    if (variants != null) {
+      for (int i=0; i<variants.size(); i++) {
+        String variant = variants.get(i);
+        ArrayList<Regularization> regs = findRegsByNorm(language, variant);
+        if (regs != null) {
+          for (int j=0; j<regs.size(); j++) {
+            Regularization reg = regs.get(j);
+            String orig = reg.getOrig();
+            regForms.add(orig);
+          }
+        }
+      }
+    }
+    return regForms;
+  }
+  
+  public void saveRegularizations(String language, String docFileName) throws ApplicationException {
+    Hashtable<String, Regularization> hashTableRegOrig = new Hashtable<String, Regularization>();
+    Hashtable<String, Regularization> hashTableRegNorm = new Hashtable<String, Regularization>();
+    File docFile = new File(docFileName);
+    // hack: in the two hashTables all regs are hold
+    getRegs(language, docFile, hashTableRegOrig, hashTableRegNorm);
+    // write all regs to DB
+    writeRegsOrigToDb(hashTableRegOrig);
+    writeRegsNormToDb(hashTableRegNorm);
+  }
+  
+  private void getRegs(String language, File docFile, Hashtable<String, Regularization> hashTableRegOrig, Hashtable<String, Regularization> hashTableRegNorm) throws ApplicationException {
+    XmlUtil xmlUtil = XmlUtil.getInstance();
+    InputSource docFileInputSource = new InputSource(docFile.toURI().getPath());
+    SchemaHandler echoSchemaHandler = new SchemaHandler();
+    NamespaceContext echoNS = echoSchemaHandler.getEchoNsContext();
+    ArrayList<Node> regsArray = xmlUtil.evaluateToNodeArray(docFileInputSource, "//echo:reg", echoNS);
+    if (regsArray != null) {
+      String docFileName = docFile.getName();
+      for (int i=0; i<regsArray.size(); i++) {
+        Node regNode = regsArray.get(i);
+        String regOrigStr = xmlUtil.getNodeValue(regNode);
+        String regNormStr = xmlUtil.getNodeAttributeValue(regNode, "norm");
+        if (regOrigStr != null && (! regOrigStr.equals("")) && regNormStr != null && (! regNormStr.equals(""))) {
+          regOrigStr = regOrigStr.toLowerCase();
+          regNormStr = regNormStr.toLowerCase();
+          Regularization reg = new Regularization(language, regOrigStr, regNormStr, docFileName);
+          reg.setSourcePosition(i);
+          Regularization regByOrigStr = hashTableRegOrig.get(regOrigStr);
+          if (regByOrigStr == null)
+            hashTableRegOrig.put(regOrigStr, reg);
+          Regularization regByNormStr = hashTableRegNorm.get(regNormStr);
+          if (regByNormStr == null)
+            hashTableRegNorm.put(regNormStr, reg);
+        }
+      }
+    }
+  }
+
+  private void writeRegsOrigToDb(Hashtable<String, Regularization> hashTableRegOrig) throws ApplicationException {
+    Enumeration<Regularization> regElements = hashTableRegOrig.elements();
+    while (regElements.hasMoreElements()) {
+      Regularization reg = regElements.nextElement();
+      boolean regAlreadyExists = false;
+      String language = reg.getLanguage();
+      String orig = reg.getOrig();
+      String norm = reg.getNorm();
+      ArrayList<Regularization> existingRegs = dbRegHandler.readRegsByOrig(language, orig);
+      if (existingRegs != null && existingRegs.size() > 0) {
+        for (int i=0; i<existingRegs.size(); i++) {
+          Regularization r = existingRegs.get(i);
+          String rNorm = r.getNorm();
+          if (rNorm.equals(norm))
+            regAlreadyExists = true;
+        }
+      }
+      if (! regAlreadyExists)
+        dbRegHandler.writeOrigReg(reg);
+    }
+  }
+    
+  private void writeRegsNormToDb(Hashtable<String, Regularization> hashTableRegNorm) throws ApplicationException {
+    Enumeration<Regularization> regElements = hashTableRegNorm.elements();
+    while (regElements.hasMoreElements()) {
+      Regularization reg = regElements.nextElement();
+      boolean regAlreadyExists = false;
+      String language = reg.getLanguage();
+      String orig = reg.getOrig();
+      String norm = reg.getNorm();
+      ArrayList<Regularization> existingRegs = dbRegHandler.readRegsByNorm(language, norm);
+      if (existingRegs != null && existingRegs.size() > 0) {
+        for (int i=0; i<existingRegs.size(); i++) {
+          Regularization r = existingRegs.get(i);
+          String rOrig = r.getOrig();
+          if (rOrig.equals(orig))
+            regAlreadyExists = true;
+        }
+      }
+      if (! regAlreadyExists)
+        dbRegHandler.writeNormReg(reg);
+    }
+  }
+    
+  private void writeAllRegs() throws ApplicationException {
+    BufferedOutputStream regOut = null;
+    try {
+      for (int i=0; i<LANGUAGES.length; i++) {
+        String language = LANGUAGES[i];
+        String docDir = ECHO_DOC_DIR + "/" + language;
+        FileUtil fileUtil = FileUtil.getInstance();
+        FilenameFilter filter = new FilenameFilterExtension("xml");
+        File[] docFiles = fileUtil.getFiles(docDir, filter); 
+        if (docFiles != null && docFiles.length > 0) {
+          Hashtable<String, Regularization> hashTableRegOrig = new Hashtable<String, Regularization>();
+          Hashtable<String, Regularization> hashTableRegNorm = new Hashtable<String, Regularization>();
+          for (int j=0; j<docFiles.length; j++) {
+            File docFile = docFiles[j];
+            getRegs(language, docFile, hashTableRegOrig, hashTableRegNorm);
+          }
+          String regOutFileName = REGULARIZATION_DATA_DIR + "/" + "regularization-" + language + ".xml";
+          File regOutFile = new File(regOutFileName);
+          regOut = new BufferedOutputStream(new FileOutputStream(regOutFile));
+          write("<regularizations>\n", regOut);
+          writeRegs(hashTableRegOrig, regOut);
+          writeRegsToDb(hashTableRegOrig, hashTableRegNorm);
+          write("</regularizations>", regOut);
+        }
+      }
+    } catch (FileNotFoundException e) {
+      throw new ApplicationException(e);
+    } finally {
+      // always close the stream 
+      if (regOut != null) try { regOut.close(); } catch (Exception e) { }
+    }
+  }
+  
+  private void writeRegs(Hashtable<String, Regularization> hashTableReg, BufferedOutputStream regOut) throws ApplicationException {
+    Enumeration<Regularization> regElements = hashTableReg.elements();
+    while (regElements.hasMoreElements()) {
+      Regularization reg = regElements.nextElement();
+      String regXmlStr = reg.getXmlString();
+      write(regXmlStr, regOut);
+    }
+  }
+  
+  private void writeRegsToDb(Hashtable<String, Regularization> hashTableRegOrig, Hashtable<String, Regularization> hashTableRegNorm) throws ApplicationException {
+    Enumeration<Regularization> regElements = hashTableRegOrig.elements();
+    while (regElements.hasMoreElements()) {
+      Regularization reg = regElements.nextElement();
+      dbRegHandler.writeOrigReg(reg);
+    }
+    regElements = hashTableRegNorm.elements();
+    while (regElements.hasMoreElements()) {
+      Regularization reg = regElements.nextElement();
+      dbRegHandler.writeNormReg(reg);
+    }
+  }
+  
+  private void deleteDbData() throws ApplicationException {
+    dbRegHandler.deleteData();
+  }
+  
+  private void write(String inputString, BufferedOutputStream out) throws ApplicationException {
+    try {
+      byte[] bytes = inputString.getBytes("utf-8");
+      out.write(bytes, 0, bytes.length);
+      out.flush();
+    } catch (IOException e) {
+      throw new ApplicationException(e);
+    }
+  }
+
+  public void end() throws ApplicationException {
+    dbRegHandler.closeDatabases();
+  }
+
+  private void beginOperation() {
+    beginOfOperation = new Date();
+  }
+
+  private void endOperation() {
+    endOfOperation = new Date();
+  }
+
+}
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/software/eXist/mpdl-modules/src/de/mpg/mpiwg/berlin/mpdl/lt/general/Betacode2Unicode.lex	Wed Nov 24 17:24:23 2010 +0100
@@ -0,0 +1,332 @@
+package de.mpg.mpiwg.berlin.mpdl.lt.general;
+
+%%
+%{
+    /*
+     * Betacode to Unicode conversion
+     */
+
+  private String toUnicode(int in) {
+    char c = (char) in;
+    Character ch = new Character(c);
+    String retString = ch.toString();
+    return retString;
+  }
+
+%}
+
+%class Betacode2UnicodeLex
+%public
+%type java.lang.String
+%unicode
+%%
+
+
+"<"[^>]+">" { return yytext(); }
+
+"*j"    { return "H"; }
+"j"     { return "h"; }
+"*v"    { return "F"; }
+"v"    { return "f"; }
+"*s"    { return toUnicode(0x03a3); }
+
+"!" 	{ return "."; }
+":"   { return toUnicode(0x00B7); }  /* MPDL update  */
+
+"a)"	{ return toUnicode(0x1F00); }
+"a("	{ return toUnicode(0x1F01); }
+"a)\\"	{ return toUnicode(0x1F02); }
+"a(\\"	{ return toUnicode(0x1F03); }
+"a)/"	{ return toUnicode(0x1F04); }
+"a(/"	{ return toUnicode(0x1F05); }
+"a)="	{ return toUnicode(0x1F06); }
+"a(="	{ return toUnicode(0x1F07); }
+"*)a"	{ return toUnicode(0x1F08); }
+"*(a"	{ return toUnicode(0x1F09); }
+"*)\\a"	{ return toUnicode(0x1F0A); }
+"*(\\a"	{ return toUnicode(0x1F0B); }
+"*)/a"	{ return toUnicode(0x1F0C); }
+"*(/a"	{ return toUnicode(0x1F0D); }
+"*)=a"	{ return toUnicode(0x1F0E); }
+"*(=a"	{ return toUnicode(0x1F0F); }
+"e)"	{ return toUnicode(0x1F10); }
+"e("	{ return toUnicode(0x1F11); }
+"e)\\"	{ return toUnicode(0x1F12); }
+"e(\\"	{ return toUnicode(0x1F13); }
+"e)/"	{ return toUnicode(0x1F14); }
+"e(/"	{ return toUnicode(0x1F15); }
+"*)e"	{ return toUnicode(0x1F18); }
+"*(e"	{ return toUnicode(0x1F19); }
+"*)\\e"	{ return toUnicode(0x1F1A); }
+"*(\\e"	{ return toUnicode(0x1F1B); }
+"*)/e"	{ return toUnicode(0x1F1C); }
+"*(/e"	{ return toUnicode(0x1F1D); }
+"h)"	{ return toUnicode(0x1F20); }
+"h("	{ return toUnicode(0x1F21); }
+"h)\\"	{ return toUnicode(0x1F22); }
+"h(\\"	{ return toUnicode(0x1F23); }
+"h)/"	{ return toUnicode(0x1F24); }
+"h(/"	{ return toUnicode(0x1F25); }
+"h)="	{ return toUnicode(0x1F26); }
+"h(="	{ return toUnicode(0x1F27); }
+"*)h"	{ return toUnicode(0x1F28); }
+"*(h"	{ return toUnicode(0x1F29); }
+"*)\\h"	{ return toUnicode(0x1F2A); }
+"*(\\h"	{ return toUnicode(0x1F2B); }
+"*)/h"	{ return toUnicode(0x1F2C); }
+"*(/h"	{ return toUnicode(0x1F2D); }
+"*)=h"	{ return toUnicode(0x1F2E); }
+"*(=h"	{ return toUnicode(0x1F2F); }
+"i)"	{ return toUnicode(0x1F30); }
+"i("	{ return toUnicode(0x1F31); }
+"i)\\"	{ return toUnicode(0x1F32); }
+"i(\\"	{ return toUnicode(0x1F33); }
+"i)/"	{ return toUnicode(0x1F34); }
+"i(/"	{ return toUnicode(0x1F35); }
+"i)="	{ return toUnicode(0x1F36); }
+"i(="	{ return toUnicode(0x1F37); }
+"*)i"	{ return toUnicode(0x1F38); }
+"*(i"	{ return toUnicode(0x1F39); }
+"*)\\i"	{ return toUnicode(0x1F3A); }
+"*(\\i"	{ return toUnicode(0x1F3B); }
+"*)/i"	{ return toUnicode(0x1F3C); }
+"*(/i"	{ return toUnicode(0x1F3D); }
+"*)=i"	{ return toUnicode(0x1F3E); }
+"*(=i"	{ return toUnicode(0x1F3F); }
+"o)"	{ return toUnicode(0x1F40); }
+"o("	{ return toUnicode(0x1F41); }
+"o)\\"	{ return toUnicode(0x1F42); }
+"o(\\"	{ return toUnicode(0x1F43); }
+"o)/"	{ return toUnicode(0x1F44); }
+"o(/"	{ return toUnicode(0x1F45); }
+"*)o"	{ return toUnicode(0x1F48); }
+"*(o"	{ return toUnicode(0x1F49); }
+"*)\\o"	{ return toUnicode(0x1F4A); }
+"*(\\o"	{ return toUnicode(0x1F4B); }
+"*)/o"	{ return toUnicode(0x1F4C); }
+"*(/o"	{ return toUnicode(0x1F4D); }
+"u)"	{ return toUnicode(0x1F50); }
+"u("	{ return toUnicode(0x1F51); }
+"u)\\"	{ return toUnicode(0x1F52); }
+"u(\\"	{ return toUnicode(0x1F53); }
+"u)/"	{ return toUnicode(0x1F54); }
+"u(/"	{ return toUnicode(0x1F55); }
+"u)="	{ return toUnicode(0x1F56); }
+"u(="	{ return toUnicode(0x1F57); }
+"*(u"	{ return toUnicode(0x1F59); }
+"*(\\u"	{ return toUnicode(0x1F5B); }
+"*(/u"	{ return toUnicode(0x1F5D); }
+"*(=u"	{ return toUnicode(0x1F5F); }
+"w)"	{ return toUnicode(0x1F60); }
+"w("	{ return toUnicode(0x1F61); }
+"w)\\"	{ return toUnicode(0x1F62); }
+"w(\\"	{ return toUnicode(0x1F63); }
+"w)/"	{ return toUnicode(0x1F64); }
+"w(/"	{ return toUnicode(0x1F65); }
+"w)="	{ return toUnicode(0x1F66); }
+"w(="	{ return toUnicode(0x1F67); }
+"*)w"	{ return toUnicode(0x1F68); }
+"*(w"	{ return toUnicode(0x1F69); }
+"*)\\w"	{ return toUnicode(0x1F6A); }
+"*(\\w"	{ return toUnicode(0x1F6B); }
+"*)/w"	{ return toUnicode(0x1F6C); }
+"*(/w"	{ return toUnicode(0x1F6D); }
+"*)=w"	{ return toUnicode(0x1F6E); }
+"*(=w"	{ return toUnicode(0x1F6F); }
+"a\\"	{ return toUnicode(0x1F70); }
+"a/"	{ return toUnicode(0x1F71); }
+"e\\"	{ return toUnicode(0x1F72); }
+"e/"	{ return toUnicode(0x1F73); }
+"h\\"	{ return toUnicode(0x1F74); }
+"h/"	{ return toUnicode(0x1F75); }
+"i\\"	{ return toUnicode(0x1F76); }
+"i/"	{ return toUnicode(0x1F77); }
+"o\\"	{ return toUnicode(0x1F78); }
+"o/"	{ return toUnicode(0x1F79); }
+"u\\"	{ return toUnicode(0x1F7A); }
+"u/"	{ return toUnicode(0x1F7B); }
+"w\\"	{ return toUnicode(0x1F7C); }
+"w/"	{ return toUnicode(0x1F7D); }
+"a)|"	{ return toUnicode(0x1F80); }
+"a(|"	{ return toUnicode(0x1F81); }
+"a)\\|"	{ return toUnicode(0x1F82); }
+"a(\\|"	{ return toUnicode(0x1F83); }
+"a)/|"	{ return toUnicode(0x1F84); }
+"a(/|"	{ return toUnicode(0x1F85); }
+"a)=|"	{ return toUnicode(0x1F86); }
+"a(=|"	{ return toUnicode(0x1F87); }
+"*)|a"	{ return toUnicode(0x1F88); }
+"*(|a"	{ return toUnicode(0x1F89); }
+"*)\\|a"	{ return toUnicode(0x1F8A); }
+"*(\\|a"	{ return toUnicode(0x1F8B); }
+"*)/|a"	{ return toUnicode(0x1F8C); }
+"*(/|a"	{ return toUnicode(0x1F8D); }
+"*)=|a"	{ return toUnicode(0x1F8E); }
+"*(=|a"	{ return toUnicode(0x1F8F); }
+"h)|"	{ return toUnicode(0x1F90); }
+"h(|"	{ return toUnicode(0x1F91); }
+"h)\\|"	{ return toUnicode(0x1F92); }
+"h(\\|"	{ return toUnicode(0x1F93); }
+"h)/|"	{ return toUnicode(0x1F94); }
+"h(/|"	{ return toUnicode(0x1F95); }
+"h)=|"	{ return toUnicode(0x1F96); }
+"h(=|"	{ return toUnicode(0x1F97); }
+"*)|h"	{ return toUnicode(0x1F98); }
+"*(|h"	{ return toUnicode(0x1F99); }
+"*)\\|h"	{ return toUnicode(0x1F9A); }
+"*(\\|h"	{ return toUnicode(0x1F9B); }
+"*)/|h"	{ return toUnicode(0x1F9C); }
+"*(/|h"	{ return toUnicode(0x1F9D); }
+"*)=|h"	{ return toUnicode(0x1F9E); }
+"*(=|h"	{ return toUnicode(0x1F9F); }
+"w)|"	{ return toUnicode(0x1FA0); }
+"w(|"	{ return toUnicode(0x1FA1); }
+"w)\\|"	{ return toUnicode(0x1FA2); }
+"w(\\|"	{ return toUnicode(0x1FA3); }
+"w)/|"	{ return toUnicode(0x1FA4); }
+"w(/|"	{ return toUnicode(0x1FA5); }
+"w)=|"	{ return toUnicode(0x1FA6); }
+"w(=|"	{ return toUnicode(0x1FA7); }
+"*)|w"	{ return toUnicode(0x1FA8); }
+"*(|w"	{ return toUnicode(0x1FA9); }
+"*)\\|w"	{ return toUnicode(0x1FAA); }
+"*(\\|w"	{ return toUnicode(0x1FAB); }
+"*)/|w"	{ return toUnicode(0x1FAC); }
+"*(/|w"	{ return toUnicode(0x1FAD); }
+"*)=|w"	{ return toUnicode(0x1FAE); }
+"*(=|w"	{ return toUnicode(0x1FAF); }
+"a^"	{ return toUnicode(0x1FB0); }
+"a_"	{ return toUnicode(0x1FB1); }
+"a\\|"	{ return toUnicode(0x1FB2); }
+"a|"	{ return toUnicode(0x1FB3); }
+"a/|"	{ return toUnicode(0x1FB4); }
+"a="	{ return toUnicode(0x1FB6); }
+"a=|"	{ return toUnicode(0x1FB7); }
+"*a^"	{ return toUnicode(0x1FB8); }
+"*a_"	{ return toUnicode(0x1FB9); }
+"*a\\"	{ return toUnicode(0x1FBA); }
+"*a/"	{ return toUnicode(0x1FBB); }
+"*a|"	{ return toUnicode(0x1FBC); }
+"h\\|"	{ return toUnicode(0x1FC2); }
+"h|"	{ return toUnicode(0x1FC3); }
+"h/|"	{ return toUnicode(0x1FC4); }
+"h="	{ return toUnicode(0x1FC6); }
+"h=|"	{ return toUnicode(0x1FC7); }
+"*e\\"	{ return toUnicode(0x1FC8); }
+"*e/"	{ return toUnicode(0x1FC9); }
+"*h\\"	{ return toUnicode(0x1FCA); }
+"*h/"	{ return toUnicode(0x1FCB); }
+"*h|"	{ return toUnicode(0x1FCC); }
+"i^"	{ return toUnicode(0x1FD0); }
+"i_"	{ return toUnicode(0x1FD1); }
+"i+\\"	{ return toUnicode(0x1FD2); }
+"i+/"	{ return toUnicode(0x1FD3); }
+"i="	{ return toUnicode(0x1FD6); }
+"i+="	{ return toUnicode(0x1FD7); }
+"*i^"	{ return toUnicode(0x1FD8); }
+"*i_"	{ return toUnicode(0x1FD9); }
+"*i\\"	{ return toUnicode(0x1FDA); }
+"*i/"	{ return toUnicode(0x1FDB); }
+"u^"	{ return toUnicode(0x1FE0); }
+"u_"	{ return toUnicode(0x1FE1); }
+"u+\\"	{ return toUnicode(0x1FE2); }
+"u+/"	{ return toUnicode(0x1FE3); }
+"r)"	{ return toUnicode(0x1FE4); }
+"r("	{ return toUnicode(0x1FE5); }
+"u="	{ return toUnicode(0x1FE6); }
+"u+="	{ return toUnicode(0x1FE7); }
+"*u^"	{ return toUnicode(0x1FE8); }
+"*u_"	{ return toUnicode(0x1FE9); }
+"*u\\"	{ return toUnicode(0x1FEA); }
+"*u/"	{ return toUnicode(0x1FEB); }
+"*(r"	{ return toUnicode(0x1FEC); }
+"w\\|"	{ return toUnicode(0x1FF2); }
+"w|"	{ return toUnicode(0x1FF3); }
+"w/|"	{ return toUnicode(0x1FF4); }
+"*w\\"	{ return toUnicode(0x1FFA); }
+"*w/"	{ return toUnicode(0x1FFB); }
+"*w|"	{ return toUnicode(0x1FFC); }
+"w="	{ return toUnicode(0x1FF6); }
+"w=|"	{ return toUnicode(0x1FF7); }
+"*o\\"	{ return toUnicode(0x1FF8); }
+"*o/"	{ return toUnicode(0x1FF9); }
+
+"\\"	{ return toUnicode(0x0300); }
+"/"		{ return toUnicode(0x0301); }
+"_"		{ return toUnicode(0x0304); }
+"^"		{ return toUnicode(0x0306); }
+"+"		{ return toUnicode(0x0308); }
+"="		{ return toUnicode(0x0302); }
+")"		{ return toUnicode(0x0313); }
+"("		{ return toUnicode(0x0314); }
+"?"		{ return toUnicode(0x0323); }
+"|"		{ return toUnicode(0x0345); }
+
+"a"		 { return toUnicode(0x03b1); }  /* MPDL update  */
+"*a"   { return toUnicode(0x0391); }  /* MPDL update  */
+"b"		{ return toUnicode(0x03b2); }   /* MPDL update  */
+"*b"   { return toUnicode(0x0392); }  /* MPDL update  */
+"g"		{ return toUnicode(0x03b3); }   /* MPDL update  */
+"*g"   { return toUnicode(0x0393); }  /* MPDL update  */
+"d"		{ return toUnicode(0x03b4); }   /* MPDL update  */
+"*d"   { return toUnicode(0x0394); }  /* MPDL update  */
+"e"		{ return toUnicode(0x03b5); }   /* MPDL update  */
+"*e"   { return toUnicode(0x0395); }  /* MPDL update  */
+"z"		{ return toUnicode(0x03b6); }   /* MPDL update  */
+"*z"   { return toUnicode(0x0396); }  /* MPDL update  */
+"h"		{ return toUnicode(0x03b7); }   /* MPDL update  */
+"*h"   { return toUnicode(0x0397); }  /* MPDL update  */
+"q"		{ return toUnicode(0x03b8); }   /* MPDL update  */
+"*q"   { return toUnicode(0x0398); }  /* MPDL update  */
+"i"		{ return toUnicode(0x03b9); }   /* MPDL update  */
+"*i"   { return toUnicode(0x0399); }  /* MPDL update  */
+"k"		{ return toUnicode(0x03ba); }   /* MPDL update  */
+"*k"   { return toUnicode(0x039a); }  /* MPDL update  */
+"l"		{ return toUnicode(0x03bb); }   /* MPDL update  */
+"*l"   { return toUnicode(0x039b); }  /* MPDL update  */
+"m"		{ return toUnicode(0x03bc); }   /* MPDL update  */
+"*m"   { return toUnicode(0x039c); }  /* MPDL update  */
+"n"		{ return toUnicode(0x03bd); }   /* MPDL update  */
+"*n"   { return toUnicode(0x039d); }  /* MPDL update  */
+"c"		{ return toUnicode(0x03be); }   /* MPDL update  */
+"*c"   { return toUnicode(0x039e); }  /* MPDL update  */
+"o"		{ return toUnicode(0x03bf); }   /* MPDL update  */
+"*o"   { return toUnicode(0x039f); }  /* MPDL update  */
+"p"		{ return toUnicode(0x03c0); }   /* MPDL update  */
+"*p"   { return toUnicode(0x03a0); }  /* MPDL update  */
+"r"		{ return toUnicode(0x03c1); }   /* MPDL update  */
+"*r"   { return toUnicode(0x03a1); }  /* MPDL update  */
+
+"*s"  { return toUnicode(0x03a3); }  /* MPDL update  */
+"s1"	{ return toUnicode(0x03c3); } /* mdh 2002-01-07 */
+"s"/\-\-	{ return toUnicode(0x03c2); }   
+"s"/\&gt; }[a-z\?\!0-9*=\/()\'\-] { return toUnicode(0x03c3); }  /* MPDL update  */
+"s"/\&lt; { return toUnicode(0x03c2); }  /* MPDL update  */
+"s"/[\[\]][a-z\?\!0-9*=\/()\'\-] { return toUnicode(0x03c3); }  /* MPDL update  */
+"s"/\??[^a-z0-9*=\/()\'\-\[\?] { return toUnicode(0x03c2); }
+"s"		{ return toUnicode(0x03c3); }  /* MPDL update  */
+
+"t"		{ return toUnicode(0x03c4); }   /* MPDL update  */
+"*t"   { return toUnicode(0x03a4); }  /* MPDL update  */
+"u"		{ return toUnicode(0x03c5); }   /* MPDL update  */
+"*u"   { return toUnicode(0x03a5); }  /* MPDL update  */
+"f"		{ return toUnicode(0x03c6); }   /* MPDL update  */
+"*f"   { return toUnicode(0x03a6); }  /* MPDL update  */
+"x"		{ return toUnicode(0x03c7); }   /* MPDL update  */
+"*x"   { return toUnicode(0x03a7); }  /* MPDL update  */
+"y"		{ return toUnicode(0x03c8); }   /* MPDL update  */
+"*y"   { return toUnicode(0x03a8); }  /* MPDL update  */
+"w"		{ return toUnicode(0x03c9); }   /* MPDL update  */
+"*w"   { return toUnicode(0x03a9); }  /* MPDL update  */
+
+[\&_]"vert;"   { return "|"; }
+[\&_]"lpar;"   { return "("; }
+[\&_]"rpar;"   { return ")"; }
+[\_\&]"lt;"    { return "&lt;"; }
+[\_\&]"gt;"    { return "&gt;"; }
+"&#039;"       { return "'"; }  /* MPDL update  */
+
+"&"[a-zA-Z]+";"  { return yytext(); }
+
+.       { return yytext(); }
+\n      { return yytext(); }
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/software/eXist/mpdl-modules/src/de/mpg/mpiwg/berlin/mpdl/lt/general/Betacode2Unicode.lex.old	Wed Nov 24 17:24:23 2010 +0100
@@ -0,0 +1,318 @@
+package de.mpg.mpiwg.berlin.mpdl.lt.general;
+
+%%
+%{
+    /*
+     * Betacode to Unicode conversion
+     */
+
+  private int isUpper = 0;
+
+  private String toUnicodeGreek(int in) {
+    String retStr = toUnicode(in - (isUpper * 0x0020));
+    isUpper = 0;
+    return retStr;
+  }
+
+  private String toUnicode(int in) {
+    char c = (char) in;
+    Character ch = new Character(c);
+    String retString = ch.toString();
+    return retString;
+  }
+
+%}
+
+%class Betacode2UnicodeLex
+%public
+%type java.lang.String
+%unicode
+%%
+
+
+"<"[^>]+">" { return yytext(); }
+
+"*j"    { return "H"; }
+"j"     { return "h"; }
+"*v"    { return "F"; }
+"v"    { return "f"; }
+"*s"    { return toUnicode(0x03a3); }
+
+"!" 	{ return "."; }
+":"   { return toUnicode(0x00B7); }  /* MPDL update  */
+
+"a)"	{ return toUnicode(0x1F00); }
+"a("	{ return toUnicode(0x1F01); }
+"a)\\"	{ return toUnicode(0x1F02); }
+"a(\\"	{ return toUnicode(0x1F03); }
+"a)/"	{ return toUnicode(0x1F04); }
+"a(/"	{ return toUnicode(0x1F05); }
+"a)="	{ return toUnicode(0x1F06); }
+"a(="	{ return toUnicode(0x1F07); }
+"*)a"	{ return toUnicode(0x1F08); }
+"*(a"	{ return toUnicode(0x1F09); }
+"*)\\a"	{ return toUnicode(0x1F0A); }
+"*(\\a"	{ return toUnicode(0x1F0B); }
+"*)/a"	{ return toUnicode(0x1F0C); }
+"*(/a"	{ return toUnicode(0x1F0D); }
+"*)=a"	{ return toUnicode(0x1F0E); }
+"*(=a"	{ return toUnicode(0x1F0F); }
+"e)"	{ return toUnicode(0x1F10); }
+"e("	{ return toUnicode(0x1F11); }
+"e)\\"	{ return toUnicode(0x1F12); }
+"e(\\"	{ return toUnicode(0x1F13); }
+"e)/"	{ return toUnicode(0x1F14); }
+"e(/"	{ return toUnicode(0x1F15); }
+"*)e"	{ return toUnicode(0x1F18); }
+"*(e"	{ return toUnicode(0x1F19); }
+"*)\\e"	{ return toUnicode(0x1F1A); }
+"*(\\e"	{ return toUnicode(0x1F1B); }
+"*)/e"	{ return toUnicode(0x1F1C); }
+"*(/e"	{ return toUnicode(0x1F1D); }
+"h)"	{ return toUnicode(0x1F20); }
+"h("	{ return toUnicode(0x1F21); }
+"h)\\"	{ return toUnicode(0x1F22); }
+"h(\\"	{ return toUnicode(0x1F23); }
+"h)/"	{ return toUnicode(0x1F24); }
+"h(/"	{ return toUnicode(0x1F25); }
+"h)="	{ return toUnicode(0x1F26); }
+"h(="	{ return toUnicode(0x1F27); }
+"*)h"	{ return toUnicode(0x1F28); }
+"*(h"	{ return toUnicode(0x1F29); }
+"*)\\h"	{ return toUnicode(0x1F2A); }
+"*(\\h"	{ return toUnicode(0x1F2B); }
+"*)/h"	{ return toUnicode(0x1F2C); }
+"*(/h"	{ return toUnicode(0x1F2D); }
+"*)=h"	{ return toUnicode(0x1F2E); }
+"*(=h"	{ return toUnicode(0x1F2F); }
+"i)"	{ return toUnicode(0x1F30); }
+"i("	{ return toUnicode(0x1F31); }
+"i)\\"	{ return toUnicode(0x1F32); }
+"i(\\"	{ return toUnicode(0x1F33); }
+"i)/"	{ return toUnicode(0x1F34); }
+"i(/"	{ return toUnicode(0x1F35); }
+"i)="	{ return toUnicode(0x1F36); }
+"i(="	{ return toUnicode(0x1F37); }
+"*)i"	{ return toUnicode(0x1F38); }
+"*(i"	{ return toUnicode(0x1F39); }
+"*)\\i"	{ return toUnicode(0x1F3A); }
+"*(\\i"	{ return toUnicode(0x1F3B); }
+"*)/i"	{ return toUnicode(0x1F3C); }
+"*(/i"	{ return toUnicode(0x1F3D); }
+"*)=i"	{ return toUnicode(0x1F3E); }
+"*(=i"	{ return toUnicode(0x1F3F); }
+"o)"	{ return toUnicode(0x1F40); }
+"o("	{ return toUnicode(0x1F41); }
+"o)\\"	{ return toUnicode(0x1F42); }
+"o(\\"	{ return toUnicode(0x1F43); }
+"o)/"	{ return toUnicode(0x1F44); }
+"o(/"	{ return toUnicode(0x1F45); }
+"*)o"	{ return toUnicode(0x1F48); }
+"*(o"	{ return toUnicode(0x1F49); }
+"*)\\o"	{ return toUnicode(0x1F4A); }
+"*(\\o"	{ return toUnicode(0x1F4B); }
+"*)/o"	{ return toUnicode(0x1F4C); }
+"*(/o"	{ return toUnicode(0x1F4D); }
+"u)"	{ return toUnicode(0x1F50); }
+"u("	{ return toUnicode(0x1F51); }
+"u)\\"	{ return toUnicode(0x1F52); }
+"u(\\"	{ return toUnicode(0x1F53); }
+"u)/"	{ return toUnicode(0x1F54); }
+"u(/"	{ return toUnicode(0x1F55); }
+"u)="	{ return toUnicode(0x1F56); }
+"u(="	{ return toUnicode(0x1F57); }
+"*(u"	{ return toUnicode(0x1F59); }
+"*(\\u"	{ return toUnicode(0x1F5B); }
+"*(/u"	{ return toUnicode(0x1F5D); }
+"*(=u"	{ return toUnicode(0x1F5F); }
+"w)"	{ return toUnicode(0x1F60); }
+"w("	{ return toUnicode(0x1F61); }
+"w)\\"	{ return toUnicode(0x1F62); }
+"w(\\"	{ return toUnicode(0x1F63); }
+"w)/"	{ return toUnicode(0x1F64); }
+"w(/"	{ return toUnicode(0x1F65); }
+"w)="	{ return toUnicode(0x1F66); }
+"w(="	{ return toUnicode(0x1F67); }
+"*)w"	{ return toUnicode(0x1F68); }
+"*(w"	{ return toUnicode(0x1F69); }
+"*)\\w"	{ return toUnicode(0x1F6A); }
+"*(\\w"	{ return toUnicode(0x1F6B); }
+"*)/w"	{ return toUnicode(0x1F6C); }
+"*(/w"	{ return toUnicode(0x1F6D); }
+"*)=w"	{ return toUnicode(0x1F6E); }
+"*(=w"	{ return toUnicode(0x1F6F); }
+"a\\"	{ return toUnicode(0x1F70); }
+"a/"	{ return toUnicode(0x1F71); }
+"e\\"	{ return toUnicode(0x1F72); }
+"e/"	{ return toUnicode(0x1F73); }
+"h\\"	{ return toUnicode(0x1F74); }
+"h/"	{ return toUnicode(0x1F75); }
+"i\\"	{ return toUnicode(0x1F76); }
+"i/"	{ return toUnicode(0x1F77); }
+"o\\"	{ return toUnicode(0x1F78); }
+"o/"	{ return toUnicode(0x1F79); }
+"u\\"	{ return toUnicode(0x1F7A); }
+"u/"	{ return toUnicode(0x1F7B); }
+"w\\"	{ return toUnicode(0x1F7C); }
+"w/"	{ return toUnicode(0x1F7D); }
+"a)|"	{ return toUnicode(0x1F80); }
+"a(|"	{ return toUnicode(0x1F81); }
+"a)\\|"	{ return toUnicode(0x1F82); }
+"a(\\|"	{ return toUnicode(0x1F83); }
+"a)/|"	{ return toUnicode(0x1F84); }
+"a(/|"	{ return toUnicode(0x1F85); }
+"a)=|"	{ return toUnicode(0x1F86); }
+"a(=|"	{ return toUnicode(0x1F87); }
+"*)|a"	{ return toUnicode(0x1F88); }
+"*(|a"	{ return toUnicode(0x1F89); }
+"*)\\|a"	{ return toUnicode(0x1F8A); }
+"*(\\|a"	{ return toUnicode(0x1F8B); }
+"*)/|a"	{ return toUnicode(0x1F8C); }
+"*(/|a"	{ return toUnicode(0x1F8D); }
+"*)=|a"	{ return toUnicode(0x1F8E); }
+"*(=|a"	{ return toUnicode(0x1F8F); }
+"h)|"	{ return toUnicode(0x1F90); }
+"h(|"	{ return toUnicode(0x1F91); }
+"h)\\|"	{ return toUnicode(0x1F92); }
+"h(\\|"	{ return toUnicode(0x1F93); }
+"h)/|"	{ return toUnicode(0x1F94); }
+"h(/|"	{ return toUnicode(0x1F95); }
+"h)=|"	{ return toUnicode(0x1F96); }
+"h(=|"	{ return toUnicode(0x1F97); }
+"*)|h"	{ return toUnicode(0x1F98); }
+"*(|h"	{ return toUnicode(0x1F99); }
+"*)\\|h"	{ return toUnicode(0x1F9A); }
+"*(\\|h"	{ return toUnicode(0x1F9B); }
+"*)/|h"	{ return toUnicode(0x1F9C); }
+"*(/|h"	{ return toUnicode(0x1F9D); }
+"*)=|h"	{ return toUnicode(0x1F9E); }
+"*(=|h"	{ return toUnicode(0x1F9F); }
+"w)|"	{ return toUnicode(0x1FA0); }
+"w(|"	{ return toUnicode(0x1FA1); }
+"w)\\|"	{ return toUnicode(0x1FA2); }
+"w(\\|"	{ return toUnicode(0x1FA3); }
+"w)/|"	{ return toUnicode(0x1FA4); }
+"w(/|"	{ return toUnicode(0x1FA5); }
+"w)=|"	{ return toUnicode(0x1FA6); }
+"w(=|"	{ return toUnicode(0x1FA7); }
+"*)|w"	{ return toUnicode(0x1FA8); }
+"*(|w"	{ return toUnicode(0x1FA9); }
+"*)\\|w"	{ return toUnicode(0x1FAA); }
+"*(\\|w"	{ return toUnicode(0x1FAB); }
+"*)/|w"	{ return toUnicode(0x1FAC); }
+"*(/|w"	{ return toUnicode(0x1FAD); }
+"*)=|w"	{ return toUnicode(0x1FAE); }
+"*(=|w"	{ return toUnicode(0x1FAF); }
+"a^"	{ return toUnicode(0x1FB0); }
+"a_"	{ return toUnicode(0x1FB1); }
+"a\\|"	{ return toUnicode(0x1FB2); }
+"a|"	{ return toUnicode(0x1FB3); }
+"a/|"	{ return toUnicode(0x1FB4); }
+"a="	{ return toUnicode(0x1FB6); }
+"a=|"	{ return toUnicode(0x1FB7); }
+"*a^"	{ return toUnicode(0x1FB8); }
+"*a_"	{ return toUnicode(0x1FB9); }
+"*a\\"	{ return toUnicode(0x1FBA); }
+"*a/"	{ return toUnicode(0x1FBB); }
+"*a|"	{ return toUnicode(0x1FBC); }
+"h\\|"	{ return toUnicode(0x1FC2); }
+"h|"	{ return toUnicode(0x1FC3); }
+"h/|"	{ return toUnicode(0x1FC4); }
+"h="	{ return toUnicode(0x1FC6); }
+"h=|"	{ return toUnicode(0x1FC7); }
+"*e\\"	{ return toUnicode(0x1FC8); }
+"*e/"	{ return toUnicode(0x1FC9); }
+"*h\\"	{ return toUnicode(0x1FCA); }
+"*h/"	{ return toUnicode(0x1FCB); }
+"*h|"	{ return toUnicode(0x1FCC); }
+"i^"	{ return toUnicode(0x1FD0); }
+"i_"	{ return toUnicode(0x1FD1); }
+"i+\\"	{ return toUnicode(0x1FD2); }
+"i+/"	{ return toUnicode(0x1FD3); }
+"i="	{ return toUnicode(0x1FD6); }
+"i+="	{ return toUnicode(0x1FD7); }
+"*i^"	{ return toUnicode(0x1FD8); }
+"*i_"	{ return toUnicode(0x1FD9); }
+"*i\\"	{ return toUnicode(0x1FDA); }
+"*i/"	{ return toUnicode(0x1FDB); }
+"u^"	{ return toUnicode(0x1FE0); }
+"u_"	{ return toUnicode(0x1FE1); }
+"u+\\"	{ return toUnicode(0x1FE2); }
+"u+/"	{ return toUnicode(0x1FE3); }
+"r)"	{ return toUnicode(0x1FE4); }
+"r("	{ return toUnicode(0x1FE5); }
+"u="	{ return toUnicode(0x1FE6); }
+"u+="	{ return toUnicode(0x1FE7); }
+"*u^"	{ return toUnicode(0x1FE8); }
+"*u_"	{ return toUnicode(0x1FE9); }
+"*u\\"	{ return toUnicode(0x1FEA); }
+"*u/"	{ return toUnicode(0x1FEB); }
+"*(r"	{ return toUnicode(0x1FEC); }
+"w\\|"	{ return toUnicode(0x1FF2); }
+"w|"	{ return toUnicode(0x1FF3); }
+"w/|"	{ return toUnicode(0x1FF4); }
+"*w\\"	{ return toUnicode(0x1FFA); }
+"*w/"	{ return toUnicode(0x1FFB); }
+"*w|"	{ return toUnicode(0x1FFC); }
+"w="	{ return toUnicode(0x1FF6); }
+"w=|"	{ return toUnicode(0x1FF7); }
+"*o\\"	{ return toUnicode(0x1FF8); }
+"*o/"	{ return toUnicode(0x1FF9); }
+
+"*"			isUpper = 1;
+
+"\\"	{ return toUnicode(0x0300); }
+"/"		{ return toUnicode(0x0301); }
+"_"		{ return toUnicode(0x0304); }
+"^"		{ return toUnicode(0x0306); }
+"+"		{ return toUnicode(0x0308); }
+"="		{ return toUnicode(0x0302); }
+")"		{ return toUnicode(0x0313); }
+"("		{ return toUnicode(0x0314); }
+"?"		{ return toUnicode(0x0323); }
+"|"		{ return toUnicode(0x0345); }
+
+"a"		{ return toUnicodeGreek(0x03b1); }
+"b"		{ return toUnicodeGreek(0x03b2); }
+"g"		{ return toUnicodeGreek(0x03b3); }
+"d"		{ return toUnicodeGreek(0x03b4); }
+"e"		{ return toUnicodeGreek(0x03b5); }
+"z"		{ return toUnicodeGreek(0x03b6); }
+"h"		{ return toUnicodeGreek(0x03b7); }
+"q"		{ return toUnicodeGreek(0x03b8); }
+"i"		{ return toUnicodeGreek(0x03b9); }
+"k"		{ return toUnicodeGreek(0x03ba); }
+"l"		{ return toUnicodeGreek(0x03bb); }
+"m"		{ return toUnicodeGreek(0x03bc); }
+"n"		{ return toUnicodeGreek(0x03bd); }
+"c"		{ return toUnicodeGreek(0x03be); }
+"o"		{ return toUnicodeGreek(0x03bf); }
+"p"		{ return toUnicodeGreek(0x03c0); }
+"r"		{ return toUnicodeGreek(0x03c1); }
+
+"s1"	{ return toUnicode(0x03c3); } /* mdh 2002-01-07 */
+"s"/\-\-	{ return toUnicode(0x03c2); }
+"s"/\&gt; }[a-z\?\!0-9*=\/()\'\-] { return toUnicodeGreek(0x03c3); }
+"s"/\&lt; { return toUnicodeGreek(0x03c2); }  /* MPDL update  */
+"s"/[\[\]][a-z\?\!0-9*=\/()\'\-] { return toUnicodeGreek(0x03c3); }
+"s"/\??[^a-z0-9*=\/()\'\-\[\?] { return toUnicode(0x03c2); }
+"s"		{ return toUnicodeGreek(0x03c3); }
+
+"t"		{ return toUnicodeGreek(0x03c4); }
+"u"		{ return toUnicodeGreek(0x03c5); }
+"f"		{ return toUnicodeGreek(0x03c6); }
+"x"		{ return toUnicodeGreek(0x03c7); }
+"y"		{ return toUnicodeGreek(0x03c8); }
+"w"		{ return toUnicodeGreek(0x03c9); }
+
+[\&_]"vert;"   { return "|"; }
+[\&_]"lpar;"   { return "("; }
+[\&_]"rpar;"   { return ")"; }
+[\_\&]"lt;"    { return "&lt;"; }
+[\_\&]"gt;"    { return "&gt;"; }
+"&#039;"       { return "'"; }  /* MPDL update  */
+
+"&"[a-zA-Z]+";"  { return yytext(); }
+
+.       { return yytext(); }
+\n      { return yytext(); }
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/software/eXist/mpdl-modules/src/de/mpg/mpiwg/berlin/mpdl/lt/general/Betacode2UnicodeLex.java	Wed Nov 24 17:24:23 2010 +0100
@@ -0,0 +1,1908 @@
+package de.mpg.mpiwg.berlin.mpdl.lt.general;
+
+
+/**
+ * This class is a scanner generated by 
+ * <a href="http://www.jflex.de/">JFlex</a> 1.4.3
+ * on 19.11.09 20:01 from the specification file
+ * <tt>/Users/jwillenborg/java/existDevMai2009/mpdl/extensions/mpdl-modules/src/de/mpg/mpiwg/berlin/mpdl/lt/general/Betacode2Unicode.lex</tt>
+ */
+public class Betacode2UnicodeLex {
+
+  /** This character denotes the end of file */
+  public static final int YYEOF = -1;
+
+  /** initial size of the lookahead buffer */
+  private static final int ZZ_BUFFERSIZE = 16384;
+
+  /** lexical states */
+  public static final int YYINITIAL = 0;
+
+  /**
+   * ZZ_LEXSTATE[l] is the state in the DFA for the lexical state l
+   * ZZ_LEXSTATE[l+1] is the state in the DFA for the lexical state l
+   *                  at the beginning of a line
+   * l is of the form l = 2*k, k a non negative integer
+   */
+  private static final int ZZ_LEXSTATE[] = { 
+     0, 0
+  };
+
+  /** 
+   * Translates characters to character classes
+   */
+  private static final String ZZ_CMAP_PACKED = 
+    "\12\0\1\0\26\0\1\7\1\0\1\62\2\0\1\50\1\54\1\13"+
+    "\1\12\1\3\1\30\1\0\1\47\1\0\1\15\1\63\1\46\1\54"+
+    "\1\64\5\54\1\65\1\10\1\52\1\1\1\16\1\2\1\32\1\0"+
+    "\32\66\1\56\1\14\1\55\1\26\1\27\1\0\1\11\1\33\1\44"+
+    "\1\35\1\17\1\57\1\34\1\20\1\21\1\4\1\40\1\41\1\42"+
+    "\1\43\1\22\1\45\1\37\1\31\1\6\1\51\1\23\1\5\1\24"+
+    "\1\60\1\61\1\36\1\0\1\25\1\53\uff82\0";
+
+  /** 
+   * Translates characters to character classes
+   */
+  private static final char [] ZZ_CMAP = zzUnpackCMap(ZZ_CMAP_PACKED);
+
+  /** 
+   * Translates DFA states to action switch labels.
+   */
+  private static final int [] ZZ_ACTION = zzUnpackAction();
+
+  private static final String ZZ_ACTION_PACKED_0 =
+    "\1\0\3\1\1\2\1\3\1\4\1\5\1\6\1\7"+
+    "\1\10\1\11\1\12\1\13\1\14\1\15\1\16\1\17"+
+    "\1\20\1\21\1\22\1\23\1\24\1\25\1\26\1\27"+
+    "\1\30\1\31\1\32\1\33\1\34\1\35\1\36\1\37"+
+    "\1\40\1\41\1\42\1\43\1\1\1\44\1\45\1\46"+
+    "\1\47\1\0\1\50\1\51\1\52\1\53\2\0\1\54"+
+    "\1\55\1\56\1\57\1\60\1\61\1\62\1\63\1\64"+
+    "\1\65\1\66\1\67\1\70\1\71\1\72\1\73\1\74"+
+    "\1\75\1\76\1\77\1\100\1\101\1\102\1\0\1\4"+
+    "\1\0\2\102\1\0\1\103\1\104\1\105\1\106\1\107"+
+    "\1\110\1\111\1\112\1\113\1\114\1\115\1\116\1\117"+
+    "\1\120\1\121\1\122\1\123\1\124\1\125\1\126\1\127"+
+    "\1\130\1\131\1\132\1\133\1\0\1\134\1\135\1\136"+
+    "\1\137\1\140\1\141\1\142\1\143\1\144\1\145\1\146"+
+    "\1\0\1\147\1\150\1\151\1\152\1\153\1\154\4\0"+
+    "\1\155\1\156\6\0\1\157\1\160\1\161\1\162\1\163"+
+    "\1\164\3\0\1\165\1\166\1\167\1\170\1\171\1\0"+
+    "\1\172\3\0\1\173\1\174\1\175\1\176\1\177\1\200"+
+    "\1\0\1\201\1\202\1\203\1\204\1\205\1\206\1\207"+
+    "\1\210\1\211\1\212\1\213\1\214\1\215\1\216\1\217"+
+    "\1\220\1\221\1\222\1\223\2\0\1\224\1\225\1\226"+
+    "\1\227\1\230\1\231\1\232\1\233\1\234\1\235\1\236"+
+    "\1\237\1\240\1\241\1\242\1\243\1\244\1\245\1\246"+
+    "\1\247\1\250\1\251\1\252\1\253\1\254\1\255\1\256"+
+    "\1\257\1\260\1\261\1\262\1\263\1\264\1\265\1\266"+
+    "\1\267\1\270\1\271\1\272\1\273\1\274\1\275\1\276"+
+    "\1\277\1\300\1\301\1\302\1\303\1\304\1\305\1\306"+
+    "\1\307\1\310\1\311\1\312\1\313\1\314\1\315\1\316"+
+    "\1\317\13\0\1\320\1\321\1\322\1\323\1\324\1\325"+
+    "\1\0\1\326\1\327\1\330\1\331\1\332\1\333\1\0"+
+    "\1\334\1\335\1\336\1\337\1\0\1\340\1\341\1\342"+
+    "\1\343\1\344\1\345\1\346\1\347\1\350\1\351\1\0"+
+    "\1\352\1\353\1\354\1\355\1\356\1\357\1\360\1\0"+
+    "\1\361\1\362\1\363\1\364\1\365\1\0\1\366\1\367"+
+    "\1\370\2\0\1\371\1\372\1\373\1\374\1\375\1\376"+
+    "\1\377\1\u0100\1\u0101\1\u0102\1\u0103\1\u0104\1\u0105\1\u0106"+
+    "\1\u0107\1\u0108\1\u0109\1\u010a\2\0\1\u010b\1\0\1\u010c"+
+    "\4\0\1\u010d\1\u010e\1\u010f\1\u0110\1\u0111\1\u0112\1\u0113"+
+    "\1\u0114\1\u0115\1\u0116\1\u0117\1\u0118\1\u0119\1\u011a\1\u011b"+
+    "\1\u011c\1\u011d\1\u011e\10\0\1\u011f\1\u0120\1\u0121\1\u0122";
+
+  private static int [] zzUnpackAction() {
+    int [] result = new int[359];
+    int offset = 0;
+    offset = zzUnpackAction(ZZ_ACTION_PACKED_0, offset, result);
+    return result;
+  }
+
+  private static int zzUnpackAction(String packed, int offset, int [] result) {
+    int i = 0;       /* index in packed string  */
+    int j = offset;  /* index in unpacked array */
+    int l = packed.length();
+    while (i < l) {
+      int count = packed.charAt(i++);
+      int value = packed.charAt(i++);
+      do result[j++] = value; while (--count > 0);
+    }
+    return j;
+  }
+
+
+  /** 
+   * Translates a state to a row index in the transition table
+   */
+  private static final int [] ZZ_ROWMAP = zzUnpackRowMap();
+
+  private static final String ZZ_ROWMAP_PACKED_0 =
+    "\0\0\0\67\0\156\0\245\0\67\0\67\0\334\0\67"+
+    "\0\67\0\u0113\0\67\0\67\0\67\0\67\0\67\0\u014a"+
+    "\0\u0181\0\u01b8\0\u01ef\0\u0226\0\u025d\0\67\0\67\0\u0294"+
+    "\0\67\0\u02cb\0\67\0\67\0\67\0\67\0\67\0\67"+
+    "\0\67\0\67\0\67\0\67\0\67\0\67\0\u0302\0\67"+
+    "\0\67\0\67\0\67\0\u0339\0\67\0\67\0\67\0\u0370"+
+    "\0\u03a7\0\u03de\0\u0415\0\u044c\0\u0483\0\u04ba\0\u04f1\0\u0528"+
+    "\0\67\0\67\0\67\0\67\0\67\0\67\0\67\0\67"+
+    "\0\67\0\67\0\67\0\67\0\67\0\67\0\67\0\67"+
+    "\0\67\0\u055f\0\67\0\u0596\0\u05cd\0\u0604\0\u0604\0\u063b"+
+    "\0\u0672\0\u06a9\0\u06e0\0\u0717\0\67\0\67\0\67\0\u074e"+
+    "\0\u0785\0\67\0\67\0\u07bc\0\u07f3\0\u082a\0\u0861\0\u0898"+
+    "\0\67\0\u08cf\0\u0906\0\67\0\67\0\67\0\67\0\67"+
+    "\0\u093d\0\u0974\0\u09ab\0\67\0\67\0\u09e2\0\u0a19\0\67"+
+    "\0\67\0\67\0\67\0\67\0\u0a50\0\u0a87\0\u0abe\0\u0af5"+
+    "\0\u0b2c\0\u0b63\0\67\0\u0b9a\0\u0bd1\0\u0c08\0\u0c3f\0\67"+
+    "\0\67\0\u0c76\0\u0cad\0\u0ce4\0\u0d1b\0\u0d52\0\u0d89\0\67"+
+    "\0\67\0\67\0\67\0\67\0\67\0\u0dc0\0\u0df7\0\u0e2e"+
+    "\0\67\0\67\0\67\0\67\0\67\0\u0e65\0\67\0\u0e9c"+
+    "\0\u0ed3\0\u0f0a\0\67\0\67\0\67\0\67\0\67\0\67"+
+    "\0\u0f41\0\67\0\67\0\67\0\67\0\67\0\67\0\67"+
+    "\0\67\0\67\0\67\0\67\0\67\0\67\0\67\0\67"+
+    "\0\67\0\67\0\67\0\67\0\u0f78\0\u0faf\0\67\0\u0fe6"+
+    "\0\u101d\0\u1054\0\67\0\u108b\0\u10c2\0\u10f9\0\67\0\67"+
+    "\0\67\0\67\0\67\0\67\0\67\0\67\0\u1130\0\u1167"+
+    "\0\u119e\0\67\0\u11d5\0\u120c\0\u1243\0\67\0\67\0\67"+
+    "\0\67\0\67\0\67\0\67\0\67\0\67\0\67\0\67"+
+    "\0\67\0\67\0\67\0\67\0\67\0\67\0\67\0\67"+
+    "\0\67\0\67\0\67\0\67\0\67\0\67\0\67\0\u127a"+
+    "\0\u12b1\0\u12e8\0\67\0\u131f\0\u1356\0\u138d\0\67\0\67"+
+    "\0\67\0\67\0\u13c4\0\u13fb\0\u1432\0\u1469\0\u14a0\0\u14d7"+
+    "\0\u150e\0\u1545\0\u157c\0\u15b3\0\u15ea\0\67\0\67\0\67"+
+    "\0\67\0\67\0\67\0\u1621\0\67\0\67\0\67\0\67"+
+    "\0\67\0\67\0\u1658\0\67\0\67\0\67\0\67\0\u168f"+
+    "\0\67\0\67\0\67\0\67\0\67\0\67\0\67\0\67"+
+    "\0\67\0\67\0\u16c6\0\67\0\67\0\67\0\67\0\67"+
+    "\0\67\0\67\0\u16fd\0\67\0\67\0\67\0\67\0\67"+
+    "\0\u1734\0\67\0\67\0\67\0\u176b\0\u17a2\0\67\0\67"+
+    "\0\67\0\67\0\67\0\67\0\67\0\67\0\67\0\67"+
+    "\0\67\0\67\0\67\0\67\0\67\0\67\0\67\0\67"+
+    "\0\u17d9\0\u1810\0\67\0\u1847\0\67\0\u187e\0\u18b5\0\u18ec"+
+    "\0\u1923\0\67\0\67\0\67\0\67\0\67\0\67\0\67"+
+    "\0\67\0\67\0\67\0\67\0\67\0\67\0\67\0\67"+
+    "\0\67\0\67\0\67\0\u195a\0\u1991\0\u19c8\0\u19ff\0\u1a36"+
+    "\0\u1a6d\0\u1aa4\0\u1adb\0\67\0\67\0\67\0\67";
+
+  private static int [] zzUnpackRowMap() {
+    int [] result = new int[359];
+    int offset = 0;
+    offset = zzUnpackRowMap(ZZ_ROWMAP_PACKED_0, offset, result);
+    return result;
+  }
+
+  private static int zzUnpackRowMap(String packed, int offset, int [] result) {
+    int i = 0;  /* index in packed string  */
+    int j = offset;  /* index in unpacked array */
+    int l = packed.length();
+    while (i < l) {
+      int high = packed.charAt(i++) << 16;
+      result[j++] = high | packed.charAt(i++);
+    }
+    return j;
+  }
+
+  /** 
+   * The transition table of the DFA
+   */
+  private static final int [] ZZ_TRANS = zzUnpackTrans();
+
+  private static final String ZZ_TRANS_PACKED_0 =
+    "\1\2\1\3\1\2\1\4\1\5\1\6\1\7\1\10"+
+    "\1\11\1\12\1\13\1\14\1\15\1\16\1\17\1\20"+
+    "\1\21\1\22\1\23\1\24\1\25\1\26\1\27\1\30"+
+    "\1\31\1\32\1\33\1\34\1\35\1\36\1\37\1\40"+
+    "\1\41\1\42\1\43\1\44\1\45\1\46\2\2\1\47"+
+    "\1\50\5\2\1\51\1\52\1\53\5\2\67\0\2\54"+
+    "\1\0\64\54\4\0\1\55\1\56\1\57\2\0\1\60"+
+    "\1\61\1\62\3\0\1\63\1\64\1\65\1\66\1\67"+
+    "\1\70\4\0\1\71\1\0\1\72\1\73\1\74\1\75"+
+    "\1\76\1\77\1\100\1\101\1\102\1\103\1\104\3\0"+
+    "\1\105\5\0\1\106\1\107\1\110\5\0\3\111\4\0"+
+    "\2\111\3\0\1\111\10\0\4\111\1\0\1\112\13\0"+
+    "\1\113\1\114\1\115\1\0\2\111\1\0\1\116\1\117"+
+    "\3\0\1\111\3\0\1\111\12\0\1\120\1\121\1\122"+
+    "\1\123\1\124\6\0\1\125\1\126\1\127\51\0\1\130"+
+    "\1\131\1\132\1\133\63\0\1\134\1\135\1\136\1\137"+
+    "\1\140\6\0\1\141\53\0\1\142\1\143\1\144\1\145"+
+    "\1\146\7\0\1\147\1\150\1\151\50\0\1\152\1\153"+
+    "\1\154\1\155\63\0\1\156\1\157\1\160\1\161\1\162"+
+    "\7\0\1\163\1\164\1\165\50\0\1\166\1\167\1\170"+
+    "\1\171\1\172\6\0\1\173\46\0\1\174\23\0\1\175"+
+    "\2\0\1\176\4\0\1\177\37\0\1\200\1\201\57\0"+
+    "\1\202\1\203\1\202\2\0\1\202\5\0\6\202\4\0"+
+    "\1\204\1\0\1\202\1\205\4\202\1\206\4\202\3\0"+
+    "\1\202\5\0\3\202\1\207\3\0\1\202\2\54\1\2"+
+    "\64\54\14\0\1\210\1\211\7\0\1\212\1\213\1\214"+
+    "\50\0\1\215\2\0\1\216\1\217\1\220\1\221\1\222"+
+    "\1\223\1\224\1\0\1\225\1\226\52\0\1\227\2\0"+
+    "\1\230\1\231\1\232\1\233\1\234\1\235\1\236\1\237"+
+    "\1\240\1\241\3\0\1\242\51\0\1\243\1\244\65\0"+
+    "\1\245\1\246\7\0\1\247\55\0\1\250\1\251\10\0"+
+    "\1\252\1\253\53\0\1\254\1\255\65\0\1\256\1\257"+
+    "\10\0\1\260\1\261\53\0\1\262\1\263\7\0\1\264"+
+    "\41\0\3\111\4\0\2\111\3\0\1\111\10\0\4\111"+
+    "\17\0\1\111\1\0\2\111\1\0\1\111\4\0\1\111"+
+    "\3\0\1\111\47\0\1\111\53\0\1\265\4\0\1\266"+
+    "\30\0\5\267\1\0\3\267\1\0\10\267\4\0\17\267"+
+    "\1\0\1\267\2\0\1\267\2\0\3\267\1\0\3\267"+
+    "\15\0\1\270\1\271\1\272\6\0\1\273\55\0\1\274"+
+    "\1\275\1\276\6\0\1\277\66\0\1\300\66\0\1\301"+
+    "\66\0\1\302\55\0\1\303\1\304\65\0\1\305\1\306"+
+    "\65\0\1\307\1\310\1\311\6\0\1\312\55\0\1\313"+
+    "\1\314\1\315\6\0\1\316\66\0\1\317\66\0\1\320"+
+    "\66\0\1\321\55\0\1\322\1\323\1\324\64\0\1\325"+
+    "\1\326\1\327\64\0\1\330\1\331\1\332\64\0\1\333"+
+    "\1\334\65\0\1\335\1\336\65\0\1\337\1\340\1\341"+
+    "\64\0\1\342\1\343\1\344\64\0\1\345\1\346\1\347"+
+    "\64\0\1\350\1\351\1\352\6\0\1\353\55\0\1\354"+
+    "\1\355\1\356\6\0\1\357\66\0\1\360\66\0\1\361"+
+    "\66\0\1\362\60\0\1\363\114\0\1\364\72\0\1\365"+
+    "\62\0\1\366\3\0\1\367\21\0\3\202\2\0\1\202"+
+    "\5\0\6\202\4\0\1\202\1\0\13\202\3\0\1\202"+
+    "\1\2\4\0\3\202\4\0\1\202\4\0\3\202\2\0"+
+    "\1\202\5\0\1\370\5\202\4\0\1\202\1\0\13\202"+
+    "\3\0\1\202\1\2\4\0\3\202\4\0\1\202\4\0"+
+    "\3\202\2\0\1\202\5\0\6\202\4\0\1\202\1\0"+
+    "\12\202\1\371\3\0\1\202\1\2\4\0\3\202\4\0"+
+    "\1\202\4\0\3\202\2\0\1\202\5\0\6\202\4\0"+
+    "\1\202\1\0\13\202\3\0\1\372\1\2\4\0\3\202"+
+    "\4\0\1\202\4\0\3\202\2\0\1\202\5\0\6\202"+
+    "\4\0\1\202\1\0\12\202\1\373\3\0\1\374\1\2"+
+    "\4\0\3\202\4\0\1\202\63\0\1\375\14\0\1\376"+
+    "\5\0\1\377\1\u0100\1\u0101\1\u0102\1\0\1\u0103\1\u0104"+
+    "\52\0\1\u0105\5\0\1\u0106\1\u0107\1\u0108\1\u0109\1\0"+
+    "\1\u010a\1\u010b\52\0\1\u010c\6\0\1\u010d\1\u010e\2\0"+
+    "\1\u010f\1\u0110\52\0\1\u0111\6\0\1\u0112\3\0\1\u0113"+
+    "\53\0\1\u0114\5\0\1\u0115\1\u0116\1\u0117\1\u0118\1\u0119"+
+    "\1\u011a\1\u011b\52\0\1\u011c\5\0\1\u011d\1\u011e\1\u011f"+
+    "\1\u0120\1\u0121\1\u0122\1\u0123\52\0\1\u0124\6\0\1\u0125"+
+    "\1\u0126\1\0\1\u0127\1\u0128\1\u0129\52\0\1\u012a\6\0"+
+    "\1\u012b\3\0\1\u012c\113\0\1\u012d\66\0\1\u012e\42\0"+
+    "\1\u012f\66\0\1\u0130\66\0\1\u0131\66\0\1\u0132\66\0"+
+    "\1\u0133\66\0\1\u0134\66\0\1\u0135\66\0\1\u0136\66\0"+
+    "\1\u0137\66\0\1\u0138\66\0\1\u0139\66\0\1\u013a\66\0"+
+    "\1\u013b\66\0\1\u013c\66\0\1\u013d\66\0\1\u013e\66\0"+
+    "\1\u013f\66\0\1\u0140\72\0\1\u0141\46\0\1\u0142\127\0"+
+    "\1\u0143\25\0\1\u0144\127\0\1\u0145\20\0\3\202\2\0"+
+    "\1\202\5\0\6\202\4\0\1\u0146\1\0\13\202\3\0"+
+    "\1\202\1\2\4\0\3\202\4\0\1\202\4\0\3\202"+
+    "\2\0\1\u0147\5\0\6\202\4\0\1\202\1\0\13\202"+
+    "\3\0\1\202\1\2\4\0\3\202\4\0\1\202\4\0"+
+    "\3\202\2\0\1\202\5\0\6\202\4\0\1\202\1\0"+
+    "\13\202\3\0\1\202\1\u0143\4\0\3\202\4\0\1\202"+
+    "\4\0\3\202\2\0\1\u0148\5\0\6\202\4\0\1\202"+
+    "\1\0\13\202\3\0\1\202\1\2\4\0\3\202\4\0"+
+    "\1\202\4\0\3\202\2\0\1\202\5\0\6\202\4\0"+
+    "\1\202\1\0\13\202\3\0\1\202\1\u0145\4\0\3\202"+
+    "\4\0\1\202\64\0\1\u0149\13\0\1\u014a\6\0\1\u014b"+
+    "\3\0\1\u014c\53\0\1\u014d\6\0\1\u014e\3\0\1\u014f"+
+    "\53\0\1\u0150\6\0\1\u0151\3\0\1\u0152\53\0\1\u0153"+
+    "\6\0\1\u0154\3\0\1\u0155\53\0\1\u0156\6\0\1\u0157"+
+    "\3\0\1\u0158\53\0\1\u0159\6\0\1\u015a\3\0\1\u015b"+
+    "\114\0\1\u015c\66\0\1\111\65\0\1\u015d\46\0\1\u015e"+
+    "\66\0\1\u015f\41\0\3\202\2\0\1\202\5\0\6\202"+
+    "\4\0\1\202\1\0\13\202\3\0\1\u0160\1\2\4\0"+
+    "\3\202\4\0\1\202\4\0\3\202\2\0\1\202\5\0"+
+    "\6\202\4\0\1\u0161\1\0\13\202\3\0\1\202\1\2"+
+    "\4\0\3\202\4\0\1\202\4\0\3\202\2\0\1\202"+
+    "\5\0\6\202\4\0\1\u0162\1\0\13\202\3\0\1\202"+
+    "\1\2\4\0\3\202\4\0\1\202\65\0\1\u0163\54\0"+
+    "\1\117\65\0\1\u0164\66\0\1\u0165\66\0\1\u0166\20\0"+
+    "\3\202\2\0\1\202\5\0\6\202\4\0\1\202\1\0"+
+    "\13\202\3\0\1\202\1\u0164\4\0\3\202\4\0\1\202"+
+    "\4\0\3\202\2\0\1\202\5\0\6\202\4\0\1\202"+
+    "\1\0\13\202\3\0\1\202\1\u0165\4\0\3\202\4\0"+
+    "\1\202\4\0\3\202\2\0\1\202\5\0\6\202\4\0"+
+    "\1\202\1\0\13\202\3\0\1\202\1\u0166\4\0\3\202"+
+    "\4\0\1\202\52\0\1\u0167\14\0";
+
+  private static int [] zzUnpackTrans() {
+    int [] result = new int[6930];
+    int offset = 0;
+    offset = zzUnpackTrans(ZZ_TRANS_PACKED_0, offset, result);
+    return result;
+  }
+
+  private static int zzUnpackTrans(String packed, int offset, int [] result) {
+    int i = 0;       /* index in packed string  */
+    int j = offset;  /* index in unpacked array */
+    int l = packed.length();
+    while (i < l) {
+      int count = packed.charAt(i++);
+      int value = packed.charAt(i++);
+      value--;
+      do result[j++] = value; while (--count > 0);
+    }
+    return j;
+  }
+
+
+  /* error codes */
+  private static final int ZZ_UNKNOWN_ERROR = 0;
+  private static final int ZZ_NO_MATCH = 1;
+  private static final int ZZ_PUSHBACK_2BIG = 2;
+
+  /* error messages for the codes above */
+  private static final String ZZ_ERROR_MSG[] = {
+    "Unkown internal scanner error",
+    "Error: could not match input",
+    "Error: pushback value was too large"
+  };
+
+  /**
+   * ZZ_ATTRIBUTE[aState] contains the attributes of state <code>aState</code>
+   */
+  private static final int [] ZZ_ATTRIBUTE = zzUnpackAttribute();
+
+  private static final String ZZ_ATTRIBUTE_PACKED_0 =
+    "\1\0\1\11\2\1\2\11\1\1\2\11\1\1\5\11"+
+    "\6\1\2\11\1\1\1\11\1\1\14\11\1\1\4\11"+
+    "\1\0\3\11\1\1\2\0\6\1\21\11\1\0\1\11"+
+    "\1\0\2\1\1\0\5\1\3\11\2\1\2\11\5\1"+
+    "\1\11\2\1\5\11\1\0\2\1\2\11\2\1\5\11"+
+    "\1\0\5\1\1\11\4\0\2\11\6\0\6\11\3\0"+
+    "\5\11\1\0\1\11\3\0\6\11\1\0\23\11\2\0"+
+    "\1\11\3\1\1\11\3\1\10\11\3\1\1\11\3\1"+
+    "\32\11\3\1\1\11\3\1\4\11\13\0\6\11\1\0"+
+    "\6\11\1\0\4\11\1\0\12\11\1\0\7\11\1\0"+
+    "\5\11\1\0\3\11\2\0\22\11\2\0\1\11\1\0"+
+    "\1\11\4\0\22\11\10\0\4\11";
+
+  private static int [] zzUnpackAttribute() {
+    int [] result = new int[359];
+    int offset = 0;
+    offset = zzUnpackAttribute(ZZ_ATTRIBUTE_PACKED_0, offset, result);
+    return result;
+  }
+
+  private static int zzUnpackAttribute(String packed, int offset, int [] result) {
+    int i = 0;       /* index in packed string  */
+    int j = offset;  /* index in unpacked array */
+    int l = packed.length();
+    while (i < l) {
+      int count = packed.charAt(i++);
+      int value = packed.charAt(i++);
+      do result[j++] = value; while (--count > 0);
+    }
+    return j;
+  }
+
+  /** the input device */
+  private java.io.Reader zzReader;
+
+  /** the current state of the DFA */
+  private int zzState;
+
+  /** the current lexical state */
+  private int zzLexicalState = YYINITIAL;
+
+  /** this buffer contains the current text to be matched and is
+      the source of the yytext() string */
+  private char zzBuffer[] = new char[ZZ_BUFFERSIZE];
+
+  /** the textposition at the last accepting state */
+  private int zzMarkedPos;
+
+  /** the current text position in the buffer */
+  private int zzCurrentPos;
+
+  /** startRead marks the beginning of the yytext() string in the buffer */
+  private int zzStartRead;
+
+  /** endRead marks the last character in the buffer, that has been read
+      from input */
+  private int zzEndRead;
+
+  /** number of newlines encountered up to the start of the matched text */
+  private int yyline;
+
+  /** the number of characters up to the start of the matched text */
+  private int yychar;
+
+  /**
+   * the number of characters from the last newline up to the start of the 
+   * matched text
+   */
+  private int yycolumn;
+
+  /** 
+   * zzAtBOL == true <=> the scanner is currently at the beginning of a line
+   */
+  private boolean zzAtBOL = true;
+
+  /** zzAtEOF == true <=> the scanner is at the EOF */
+  private boolean zzAtEOF;
+
+  /** denotes if the user-EOF-code has already been executed */
+  private boolean zzEOFDone;
+
+  /* user code: */
+    /*
+     * Betacode to Unicode conversion
+     */
+
+  private String toUnicode(int in) {
+    char c = (char) in;
+    Character ch = new Character(c);
+    String retString = ch.toString();
+    return retString;
+  }
+
+
+
+  /**
+   * Creates a new scanner
+   * There is also a java.io.InputStream version of this constructor.
+   *
+   * @param   in  the java.io.Reader to read input from.
+   */
+  public Betacode2UnicodeLex(java.io.Reader in) {
+    this.zzReader = in;
+  }
+
+  /**
+   * Creates a new scanner.
+   * There is also java.io.Reader version of this constructor.
+   *
+   * @param   in  the java.io.Inputstream to read input from.
+   */
+  public Betacode2UnicodeLex(java.io.InputStream in) {
+    this(new java.io.InputStreamReader(in));
+  }
+
+  /** 
+   * Unpacks the compressed character translation table.
+   *
+   * @param packed   the packed character translation table
+   * @return         the unpacked character translation table
+   */
+  private static char [] zzUnpackCMap(String packed) {
+    char [] map = new char[0x10000];
+    int i = 0;  /* index in packed string  */
+    int j = 0;  /* index in unpacked array */
+    while (i < 134) {
+      int  count = packed.charAt(i++);
+      char value = packed.charAt(i++);
+      do map[j++] = value; while (--count > 0);
+    }
+    return map;
+  }
+
+
+  /**
+   * Refills the input buffer.
+   *
+   * @return      <code>false</code>, iff there was new input.
+   * 
+   * @exception   java.io.IOException  if any I/O-Error occurs
+   */
+  private boolean zzRefill() throws java.io.IOException {
+
+    /* first: make room (if you can) */
+    if (zzStartRead > 0) {
+      System.arraycopy(zzBuffer, zzStartRead,
+                       zzBuffer, 0,
+                       zzEndRead-zzStartRead);
+
+      /* translate stored positions */
+      zzEndRead-= zzStartRead;
+      zzCurrentPos-= zzStartRead;
+      zzMarkedPos-= zzStartRead;
+      zzStartRead = 0;
+    }
+
+    /* is the buffer big enough? */
+    if (zzCurrentPos >= zzBuffer.length) {
+      /* if not: blow it up */
+      char newBuffer[] = new char[zzCurrentPos*2];
+      System.arraycopy(zzBuffer, 0, newBuffer, 0, zzBuffer.length);
+      zzBuffer = newBuffer;
+    }
+
+    /* finally: fill the buffer with new input */
+    int numRead = zzReader.read(zzBuffer, zzEndRead,
+                                            zzBuffer.length-zzEndRead);
+
+    if (numRead > 0) {
+      zzEndRead+= numRead;
+      return false;
+    }
+    // unlikely but not impossible: read 0 characters, but not at end of stream    
+    if (numRead == 0) {
+      int c = zzReader.read();
+      if (c == -1) {
+        return true;
+      } else {
+        zzBuffer[zzEndRead++] = (char) c;
+        return false;
+      }     
+    }
+
+	// numRead < 0
+    return true;
+  }
+
+    
+  /**
+   * Closes the input stream.
+   */
+  public final void yyclose() throws java.io.IOException {
+    zzAtEOF = true;            /* indicate end of file */
+    zzEndRead = zzStartRead;  /* invalidate buffer    */
+
+    if (zzReader != null)
+      zzReader.close();
+  }
+
+
+  /**
+   * Resets the scanner to read from a new input stream.
+   * Does not close the old reader.
+   *
+   * All internal variables are reset, the old input stream 
+   * <b>cannot</b> be reused (internal buffer is discarded and lost).
+   * Lexical state is set to <tt>ZZ_INITIAL</tt>.
+   *
+   * @param reader   the new input stream 
+   */
+  public final void yyreset(java.io.Reader reader) {
+    zzReader = reader;
+    zzAtBOL  = true;
+    zzAtEOF  = false;
+    zzEOFDone = false;
+    zzEndRead = zzStartRead = 0;
+    zzCurrentPos = zzMarkedPos = 0;
+    yyline = yychar = yycolumn = 0;
+    zzLexicalState = YYINITIAL;
+  }
+
+
+  /**
+   * Returns the current lexical state.
+   */
+  public final int yystate() {
+    return zzLexicalState;
+  }
+
+
+  /**
+   * Enters a new lexical state
+   *
+   * @param newState the new lexical state
+   */
+  public final void yybegin(int newState) {
+    zzLexicalState = newState;
+  }
+
+
+  /**
+   * Returns the text matched by the current regular expression.
+   */
+  public final String yytext() {
+    return new String( zzBuffer, zzStartRead, zzMarkedPos-zzStartRead );
+  }
+
+
+  /**
+   * Returns the character at position <tt>pos</tt> from the 
+   * matched text. 
+   * 
+   * It is equivalent to yytext().charAt(pos), but faster
+   *
+   * @param pos the position of the character to fetch. 
+   *            A value from 0 to yylength()-1.
+   *
+   * @return the character at position pos
+   */
+  public final char yycharat(int pos) {
+    return zzBuffer[zzStartRead+pos];
+  }
+
+
+  /**
+   * Returns the length of the matched text region.
+   */
+  public final int yylength() {
+    return zzMarkedPos-zzStartRead;
+  }
+
+
+  /**
+   * Reports an error that occured while scanning.
+   *
+   * In a wellformed scanner (no or only correct usage of 
+   * yypushback(int) and a match-all fallback rule) this method 
+   * will only be called with things that "Can't Possibly Happen".
+   * If this method is called, something is seriously wrong
+   * (e.g. a JFlex bug producing a faulty scanner etc.).
+   *
+   * Usual syntax/scanner level error handling should be done
+   * in error fallback rules.
+   *
+   * @param   errorCode  the code of the errormessage to display
+   */
+  private void zzScanError(int errorCode) {
+    String message;
+    try {
+      message = ZZ_ERROR_MSG[errorCode];
+    }
+    catch (ArrayIndexOutOfBoundsException e) {
+      message = ZZ_ERROR_MSG[ZZ_UNKNOWN_ERROR];
+    }
+
+    throw new Error(message);
+  } 
+
+
+  /**
+   * Pushes the specified amount of characters back into the input stream.
+   *
+   * They will be read again by then next call of the scanning method
+   *
+   * @param number  the number of characters to be read again.
+   *                This number must not be greater than yylength()!
+   */
+  public void yypushback(int number)  {
+    if ( number > yylength() )
+      zzScanError(ZZ_PUSHBACK_2BIG);
+
+    zzMarkedPos -= number;
+  }
+
+
+  /**
+   * Resumes scanning until the next regular expression is matched,
+   * the end of input is encountered or an I/O-Error occurs.
+   *
+   * @return      the next token
+   * @exception   java.io.IOException  if any I/O-Error occurs
+   */
+  public java.lang.String yylex() throws java.io.IOException {
+    int zzInput;
+    int zzAction;
+
+    // cached fields:
+    int zzCurrentPosL;
+    int zzMarkedPosL;
+    int zzEndReadL = zzEndRead;
+    char [] zzBufferL = zzBuffer;
+    char [] zzCMapL = ZZ_CMAP;
+
+    int [] zzTransL = ZZ_TRANS;
+    int [] zzRowMapL = ZZ_ROWMAP;
+    int [] zzAttrL = ZZ_ATTRIBUTE;
+
+    while (true) {
+      zzMarkedPosL = zzMarkedPos;
+
+      zzAction = -1;
+
+      zzCurrentPosL = zzCurrentPos = zzStartRead = zzMarkedPosL;
+  
+      zzState = ZZ_LEXSTATE[zzLexicalState];
+
+
+      zzForAction: {
+        while (true) {
+    
+          if (zzCurrentPosL < zzEndReadL)
+            zzInput = zzBufferL[zzCurrentPosL++];
+          else if (zzAtEOF) {
+            zzInput = YYEOF;
+            break zzForAction;
+          }
+          else {
+            // store back cached positions
+            zzCurrentPos  = zzCurrentPosL;
+            zzMarkedPos   = zzMarkedPosL;
+            boolean eof = zzRefill();
+            // get translated positions and possibly new buffer
+            zzCurrentPosL  = zzCurrentPos;
+            zzMarkedPosL   = zzMarkedPos;
+            zzBufferL      = zzBuffer;
+            zzEndReadL     = zzEndRead;
+            if (eof) {
+              zzInput = YYEOF;
+              break zzForAction;
+            }
+            else {
+              zzInput = zzBufferL[zzCurrentPosL++];
+            }
+          }
+          int zzNext = zzTransL[ zzRowMapL[zzState] + zzCMapL[zzInput] ];
+          if (zzNext == -1) break zzForAction;
+          zzState = zzNext;
+
+          int zzAttributes = zzAttrL[zzState];
+          if ( (zzAttributes & 1) == 1 ) {
+            zzAction = zzState;
+            zzMarkedPosL = zzCurrentPosL;
+            if ( (zzAttributes & 8) == 8 ) break zzForAction;
+          }
+
+        }
+      }
+
+      // store back cached position
+      zzMarkedPos = zzMarkedPosL;
+
+      switch (zzAction < 0 ? zzAction : ZZ_ACTION[zzAction]) {
+        case 139: 
+          { return toUnicode(0x1FF8);
+          }
+        case 291: break;
+        case 85: 
+          { return toUnicode(0x1F30);
+          }
+        case 292: break;
+        case 64: 
+          { return toUnicode(0x03a7);
+          }
+        case 293: break;
+        case 60: 
+          { return toUnicode(0x039e);
+          }
+        case 294: break;
+        case 151: 
+          { return toUnicode(0x1F06);
+          }
+        case 295: break;
+        case 206: 
+          { return toUnicode(0x1FF4);
+          }
+        case 296: break;
+        case 42: 
+          { return toUnicode(0x03a3);
+          }
+        case 297: break;
+        case 56: 
+          { return toUnicode(0x039a);
+          }
+        case 298: break;
+        case 149: 
+          { return toUnicode(0x1F02);
+          }
+        case 299: break;
+        case 254: 
+          { return toUnicode(0x1F87);
+          }
+        case 300: break;
+        case 83: 
+          { return toUnicode(0x1FC6);
+          }
+        case 301: break;
+        case 32: 
+          { return toUnicode(0x03bc);
+          }
+        case 302: break;
+        case 216: 
+          { return toUnicode(0x1F2C);
+          }
+        case 303: break;
+        case 252: 
+          { return toUnicode(0x1F83);
+          }
+        case 304: break;
+        case 172: 
+          { return toUnicode(0x1FC2);
+          }
+        case 305: break;
+        case 127: 
+          { return toUnicode(0x1F59);
+          }
+        case 306: break;
+        case 192: 
+          { return toUnicode(0x1F55);
+          }
+        case 307: break;
+        case 129: 
+          { return toUnicode(0x1FEC);
+          }
+        case 308: break;
+        case 97: 
+          { return toUnicode(0x1F51);
+          }
+        case 309: break;
+        case 39: 
+          { return toUnicode(0x03c8);
+          }
+        case 310: break;
+        case 170: 
+          { return toUnicode(0x1F27);
+          }
+        case 311: break;
+        case 36: 
+          { return toUnicode(0x03c4);
+          }
+        case 312: break;
+        case 168: 
+          { return toUnicode(0x1F23);
+          }
+        case 313: break;
+        case 99: 
+          { return toUnicode(0x1F7B);
+          }
+        case 314: break;
+        case 111: 
+          { return toUnicode(0x1FBA);
+          }
+        case 315: break;
+        case 35: 
+          { return toUnicode(0x03c0);
+          }
+        case 316: break;
+        case 196: 
+          { return toUnicode(0x1FE7);
+          }
+        case 317: break;
+        case 238: 
+          { return toUnicode(0x1F4D);
+          }
+        case 318: break;
+        case 195: 
+          { return toUnicode(0x1FE3);
+          }
+        case 319: break;
+        case 115: 
+          { return toUnicode(0x1FB9);
+          }
+        case 320: break;
+        case 87: 
+          { return toUnicode(0x1F76);
+          }
+        case 321: break;
+        case 9: 
+          { return toUnicode(0x0314);
+          }
+        case 322: break;
+        case 228: 
+          { return toUnicode(0x1F1B);
+          }
+        case 323: break;
+        case 77: 
+          { return toUnicode(0x1F72);
+          }
+        case 324: break;
+        case 46: 
+          { return toUnicode(0x0399);
+          }
+        case 325: break;
+        case 74: 
+          { return toUnicode(0x1FB1);
+          }
+        case 326: break;
+        case 120: 
+          { return toUnicode(0x1F48);
+          }
+        case 327: break;
+        case 44: 
+          { return toUnicode(0x0395);
+          }
+        case 328: break;
+        case 185: 
+          { return toUnicode(0x1F44);
+          }
+        case 329: break;
+        case 273: 
+          { return toUnicode(0x1F9C);
+          }
+        case 330: break;
+        case 136: 
+          { return toUnicode(0x1FDB);
+          }
+        case 331: break;
+        case 43: 
+          { return toUnicode(0x0391);
+          }
+        case 332: break;
+        case 92: 
+          { return toUnicode(0x1F40);
+          }
+        case 333: break;
+        case 14: 
+          { return toUnicode(0x03b7);
+          }
+        case 334: break;
+        case 268: 
+          { return "&lt;";
+          }
+        case 335: break;
+        case 223: 
+          { return toUnicode(0x1F6E);
+          }
+        case 336: break;
+        case 283: 
+          { return toUnicode(0x1FAD);
+          }
+        case 337: break;
+        case 26: 
+          { return toUnicode(0x03b3);
+          }
+        case 338: break;
+        case 160: 
+          { return toUnicode(0x1F12);
+          }
+        case 339: break;
+        case 213: 
+          { return toUnicode(0x1F6A);
+          }
+        case 340: break;
+        case 260: 
+          { return toUnicode(0x1F97);
+          }
+        case 341: break;
+        case 89: 
+          { return toUnicode(0x1FD6);
+          }
+        case 342: break;
+        case 217: 
+          { return toUnicode(0x1F3C);
+          }
+        case 343: break;
+        case 258: 
+          { return toUnicode(0x1F93);
+          }
+        case 344: break;
+        case 181: 
+          { return toUnicode(0x1FD2);
+          }
+        case 345: break;
+        case 128: 
+          { return toUnicode(0x1F69);
+          }
+        case 346: break;
+        case 226: 
+          { return toUnicode(0x1FA8);
+          }
+        case 347: break;
+        case 220: 
+          { return toUnicode(0x1F0E);
+          }
+        case 348: break;
+        case 202: 
+          { return toUnicode(0x1F65);
+          }
+        case 349: break;
+        case 262: 
+          { return toUnicode(0x1FA4);
+          }
+        case 350: break;
+        case 147: 
+          { return toUnicode(0x1FFC);
+          }
+        case 351: break;
+        case 208: 
+          { return toUnicode(0x1F0A);
+          }
+        case 352: break;
+        case 104: 
+          { return toUnicode(0x1F61);
+          }
+        case 353: break;
+        case 288: 
+          { return ")";
+          }
+        case 354: break;
+        case 200: 
+          { return toUnicode(0x1FA0);
+          }
+        case 355: break;
+        case 180: 
+          { return toUnicode(0x1F37);
+          }
+        case 356: break;
+        case 284: 
+          { return toUnicode(0x1F8F);
+          }
+        case 357: break;
+        case 287: 
+          { return "|";
+          }
+        case 358: break;
+        case 178: 
+          { return toUnicode(0x1F33);
+          }
+        case 359: break;
+        case 278: 
+          { return toUnicode(0x1F8B);
+          }
+        case 360: break;
+        case 132: 
+          { return toUnicode(0x1FCA);
+          }
+        case 361: break;
+        case 122: 
+          { return toUnicode(0x1F09);
+          }
+        case 362: break;
+        case 207: 
+          { return toUnicode(0x1FF7);
+          }
+        case 363: break;
+        case 63: 
+          { return toUnicode(0x03a6);
+          }
+        case 364: break;
+        case 59: 
+          { return toUnicode(0x039d);
+          }
+        case 365: break;
+        case 154: 
+          { return toUnicode(0x1F05);
+          }
+        case 366: break;
+        case 239: 
+          { return toUnicode(0x1F5D);
+          }
+        case 367: break;
+        case 108: 
+          { return toUnicode(0x1FF3);
+          }
+        case 368: break;
+        case 131: 
+          { return toUnicode(0x1FC9);
+          }
+        case 369: break;
+        case 68: 
+          { return toUnicode(0x1F01);
+          }
+        case 370: break;
+        case 16: 
+          { return toUnicode(0x03bf);
+          }
+        case 371: break;
+        case 242: 
+          { return toUnicode(0x1F2F);
+          }
+        case 372: break;
+        case 251: 
+          { return toUnicode(0x1F86);
+          }
+        case 373: break;
+        case 6: 
+          { return toUnicode(0x00B7);
+          }
+        case 374: break;
+        case 31: 
+          { return toUnicode(0x03bb);
+          }
+        case 375: break;
+        case 229: 
+          { return toUnicode(0x1F2B);
+          }
+        case 376: break;
+        case 249: 
+          { return toUnicode(0x1F82);
+          }
+        case 377: break;
+        case 2: 
+          { return "h";
+          }
+        case 378: break;
+        case 189: 
+          { return toUnicode(0x1F54);
+          }
+        case 379: break;
+        case 142: 
+          { return toUnicode(0x1FEB);
+          }
+        case 380: break;
+        case 96: 
+          { return toUnicode(0x1F50);
+          }
+        case 381: break;
+        case 38: 
+          { return toUnicode(0x03c7);
+          }
+        case 382: break;
+        case 166: 
+          { return toUnicode(0x1F26);
+          }
+        case 383: break;
+        case 4: 
+          { return toUnicode(0x03c3);
+          }
+        case 384: break;
+        case 148: 
+          // lookahead expression with fixed base length
+          zzMarkedPos = zzStartRead + 1;
+          { return toUnicode(0x03c3);
+          }
+        case 385: break;
+        case 164: 
+          { return toUnicode(0x1F22);
+          }
+        case 386: break;
+        case 98: 
+          { return toUnicode(0x1F7A);
+          }
+        case 387: break;
+        case 100: 
+          { return toUnicode(0x1FE6);
+          }
+        case 388: break;
+        case 19: 
+          { return toUnicode(0x0345);
+          }
+        case 389: break;
+        case 218: 
+          { return toUnicode(0x1F4C);
+          }
+        case 390: break;
+        case 194: 
+          { return toUnicode(0x1FE2);
+          }
+        case 391: break;
+        case 95: 
+          { return toUnicode(0x1F79);
+          }
+        case 392: break;
+        case 114: 
+          { return toUnicode(0x1FB8);
+          }
+        case 393: break;
+        case 82: 
+          { return toUnicode(0x1F75);
+          }
+        case 394: break;
+        case 158: 
+          { return toUnicode(0x1FB4);
+          }
+        case 395: break;
+        case 8: 
+          { return toUnicode(0x0313);
+          }
+        case 396: break;
+        case 209: 
+          { return toUnicode(0x1F1A);
+          }
+        case 397: break;
+        case 70: 
+          { return toUnicode(0x1F71);
+          }
+        case 398: break;
+        case 40: 
+          { return "H";
+          }
+        case 399: break;
+        case 55: 
+          { return toUnicode(0x0398);
+          }
+        case 400: break;
+        case 73: 
+          { return toUnicode(0x1FB0);
+          }
+        case 401: break;
+        case 285: 
+          { return toUnicode(0x1F9F);
+          }
+        case 402: break;
+        case 53: 
+          { return toUnicode(0x0394);
+          }
+        case 403: break;
+        case 186: 
+          { return toUnicode(0x1F43);
+          }
+        case 404: break;
+        case 279: 
+          { return toUnicode(0x1F9B);
+          }
+        case 405: break;
+        case 135: 
+          { return toUnicode(0x1FDA);
+          }
+        case 406: break;
+        case 123: 
+          { return toUnicode(0x1F19);
+          }
+        case 407: break;
+        case 28: 
+          { return toUnicode(0x03b6);
+          }
+        case 408: break;
+        case 163: 
+          { return toUnicode(0x1F15);
+          }
+        case 409: break;
+        case 240: 
+          { return toUnicode(0x1F6D);
+          }
+        case 410: break;
+        case 274: 
+          { return toUnicode(0x1FAC);
+          }
+        case 411: break;
+        case 25: 
+          { return toUnicode(0x03b2);
+          }
+        case 412: break;
+        case 138: 
+          { return toUnicode(0x1FD9);
+          }
+        case 413: break;
+        case 76: 
+          { return toUnicode(0x1F11);
+          }
+        case 414: break;
+        case 243: 
+          { return toUnicode(0x1F3F);
+          }
+        case 415: break;
+        case 257: 
+          { return toUnicode(0x1F96);
+          }
+        case 416: break;
+        case 230: 
+          { return toUnicode(0x1F3B);
+          }
+        case 417: break;
+        case 255: 
+          { return toUnicode(0x1F92);
+          }
+        case 418: break;
+        case 91: 
+          { return toUnicode(0x1FD1);
+          }
+        case 419: break;
+        case 121: 
+          { return toUnicode(0x1F68);
+          }
+        case 420: break;
+        case 266: 
+          { return toUnicode(0x1FA7);
+          }
+        case 421: break;
+        case 20: 
+          { return toUnicode(0x0306);
+          }
+        case 422: break;
+        case 234: 
+          { return toUnicode(0x1F0D);
+          }
+        case 423: break;
+        case 198: 
+          { return toUnicode(0x1F64);
+          }
+        case 424: break;
+        case 264: 
+          { return toUnicode(0x1FA3);
+          }
+        case 425: break;
+        case 146: 
+          { return toUnicode(0x1FFB);
+          }
+        case 426: break;
+        case 12: 
+          { return toUnicode(0x0302);
+          }
+        case 427: break;
+        case 103: 
+          { return toUnicode(0x1F60);
+          }
+        case 428: break;
+        case 289: 
+          { return "(";
+          }
+        case 429: break;
+        case 177: 
+          { return toUnicode(0x1F36);
+          }
+        case 430: break;
+        case 275: 
+          { return toUnicode(0x1F8E);
+          }
+        case 431: break;
+        case 175: 
+          { return toUnicode(0x1F32);
+          }
+        case 432: break;
+        case 49: 
+          { return toUnicode(0x03a9);
+          }
+        case 433: break;
+        case 269: 
+          { return toUnicode(0x1F8A);
+          }
+        case 434: break;
+        case 116: 
+          { return toUnicode(0x1F08);
+          }
+        case 435: break;
+        case 107: 
+          { return toUnicode(0x1FF6);
+          }
+        case 436: break;
+        case 267: 
+          { return "&gt;";
+          }
+        case 437: break;
+        case 48: 
+          { return toUnicode(0x03a5);
+          }
+        case 438: break;
+        case 58: 
+          { return toUnicode(0x039c);
+          }
+        case 439: break;
+        case 150: 
+          { return toUnicode(0x1F04);
+          }
+        case 440: break;
+        case 205: 
+          { return toUnicode(0x1FF2);
+          }
+        case 441: break;
+        case 50: 
+          { return toUnicode(0x03a1);
+          }
+        case 442: break;
+        case 246: 
+          { return toUnicode(0x1F89);
+          }
+        case 443: break;
+        case 130: 
+          { return toUnicode(0x1FC8);
+          }
+        case 444: break;
+        case 67: 
+          { return toUnicode(0x1F00);
+          }
+        case 445: break;
+        case 34: 
+          { return toUnicode(0x03be);
+          }
+        case 446: break;
+        case 221: 
+          { return toUnicode(0x1F2E);
+          }
+        case 447: break;
+        case 253: 
+          { return toUnicode(0x1F85);
+          }
+        case 448: break;
+        case 173: 
+          { return toUnicode(0x1FC4);
+          }
+        case 449: break;
+        case 24: 
+          { return toUnicode(0x0323);
+          }
+        case 450: break;
+        case 30: 
+          { return toUnicode(0x03ba);
+          }
+        case 451: break;
+        case 210: 
+          { return toUnicode(0x1F2A);
+          }
+        case 452: break;
+        case 156: 
+          { return toUnicode(0x1F81);
+          }
+        case 453: break;
+        case 193: 
+          { return toUnicode(0x1F57);
+          }
+        case 454: break;
+        case 191: 
+          { return toUnicode(0x1F53);
+          }
+        case 455: break;
+        case 141: 
+          { return toUnicode(0x1FEA);
+          }
+        case 456: break;
+        case 124: 
+          { return toUnicode(0x1F29);
+          }
+        case 457: break;
+        case 37: 
+          { return toUnicode(0x03c6);
+          }
+        case 458: break;
+        case 169: 
+          { return toUnicode(0x1F25);
+          }
+        case 459: break;
+        case 106: 
+          { return toUnicode(0x1F7D);
+          }
+        case 460: break;
+        case 113: 
+          { return toUnicode(0x1FBC);
+          }
+        case 461: break;
+        case 66: 
+          // lookahead expression with fixed base length
+          zzMarkedPos = zzStartRead + 1;
+          { return toUnicode(0x03c2);
+          }
+        case 462: break;
+        case 144: 
+          { return toUnicode(0x1FE9);
+          }
+        case 463: break;
+        case 80: 
+          { return toUnicode(0x1F21);
+          }
+        case 464: break;
+        case 110: 
+          { return toUnicode(0x1FE5);
+          }
+        case 465: break;
+        case 1: 
+          { return yytext();
+          }
+        case 466: break;
+        case 231: 
+          { return toUnicode(0x1F4B);
+          }
+        case 467: break;
+        case 102: 
+          { return toUnicode(0x1FE1);
+          }
+        case 468: break;
+        case 94: 
+          { return toUnicode(0x1F78);
+          }
+        case 469: break;
+        case 159: 
+          { return toUnicode(0x1FB7);
+          }
+        case 470: break;
+        case 235: 
+          { return toUnicode(0x1F1D);
+          }
+        case 471: break;
+        case 81: 
+          { return toUnicode(0x1F74);
+          }
+        case 472: break;
+        case 72: 
+          { return toUnicode(0x1FB3);
+          }
+        case 473: break;
+        case 69: 
+          { return toUnicode(0x1F70);
+          }
+        case 474: break;
+        case 45: 
+          { return toUnicode(0x0397);
+          }
+        case 475: break;
+        case 276: 
+          { return toUnicode(0x1F9E);
+          }
+        case 476: break;
+        case 52: 
+          { return toUnicode(0x0393);
+          }
+        case 477: break;
+        case 184: 
+          { return toUnicode(0x1F42);
+          }
+        case 478: break;
+        case 15: 
+          { return toUnicode(0x03b9);
+          }
+        case 479: break;
+        case 270: 
+          { return toUnicode(0x1F9A);
+          }
+        case 480: break;
+        case 117: 
+          { return toUnicode(0x1F18);
+          }
+        case 481: break;
+        case 286: 
+          { return toUnicode(0x1FAF);
+          }
+        case 482: break;
+        case 13: 
+          { return toUnicode(0x03b5);
+          }
+        case 483: break;
+        case 161: 
+          { return toUnicode(0x1F14);
+          }
+        case 484: break;
+        case 219: 
+          { return toUnicode(0x1F6C);
+          }
+        case 485: break;
+        case 280: 
+          { return toUnicode(0x1FAB);
+          }
+        case 486: break;
+        case 7: 
+          { return toUnicode(0x03b1);
+          }
+        case 487: break;
+        case 247: 
+          { return toUnicode(0x1F99);
+          }
+        case 488: break;
+        case 137: 
+          { return toUnicode(0x1FD8);
+          }
+        case 489: break;
+        case 75: 
+          { return toUnicode(0x1F10);
+          }
+        case 490: break;
+        case 222: 
+          { return toUnicode(0x1F3E);
+          }
+        case 491: break;
+        case 259: 
+          { return toUnicode(0x1F95);
+          }
+        case 492: break;
+        case 211: 
+          { return toUnicode(0x1F3A);
+          }
+        case 493: break;
+        case 171: 
+          { return toUnicode(0x1F91);
+          }
+        case 494: break;
+        case 90: 
+          { return toUnicode(0x1FD0);
+          }
+        case 495: break;
+        case 203: 
+          { return toUnicode(0x1F67);
+          }
+        case 496: break;
+        case 263: 
+          { return toUnicode(0x1FA6);
+          }
+        case 497: break;
+        case 214: 
+          { return toUnicode(0x1F0C);
+          }
+        case 498: break;
+        case 201: 
+          { return toUnicode(0x1F63);
+          }
+        case 499: break;
+        case 261: 
+          { return toUnicode(0x1FA2);
+          }
+        case 500: break;
+        case 145: 
+          { return toUnicode(0x1FFA);
+          }
+        case 501: break;
+        case 125: 
+          { return toUnicode(0x1F39);
+          }
+        case 502: break;
+        case 11: 
+          { return toUnicode(0x0301);
+          }
+        case 503: break;
+        case 290: 
+          { return "'";
+          }
+        case 504: break;
+        case 179: 
+          { return toUnicode(0x1F35);
+          }
+        case 505: break;
+        case 281: 
+          { return toUnicode(0x1F8D);
+          }
+        case 506: break;
+        case 134: 
+          { return toUnicode(0x1FCC);
+          }
+        case 507: break;
+        case 140: 
+          { return toUnicode(0x1FF9);
+          }
+        case 508: break;
+        case 86: 
+          { return toUnicode(0x1F31);
+          }
+        case 509: break;
+        case 65: 
+          { return toUnicode(0x03a8);
+          }
+        case 510: break;
+        case 47: 
+          { return toUnicode(0x039f);
+          }
+        case 511: break;
+        case 155: 
+          { return toUnicode(0x1F07);
+          }
+        case 512: break;
+        case 244: 
+          { return toUnicode(0x1F5F);
+          }
+        case 513: break;
+        case 62: 
+          { return toUnicode(0x03a4);
+          }
+        case 514: break;
+        case 57: 
+          { return toUnicode(0x039b);
+          }
+        case 515: break;
+        case 153: 
+          { return toUnicode(0x1F03);
+          }
+        case 516: break;
+        case 232: 
+          { return toUnicode(0x1F5B);
+          }
+        case 517: break;
+        case 61: 
+          { return toUnicode(0x03a0);
+          }
+        case 518: break;
+        case 224: 
+          { return toUnicode(0x1F88);
+          }
+        case 519: break;
+        case 174: 
+          { return toUnicode(0x1FC7);
+          }
+        case 520: break;
+        case 33: 
+          { return toUnicode(0x03bd);
+          }
+        case 521: break;
+        case 236: 
+          { return toUnicode(0x1F2D);
+          }
+        case 522: break;
+        case 250: 
+          { return toUnicode(0x1F84);
+          }
+        case 523: break;
+        case 84: 
+          { return toUnicode(0x1FC3);
+          }
+        case 524: break;
+        case 152: 
+          { return toUnicode(0x1F80);
+          }
+        case 525: break;
+        case 3: 
+          { return "f";
+          }
+        case 526: break;
+        case 190: 
+          { return toUnicode(0x1F56);
+          }
+        case 527: break;
+        case 188: 
+          { return toUnicode(0x1F52);
+          }
+        case 528: break;
+        case 18: 
+          { return toUnicode(0x03c9);
+          }
+        case 529: break;
+        case 118: 
+          { return toUnicode(0x1F28);
+          }
+        case 530: break;
+        case 17: 
+          { return toUnicode(0x03c5);
+          }
+        case 531: break;
+        case 165: 
+          { return toUnicode(0x1F24);
+          }
+        case 532: break;
+        case 105: 
+          { return toUnicode(0x1F7C);
+          }
+        case 533: break;
+        case 112: 
+          { return toUnicode(0x1FBB);
+          }
+        case 534: break;
+        case 23: 
+          { return toUnicode(0x03c1);
+          }
+        case 535: break;
+        case 143: 
+          { return toUnicode(0x1FE8);
+          }
+        case 536: break;
+        case 79: 
+          { return toUnicode(0x1F20);
+          }
+        case 537: break;
+        case 109: 
+          { return toUnicode(0x1FE4);
+          }
+        case 538: break;
+        case 212: 
+          { return toUnicode(0x1F4A);
+          }
+        case 539: break;
+        case 101: 
+          { return toUnicode(0x1FE0);
+          }
+        case 540: break;
+        case 88: 
+          { return toUnicode(0x1F77);
+          }
+        case 541: break;
+        case 71: 
+          { return toUnicode(0x1FB6);
+          }
+        case 542: break;
+        case 215: 
+          { return toUnicode(0x1F1C);
+          }
+        case 543: break;
+        case 78: 
+          { return toUnicode(0x1F73);
+          }
+        case 544: break;
+        case 157: 
+          { return toUnicode(0x1FB2);
+          }
+        case 545: break;
+        case 126: 
+          { return toUnicode(0x1F49);
+          }
+        case 546: break;
+        case 41: 
+          { return "F";
+          }
+        case 547: break;
+        case 54: 
+          { return toUnicode(0x0396);
+          }
+        case 548: break;
+        case 187: 
+          { return toUnicode(0x1F45);
+          }
+        case 549: break;
+        case 282: 
+          { return toUnicode(0x1F9D);
+          }
+        case 550: break;
+        case 51: 
+          { return toUnicode(0x0392);
+          }
+        case 551: break;
+        case 93: 
+          { return toUnicode(0x1F41);
+          }
+        case 552: break;
+        case 29: 
+          { return toUnicode(0x03b8);
+          }
+        case 553: break;
+        case 245: 
+          { return toUnicode(0x1F6F);
+          }
+        case 554: break;
+        case 277: 
+          { return toUnicode(0x1FAE);
+          }
+        case 555: break;
+        case 27: 
+          { return toUnicode(0x03b4);
+          }
+        case 556: break;
+        case 162: 
+          { return toUnicode(0x1F13);
+          }
+        case 557: break;
+        case 233: 
+          { return toUnicode(0x1F6B);
+          }
+        case 558: break;
+        case 271: 
+          { return toUnicode(0x1FAA);
+          }
+        case 559: break;
+        case 225: 
+          { return toUnicode(0x1F98);
+          }
+        case 560: break;
+        case 183: 
+          { return toUnicode(0x1FD7);
+          }
+        case 561: break;
+        case 237: 
+          { return toUnicode(0x1F3D);
+          }
+        case 562: break;
+        case 256: 
+          { return toUnicode(0x1F94);
+          }
+        case 563: break;
+        case 182: 
+          { return toUnicode(0x1FD3);
+          }
+        case 564: break;
+        case 248: 
+          { return toUnicode(0x1FA9);
+          }
+        case 565: break;
+        case 22: 
+          { return toUnicode(0x0308);
+          }
+        case 566: break;
+        case 167: 
+          { return toUnicode(0x1F90);
+          }
+        case 567: break;
+        case 241: 
+          { return toUnicode(0x1F0F);
+          }
+        case 568: break;
+        case 199: 
+          { return toUnicode(0x1F66);
+          }
+        case 569: break;
+        case 5: 
+          { return ".";
+          }
+        case 570: break;
+        case 265: 
+          { return toUnicode(0x1FA5);
+          }
+        case 571: break;
+        case 21: 
+          { return toUnicode(0x0304);
+          }
+        case 572: break;
+        case 227: 
+          { return toUnicode(0x1F0B);
+          }
+        case 573: break;
+        case 197: 
+          { return toUnicode(0x1F62);
+          }
+        case 574: break;
+        case 204: 
+          { return toUnicode(0x1FA1);
+          }
+        case 575: break;
+        case 119: 
+          { return toUnicode(0x1F38);
+          }
+        case 576: break;
+        case 10: 
+          { return toUnicode(0x0300);
+          }
+        case 577: break;
+        case 176: 
+          { return toUnicode(0x1F34);
+          }
+        case 578: break;
+        case 272: 
+          { return toUnicode(0x1F8C);
+          }
+        case 579: break;
+        case 133: 
+          { return toUnicode(0x1FCB);
+          }
+        case 580: break;
+        default: 
+          if (zzInput == YYEOF && zzStartRead == zzCurrentPos) {
+            zzAtEOF = true;
+            return null;
+          } 
+          else {
+            zzScanError(ZZ_NO_MATCH);
+          }
+      }
+    }
+  }
+
+
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/software/eXist/mpdl-modules/src/de/mpg/mpiwg/berlin/mpdl/lt/general/Buckwalter2Unicode.lex	Wed Nov 24 17:24:23 2010 +0100
@@ -0,0 +1,121 @@
+package de.mpg.mpiwg.berlin.mpdl.lt.general;
+
+%%
+%{
+    /*
+     * Betacode to Unicode conversion
+     */
+
+%}
+
+%class Buckwalter2UnicodeLex
+%public
+%type java.lang.String
+%unicode
+%%
+
+
+"<"[^>]+">" { return yytext(); }
+
+"'"   { return "\u0621"; }  /* Hamza  */
+"|"   { return "\u0622"; }  /* ALEF WITH MADDA ABOVE  from AraMorph */
+">"   { return "\u0623"; }  /* Hamza  */
+"&"   { return "\u0624"; }  /* Hamza  */
+"<"   { return "\u0625"; }  /* Alif + HamzaBelow  */
+"}"   { return "\u0626"; }  /* Ya + HamzaAbove  */
+"A"   { return "\u0627"; }  /* Alif  */
+"b"   { return "\u0628"; }  /* Ba  */
+"p"   { return "\u0629"; }  /* TaMarbuta  */
+"t"   { return "\u062A"; }  /* Ta  */
+"v"   { return "\u062B"; }  /* Tha  */
+"j"   { return "\u062C"; }  /* Jeem  */
+"H"   { return "\u062D"; }  /* HHa  */
+"x"   { return "\u062E"; }  /* Kha  */
+"d"   { return "\u062F"; }  /* Dal  */
+"*"   { return "\u0630"; }  /* Thal  */
+"r"   { return "\u0631"; }  /* Ra  */
+"z"   { return "\u0632"; }  /* Zain  */
+"s"   { return "\u0633"; }  /* Seen  */
+"$"   { return "\u0634"; }  /* Sheen  */
+"S"   { return "\u0635"; }  /* Sad  */
+"D"   { return "\u0636"; }  /* DDad  */
+"T"   { return "\u0637"; }  /* TTa  */
+"Z"   { return "\u0638"; }  /* DTha  */
+"E"   { return "\u0639"; }  /* Ain  */
+"g"   { return "\u063A"; }  /* Ghain  */
+
+"_"   { return "\u0640"; }  /* Tatweel  */
+"f"   { return "\u0641"; }  /* Fa  */
+"q"   { return "\u0642"; }  /* Qaf  */
+"k"   { return "\u0643"; }  /* Kaf  */
+"l"   { return "\u0644"; }  /* Lam  */
+"m"   { return "\u0645"; }  /* Meem  */
+"n"   { return "\u0646"; }  /* Noon  */
+"h"   { return "\u0647"; }  /* Ha  */
+"w"   { return "\u0648"; }  /* Waw  */
+"Y"   { return "\u0649"; }  /* AlifMaksura  */
+"y"   { return "\u064A"; }  /* Ya  */
+"F"   { return "\u064B"; }  /* Fathatan  */
+"N"   { return "\u064C"; }  /* Dammatan  */
+"K"   { return "\u064D"; }  /* Kasratan  */
+"a"   { return "\u064E"; }  /* Fatha  */
+"u"   { return "\u064F"; }  /* Damma  */
+"i"   { return "\u0650"; }  /* Kasra  */
+"~"   { return "\u0651"; }  /* Shadda  */
+"o"   { return "\u0652"; }  /* Sukun  */
+"^"   { return "\u0653"; }  /* Maddah  */
+"#"   { return "\u0654"; }  /* HamzaAbove  */
+
+"`"   { return "\u0670"; }  /* AlifKhanjareeya  */
+"{"   { return "\u0671"; }  /* Alif + HamzatWasl  */
+
+"P"   { return "\u067E"; }  /* PEH  from AraMorph   */
+"J"   { return "\u0686"; }  /* TCHEH  from AraMorph */
+"V"   { return "\u06A4"; }  /* VEH  from AraMorph */
+"G"   { return "\u06AF"; }  /* GAF  from AraMorph */
+"R"   { return "\u0698"; }  /* JEH  from AraMorph */
+"?"   { return "\u061F"; }  /* QUESTION MARK  from AraMorph */
+
+":"   { return "\u06DC"; }  /* SmallHighSeen  */
+"@"   { return "\u06DF"; }  /* SmallHighRoundedZero  */
+
+"["   { return "\u06E2"; }  /* SmallHighMeemIsolatedForm  */
+";"   { return "\u06E3"; }  /* SmallLowSeen  */
+","   { return "\u06E5"; }  /* SmallWaw  */
+"."   { return "\u06E6"; }  /* SmallYa  */
+"!"   { return "\u06E8"; }  /* SmallHighNoon  */
+"-"   { return "\u06EA"; }  /* EmptyCentreLowStop  */
+"+"   { return "\u06EB"; }  /* EmptyCentreHighStop  */
+"%"   { return "\u06EC"; }  /* RoundedHighStopWithFilledCentre  */
+"]"   { return "\u06ED"; }  /* SmallLowMeem  */
+
+[\&_]"vert;"   { return "|"; }
+[\&_]"lpar;"   { return "("; }
+[\&_]"rpar;"   { return ")"; }
+[\_\&]"lt;"    { return "&lt;"; }
+[\_\&]"gt;"    { return "&gt;"; }
+"&#039;"       { return "'"; } 
+
+"&"[a-zA-Z]+";"  { return yytext(); }
+
+.       { return yytext(); }
+\n      { return yytext(); }
+
+/* make problemes   */
+/* "\\""   { return "\u06E0"; }  SmallHighUprightRectangularZero  */ 
+
+
+/* double entries    */
+/*  ","   { return "\u060C"; }  COMMA  from AraMorph */
+/*  ";"   { return "\u061B"; }  SEMICOLON  from AraMorph */
+
+/* not in buckwalter contained   */
+/* \u0679 : ARABIC LETTER TTEH   */
+/* \u0688 : ARABIC LETTER DDAL   */
+/* \u06A9 : ARABIC LETTER KEHEH  */
+/* \u0691 : ARABIC LETTER RREH   */
+/* \u06BA : ARABIC LETTER NOON GHUNNA  */
+/* \u06BE : ARABIC LETTER HEH DOACHASHMEE  */
+/* \u06C1 : ARABIC LETTER HEH GOAL  */
+/* \u06D2 : ARABIC LETTER YEH BARREE  */
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/software/eXist/mpdl-modules/src/de/mpg/mpiwg/berlin/mpdl/lt/general/Buckwalter2UnicodeLex.java	Wed Nov 24 17:24:23 2010 +0100
@@ -0,0 +1,909 @@
+package de.mpg.mpiwg.berlin.mpdl.lt.general;
+
+
+/**
+ * This class is a scanner generated by 
+ * <a href="http://www.jflex.de/">JFlex</a> 1.4.3
+ * on 20.11.09 17:57 from the specification file
+ * <tt>/Users/jwillenborg/java/existDevMai2009/mpdl/extensions/mpdl-modules/src/de/mpg/mpiwg/berlin/mpdl/lt/general/Buckwalter2Unicode.lex</tt>
+ */
+public class Buckwalter2UnicodeLex {
+
+  /** This character denotes the end of file */
+  public static final int YYEOF = -1;
+
+  /** initial size of the lookahead buffer */
+  private static final int ZZ_BUFFERSIZE = 16384;
+
+  /** lexical states */
+  public static final int YYINITIAL = 0;
+
+  /**
+   * ZZ_LEXSTATE[l] is the state in the DFA for the lexical state l
+   * ZZ_LEXSTATE[l+1] is the state in the DFA for the lexical state l
+   *                  at the beginning of a line
+   * l is of the form l = 2*k, k a non negative integer
+   */
+  private static final int ZZ_LEXSTATE[] = { 
+     0, 0
+  };
+
+  /** 
+   * Translates characters to character classes
+   */
+  private static final String ZZ_CMAP_PACKED = 
+    "\12\0\1\0\26\0\1\76\1\0\1\57\1\24\1\101\1\5\1\3"+
+    "\2\0\1\20\1\100\1\74\1\77\1\75\1\0\1\104\2\0\1\105"+
+    "\5\0\1\106\1\70\1\73\1\1\1\0\1\2\1\67\1\71\1\7"+
+    "\2\107\1\26\1\31\1\46\1\65\1\15\1\107\1\63\1\50\2\107"+
+    "\1\47\1\107\1\62\1\107\1\66\1\25\1\27\1\107\1\64\2\107"+
+    "\1\44\1\30\1\72\1\0\1\102\1\56\1\33\1\60\1\51\1\10"+
+    "\1\107\1\17\1\103\1\34\1\32\1\42\1\53\1\14\1\36\1\37"+
+    "\1\40\1\41\1\55\1\11\1\35\1\21\1\23\1\12\1\52\1\13"+
+    "\1\43\1\16\1\45\1\22\1\61\1\4\1\6\1\54\uff81\0";
+
+  /** 
+   * Translates characters to character classes
+   */
+  private static final char [] ZZ_CMAP = zzUnpackCMap(ZZ_CMAP_PACKED);
+
+  /** 
+   * Translates DFA states to action switch labels.
+   */
+  private static final int [] ZZ_ACTION = zzUnpackAction();
+
+  private static final String ZZ_ACTION_PACKED_0 =
+    "\1\0\1\1\1\2\1\3\1\4\1\5\1\6\1\7"+
+    "\1\10\1\11\1\12\1\13\1\14\1\15\1\16\1\17"+
+    "\1\20\1\21\1\22\1\23\1\24\1\25\1\26\1\27"+
+    "\1\30\1\31\1\32\1\33\1\34\1\35\1\36\1\37"+
+    "\1\40\1\41\1\42\1\43\1\44\1\45\1\46\1\47"+
+    "\1\50\1\51\1\52\1\53\1\54\1\55\1\56\1\57"+
+    "\1\60\1\61\1\62\1\63\1\64\1\65\1\66\1\67"+
+    "\1\70\1\71\1\72\1\73\1\74\1\75\1\76\1\77"+
+    "\1\100\1\101\1\102\1\103\30\0\1\104\1\0\1\105"+
+    "\13\0\1\106\1\107\1\110\1\111";
+
+  private static int [] zzUnpackAction() {
+    int [] result = new int[110];
+    int offset = 0;
+    offset = zzUnpackAction(ZZ_ACTION_PACKED_0, offset, result);
+    return result;
+  }
+
+  private static int zzUnpackAction(String packed, int offset, int [] result) {
+    int i = 0;       /* index in packed string  */
+    int j = offset;  /* index in unpacked array */
+    int l = packed.length();
+    while (i < l) {
+      int count = packed.charAt(i++);
+      int value = packed.charAt(i++);
+      do result[j++] = value; while (--count > 0);
+    }
+    return j;
+  }
+
+
+  /** 
+   * Translates a state to a row index in the transition table
+   */
+  private static final int [] ZZ_ROWMAP = zzUnpackRowMap();
+
+  private static final String ZZ_ROWMAP_PACKED_0 =
+    "\0\0\0\110\0\220\0\110\0\110\0\110\0\330\0\110"+
+    "\0\110\0\110\0\110\0\110\0\110\0\110\0\110\0\110"+
+    "\0\110\0\110\0\110\0\110\0\110\0\110\0\110\0\110"+
+    "\0\110\0\110\0\110\0\110\0\u0120\0\110\0\110\0\110"+
+    "\0\110\0\110\0\110\0\110\0\110\0\110\0\110\0\110"+
+    "\0\110\0\110\0\110\0\110\0\110\0\110\0\110\0\110"+
+    "\0\110\0\110\0\110\0\110\0\110\0\110\0\110\0\110"+
+    "\0\110\0\110\0\110\0\110\0\110\0\110\0\110\0\110"+
+    "\0\110\0\110\0\110\0\110\0\u0168\0\u01b0\0\u01f8\0\u0240"+
+    "\0\u0288\0\u02d0\0\u0318\0\u0360\0\u03a8\0\u03f0\0\u0438\0\u0480"+
+    "\0\u04c8\0\u0510\0\u0558\0\u05a0\0\u05e8\0\u0630\0\u0678\0\u06c0"+
+    "\0\u0708\0\u0750\0\u0798\0\u07e0\0\110\0\u0828\0\110\0\u0870"+
+    "\0\u08b8\0\u0900\0\u0948\0\u0990\0\u09d8\0\u0a20\0\u0a68\0\u0ab0"+
+    "\0\u0af8\0\u0b40\0\110\0\110\0\110\0\110";
+
+  private static int [] zzUnpackRowMap() {
+    int [] result = new int[110];
+    int offset = 0;
+    offset = zzUnpackRowMap(ZZ_ROWMAP_PACKED_0, offset, result);
+    return result;
+  }
+
+  private static int zzUnpackRowMap(String packed, int offset, int [] result) {
+    int i = 0;  /* index in packed string  */
+    int j = offset;  /* index in unpacked array */
+    int l = packed.length();
+    while (i < l) {
+      int high = packed.charAt(i++) << 16;
+      result[j++] = high | packed.charAt(i++);
+    }
+    return j;
+  }
+
+  /** 
+   * The transition table of the DFA
+   */
+  private static final int [] ZZ_TRANS = zzUnpackTrans();
+
+  private static final String ZZ_TRANS_PACKED_0 =
+    "\1\2\1\3\1\4\1\5\1\6\1\7\1\10\1\11"+
+    "\1\12\1\13\1\14\1\15\1\16\1\17\1\20\1\21"+
+    "\1\22\1\23\1\24\1\25\1\26\1\27\1\30\1\31"+
+    "\1\32\1\33\1\34\1\35\1\36\1\37\1\40\1\41"+
+    "\1\42\1\43\1\44\1\45\1\46\1\47\1\50\1\51"+
+    "\1\52\1\53\1\54\1\55\1\56\1\57\1\60\1\61"+
+    "\1\62\1\63\1\64\1\65\1\66\1\67\1\70\1\71"+
+    "\1\72\1\73\1\74\1\75\1\76\1\77\1\100\1\101"+
+    "\1\102\1\103\1\104\5\2\110\0\2\105\1\0\105\105"+
+    "\7\0\4\106\1\107\4\106\1\0\1\110\2\106\1\0"+
+    "\5\106\1\111\1\0\3\106\1\112\14\106\1\0\1\106"+
+    "\1\0\1\113\2\0\5\106\14\0\1\106\3\0\1\106"+
+    "\13\0\1\114\5\0\1\115\10\0\1\116\4\0\1\117"+
+    "\50\0\2\105\1\2\105\105\7\0\11\106\1\0\3\106"+
+    "\1\0\6\106\1\0\20\106\1\0\1\106\4\0\5\106"+
+    "\4\0\1\2\7\0\1\106\3\0\1\106\7\0\11\106"+
+    "\1\0\3\106\1\0\6\106\1\0\20\106\1\0\1\106"+
+    "\4\0\5\106\4\0\1\2\7\0\1\120\3\0\1\106"+
+    "\7\0\2\106\1\121\6\106\1\0\3\106\1\0\6\106"+
+    "\1\0\20\106\1\0\1\106\4\0\5\106\4\0\1\2"+
+    "\7\0\1\106\3\0\1\106\7\0\3\106\1\122\5\106"+
+    "\1\0\3\106\1\0\6\106\1\0\20\106\1\0\1\106"+
+    "\4\0\5\106\4\0\1\2\7\0\1\106\3\0\1\106"+
+    "\7\0\2\106\1\123\1\124\5\106\1\0\3\106\1\0"+
+    "\6\106\1\0\20\106\1\0\1\106\4\0\5\106\4\0"+
+    "\1\2\7\0\1\106\3\0\1\106\104\0\1\125\106\0"+
+    "\1\126\15\0\1\127\110\0\1\130\106\0\1\131\1\132"+
+    "\104\0\11\106\1\0\1\133\2\106\1\0\6\106\1\0"+
+    "\20\106\1\0\1\106\4\0\5\106\4\0\1\2\7\0"+
+    "\1\106\3\0\1\106\7\0\11\106\1\0\3\106\1\0"+
+    "\6\106\1\0\15\106\1\134\2\106\1\0\1\106\4\0"+
+    "\5\106\4\0\1\2\7\0\1\106\3\0\1\106\7\0"+
+    "\11\106\1\0\3\106\1\0\6\106\1\0\20\106\1\0"+
+    "\1\106\4\0\5\106\4\0\1\135\7\0\1\106\3\0"+
+    "\1\106\7\0\11\106\1\0\3\106\1\0\6\106\1\0"+
+    "\15\106\1\136\2\106\1\0\1\106\4\0\5\106\4\0"+
+    "\1\2\7\0\1\106\3\0\1\106\7\0\11\106\1\0"+
+    "\3\106\1\0\6\106\1\0\20\106\1\0\1\106\4\0"+
+    "\5\106\4\0\1\137\7\0\1\106\3\0\1\106\105\0"+
+    "\1\140\23\0\1\141\137\0\1\142\131\0\1\135\65\0"+
+    "\1\143\131\0\1\137\23\0\3\106\1\144\5\106\1\0"+
+    "\3\106\1\0\6\106\1\0\20\106\1\0\1\106\4\0"+
+    "\5\106\4\0\1\2\7\0\1\106\3\0\1\106\7\0"+
+    "\11\106\1\0\1\145\2\106\1\0\6\106\1\0\20\106"+
+    "\1\0\1\106\4\0\5\106\4\0\1\2\7\0\1\106"+
+    "\3\0\1\106\7\0\11\106\1\0\1\146\2\106\1\0"+
+    "\6\106\1\0\20\106\1\0\1\106\4\0\5\106\4\0"+
+    "\1\2\7\0\1\106\3\0\1\106\106\0\1\147\13\0"+
+    "\1\150\116\0\1\151\107\0\1\152\75\0\11\106\1\0"+
+    "\3\106\1\0\6\106\1\0\20\106\1\0\1\106\4\0"+
+    "\5\106\4\0\1\153\7\0\1\106\3\0\1\106\7\0"+
+    "\11\106\1\0\3\106\1\0\6\106\1\0\20\106\1\0"+
+    "\1\106\4\0\5\106\4\0\1\154\7\0\1\106\3\0"+
+    "\1\106\7\0\11\106\1\0\3\106\1\0\6\106\1\0"+
+    "\20\106\1\0\1\106\4\0\5\106\4\0\1\155\7\0"+
+    "\1\106\3\0\1\106\73\0\1\156\107\0\1\153\107\0"+
+    "\1\154\107\0\1\155\14\0";
+
+  private static int [] zzUnpackTrans() {
+    int [] result = new int[2952];
+    int offset = 0;
+    offset = zzUnpackTrans(ZZ_TRANS_PACKED_0, offset, result);
+    return result;
+  }
+
+  private static int zzUnpackTrans(String packed, int offset, int [] result) {
+    int i = 0;       /* index in packed string  */
+    int j = offset;  /* index in unpacked array */
+    int l = packed.length();
+    while (i < l) {
+      int count = packed.charAt(i++);
+      int value = packed.charAt(i++);
+      value--;
+      do result[j++] = value; while (--count > 0);
+    }
+    return j;
+  }
+
+
+  /* error codes */
+  private static final int ZZ_UNKNOWN_ERROR = 0;
+  private static final int ZZ_NO_MATCH = 1;
+  private static final int ZZ_PUSHBACK_2BIG = 2;
+
+  /* error messages for the codes above */
+  private static final String ZZ_ERROR_MSG[] = {
+    "Unkown internal scanner error",
+    "Error: could not match input",
+    "Error: pushback value was too large"
+  };
+
+  /**
+   * ZZ_ATTRIBUTE[aState] contains the attributes of state <code>aState</code>
+   */
+  private static final int [] ZZ_ATTRIBUTE = zzUnpackAttribute();
+
+  private static final String ZZ_ATTRIBUTE_PACKED_0 =
+    "\1\0\1\11\1\1\3\11\1\1\25\11\1\1\47\11"+
+    "\30\0\1\11\1\0\1\11\13\0\4\11";
+
+  private static int [] zzUnpackAttribute() {
+    int [] result = new int[110];
+    int offset = 0;
+    offset = zzUnpackAttribute(ZZ_ATTRIBUTE_PACKED_0, offset, result);
+    return result;
+  }
+
+  private static int zzUnpackAttribute(String packed, int offset, int [] result) {
+    int i = 0;       /* index in packed string  */
+    int j = offset;  /* index in unpacked array */
+    int l = packed.length();
+    while (i < l) {
+      int count = packed.charAt(i++);
+      int value = packed.charAt(i++);
+      do result[j++] = value; while (--count > 0);
+    }
+    return j;
+  }
+
+  /** the input device */
+  private java.io.Reader zzReader;
+
+  /** the current state of the DFA */
+  private int zzState;
+
+  /** the current lexical state */
+  private int zzLexicalState = YYINITIAL;
+
+  /** this buffer contains the current text to be matched and is
+      the source of the yytext() string */
+  private char zzBuffer[] = new char[ZZ_BUFFERSIZE];
+
+  /** the textposition at the last accepting state */
+  private int zzMarkedPos;
+
+  /** the current text position in the buffer */
+  private int zzCurrentPos;
+
+  /** startRead marks the beginning of the yytext() string in the buffer */
+  private int zzStartRead;
+
+  /** endRead marks the last character in the buffer, that has been read
+      from input */
+  private int zzEndRead;
+
+  /** number of newlines encountered up to the start of the matched text */
+  private int yyline;
+
+  /** the number of characters up to the start of the matched text */
+  private int yychar;
+
+  /**
+   * the number of characters from the last newline up to the start of the 
+   * matched text
+   */
+  private int yycolumn;
+
+  /** 
+   * zzAtBOL == true <=> the scanner is currently at the beginning of a line
+   */
+  private boolean zzAtBOL = true;
+
+  /** zzAtEOF == true <=> the scanner is at the EOF */
+  private boolean zzAtEOF;
+
+  /** denotes if the user-EOF-code has already been executed */
+  private boolean zzEOFDone;
+
+  /* user code: */
+    /*
+     * Betacode to Unicode conversion
+     */
+
+
+
+  /**
+   * Creates a new scanner
+   * There is also a java.io.InputStream version of this constructor.
+   *
+   * @param   in  the java.io.Reader to read input from.
+   */
+  public Buckwalter2UnicodeLex(java.io.Reader in) {
+    this.zzReader = in;
+  }
+
+  /**
+   * Creates a new scanner.
+   * There is also java.io.Reader version of this constructor.
+   *
+   * @param   in  the java.io.Inputstream to read input from.
+   */
+  public Buckwalter2UnicodeLex(java.io.InputStream in) {
+    this(new java.io.InputStreamReader(in));
+  }
+
+  /** 
+   * Unpacks the compressed character translation table.
+   *
+   * @param packed   the packed character translation table
+   * @return         the unpacked character translation table
+   */
+  private static char [] zzUnpackCMap(String packed) {
+    char [] map = new char[0x10000];
+    int i = 0;  /* index in packed string  */
+    int j = 0;  /* index in unpacked array */
+    while (i < 178) {
+      int  count = packed.charAt(i++);
+      char value = packed.charAt(i++);
+      do map[j++] = value; while (--count > 0);
+    }
+    return map;
+  }
+
+
+  /**
+   * Refills the input buffer.
+   *
+   * @return      <code>false</code>, iff there was new input.
+   * 
+   * @exception   java.io.IOException  if any I/O-Error occurs
+   */
+  private boolean zzRefill() throws java.io.IOException {
+
+    /* first: make room (if you can) */
+    if (zzStartRead > 0) {
+      System.arraycopy(zzBuffer, zzStartRead,
+                       zzBuffer, 0,
+                       zzEndRead-zzStartRead);
+
+      /* translate stored positions */
+      zzEndRead-= zzStartRead;
+      zzCurrentPos-= zzStartRead;
+      zzMarkedPos-= zzStartRead;
+      zzStartRead = 0;
+    }
+
+    /* is the buffer big enough? */
+    if (zzCurrentPos >= zzBuffer.length) {
+      /* if not: blow it up */
+      char newBuffer[] = new char[zzCurrentPos*2];
+      System.arraycopy(zzBuffer, 0, newBuffer, 0, zzBuffer.length);
+      zzBuffer = newBuffer;
+    }
+
+    /* finally: fill the buffer with new input */
+    int numRead = zzReader.read(zzBuffer, zzEndRead,
+                                            zzBuffer.length-zzEndRead);
+
+    if (numRead > 0) {
+      zzEndRead+= numRead;
+      return false;
+    }
+    // unlikely but not impossible: read 0 characters, but not at end of stream    
+    if (numRead == 0) {
+      int c = zzReader.read();
+      if (c == -1) {
+        return true;
+      } else {
+        zzBuffer[zzEndRead++] = (char) c;
+        return false;
+      }     
+    }
+
+	// numRead < 0
+    return true;
+  }
+
+    
+  /**
+   * Closes the input stream.
+   */
+  public final void yyclose() throws java.io.IOException {
+    zzAtEOF = true;            /* indicate end of file */
+    zzEndRead = zzStartRead;  /* invalidate buffer    */
+
+    if (zzReader != null)
+      zzReader.close();
+  }
+
+
+  /**
+   * Resets the scanner to read from a new input stream.
+   * Does not close the old reader.
+   *
+   * All internal variables are reset, the old input stream 
+   * <b>cannot</b> be reused (internal buffer is discarded and lost).
+   * Lexical state is set to <tt>ZZ_INITIAL</tt>.
+   *
+   * @param reader   the new input stream 
+   */
+  public final void yyreset(java.io.Reader reader) {
+    zzReader = reader;
+    zzAtBOL  = true;
+    zzAtEOF  = false;
+    zzEOFDone = false;
+    zzEndRead = zzStartRead = 0;
+    zzCurrentPos = zzMarkedPos = 0;
+    yyline = yychar = yycolumn = 0;
+    zzLexicalState = YYINITIAL;
+  }
+
+
+  /**
+   * Returns the current lexical state.
+   */
+  public final int yystate() {
+    return zzLexicalState;
+  }
+
+
+  /**
+   * Enters a new lexical state
+   *
+   * @param newState the new lexical state
+   */
+  public final void yybegin(int newState) {
+    zzLexicalState = newState;
+  }
+
+
+  /**
+   * Returns the text matched by the current regular expression.
+   */
+  public final String yytext() {
+    return new String( zzBuffer, zzStartRead, zzMarkedPos-zzStartRead );
+  }
+
+
+  /**
+   * Returns the character at position <tt>pos</tt> from the 
+   * matched text. 
+   * 
+   * It is equivalent to yytext().charAt(pos), but faster
+   *
+   * @param pos the position of the character to fetch. 
+   *            A value from 0 to yylength()-1.
+   *
+   * @return the character at position pos
+   */
+  public final char yycharat(int pos) {
+    return zzBuffer[zzStartRead+pos];
+  }
+
+
+  /**
+   * Returns the length of the matched text region.
+   */
+  public final int yylength() {
+    return zzMarkedPos-zzStartRead;
+  }
+
+
+  /**
+   * Reports an error that occured while scanning.
+   *
+   * In a wellformed scanner (no or only correct usage of 
+   * yypushback(int) and a match-all fallback rule) this method 
+   * will only be called with things that "Can't Possibly Happen".
+   * If this method is called, something is seriously wrong
+   * (e.g. a JFlex bug producing a faulty scanner etc.).
+   *
+   * Usual syntax/scanner level error handling should be done
+   * in error fallback rules.
+   *
+   * @param   errorCode  the code of the errormessage to display
+   */
+  private void zzScanError(int errorCode) {
+    String message;
+    try {
+      message = ZZ_ERROR_MSG[errorCode];
+    }
+    catch (ArrayIndexOutOfBoundsException e) {
+      message = ZZ_ERROR_MSG[ZZ_UNKNOWN_ERROR];
+    }
+
+    throw new Error(message);
+  } 
+
+
+  /**
+   * Pushes the specified amount of characters back into the input stream.
+   *
+   * They will be read again by then next call of the scanning method
+   *
+   * @param number  the number of characters to be read again.
+   *                This number must not be greater than yylength()!
+   */
+  public void yypushback(int number)  {
+    if ( number > yylength() )
+      zzScanError(ZZ_PUSHBACK_2BIG);
+
+    zzMarkedPos -= number;
+  }
+
+
+  /**
+   * Resumes scanning until the next regular expression is matched,
+   * the end of input is encountered or an I/O-Error occurs.
+   *
+   * @return      the next token
+   * @exception   java.io.IOException  if any I/O-Error occurs
+   */
+  public java.lang.String yylex() throws java.io.IOException {
+    int zzInput;
+    int zzAction;
+
+    // cached fields:
+    int zzCurrentPosL;
+    int zzMarkedPosL;
+    int zzEndReadL = zzEndRead;
+    char [] zzBufferL = zzBuffer;
+    char [] zzCMapL = ZZ_CMAP;
+
+    int [] zzTransL = ZZ_TRANS;
+    int [] zzRowMapL = ZZ_ROWMAP;
+    int [] zzAttrL = ZZ_ATTRIBUTE;
+
+    while (true) {
+      zzMarkedPosL = zzMarkedPos;
+
+      zzAction = -1;
+
+      zzCurrentPosL = zzCurrentPos = zzStartRead = zzMarkedPosL;
+  
+      zzState = ZZ_LEXSTATE[zzLexicalState];
+
+
+      zzForAction: {
+        while (true) {
+    
+          if (zzCurrentPosL < zzEndReadL)
+            zzInput = zzBufferL[zzCurrentPosL++];
+          else if (zzAtEOF) {
+            zzInput = YYEOF;
+            break zzForAction;
+          }
+          else {
+            // store back cached positions
+            zzCurrentPos  = zzCurrentPosL;
+            zzMarkedPos   = zzMarkedPosL;
+            boolean eof = zzRefill();
+            // get translated positions and possibly new buffer
+            zzCurrentPosL  = zzCurrentPos;
+            zzMarkedPosL   = zzMarkedPos;
+            zzBufferL      = zzBuffer;
+            zzEndReadL     = zzEndRead;
+            if (eof) {
+              zzInput = YYEOF;
+              break zzForAction;
+            }
+            else {
+              zzInput = zzBufferL[zzCurrentPosL++];
+            }
+          }
+          int zzNext = zzTransL[ zzRowMapL[zzState] + zzCMapL[zzInput] ];
+          if (zzNext == -1) break zzForAction;
+          zzState = zzNext;
+
+          int zzAttributes = zzAttrL[zzState];
+          if ( (zzAttributes & 1) == 1 ) {
+            zzAction = zzState;
+            zzMarkedPosL = zzCurrentPosL;
+            if ( (zzAttributes & 8) == 8 ) break zzForAction;
+          }
+
+        }
+      }
+
+      // store back cached position
+      zzMarkedPos = zzMarkedPosL;
+
+      switch (zzAction < 0 ? zzAction : ZZ_ACTION[zzAction]) {
+        case 54: 
+          { return "\u06AF";
+          }
+        case 74: break;
+        case 10: 
+          { return "\u0629";
+          }
+        case 75: break;
+        case 26: 
+          { return "\u0639";
+          }
+        case 76: break;
+        case 9: 
+          { return "\u0628";
+          }
+        case 77: break;
+        case 37: 
+          { return "\u0649";
+          }
+        case 78: break;
+        case 25: 
+          { return "\u0638";
+          }
+        case 79: break;
+        case 8: 
+          { return "\u0627";
+          }
+        case 80: break;
+        case 58: 
+          { return "\u06DF";
+          }
+        case 81: break;
+        case 36: 
+          { return "\u0648";
+          }
+        case 82: break;
+        case 68: 
+          { return "&gt;";
+          }
+        case 83: break;
+        case 24: 
+          { return "\u0637";
+          }
+        case 84: break;
+        case 7: 
+          { return "\u0626";
+          }
+        case 85: break;
+        case 35: 
+          { return "\u0647";
+          }
+        case 86: break;
+        case 23: 
+          { return "\u0636";
+          }
+        case 87: break;
+        case 2: 
+          { return "\u0625";
+          }
+        case 88: break;
+        case 69: 
+          { return "&lt;";
+          }
+        case 89: break;
+        case 34: 
+          { return "\u0646";
+          }
+        case 90: break;
+        case 67: 
+          { return "\u06ED";
+          }
+        case 91: break;
+        case 22: 
+          { return "\u0635";
+          }
+        case 92: break;
+        case 6: 
+          { return "\u0624";
+          }
+        case 93: break;
+        case 57: 
+          { return "\u06DC";
+          }
+        case 94: break;
+        case 33: 
+          { return "\u0645";
+          }
+        case 95: break;
+        case 66: 
+          { return "\u06EC";
+          }
+        case 96: break;
+        case 21: 
+          { return "\u0634";
+          }
+        case 97: break;
+        case 3: 
+          { return "\u0623";
+          }
+        case 98: break;
+        case 32: 
+          { return "\u0644";
+          }
+        case 99: break;
+        case 70: 
+          { return "|";
+          }
+        case 100: break;
+        case 65: 
+          { return "\u06EB";
+          }
+        case 101: break;
+        case 20: 
+          { return "\u0633";
+          }
+        case 102: break;
+        case 55: 
+          { return "\u0698";
+          }
+        case 103: break;
+        case 5: 
+          { return "\u0622";
+          }
+        case 104: break;
+        case 48: 
+          { return "\u0654";
+          }
+        case 105: break;
+        case 31: 
+          { return "\u0643";
+          }
+        case 106: break;
+        case 19: 
+          { return "\u0632";
+          }
+        case 107: break;
+        case 64: 
+          { return "\u06EA";
+          }
+        case 108: break;
+        case 4: 
+          { return "\u0621";
+          }
+        case 109: break;
+        case 52: 
+          { return "\u0686";
+          }
+        case 110: break;
+        case 47: 
+          { return "\u0653";
+          }
+        case 111: break;
+        case 30: 
+          { return "\u0642";
+          }
+        case 112: break;
+        case 18: 
+          { return "\u0631";
+          }
+        case 113: break;
+        case 46: 
+          { return "\u0652";
+          }
+        case 114: break;
+        case 29: 
+          { return "\u0641";
+          }
+        case 115: break;
+        case 17: 
+          { return "\u0630";
+          }
+        case 116: break;
+        case 45: 
+          { return "\u0651";
+          }
+        case 117: break;
+        case 28: 
+          { return "\u0640";
+          }
+        case 118: break;
+        case 44: 
+          { return "\u0650";
+          }
+        case 119: break;
+        case 1: 
+          { return yytext();
+          }
+        case 120: break;
+        case 50: 
+          { return "\u0671";
+          }
+        case 121: break;
+        case 49: 
+          { return "\u0670";
+          }
+        case 122: break;
+        case 63: 
+          { return "\u06E8";
+          }
+        case 123: break;
+        case 53: 
+          { return "\u06A4";
+          }
+        case 124: break;
+        case 56: 
+          { return "\u061F";
+          }
+        case 125: break;
+        case 16: 
+          { return "\u062F";
+          }
+        case 126: break;
+        case 62: 
+          { return "\u06E6";
+          }
+        case 127: break;
+        case 15: 
+          { return "\u062E";
+          }
+        case 128: break;
+        case 61: 
+          { return "\u06E5";
+          }
+        case 129: break;
+        case 43: 
+          { return "\u064F";
+          }
+        case 130: break;
+        case 14: 
+          { return "\u062D";
+          }
+        case 131: break;
+        case 42: 
+          { return "\u064E";
+          }
+        case 132: break;
+        case 60: 
+          { return "\u06E3";
+          }
+        case 133: break;
+        case 13: 
+          { return "\u062C";
+          }
+        case 134: break;
+        case 41: 
+          { return "\u064D";
+          }
+        case 135: break;
+        case 59: 
+          { return "\u06E2";
+          }
+        case 136: break;
+        case 12: 
+          { return "\u062B";
+          }
+        case 137: break;
+        case 40: 
+          { return "\u064C";
+          }
+        case 138: break;
+        case 11: 
+          { return "\u062A";
+          }
+        case 139: break;
+        case 51: 
+          { return "\u067E";
+          }
+        case 140: break;
+        case 39: 
+          { return "\u064B";
+          }
+        case 141: break;
+        case 27: 
+          { return "\u063A";
+          }
+        case 142: break;
+        case 38: 
+          { return "\u064A";
+          }
+        case 143: break;
+        case 71: 
+          { return ")";
+          }
+        case 144: break;
+        case 72: 
+          { return "(";
+          }
+        case 145: break;
+        case 73: 
+          { return "'";
+          }
+        case 146: break;
+        default: 
+          if (zzInput == YYEOF && zzStartRead == zzCurrentPos) {
+            zzAtEOF = true;
+            return null;
+          } 
+          else {
+            zzScanError(ZZ_NO_MATCH);
+          }
+      }
+    }
+  }
+
+
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/software/eXist/mpdl-modules/src/de/mpg/mpiwg/berlin/mpdl/lt/general/Language.java	Wed Nov 24 17:24:23 2010 +0100
@@ -0,0 +1,38 @@
+package de.mpg.mpiwg.berlin.mpdl.lt.general;
+
+import java.util.HashMap;
+
+public class Language {
+  private static Language instance;
+  private static HashMap<String, String> languageIds = new HashMap<String, String>();
+  
+  public static Language getInstance() {
+    if (instance == null) {
+      instance = new Language();
+      instance.init();
+    }
+    return instance;
+  }
+
+  private void init() {
+    languageIds.put("ar", "ar");
+    languageIds.put("de", "de");
+    languageIds.put("el", "el");
+    languageIds.put("grc", "el");
+    languageIds.put("en", "en");
+    languageIds.put("fr", "fr");
+    languageIds.put("it", "it");
+    languageIds.put("la", "la");
+    languageIds.put("lat", "la");
+    languageIds.put("nl", "nl");
+    languageIds.put("zh", "zh");
+  }
+  
+  public String getLanguageId(String language) {
+    if (language == null)
+      return null;
+    String retLanguageId = null;
+    retLanguageId = languageIds.get(language);
+    return retLanguageId;
+  }
+}
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/software/eXist/mpdl-modules/src/de/mpg/mpiwg/berlin/mpdl/lt/general/Transcoder.java	Wed Nov 24 17:24:23 2010 +0100
@@ -0,0 +1,163 @@
+package de.mpg.mpiwg.berlin.mpdl.lt.general;
+
+import java.io.IOException;
+import java.io.StringReader;
+
+import de.mpg.mpiwg.berlin.mpdl.exception.ApplicationException;
+import edu.unc.epidoc.transcoder.TransCoder;
+
+public class Transcoder {
+  private static Transcoder instance;
+  private TransCoder betaCodeTranscoder;
+  
+  public static Transcoder getInstance() {
+    if (instance == null) {
+      instance = new Transcoder();
+    }
+    return instance;
+  }
+
+  public String transcodeFromBetaCode2UnicodeEpidoc(String inputStr) throws ApplicationException {
+    String encodedUnicodeStr = null;
+    try {
+      if (betaCodeTranscoder == null) {
+        betaCodeTranscoder = new TransCoder();
+        betaCodeTranscoder.setParser("BetaCode");
+        betaCodeTranscoder.setConverter("UnicodeC");
+      }
+      encodedUnicodeStr = betaCodeTranscoder.getString(inputStr);
+    } catch (Exception e) {
+      throw new ApplicationException(e);
+    }
+    return encodedUnicodeStr;
+  }
+  
+  public String transcodeFromBetaCode2Unicode(String inputStr) throws ApplicationException {
+    StringReader strReader = new StringReader(inputStr);
+    Betacode2UnicodeLex betacode2UnicodeLex = new Betacode2UnicodeLex(strReader);
+    String retStr = "";
+    String token = "";
+    while (token != null) {
+      try {
+        token = betacode2UnicodeLex.yylex();
+        if (token != null)
+          retStr += token;
+      } catch (IOException e ) {
+        throw new ApplicationException(e);
+      }
+    }
+    return retStr;
+    /* 
+    // alternative to JFlex 
+    String encodedUnicodeStr = null;
+    if (inputStr.matches("^a)"))
+      encodedUnicodeStr = inputStr.replaceFirst("^a)", "\u1F00");
+    else if (inputStr.matches("^a("))
+      encodedUnicodeStr = inputStr.replaceFirst("^a(", "\u1F01");
+    else if (inputStr.matches("^a)\\"))
+      encodedUnicodeStr = inputStr.replaceFirst("^a)\\", "\u1F02");
+      
+    // the longest regular expressions first
+    
+    return encodedUnicodeStr;
+    */
+  }
+  
+  public String transcodeFromBuckwalter2Unicode(String inputStr) throws ApplicationException {
+    StringReader strReader = new StringReader(inputStr);
+    Buckwalter2UnicodeLex buckwalter2UnicodeLex = new Buckwalter2UnicodeLex(strReader);
+    String retStr = "";
+    String token = "";
+    while (token != null) {
+      try {
+        token = buckwalter2UnicodeLex.yylex();
+        if (token != null)
+          retStr += token;
+      } catch (IOException e ) {
+        throw new ApplicationException(e);
+      }
+    }
+    return retStr;
+  }
+
+
+  
+  public String transcodeFromBuckwalter2UnicodeAraMorph(String inputStr) {
+    String encodedUnicodeStr = arabizeWord(inputStr);
+    return encodedUnicodeStr;
+  }
+
+  /*
+   * copied from http://www.nongnu.org/aramorph/english/download.html
+   * Class: AraMorph
+   */
+  private String arabizeWord(String translitered) {
+    String tmp_word = translitered;
+    // convert to transliteration
+    tmp_word = tmp_word.replaceAll("'", "\u0621"); //\u0621 : ARABIC LETTER HAMZA
+    tmp_word = tmp_word.replaceAll("\\|", "\u0622"); //\u0622 : ARABIC LETTER ALEF WITH MADDA ABOVE
+    tmp_word = tmp_word.replaceAll(">", "\u0623"); //\u0623 : ARABIC LETTER ALEF WITH HAMZA ABOVE
+    tmp_word = tmp_word.replaceAll("&", "\u0624"); //\u0624 : ARABIC LETTER WAW WITH HAMZA ABOVE
+    tmp_word = tmp_word.replaceAll("<", "\u0625"); //\u0625 : ARABIC LETTER ALEF WITH HAMZA BELOW
+    tmp_word = tmp_word.replaceAll("}", "\u0626"); //\u0626 : ARABIC LETTER YEH WITH HAMZA ABOVE
+    tmp_word = tmp_word.replaceAll("A", "\u0627"); //\u0627 : ARABIC LETTER ALEF
+    tmp_word = tmp_word.replaceAll("b", "\u0628"); //\u0628 : ARABIC LETTER BEH
+    tmp_word = tmp_word.replaceAll("p", "\u0629"); //\u0629 : ARABIC LETTER TEH MARBUTA
+    tmp_word = tmp_word.replaceAll("t", "\u062A"); //\u062A : ARABIC LETTER TEH
+    tmp_word = tmp_word.replaceAll("v", "\u062B"); //\u062B : ARABIC LETTER THEH
+    tmp_word = tmp_word.replaceAll("j", "\u062C"); //\u062C : ARABIC LETTER JEEM
+    tmp_word = tmp_word.replaceAll("H", "\u062D"); //\u062D : ARABIC LETTER HAH
+    tmp_word = tmp_word.replaceAll("x", "\u062E"); //\u062E : ARABIC LETTER KHAH
+    tmp_word = tmp_word.replaceAll("d", "\u062F"); //\u062F : ARABIC LETTER DAL
+    tmp_word = tmp_word.replaceAll("\\*", "\u0630"); //\u0630 : ARABIC LETTER THAL
+    tmp_word = tmp_word.replaceAll("r", "\u0631"); //\u0631 : ARABIC LETTER REH
+    tmp_word = tmp_word.replaceAll("z", "\u0632"); //\u0632 : ARABIC LETTER ZAIN
+    tmp_word = tmp_word.replaceAll("s", "\u0633" ); //\u0633 : ARABIC LETTER SEEN
+    tmp_word = tmp_word.replaceAll("\\$", "\u0634"); //\u0634 : ARABIC LETTER SHEEN
+    tmp_word = tmp_word.replaceAll("S", "\u0635"); //\u0635 : ARABIC LETTER SAD
+    tmp_word = tmp_word.replaceAll("D", "\u0636"); //\u0636 : ARABIC LETTER DAD
+    tmp_word = tmp_word.replaceAll("T", "\u0637"); //\u0637 : ARABIC LETTER TAH
+    tmp_word = tmp_word.replaceAll("Z", "\u0638"); //\u0638 : ARABIC LETTER ZAH
+    tmp_word = tmp_word.replaceAll("E", "\u0639"); //\u0639 : ARABIC LETTER AIN
+    tmp_word = tmp_word.replaceAll("g", "\u063A"); //\u063A : ARABIC LETTER GHAIN
+    tmp_word = tmp_word.replaceAll("_", "\u0640"); //\u0640 : ARABIC TATWEEL
+    tmp_word = tmp_word.replaceAll("f", "\u0641"); //\u0641 : ARABIC LETTER FEH
+    tmp_word = tmp_word.replaceAll("q", "\u0642"); //\u0642 : ARABIC LETTER QAF
+    tmp_word = tmp_word.replaceAll("k", "\u0643"); //\u0643 : ARABIC LETTER KAF
+    tmp_word = tmp_word.replaceAll("l", "\u0644"); //\u0644 : ARABIC LETTER LAM
+    tmp_word = tmp_word.replaceAll("m", "\u0645"); //\u0645 : ARABIC LETTER MEEM
+    tmp_word = tmp_word.replaceAll("n", "\u0646"); //\u0646 : ARABIC LETTER NOON
+    tmp_word = tmp_word.replaceAll("h", "\u0647"); //\u0647 : ARABIC LETTER HEH
+    tmp_word = tmp_word.replaceAll("w", "\u0648"); //\u0648 : ARABIC LETTER WAW
+    tmp_word = tmp_word.replaceAll("Y", "\u0649"); //\u0649 : ARABIC LETTER ALEF MAKSURA
+    tmp_word = tmp_word.replaceAll("y", "\u064A"); //\u064A : ARABIC LETTER YEH
+    tmp_word = tmp_word.replaceAll("F", "\u064B"); //\u064B : ARABIC FATHATAN
+    tmp_word = tmp_word.replaceAll("N", "\u064C"); //\u064C : ARABIC DAMMATAN
+    tmp_word = tmp_word.replaceAll("K", "\u064D"); //\u064D : ARABIC KASRATAN
+    tmp_word = tmp_word.replaceAll("a", "\u064E"); //\u064E : ARABIC FATHA
+    tmp_word = tmp_word.replaceAll("u", "\u064F"); //\u064F : ARABIC DAMMA
+    tmp_word = tmp_word.replaceAll("i", "\u0650"); //\u0650 : ARABIC KASRA
+    tmp_word = tmp_word.replaceAll("~", "\u0651"); //\u0651 : ARABIC SHADDA
+    tmp_word = tmp_word.replaceAll("o", "\u0652"); //\u0652 : ARABIC SUKUN
+    tmp_word = tmp_word.replaceAll("`", "\u0670"); //\u0670 : ARABIC LETTER SUPERSCRIPT ALEF
+    tmp_word = tmp_word.replaceAll("\\{", "\u0671"); //\u0671 : ARABIC LETTER ALEF WASLA
+    tmp_word = tmp_word.replaceAll("P", "\u067E"); //\u067E : ARABIC LETTER PEH
+    tmp_word = tmp_word.replaceAll("J", "\u0686"); //\u0686 : ARABIC LETTER TCHEH
+    tmp_word = tmp_word.replaceAll("V", "\u06A4"); //\u06A4 : ARABIC LETTER VEH
+    tmp_word = tmp_word.replaceAll("G", "\u06AF"); //\u06AF : ARABIC LETTER GAF
+    tmp_word = tmp_word.replaceAll("R", "\u0698"); //\u0698 : ARABIC LETTER JEH (no more in Buckwalter system)
+    //Not in Buckwalter system \u0679 : ARABIC LETTER TTEH
+    //Not in Buckwalter system \u0688 : ARABIC LETTER DDAL
+    //Not in Buckwalter system \u06A9 : ARABIC LETTER KEHEH
+    //Not in Buckwalter system \u0691 : ARABIC LETTER RREH
+    //Not in Buckwalter system \u06BA : ARABIC LETTER NOON GHUNNA
+    //Not in Buckwalter system \u06BE : ARABIC LETTER HEH DOACHASHMEE
+    //Not in Buckwalter system \u06C1 : ARABIC LETTER HEH GOAL
+    //Not in Buckwalter system \u06D2 : ARABIC LETTER YEH BARREE
+    tmp_word = tmp_word.replaceAll(",", "\u060C" ); //\u060C : ARABIC COMMA
+    tmp_word = tmp_word.replaceAll(";", "\u061B"); //\u061B : ARABIC SEMICOLON
+    tmp_word = tmp_word.replaceAll("\\?", "\u061F"); //\u061F : ARABIC QUESTION MARK
+    return tmp_word;
+  }
+  
+}
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/software/eXist/mpdl-modules/src/de/mpg/mpiwg/berlin/mpdl/lt/lex/app/Lexica.java	Wed Nov 24 17:24:23 2010 +0100
@@ -0,0 +1,157 @@
+package de.mpg.mpiwg.berlin.mpdl.lt.lex.app;
+
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.Iterator;
+import java.util.Set;
+
+import de.mpg.mpiwg.berlin.mpdl.lt.general.Language;
+
+/*
+florio: 70091 records (6 of them are not xml valid)
+bonitz: 14648 records (46 of them are not xml valid)
+webster: 111733 records (3 of them are not xml valid)
+ls: 53500 records (14 of them are not xml valid)
+autenrieth: 10158 records (468 of them are not xml valid)
+cooper: 33124 records (116 of them are not xml valid)
+baretti: 53555 records (0 of them are not xml valid)
+salmone: 6360 records (11 of them are not xml valid)
+lsj: 112631 records (26922 of them are not xml valid)
+ */
+public class Lexica {
+  private static Lexica instance;
+  private static HashMap<String, Lexicon> lexica = new HashMap<String, Lexicon>();
+  
+  public static Lexica getInstance() {
+    if (instance == null) {
+      instance = new Lexica();
+      instance.init();
+    }
+    return instance;
+  }
+
+  private void init() {
+    Lexicon autenrieth = new Lexicon("autenrieth", "el");
+    autenrieth.setDescription("Autenrieth, a Homeric lexicon");
+    Lexicon baretti = new Lexicon("baretti", "it");
+    baretti.setDescription("Baretti, a dictionary of the English and Italian languages");
+    Lexicon bonitz = new Lexicon("bonitz", "el");
+    bonitz.setDescription("Bonitz, index Aristotelicus");
+    Lexicon cooper = new Lexicon("cooper", "la");
+    cooper.setDescription("Cooper, Thesaurus Linguae Romanae et Brittanicae");
+    Lexicon florio = new Lexicon("florio", "it");
+    florio.setDescription("Florio, a worlde of wordes, or most copious, dictionarie in Italian and English");
+    Lexicon ls = new Lexicon("ls", "la");
+    ls.setDescription("Lewis and Short, Latin dictionary");
+    Lexicon lsj = new Lexicon("lsj", "el");
+    lsj.setDescription("Liddell-Scott-Jones, a Greek-English lexicon");
+    Lexicon salmone = new Lexicon("salmone", "ar");
+    salmone.setDescription("Salmoné, an advanced learner's Arabic-English dictionary");
+    Lexicon salmoneUnicode = new Lexicon("salmoneUnicode", "ar");
+    salmoneUnicode.setDescription("Salmoné, an advanced learner's Arabic-English dictionary");
+    Lexicon webster = new Lexicon("webster", "en");
+    webster.setDescription("Webster's revised unabridged dictionary (1913)");
+    lexica.put("autenrieth", autenrieth);
+    lexica.put("baretti", baretti);
+    lexica.put("bonitz", bonitz);
+    lexica.put("cooper", cooper);
+    lexica.put("florio", florio);
+    lexica.put("ls", ls);
+    lexica.put("lsj", lsj);
+    lexica.put("salmone", salmone);
+    lexica.put("webster", webster);
+  }
+  
+  public ArrayList<Lexicon> getLexicons(String lang) {
+    String language = Language.getInstance().getLanguageId(lang);
+    ArrayList<Lexicon> retLexicons = null;
+    Set<String> keys = lexica.keySet();
+    Iterator<String> it = keys.iterator();
+    while (it.hasNext()) {
+      String lexName = it.next();
+      Lexicon lexicon = lexica.get(lexName);
+      String sourceLanguage = lexicon.getSourceLanguage();
+      if (sourceLanguage != null && sourceLanguage.equals(language)) {
+        if (retLexicons == null)
+          retLexicons = new ArrayList<Lexicon>();
+        retLexicons.add(lexicon);
+      }
+    }
+    return retLexicons;
+  }
+
+  public ArrayList<Lexicon> getLexicons() {
+    ArrayList<Lexicon> retLexicons = null;
+    Set<String> keys = lexica.keySet();
+    Iterator<String> it = keys.iterator();
+    while (it.hasNext()) {
+      String lexName = it.next();
+      Lexicon lexicon = lexica.get(lexName);
+      if (retLexicons == null)
+        retLexicons = new ArrayList<Lexicon>();
+      retLexicons.add(lexicon);
+    }
+    return retLexicons;
+  }
+
+  public ArrayList<Lexicon> getBetacodeLexicons() {
+    ArrayList<Lexicon> retLexicons = new ArrayList<Lexicon>();
+    retLexicons.add(lexica.get("autenrieth"));
+    retLexicons.add(lexica.get("bonitz"));
+    retLexicons.add(lexica.get("lsj"));
+    return retLexicons;
+  }
+  
+  public ArrayList<Lexicon> getBuckwalterLexicons() {
+    ArrayList<Lexicon> retLexicons = new ArrayList<Lexicon>();
+    retLexicons.add(lexica.get("salmone"));
+    return retLexicons;
+  }
+  
+}
+
+/*  TODO
+<option value="dwds">Das Digitale Wörterbuch der deutschen Sprache</option>
+<option value="grimm">Deutsches Wörterbuch von J. und W. Grimm (experimental)</option>
+<option value="artfl">Dictionnaire de l'Académie francaise, 4e éd. (1762)</option>
+<option value="epsd">Pennsylvania Sumerian Dictionary</option>
+
+  else if (dictname == "dwds") lang="de";   
+  else if (dictname == "grimm") lang="de";  
+  else if (dictname == "artfl") lang="fr"; 
+  else of (dictname == "epsd") lang="sux";
+
+DWDS:
+  
+Link: http://www.dwds.de/?woerterbuch=1&qu=auto   
+Logo: http://www.dwds.de/images/dwds_logo.gif  
+Copyright: Copyright &copy; by Berlin-Brandenburgische Akademie der Wissenschaften, Wörterbuch der deutschen Gegenwartssprache, all rights reserved.
+
+Grimm:
+
+Link: http://germa63.uni-trier.de:8080/Projects/WBB/woerterbuecher/dwb/report_lemma?wb=G&word=auto  
+View: http://germa63.uni-trier.de:8080/Projects/WBB/woerterbuecher/dwb/selectarticles?lemid=  
+Output:
+<html>
+<head>
+<title>Deutsches Wörterbuch von Jacob und Wilhelm Grimm</title>
+<link rel="stylesheet" href="http://germa63.uni-trier.de:8080/Projects/WBB/woerterbuecher/dwb/styles_wbb" type="text/css"></link>
+<link rel="stylesheet" href="http://germa63.uni-trier.de:8080/Projects/WBB/woerterbuecher/dwb/styles_add" type="text/css"></link>
+<META HTTP-EQUIV="Content-Type" CONTENT="text/html; charset=utf-8"></META>
+</head>
+<body>
+
+ARTFL:
+
+Name: Dictionnaire de l'Académie francaise, 4e éd.
+Vorverarbeitung des Wortes yourWord: $word =~ s/%([0-9A-F]{2})/pack("H2", $1)/ge;
+Link: http://colet.uchicago.edu/cgi-bin/dico1look.pl?dicoid=ACAD1762&strippedhw=yourWord
+
+EPSD:
+
+Name: ePSD (Pennsylvania Sumerian Dictionary)
+Link: http://psd.museum.upenn.edu/cgi-bin/epsd.plx?x=epsd&q=yourWord
+
+
+
+ */
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/software/eXist/mpdl-modules/src/de/mpg/mpiwg/berlin/mpdl/lt/lex/app/Lexicon.java	Wed Nov 24 17:24:23 2010 +0100
@@ -0,0 +1,96 @@
+package de.mpg.mpiwg.berlin.mpdl.lt.lex.app;
+
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.Enumeration;
+import java.util.Hashtable;
+
+public class Lexicon {
+  private String name;
+  private String sourceLang;
+  private String description;
+  private Hashtable<String, LexiconEntry> entries;
+
+  public Lexicon(String name, String sourceLanguage) {
+    this.name = name;
+    this.sourceLang = sourceLanguage;
+    this.entries = new Hashtable<String, LexiconEntry>();
+  }
+  
+  public String getName() {
+    return name;
+  }
+  
+  public void setName(String name) {
+    this.name = name;  
+  }
+  
+  public String getSourceLanguage() {
+    return sourceLang;
+  }
+  
+  public String getDescription() {
+    return description;
+  }
+  
+  public void setDescription(String description) {
+    this.description = description;  
+  }
+  
+  public boolean isBetacodeLexicon() {
+    boolean isBetacode = false;
+    if (name.equals("autenrieth") || name.equals("bonitz") || name.equals("lsj"))
+      isBetacode = true;
+    return isBetacode;
+  }
+  
+  public boolean isBuckwalterLexicon() {
+    boolean isBuckwalter = false;
+    if (name.equals("salmone"))
+      isBuckwalter = true;
+    return isBuckwalter;
+  }
+  
+  public ArrayList<LexiconEntry> getEntries() {
+    ArrayList<LexiconEntry> result = new ArrayList<LexiconEntry>();
+    if (entries != null) {
+      Enumeration<String> entryKeys = entries.keys();
+      while(entryKeys.hasMoreElements()) {
+        String entryKey = entryKeys.nextElement();
+        LexiconEntry le = entries.get(entryKey);
+        result.add(le);
+      }
+    }
+    Collections.sort(result);
+    if (result.isEmpty())
+      return null;
+    else 
+      return result;
+  }
+
+  public boolean isEmpty() {
+    if (entries == null || entries.isEmpty())
+      return true;
+    else
+      return false;
+  }
+  
+  public void addEntry(LexiconEntry newEntry) {
+    if (entries == null)
+      this.entries = new Hashtable<String, LexiconEntry>();  
+    entries.put(newEntry.getFormName(), newEntry);
+  }
+  
+  /*
+   * without lexicon entries (non-Javadoc)
+   * @see java.lang.Object#clone()
+   */
+  public Lexicon clone() {
+    Lexicon lex = new Lexicon(name, sourceLang);
+    lex.description = description;
+    lex.entries = new Hashtable<String, LexiconEntry>();
+    return lex;
+  }
+  
+
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/software/eXist/mpdl-modules/src/de/mpg/mpiwg/berlin/mpdl/lt/lex/app/LexiconEntry.java	Wed Nov 24 17:24:23 2010 +0100
@@ -0,0 +1,121 @@
+package de.mpg.mpiwg.berlin.mpdl.lt.lex.app;
+
+public class LexiconEntry implements Comparable<LexiconEntry> {
+  private String lexiconName;
+  private String formName;
+  private String content;
+  private boolean xmlValid = false;
+  private boolean xmlMadeValid = false;
+  private String validationCode;
+  private String validationFailElementName;
+
+  public LexiconEntry(String lexiconName, String formName, String content) {
+    this.lexiconName = lexiconName;
+    this.formName = formName;
+    this.content = content;
+    if (content != null) {
+      int begin = content.indexOf("<xml-valid>");
+      int end = content.indexOf("</xml-valid>");
+      if (begin != -1 && end != -1) {
+        String xmlValid = content.substring(begin + 11, end);
+        if (xmlValid != null) {
+          if (xmlValid.equals("true"))
+            this.xmlValid = true;
+          else if (xmlValid.equals("false"))
+            this.xmlValid = false;
+        }
+      }
+    }
+  }
+  
+  public String getLexiconName() {
+    return lexiconName;
+  }
+
+  public String getFormName() {
+    return formName;
+  }
+  
+  public void setFormName(String formName) {
+    this.formName = formName;  
+  }
+  
+  public String getContent() {
+    return content;
+  }
+
+  public void setContent(String content) {
+    this.content = content;  
+  }
+  
+  public boolean isXmlValid() {
+    return xmlValid;
+  }
+  
+  public void setXmlValid(boolean xmlValid) {
+    this.xmlValid = xmlValid;
+  }
+
+  public String getValidationCode() {
+    return validationCode;
+  }
+
+  public void setValidationCode(String validationCode) {
+    this.validationCode = validationCode;
+  }
+
+  public String getValidationFailElementName() {
+    return validationFailElementName;
+  }
+
+  public void setValidationFailElementName(String validationFailElementName) {
+    this.validationFailElementName = validationFailElementName;
+  }
+
+  public boolean isXmlMadeValid() {
+    return xmlMadeValid;
+  }
+
+  public void setXmlMadeValid(boolean xmlMadeValid) {
+    this.xmlMadeValid = xmlMadeValid;
+  }
+
+  public String getRepairedEntry() {
+    String retStr = null;
+    if (content != null) {
+      int begin = content.indexOf("<repaired-entry>");
+      int end = content.indexOf("</repaired-entry>");
+      if (begin != -1 && end != -1) {
+        retStr = content.substring(begin, end) + "</repaired-entry>";
+      }
+    }
+    return retStr;
+  }
+
+  public String getOriginalEntry() {
+    String retStr = null;
+    if (content != null) {
+      int begin = content.indexOf("<original-entry>");
+      int end = content.indexOf("</original-entry>");
+      if (begin != -1 && end != -1) {
+        retStr = content.substring(begin, end) + "</original-entry>";
+      }
+    }
+    return retStr;
+  }
+
+  public int compareTo(LexiconEntry l) {
+    if (l.formName == null && this.formName == null) {
+      return 0;
+    }
+    if (this.formName == null) {
+      return 1;
+    }
+    if (l.formName == null) {
+      return -1;
+    }
+    return this.formName.compareTo(l.formName);
+  }
+
+  
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/software/eXist/mpdl-modules/src/de/mpg/mpiwg/berlin/mpdl/lt/lex/db/DBLexWriter.java	Wed Nov 24 17:24:23 2010 +0100
@@ -0,0 +1,630 @@
+package de.mpg.mpiwg.berlin.mpdl.lt.lex.db;
+
+import java.io.BufferedOutputStream;
+import java.io.BufferedReader;
+import java.io.File;
+import java.io.FileNotFoundException;
+import java.io.FileOutputStream;
+import java.io.FileReader;
+import java.io.IOException;
+import java.io.Reader;
+import java.io.StringReader;
+import java.io.UnsupportedEncodingException;
+import java.util.ArrayList;
+import java.util.Date;
+import java.util.HashMap;
+import java.util.Iterator;
+import java.util.List;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+import org.xml.sax.InputSource;
+import org.xml.sax.SAXException;
+import org.xml.sax.XMLReader;
+
+import com.sleepycat.je.Cursor;
+import com.sleepycat.je.Database;
+import com.sleepycat.je.DatabaseEntry;
+import com.sleepycat.je.DatabaseException;
+import com.sleepycat.je.LockMode;
+import com.sleepycat.je.OperationStatus;
+import com.sleepycat.je.util.DbLoad;
+import com.sun.org.apache.xerces.internal.parsers.SAXParser;
+
+import de.mpg.mpiwg.berlin.mpdl.util.StringUtilEscapeChars;
+import de.mpg.mpiwg.berlin.mpdl.util.Util;
+import de.mpg.mpiwg.berlin.mpdl.exception.ApplicationException;
+import de.mpg.mpiwg.berlin.mpdl.general.MpdlConstants;
+import de.mpg.mpiwg.berlin.mpdl.lt.general.Transcoder;
+import de.mpg.mpiwg.berlin.mpdl.lt.lex.app.Lexica;
+import de.mpg.mpiwg.berlin.mpdl.lt.lex.app.Lexicon;
+import de.mpg.mpiwg.berlin.mpdl.lt.lex.app.LexiconEntry;
+
+public class DBLexWriter {
+  private static DBLexWriter instance;
+  private static String MPDL_DATA_DIR = MpdlConstants.MPDL_DATA_DIR;
+  private static String DATA_FILES_DIR_LEXICA = MPDL_DATA_DIR + "/dataFiles/pollux";
+  private static String DB_DIR_LEXICA = MPDL_DATA_DIR + "/dataBerkeleyDB/pollux";
+  private DbEnvLex dbEnvLexica;
+  private Date beginOfOperation;
+  private Date endOfOperation;
+  
+  public static DBLexWriter getInstance() throws ApplicationException {
+    if (instance == null) {
+      instance = new DBLexWriter();
+    }
+    return instance;
+  }
+
+  public static void main(String[] args) throws ApplicationException {
+    getInstance();
+    instance.beginOperation();
+    System.out.print("Start ...");
+    // instance.initReadOnly();
+    instance.initReadWrite();
+    // instance.readSampleData();
+    // instance.testTranscoder();
+    // instance.printSizeOfAllLexicons();
+    instance.writeLexiconsToFiles();
+    // instance.loadPolluxDbDumpsToDb();
+    // instance.copyAndRepairAndTranscodeDumps();
+    instance.end();
+    instance.endOperation();
+    Double elapsedTime = new Util().getSecondWithMillisecondsBetween(instance.beginOfOperation, instance.endOfOperation);
+    System.out.println("End.");
+    System.out.println("Needed time: " + elapsedTime + " seconds");
+  }
+
+  private void initReadWrite() throws ApplicationException {
+    dbEnvLexica = new DbEnvLex();
+    dbEnvLexica.setDataDir(DB_DIR_LEXICA);
+    dbEnvLexica.initReadWrite();
+  }
+  
+  private void initReadOnly() throws ApplicationException {
+    dbEnvLexica = new DbEnvLex();
+    dbEnvLexica.setDataDir(DB_DIR_LEXICA);
+    dbEnvLexica.initReadOnly();
+    ArrayList<Lexicon> lexicons = Lexica.getInstance().getLexicons();
+    for (int i=0; i<lexicons.size(); i++) {
+      Lexicon lexicon = lexicons.get(i);
+      String lexiconName = lexicon.getName();
+      dbEnvLexica.openDatabase(lexiconName);
+    }
+  }
+  
+  private void loadPolluxDbDumpsToDb() throws ApplicationException {
+    ArrayList<Lexicon> lexicons = Lexica.getInstance().getLexicons();
+    for (int i=0; i<lexicons.size(); i++) {
+      Lexicon lexicon = lexicons.get(i);
+      String lexiconName = lexicon.getName();
+      loadDbDumpToDb(lexiconName);
+    }
+  }
+  
+  private void loadDbDumpToDb(String lexiconName) throws ApplicationException {
+    String dumpFileName = DATA_FILES_DIR_LEXICA + "/" + lexiconName + ".dump";
+    String dbName = lexiconName + "Dump.db";
+    try {
+      BufferedReader bufferedReader = new BufferedReader(new FileReader(dumpFileName));
+      DbLoad loader = new DbLoad();
+      loader.setEnv(dbEnvLexica.getEnv());
+      loader.setDbName(dbName);
+      loader.setInputReader(bufferedReader);
+      loader.setIgnoreUnknownConfig(true);
+      loader.load();
+      bufferedReader.close();
+    } catch (FileNotFoundException e) {
+      throw new ApplicationException(e);
+    } catch (IOException e) {
+      throw new ApplicationException(e);
+    } catch (DatabaseException e) {
+      throw new ApplicationException(e);
+    }
+  }
+  
+  private void readSampleData() throws ApplicationException {
+    try {
+      List<String> dbNames = dbEnvLexica.getEnv().getDatabaseNames();
+      String l1 = readEntry("autenrieth", "au)to/s");
+      String l2 = readEntry("ls", "laudabilis");
+      String l3 = readEntry("lsjUnicode", "ἄδρεπτος");
+      String l4 = readEntry("salmoneUnicode", "ءرش");
+      System.out.println("Autenrieth: autos: " + l1);
+      System.out.println("Lewis & Short: Laudabilis: " + l2);
+      System.out.println("LSJ: ἄδρεπτος: " + l3);
+      System.out.println("Salmone: طب: " + l4);
+      printSampleEntries("salmoneUnicode", 10);
+      printSampleEntries("lsjUnicode", 1000);
+    } catch (DatabaseException e) {
+      throw new ApplicationException(e);
+    }
+  }
+  
+  private void end() throws ApplicationException {
+    ArrayList<Lexicon> lexicons = Lexica.getInstance().getLexicons();
+    for (int i=0; i<lexicons.size(); i++) {
+      Lexicon lexicon = lexicons.get(i);
+      String lexiconName = lexicon.getName();
+      dbEnvLexica.closeDatabase(lexiconName);
+      dbEnvLexica.closeDatabase(lexiconName + "Dump");
+    }
+    dbEnvLexica.close();
+  }
+
+  private String readEntry(String lexiconName, String formName) throws ApplicationException {
+    String retString = null;
+    try {
+      String keyStr = formName;
+      DatabaseEntry dbEntryKey = new DatabaseEntry(keyStr.getBytes("utf-8"));
+      Database lexDB = dbEnvLexica.getLexiconDB(lexiconName);
+      Cursor cursor = lexDB.openCursor(null, null);
+      DatabaseEntry foundValue = new DatabaseEntry();
+      OperationStatus operationStatus = cursor.getSearchKey(dbEntryKey, foundValue, LockMode.DEFAULT);
+      if (operationStatus == OperationStatus.SUCCESS) {
+        byte[] foundValueBytes = foundValue.getData();
+        retString = new String(foundValueBytes, "utf-8");
+      }
+      cursor.close();
+    } catch (DatabaseException e) {
+      throw new ApplicationException(e);
+    } catch (UnsupportedEncodingException e) {
+      throw new ApplicationException(e);
+    }
+    return retString;
+  }
+  
+  private void printSizeOfAllLexiconsTemp() throws ApplicationException {
+    String lexiconName = "lsj";
+    int[] sizes = getSizes(lexiconName);
+    System.out.println(lexiconName + ": " + sizes[0] + " records (" + sizes[1] + " of them are not xml valid)");
+  }
+  
+  private void printSizeOfAllLexicons() throws ApplicationException {
+    ArrayList<Lexicon> lexicons = Lexica.getInstance().getLexicons();
+    for (int i=0; i<lexicons.size(); i++) {
+      Lexicon lexicon = lexicons.get(i);
+      String lexiconName = lexicon.getName();
+      int[] sizes = getSizes(lexiconName);
+      System.out.println(lexiconName + ": " + sizes[0] + " records (" + sizes[1] + " of them are not xml valid)");
+    }
+  }
+  
+  private int[] getSizes(String lexiconName) throws ApplicationException {
+    int size = 0;
+    int sizeXmlNotValidEntries = 0;
+    try {
+      dbEnvLexica.openDatabase(lexiconName);
+      Database lexDB = dbEnvLexica.getLexiconDB(lexiconName);
+      Cursor cursor = lexDB.openCursor(null, null);
+      DatabaseEntry dbEntryKey = new DatabaseEntry();
+      DatabaseEntry dbEntryValue = new DatabaseEntry();
+      OperationStatus operationStatus = cursor.getFirst(dbEntryKey, dbEntryValue, LockMode.DEFAULT);
+      while (operationStatus == OperationStatus.SUCCESS) {
+        operationStatus = cursor.getNext(dbEntryKey, dbEntryValue, LockMode.DEFAULT);
+        byte[] dbEntryKeyBytes = dbEntryKey.getData();
+        String dbEntryKeyStr = new String(dbEntryKeyBytes, "utf-8");
+        byte[] dbEntryValueBytes = dbEntryValue.getData();
+        String dbEntryValueStr = new String(dbEntryValueBytes, "utf-8");
+        int begin = dbEntryValueStr.indexOf("<repaired-entry>");
+        int end = dbEntryValueStr.indexOf("</repaired-entry>");
+        dbEntryValueStr = dbEntryValueStr.substring(begin, end) + "</repaired-entry>";
+        LexiconEntry dbLexEntry = new LexiconEntry(lexiconName, dbEntryKeyStr, dbEntryValueStr);
+        LexiconEntry xmlLexiconEntry = xmlParse(dbLexEntry);
+        if (! xmlLexiconEntry.isXmlValid()) {
+          sizeXmlNotValidEntries ++;
+        }
+        size++;
+      }
+      cursor.close();
+    } catch (DatabaseException e) {
+      throw new ApplicationException(e);
+    } catch (UnsupportedEncodingException e) {
+      throw new ApplicationException(e);
+    }
+    int[] sizes = new int[2];
+    sizes[0] = size;
+    sizes[1] = sizeXmlNotValidEntries;
+    return sizes;
+  }
+  
+  private void copyAndRepairAndTranscodeDumps() throws ApplicationException {
+    try {
+      ArrayList<Lexicon> lexicons = Lexica.getInstance().getLexicons();
+      for (int i=0; i<lexicons.size(); i++) {
+        Lexicon lexicon = lexicons.get(i);
+        String lexiconName = lexicon.getName();
+        HashMap<String, DatabaseEntry> lexDumpHashMap = getWholeLexiconHashMap(lexiconName + "Dump");
+        dbEnvLexica.openDatabase(lexiconName);
+        Database lexDB = dbEnvLexica.getLexiconDB(lexiconName);
+        Iterator<String> lexDumpIter = lexDumpHashMap.keySet().iterator();
+        while (lexDumpIter.hasNext()) {
+          String lexDumpKeyStr = lexDumpIter.next();
+          DatabaseEntry lexDumpValue = lexDumpHashMap.get(lexDumpKeyStr);
+          byte[] lexDumpValueBytes = lexDumpValue.getData();
+          String lexDumpValueStr = new String(lexDumpValueBytes, "utf-8");
+          String newLexValueStr = new String(lexDumpValueBytes, "utf-8");
+          // repair lsj
+          if (lexiconName.equals("lsj")) {
+            newLexValueStr = newLexValueStr.replaceAll("<br>", "<br/>");
+            newLexValueStr = newLexValueStr.replaceAll("<p>", "<p/>");
+            String elementNameGreek = "G";
+            newLexValueStr = deleteNestedTags(elementNameGreek, newLexValueStr); // delete tags <G> and </G> inside <G> 
+            newLexValueStr = newLexValueStr.replaceAll("lang=greek", "lang=\"greek\"");
+            boolean senseContained = newLexValueStr.matches(".*<sense.*>.*");
+            boolean endSenseContained = newLexValueStr.matches(".*</sense>.*");
+            if (senseContained && ! endSenseContained)
+              newLexValueStr = newLexValueStr.replaceAll("<sense .*?>", ""); 
+            else if (!senseContained && endSenseContained)
+              newLexValueStr = newLexValueStr.replaceAll("</sense>", ""); 
+            boolean refContained = newLexValueStr.matches(".*<ref.*>.*");
+            boolean endRefContained = newLexValueStr.matches(".*</ref>.*");
+            if (refContained && ! endRefContained)
+              newLexValueStr = newLexValueStr.replaceAll("<ref .*?>", ""); 
+            else if (!refContained && endRefContained)
+              newLexValueStr = newLexValueStr.replaceAll("</ref>", ""); 
+            /*
+            boolean itypeContained = newLexValueStr.matches(".*<itype.*>.*");
+            boolean endItypeContained = newLexValueStr.matches(".*</itype>.*");
+            if (itypeContained && ! endItypeContained)
+              newLexValueStr = newLexValueStr.replaceAll("<itype .*?>", ""); 
+            else if (!itypeContained && endItypeContained)
+              newLexValueStr = newLexValueStr.replaceAll("</itype>", "");
+            */ 
+          }
+          // repair cooper
+          if (lexiconName.equals("cooper")) {
+            newLexValueStr = newLexValueStr.replaceAll("<PB>", "");   // TODO hack
+            newLexValueStr = newLexValueStr.replaceAll("<p>", "<p/>");   // TODO hack
+          }
+          // repair baretti
+          if (lexiconName.equals("baretti")) {
+            newLexValueStr = newLexValueStr.replaceAll("<li>", "<li/>");   // TODO hack
+          }
+          // repair for all lexicons
+          newLexValueStr = newLexValueStr.replaceAll("type=style", "type=\"style\"");
+          newLexValueStr = newLexValueStr.replaceAll("type=dom", "type=\"dom\"");
+          newLexValueStr = newLexValueStr.replaceAll("<\\*>", ""); 
+          newLexValueStr = newLexValueStr.replaceAll("<p />", "<p/>");
+          LexiconEntry newLexEntryTemp = new LexiconEntry(lexiconName, lexDumpKeyStr, newLexValueStr);  // lexDumpKeyStr is not transcoded yet but it will not be used in further in the code 
+          LexiconEntry newLexEntry = xmlParseAndRepair(newLexEntryTemp);
+          String xmlValidString = "<xml-valid>true</xml-valid>";
+          if (! newLexEntry.isXmlValid()) {
+            xmlValidString = "<xml-valid>false</xml-valid>";
+          }
+          newLexValueStr = newLexEntry.getContent();
+          // transcode the Betacode lexicon entries to Unicode (key and value)
+          if (lexicon.isBetacodeLexicon()) {
+            Transcoder transcoder = Transcoder.getInstance();
+            lexDumpKeyStr = transcoder.transcodeFromBetaCode2Unicode(lexDumpKeyStr);
+            String elementName = "G";
+            if (newLexEntry.isXmlValid()) {
+              newLexValueStr = transcodeByElementName("fromBetacode2Unicode", elementName, newLexValueStr);
+            }
+          }
+          // transcode the Buckwalter entries to Unicode (key and value)
+          if (lexicon.isBuckwalterLexicon()) {
+            Transcoder transcoder = Transcoder.getInstance();
+            lexDumpKeyStr = transcoder.transcodeFromBuckwalter2Unicode(lexDumpKeyStr);
+            String elementName = "AR";
+            if (newLexEntry.isXmlValid()) {
+              newLexValueStr = transcodeByElementName("fromBuckwalter2Unicode", elementName, newLexValueStr);
+            }
+          }
+          // put the entry into database 
+          newLexValueStr = "<content>" + xmlValidString + "<original-entry>" + lexDumpValueStr + "</original-entry>" + "<repaired-entry>" + newLexValueStr + "</repaired-entry>" + "</content>";
+          DatabaseEntry newLexDumpKey = new DatabaseEntry(lexDumpKeyStr.getBytes("utf-8"));
+          DatabaseEntry newLexValue = new DatabaseEntry(newLexValueStr.getBytes("utf-8"));
+          lexDB.put(null, newLexDumpKey, newLexValue);
+        }
+      }
+    } catch (DatabaseException e) {
+      throw new ApplicationException(e);
+    } catch (UnsupportedEncodingException e) {
+      throw new ApplicationException(e);
+    }
+  }
+  
+  private void printSampleEntries(String lexiconName, int count) throws ApplicationException {
+    try {
+      int counter = 0;
+      dbEnvLexica.openDatabase(lexiconName);
+      Database lexDB = dbEnvLexica.getLexiconDB(lexiconName);
+      Cursor cursor = lexDB.openCursor(null, null);
+      DatabaseEntry dbEntryKey = new DatabaseEntry();
+      DatabaseEntry dbEntryValue = new DatabaseEntry();
+      OperationStatus operationStatus = cursor.getFirst(dbEntryKey, dbEntryValue, LockMode.DEFAULT);
+      while (operationStatus == OperationStatus.SUCCESS  && counter < count) {
+        int size = dbEntryKey.getSize();
+        if (size > 0) {
+          byte[] dbEntryKeyBytes = dbEntryKey.getData();
+          String dbEntryKeyStr = new String(dbEntryKeyBytes, "utf-8");
+          System.out.println(lexiconName + ": key: " + dbEntryKeyStr + " value size: " +  dbEntryValue.getSize());
+        }
+        operationStatus = cursor.getNext(dbEntryKey, dbEntryValue, LockMode.DEFAULT);
+        counter++;
+      }
+      cursor.close();
+    } catch (DatabaseException e) {
+      throw new ApplicationException(e);
+    } catch (UnsupportedEncodingException e) {
+      throw new ApplicationException(e);
+    }
+  }
+
+  private void testTranscoder() throws ApplicationException {
+    String testStr = "<G>hfhf fdfd<G>ei)mi/</G> (<tr>sum</tr>), Aeol. <G>e)/mmi</G> hfhfh </G><author>Sapph.</author>2.15, <author>Theoc.</author>20.32; Cret. <G>h)mi/</G> <title>GDI</title> 4959a; <per>2</per><number>sg.</number> <G>ei)=</G>, Ep. and Ion. <cit><G>ei)s</G> <author>Od.</author>17.388</cit>, al., Aeol. <G>e)/ssi</G>, Ep. and Dor. <cit><G>e)ssi/</G> <author>Il.</author>1.176</cit>, <author>Pi.</author>";
+    String testStr2 = "aaaaa <G>1111a <G>2222a</G> <G>3333a</G> 1111a</G> aaaaa bbbbb <G>1111b <G>2222b</G> <G>3333b</G> 1111b</G> bbbbb ";
+    String testStr3 = "<G>e)pano/rqwsin e)/xein</G>, opp <G>a)ni/aton ei)=nai *hi</G>3. 1165 b18. --<G>e)panorqw/seis kai boh/qeiai *rb</G>5. 1383 a20.";
+    String testStr4 = "<G>suni^hmi</G> <author>Ar.</author><title>Av.</title>946 (s. v.l.), <author>Strato Com.</author>1.3: with variation of quantity, <G>plei=ston ou)=lon i(/ei <G>[i^]</G>, i)/oulon i(/ei [i_</G>] <title>Carm.Pop.</title> 1.]:&#x2014" +
+                         ";<br><tr>release, let go</tr>, <cit><G>h(=ka ..po/das kai\\ xei=re fe/resqai</G> <author>Od.</author>12.442</cit>; <G>h(=ke fe/resqai</G> <tr>let</tr> him float" + 
+                         "off, <author>Il.</author>21.120; <tr>let fall</tr>, <G>ka\\d de\\ ka/rhtos h(=ke ko/mas</G> <tr>made</tr> his locks <tr>flow</tr> down from his head, <author>Od.<" +
+                         "/author>6.231; [<cit><G>e)qei/ras] i(/ei lo/fon a)mfi/</G> .... ggg";
+    String testStr5 = "plei=ston ou)=lon i(/ei ";
+    String testStr6 = "*a as< as as: *)a *s ss ";
+    Transcoder t = Transcoder.getInstance();
+    String transcoded = t.transcodeFromBetaCode2Unicode(testStr4);
+    transcoded = t.transcodeFromBetaCode2Unicode(testStr5);
+    transcoded = t.transcodeFromBetaCode2Unicode(testStr6);
+    
+    String arabTestStr1 = "^nutaf";
+    String arabTestStr2 = "min";
+    String arabTestStr3 = "Aal-Hiyal (^qAla ^&gt;arisTwTAlys) yataEaj~aba Aal-nAs minhA &lt;im~A fy Aal-&gt;a$yA' Aal~aty taEriDu TabEAF fa-mim~A lA yuElamu Eil~atuhu wa-&lt;im~A fy Aal-&gt;a$yA' Aal-muxAlifap li-l-TabE fa-mim~A yuEmalu bi-Aal-SinAEap li-manfaEap Aal-nAs li-&gt;an~a Aal-TabyEap tulzimu &gt;abadAF jihap wAHidap wa-&gt;am~A manAfiE Aal-nAs fa-&lt;in~ahA taxtalifu &lt;ixtilAfAF kavyrAF.";
+    transcoded = t.transcodeFromBuckwalter2Unicode(arabTestStr1);
+    transcoded = t.transcodeFromBuckwalter2Unicode(arabTestStr2);
+    transcoded = t.transcodeFromBuckwalter2Unicode(arabTestStr3);
+    
+    // String deletedNestedTags = deleteNestedTags("G", testStr4);
+    // String regExpr = "(<G>.*?)<G>(.*?)</G>(.*?)<G>(.*?)</G>(.*?</G>)";
+    String regExpr = "(<G>.*?)<G>(.*)(</G>){1,}(.*?</G>)";
+    // String regExpr = "(<G>.*?)<G>(.*?)</G>(.*?)<G>(.*?)</G>(.*?</G>)";
+    String replaceStr = testStr2.replaceAll(regExpr, "$1$2$4");
+    // String replaceStr2 = testStr2.replaceAll("<G>(.*)<G>(.*)</G>(.*)<G>(.*)</G>(.*)</G>", "<G>$2$3$4$5</G>");
+    regExpr = "<G>.*?(<G>.*?</G>){1,}.*?</G>";
+    regExpr = "(<G>.*?)<G>(.*?)</G>(.*?){1,}(.*?</G>)";
+    // String regExpr = "[a-zA-Z0-9]+?\\[.+?\\]/" + "|" + "[a-zA-Z0-9]+?/" + "|" + "[a-zA-Z0-9]+?\\[.+\\]$" + "|" + "[a-zA-Z0-9]+?$"; // pathName example: "/archimedes[@xmlns:xlink eq "http://www.w3.org/1999/xlink"]/text/body/chap/p[@type eq "main"]/s/foreign[@lang eq "en"]"
+    Pattern p = Pattern.compile(regExpr, Pattern.CASE_INSENSITIVE | Pattern.MULTILINE); // both flags enabled
+    Matcher m = p.matcher(testStr2);
+    while (m.find()) {
+      int msBeginPos = m.start();
+      int msEndPos = m.end();
+      String matchStr = testStr2.substring(msBeginPos, msEndPos);
+      String bla = "";
+    }
+
+    String retStr = transcodeByElementName("fromBetacode2Unicode", "G", testStr);
+    retStr = transcodeByElementName("fromBetacode2Unicode", "G", "bla");
+    retStr = transcodeByElementName("fromBetacode2Unicode", "G", "");
+  }
+  
+  private String transcodeByElementName(String transcodeDirection, String elementName, String inputStr) throws ApplicationException {
+    if (inputStr == null || elementName == null)
+      return null;
+    String elemBeginTag = "<" + elementName + ">";
+    String elemEndTag = "</" + elementName + ">";
+    Transcoder transcoder = Transcoder.getInstance();
+    String outputStr = "";
+    int begin = inputStr.indexOf(elemBeginTag);
+    int end = inputStr.indexOf(elemEndTag);
+    while (begin != -1 && end != -1 && begin < end) {
+      String before = inputStr.substring(0, begin);
+      String origStr = inputStr.substring(begin + elemBeginTag.length(), end);
+      origStr = StringUtilEscapeChars.deleteSpecialXmlEntities(origStr);
+      String transcodedStr = origStr;
+      if (transcodeDirection.equals("fromBetacode2Unicode"))
+        transcodedStr = transcoder.transcodeFromBetaCode2Unicode(origStr);
+      else if (transcodeDirection.equals("fromBuckwalter2Unicode"))
+        transcodedStr = transcoder.transcodeFromBuckwalter2Unicode(origStr);
+      outputStr = outputStr + before + new String(elemBeginTag);
+      outputStr = outputStr + transcodedStr;
+      outputStr = outputStr + new String(elemEndTag);
+      inputStr = inputStr.substring(end + elemEndTag.length());
+      begin = inputStr.indexOf(elemBeginTag);
+      end = inputStr.indexOf(elemEndTag);
+    }
+    outputStr = outputStr + inputStr;
+    return outputStr;
+  }
+  
+  private String deleteNestedTags(String elementName, String inputStr) {
+    String inputStrTmp = new String(inputStr);
+    String elemBeginTag = "<" + elementName + ">";
+    String elemEndTag = "</" + elementName + ">";
+    String outputStr = "";
+    int begin = inputStrTmp.indexOf(elemBeginTag);
+    int end = inputStrTmp.indexOf(elemEndTag);
+    while (begin != -1 && end != -1) {
+      end = getIndexClosedTag(begin, elementName, inputStrTmp);
+      String before = inputStrTmp.substring(0, begin);
+      String origStr = null;
+      if (end == -1) // if no end tag could be found
+        origStr = inputStrTmp.substring(begin + elemBeginTag.length(), inputStrTmp.length());
+      else
+        origStr = inputStrTmp.substring(begin + elemBeginTag.length(), end);
+      origStr = origStr.replaceAll(elemBeginTag, "");
+      origStr = origStr.replaceAll(elemEndTag, "");
+      outputStr = outputStr + before + new String(elemBeginTag);
+      outputStr = outputStr + origStr;
+      outputStr = outputStr + new String(elemEndTag);
+      inputStrTmp = inputStrTmp.substring(end + elemEndTag.length());
+      begin = inputStrTmp.indexOf(elemBeginTag);
+    }
+    outputStr = outputStr + inputStrTmp;
+    return outputStr;
+  }
+  
+  private int getIndexClosedTag(int begin, String elementName, String inputStr) {
+    int beginTmp = begin;
+    int retIndex = -1;
+    String elemBeginTag = "<" + elementName + ">";
+    String elemEndTag = "</" + elementName + ">";
+    int indexEndTag = inputStr.indexOf(elemEndTag);
+    while (indexEndTag != -1) {
+      String betweenTmpStr = inputStr.substring(beginTmp + elemBeginTag.length(), indexEndTag);
+      int indexBeginTag = betweenTmpStr.indexOf(elemBeginTag);
+      if (indexBeginTag != -1) {
+        beginTmp = indexEndTag;
+      } else {
+        return indexEndTag;
+      }
+      indexEndTag = inputStr.indexOf(elemEndTag, indexEndTag + elemEndTag.length());
+    }
+    return retIndex;
+  }
+  
+  private HashMap<String, DatabaseEntry> getWholeLexiconHashMap(String lexiconName) throws ApplicationException {
+    HashMap<String, DatabaseEntry> lexHashMap = new HashMap<String, DatabaseEntry>();
+    try {
+      dbEnvLexica.openDatabase(lexiconName);
+      Database lexDB = dbEnvLexica.getLexiconDB(lexiconName);
+      Cursor cursor = lexDB.openCursor(null, null);
+      DatabaseEntry dbEntryKey = new DatabaseEntry();
+      DatabaseEntry dbEntryValue = new DatabaseEntry();
+      OperationStatus operationStatus = cursor.getFirst(dbEntryKey, dbEntryValue, LockMode.DEFAULT);
+      while (operationStatus == OperationStatus.SUCCESS) {
+        int size = dbEntryKey.getSize();
+        if (size > 0) {
+          byte[] dbEntryKeyBytes = dbEntryKey.getData();
+          String dbEntryKeyStr = new String(dbEntryKeyBytes, "utf-8");
+          DatabaseEntry newDbEntryValue = new DatabaseEntry(dbEntryValue.getData());
+          lexHashMap.put(dbEntryKeyStr, newDbEntryValue);
+        }
+        operationStatus = cursor.getNext(dbEntryKey, dbEntryValue, LockMode.DEFAULT);
+      }
+      cursor.close();
+    } catch (DatabaseException e) {
+      throw new ApplicationException(e);
+    } catch (UnsupportedEncodingException e) {
+      throw new ApplicationException(e);
+    }
+    return lexHashMap;
+  }
+  
+  private LexiconEntry xmlParseAndRepair(LexiconEntry lexEntry) throws ApplicationException {
+    String origLexEntryContent = lexEntry.getContent();
+    String lexEntryContent = new String(origLexEntryContent);
+    lexEntry.setContent(lexEntryContent);
+    // parse and repair: try to repair it 3 times through parsing
+    LexiconEntry retLexiconEntry = xmParseAndRepairLocal(lexEntry);
+    retLexiconEntry = xmParseAndRepairLocal(retLexiconEntry);
+    retLexiconEntry = xmParseAndRepairLocal(retLexiconEntry);
+    // if it could not be repaired the original content (which is not XML valid) is delivered
+    if (! retLexiconEntry.isXmlValid())
+      retLexiconEntry.setContent(origLexEntryContent);
+    return retLexiconEntry;
+  }
+
+  private LexiconEntry xmParseAndRepairLocal(LexiconEntry lexEntry) throws ApplicationException {
+    if (! lexEntry.isXmlValid()) {
+      lexEntry = xmlParse(lexEntry);
+    }
+    if (! lexEntry.isXmlValid() && lexEntry.getValidationCode() != null && lexEntry.getValidationCode().equals("elementNotClosed")) {
+      String elementName = lexEntry.getValidationFailElementName();
+      String lexiconEntryContent = lexEntry.getContent();
+      lexiconEntryContent = lexiconEntryContent.replaceAll("<" + elementName + " .*?>", "");
+      lexiconEntryContent = lexiconEntryContent.replaceAll("</" + elementName + ">", "");
+      lexEntry.setContent(lexiconEntryContent);
+      lexEntry.setXmlMadeValid(true);    
+    }
+    return lexEntry;
+  }
+  
+  private LexiconEntry xmlParse(LexiconEntry lexEntry) throws ApplicationException {
+    String lexEntryContent = "<content>" + lexEntry.getContent() + "</content>";
+    LexEntryContentHandler lexEntryContentHandler = new LexEntryContentHandler();
+    XMLReader xmlParser = new SAXParser();
+    xmlParser.setContentHandler(lexEntryContentHandler);
+    LexEntryErrorHandler lexEntryErrorHandler = new LexEntryErrorHandler();
+    xmlParser.setErrorHandler(lexEntryErrorHandler);
+    try {
+      Reader reader = new StringReader(lexEntryContent);
+      InputSource input = new InputSource(reader);
+      xmlParser.parse(input);
+      lexEntry.setXmlValid(true);
+    } catch (SAXException e) {
+      // nothing but following
+      lexEntry.setXmlValid(false);
+      String exceptionMessage = e.getMessage();
+      if (exceptionMessage.matches("The element type .* must be terminated by the matching end-tag .*")) {
+        int begin = exceptionMessage.indexOf("\"");
+        if (begin != -1) {
+          String subStr = exceptionMessage.substring(begin + 1);
+          int end = subStr.indexOf("\"");
+          if (end != -1) {
+            String elementName = exceptionMessage.substring(begin + 1, begin + 1 + end);
+            lexEntry.setValidationCode("elementNotClosed");
+            lexEntry.setValidationFailElementName(elementName);
+          }
+        }
+      }
+    } catch (IOException e) {
+      throw new ApplicationException(e);
+    }
+    return lexEntry;
+  }
+
+  private void writeLexiconsToFiles() throws ApplicationException {
+    BufferedReader in = null;
+    BufferedOutputStream out = null;
+    try {
+      ArrayList<Lexicon> lexicons = Lexica.getInstance().getLexicons();
+      for (int i=0; i<lexicons.size(); i++) {
+        Lexicon lexicon = lexicons.get(i);
+        String lexiconName = lexicon.getName();
+        HashMap<String, DatabaseEntry> lexHashMap = getWholeLexiconHashMap(lexiconName);
+        Iterator<String> lexDumpIter = lexHashMap.keySet().iterator();
+        File outputFile = new File(DATA_FILES_DIR_LEXICA + "/" + lexiconName + ".xml");
+        out = new BufferedOutputStream(new FileOutputStream(outputFile));
+        write("<lexicon>\n", out);
+        write("<name>" + lexiconName + "</name>\n", out);
+        write("<description>" + lexicon.getDescription() + "</description>\n", out);
+        write("<entries>\n", out);
+        while (lexDumpIter.hasNext()) {
+          write("<entry>\n", out);
+          String lexKeyStr = lexDumpIter.next();
+          write("<form>" + lexKeyStr + "</form>\n", out);
+          DatabaseEntry lexValue = lexHashMap.get(lexKeyStr);
+          byte[] lexValueBytes = lexValue.getData();
+          write(lexValueBytes, out);
+          write("</entry>\n", out);
+        }
+        write("</entries>\n", out);
+        write("</lexicon>\n", out);
+      }
+    } catch (FileNotFoundException e) {
+      throw new ApplicationException(e);
+    } finally {
+      // always close the stream 
+      if (in != null) try { in.close(); } catch (Exception e) { }
+      if (out != null) try { out.close(); } catch (Exception e) { }
+    }
+  }
+  
+  private void write(byte[] inputBytes, BufferedOutputStream out) throws ApplicationException {
+    try {
+      out.write(inputBytes, 0, inputBytes.length);
+      out.flush();
+    } catch (IOException e) {
+      throw new ApplicationException(e);
+    }
+  }
+
+  private void write(String outStr, BufferedOutputStream out) throws ApplicationException {
+    try {
+      byte[] bytes = outStr.getBytes("utf-8");
+      out.write(bytes, 0, bytes.length);
+      out.flush();
+    } catch (IOException e) {
+      throw new ApplicationException(e);
+    }
+  }
+  
+  private void beginOperation() {
+    beginOfOperation = new Date();
+  }
+
+  private void endOperation() {
+    endOfOperation = new Date();
+  }
+
+}
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/software/eXist/mpdl-modules/src/de/mpg/mpiwg/berlin/mpdl/lt/lex/db/DbEnvLex.java	Wed Nov 24 17:24:23 2010 +0100
@@ -0,0 +1,101 @@
+package de.mpg.mpiwg.berlin.mpdl.lt.lex.db;
+
+import java.io.File;
+import java.util.HashMap;
+
+import com.sleepycat.je.Database;
+import com.sleepycat.je.DatabaseConfig;
+import com.sleepycat.je.DatabaseException;
+import com.sleepycat.je.Environment;
+import com.sleepycat.je.EnvironmentConfig;
+
+import de.mpg.mpiwg.berlin.mpdl.exception.ApplicationException;
+
+public class DbEnvLex {
+  private String dataDir;
+  private File envPath;
+  private Environment env;
+  private EnvironmentConfig envConfig;
+  private DatabaseConfig dbConfig;
+  private HashMap<String, Database> lexiconDBs = new HashMap<String, Database>();
+
+  public DbEnvLex() {
+  }
+
+  public void setDataDir(String dataDir) {
+    this.dataDir = dataDir;
+  }
+  
+  public void initReadOnly() throws ApplicationException {
+    try {
+      envConfig = new EnvironmentConfig();
+      dbConfig = new DatabaseConfig();
+      envPath = new File(dataDir);
+      env = new Environment(envPath, envConfig);
+    } catch (DatabaseException e) {
+      throw new ApplicationException(e);
+    }
+  }
+
+  public void initReadWrite() throws ApplicationException {
+    try {
+      envConfig = new EnvironmentConfig();
+      dbConfig = new DatabaseConfig();
+      envConfig.setReadOnly(false);
+      dbConfig.setReadOnly(false);
+      envConfig.setAllowCreate(true);
+      dbConfig.setAllowCreate(true);
+      envConfig.setTransactional(true);
+      dbConfig.setTransactional(true);
+      envPath = new File(dataDir);
+      env = new Environment(envPath, envConfig);
+    } catch (DatabaseException e) {
+      throw new ApplicationException(e);
+    }
+  }
+
+  public void openDatabase(String lexiconName) throws ApplicationException {
+    try {
+      Database lexDB = lexiconDBs.get(lexiconName);
+      if (lexDB == null) {
+        Database lexiconDB = env.openDatabase(null, lexiconName + ".db", dbConfig);
+        lexiconDBs.put(lexiconName, lexiconDB);
+      }
+    } catch (DatabaseException e) {
+      throw new ApplicationException(e);
+    }
+  }
+  
+  public void closeDatabase(String lexiconName) throws ApplicationException {
+    try {
+      if (lexiconDBs != null) {
+        Database lexiconDB = lexiconDBs.get(lexiconName);
+        if (lexiconDB != null)
+          lexiconDB.close();
+      }
+    } catch (DatabaseException e) {
+      throw new ApplicationException(e);
+    }
+  }
+  
+  public Environment getEnv() {
+    return env;
+  }
+
+  public Database getLexiconDB(String lexiconName) {
+    Database lexiconDB = lexiconDBs.get(lexiconName);
+    return lexiconDB;
+  }
+
+  public void close() throws ApplicationException {
+    if (env != null) {
+      try {
+        if (env != null)
+          env.close();
+      } catch (DatabaseException e) {
+        throw new ApplicationException(e);
+      }
+    }
+  }
+}
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/software/eXist/mpdl-modules/src/de/mpg/mpiwg/berlin/mpdl/lt/lex/db/LexEntryContentHandler.java	Wed Nov 24 17:24:23 2010 +0100
@@ -0,0 +1,43 @@
+package de.mpg.mpiwg.berlin.mpdl.lt.lex.db;
+
+import org.xml.sax.*;
+
+public class LexEntryContentHandler implements ContentHandler {
+  
+  public LexEntryContentHandler() {
+  }
+  
+  public void startDocument() throws SAXException {
+  }
+
+  public void endDocument() throws SAXException {
+  }
+  
+  public void characters(char[] c, int start, int length) throws SAXException {
+  }
+
+  public void ignorableWhitespace(char[] c, int start, int length) throws SAXException {
+  }
+
+  public void processingInstruction(String target, String data) throws SAXException {
+  }
+
+  public void setDocumentLocator(org.xml.sax.Locator arg1) {
+  }
+
+  public void endPrefixMapping(String prefix) throws SAXException {
+  }
+
+  public void skippedEntity(String name) throws SAXException {
+  }
+
+  public void startElement(String uri, String localName, String name, Attributes attrs) throws SAXException {
+  }
+
+  public void endElement(String uri, String localName, String name) throws SAXException {
+  }
+
+  public void startPrefixMapping(String prefix, String uri) throws SAXException {
+  }
+  
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/software/eXist/mpdl-modules/src/de/mpg/mpiwg/berlin/mpdl/lt/lex/db/LexEntryErrorHandler.java	Wed Nov 24 17:24:23 2010 +0100
@@ -0,0 +1,12 @@
+package de.mpg.mpiwg.berlin.mpdl.lt.lex.db;
+
+import org.xml.sax.*;
+
+public class LexEntryErrorHandler implements ErrorHandler {
+  public void warning(SAXParseException exception) throws SAXException {
+  }
+  public void error(SAXParseException exception) throws SAXException {
+  }
+  public void fatalError(SAXParseException exception) throws SAXException {
+  }
+}
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/software/eXist/mpdl-modules/src/de/mpg/mpiwg/berlin/mpdl/lt/lex/db/LexHandler.java	Wed Nov 24 17:24:23 2010 +0100
@@ -0,0 +1,175 @@
+package de.mpg.mpiwg.berlin.mpdl.lt.lex.db;
+
+import java.io.UnsupportedEncodingException;
+import java.util.ArrayList;
+import java.util.Date;
+
+import com.sleepycat.je.Cursor;
+import com.sleepycat.je.Database;
+import com.sleepycat.je.DatabaseEntry;
+import com.sleepycat.je.DatabaseException;
+import com.sleepycat.je.LockMode;
+import com.sleepycat.je.OperationStatus;
+
+import de.mpg.mpiwg.berlin.mpdl.util.Util;
+import de.mpg.mpiwg.berlin.mpdl.exception.ApplicationException;
+import de.mpg.mpiwg.berlin.mpdl.general.MpdlConstants;
+import de.mpg.mpiwg.berlin.mpdl.lt.general.Transcoder;
+import de.mpg.mpiwg.berlin.mpdl.lt.lex.app.Lexica;
+import de.mpg.mpiwg.berlin.mpdl.lt.lex.app.Lexicon;
+import de.mpg.mpiwg.berlin.mpdl.lt.lex.app.LexiconEntry;
+import de.mpg.mpiwg.berlin.mpdl.lt.morph.app.Lemma;
+import de.mpg.mpiwg.berlin.mpdl.lt.morph.app.MorphologyCache;
+
+public class LexHandler {
+  private static LexHandler instance;
+  private static String MPDL_DATA_DIR = MpdlConstants.MPDL_EXIST_DATA_DIR;
+  private static String DB_DIR_LEXICA = MPDL_DATA_DIR + "/dataBerkeleyDB/pollux";
+  private DbEnvLex dbEnvLexica;
+  private Date beginOfOperation;
+  private Date endOfOperation;
+  
+  public static LexHandler getInstance() throws ApplicationException {
+    if (instance == null) {
+      instance = new LexHandler();
+      instance.initReadOnly();
+    }
+    return instance;
+  }
+
+  /**
+   * 
+   * @param formName
+   * @param language
+   * @return delivers lexical entries by help of the morphology component (lexical entry of the stem of the normalized word form)
+   * @throws ApplicationException
+   */
+  public ArrayList<String> getLexEntryKeys(String formName, String language, boolean normalize) throws ApplicationException {
+    ArrayList<String> lexEntryKeys = new ArrayList<String>();
+    MorphologyCache morphologyCache = MorphologyCache.getInstance();
+    ArrayList<Lemma> formLemmas = morphologyCache.getLemmasByFormName(language, formName, normalize);
+    boolean hasLexEntry = false;
+    hasLexEntry = hasLexEntryKey(formName, language);
+    if (hasLexEntry)
+      lexEntryKeys.add(formName);
+    if (formLemmas != null) {
+      for (int j=0; j<formLemmas.size(); j++) {
+        Lemma l = formLemmas.get(j);
+        String lName = l.getLemmaName();
+        if (! hasLexEntry) {
+          hasLexEntry = hasLexEntryKey(lName, language);
+        }
+        if (! lName.equals(formName) && hasLexEntry) {
+          lexEntryKeys.add(lName);
+        }
+      }
+    }
+    if(lexEntryKeys.isEmpty())
+      return null;
+    else
+      return lexEntryKeys;
+  }
+  
+  public boolean hasLexEntryKey(String formName, String language) throws ApplicationException {
+    boolean hasLexEntry = false;
+    ArrayList<Lexicon> statLexicons = Lexica.getInstance().getLexicons(language);
+    if (statLexicons != null) {
+      for (int i=0; i<statLexicons.size(); i++) {
+        Lexicon lexicon = statLexicons.get(i).clone(); // clone without lexicon entries
+        LexiconEntry lexEntry = readEntry(lexicon.getName(), formName);
+        if (lexEntry != null) {
+          return true;
+        }
+      }
+    }
+    return hasLexEntry;
+  }
+  
+  public LexiconEntry readEntry(String lexiconName, String formName) throws ApplicationException {
+    LexiconEntry retLexEntry = null;
+    try {
+      String dbFoundValueStr = null;
+      String keyStr = formName;
+      DatabaseEntry dbEntryKey = new DatabaseEntry(keyStr.getBytes("utf-8"));
+      Database lexDB = dbEnvLexica.getLexiconDB(lexiconName);
+      Cursor cursor = lexDB.openCursor(null, null);
+      DatabaseEntry foundValue = new DatabaseEntry();
+      OperationStatus operationStatus = cursor.getSearchKey(dbEntryKey, foundValue, LockMode.DEFAULT);
+      if (operationStatus == OperationStatus.SUCCESS) {
+        byte[] foundValueBytes = foundValue.getData();
+        dbFoundValueStr = new String(foundValueBytes, "utf-8");
+      }
+      cursor.close();
+      if (dbFoundValueStr != null) {
+        retLexEntry = new LexiconEntry(lexiconName, formName, dbFoundValueStr);
+      }
+    } catch (DatabaseException e) {
+      throw new ApplicationException(e);
+    } catch (UnsupportedEncodingException e) {
+      throw new ApplicationException(e);
+    }
+    return retLexEntry;
+  }
+  
+  public String transcode(String fromEncoding, String toEncoding, String inputStr) throws ApplicationException {
+    String encodedStr = null;
+    Transcoder transcoder = Transcoder.getInstance();
+    if (fromEncoding.equals("buckwalter") && toEncoding.equals("unicode")) {
+      encodedStr = transcoder.transcodeFromBuckwalter2Unicode(inputStr);
+    } else if (fromEncoding.equals("betacode") && toEncoding.equals("unicode")) {
+      encodedStr = transcoder.transcodeFromBetaCode2Unicode(inputStr);
+    }
+    return encodedStr;
+  }
+  
+  public static void main(String[] args) throws ApplicationException {
+    getInstance();
+    instance.beginOperation();
+    System.out.print("Start ...");
+    instance.readSampleData();
+    instance.end();
+    instance.endOperation();
+    Double elapsedTime = new Util().getSecondWithMillisecondsBetween(instance.beginOfOperation, instance.endOfOperation);
+    System.out.println("End.");
+    System.out.println("Needed time: " + elapsedTime + " seconds");
+  }
+
+  private void initReadOnly() throws ApplicationException {
+    dbEnvLexica = new DbEnvLex();
+    dbEnvLexica.setDataDir(DB_DIR_LEXICA);
+    dbEnvLexica.initReadOnly();
+    ArrayList<Lexicon> lexicons = Lexica.getInstance().getLexicons();
+    for (int i=0; i<lexicons.size(); i++) {
+      Lexicon lexicon = lexicons.get(i);
+      String lexiconName = lexicon.getName();
+      dbEnvLexica.openDatabase(lexiconName);
+    }
+  }
+  
+  private void readSampleData() throws ApplicationException {
+    // List<String> dbNames = dbEnvLexica.getEnv().getDatabaseNames();
+    String l1 = readEntry("autenrieth", "au)to/s").getContent(); // greek: see also bonitz and lsj
+    String l2 = readEntry("ls", "laudabilis").getContent();  // latin
+    System.out.println("Autenrieth: autos: " + l1);
+    System.out.println("Lewis & Short: Laudabilis: " + l2);
+  }
+  
+  private void end() throws ApplicationException {
+    ArrayList<Lexicon> lexicons = Lexica.getInstance().getLexicons();
+    for (int i=0; i<lexicons.size(); i++) {
+      Lexicon lexicon = lexicons.get(i);
+      String lexiconName = lexicon.getName();
+      dbEnvLexica.closeDatabase(lexiconName);
+    }
+    dbEnvLexica.close();
+  }
+
+  private void beginOperation() {
+    beginOfOperation = new Date();
+  }
+
+  private void endOperation() {
+    endOfOperation = new Date();
+  }
+
+}
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/software/eXist/mpdl-modules/src/de/mpg/mpiwg/berlin/mpdl/lt/morph/app/Form.java	Wed Nov 24 17:24:23 2010 +0100
@@ -0,0 +1,333 @@
+package de.mpg.mpiwg.berlin.mpdl.lt.morph.app;
+
+public class Form {
+  private String provider;
+  private String language;
+  private String formName;
+  private String lemmaName;
+  private String pos;
+  private String tense;
+  private String voice;
+  private String casus;
+  private String number;
+  private String mood;
+  private String person;
+  private String gender;
+  private String definite;
+  
+  public Form() {
+  }
+  
+  public Form(String provider, String language, String formName) {
+    this.provider = provider;
+    this.language = language;
+    this.formName = formName;
+  }
+  
+  public void normalize() {
+    // lower case of form and lemma
+    formName = formName.toLowerCase();
+    lemmaName = lemmaName.toLowerCase();
+    // XML: special symbols
+    formName = formName.replaceAll("&", "&amp;");
+    formName = formName.replaceAll("'", "&apos;");
+    formName = formName.replaceAll("<", "&lt;");
+    formName = formName.replaceAll(">", "&gt;");
+    formName = formName.replaceAll("\"", "&quot;");
+    lemmaName = lemmaName.replaceAll("&", "&amp;");
+    lemmaName = lemmaName.replaceAll("'", "&apos;");
+    lemmaName = lemmaName.replaceAll("<", "&lt;");
+    lemmaName = lemmaName.replaceAll(">", "&gt;");
+    lemmaName = lemmaName.replaceAll("\"", "&quot;");
+    // unification of lemma names (homographs)  TODO do not unificate the homographs
+    lemmaName = lemmaName.replaceAll("#[0-9]", "");
+    if (isArabic()) {
+      if (lemmaName != null) {
+        int length = lemmaName.length();
+        char lastChar = lemmaName.charAt(length - 1);
+        boolean isDigit = Character.isDigit(lastChar);
+        if (isDigit)
+          lemmaName = lemmaName.substring(0, length - 1);
+      }
+    }
+    // unification of forms and lemmas with hyphens: remove the hyphen
+    formName = formName.replaceAll("-", "");
+    lemmaName = lemmaName.replaceAll("-", "");
+    // unification of forms and lemmas with blanks (sequence of words): remove the blanks
+    formName = formName.replaceAll(" ", "");
+    lemmaName = lemmaName.replaceAll(" ", "");
+    // unification of forms and lemmas with plus symbols: remove the plus symbol
+    formName = formName.replaceAll("\\+", "");
+    lemmaName = lemmaName.replaceAll("\\+", "");
+    // TODO call MpdlMorphDataNormalizer (handle Umlauts in german, accents in french, character classes (longs, s, ...) ...)
+    
+  }
+
+  public boolean isOk() {
+    boolean ret = true;
+    if (formName == null || lemmaName == null)
+      ret = false;
+    else if (formName.length() == 0 || lemmaName.length() == 0 || formName.length() == 1 || lemmaName.length() == 1)
+      ret = false;
+    return ret;
+  }
+  
+  public boolean isGreek() {
+    boolean ret = false;
+    if (language != null && language.equals("el"))
+      ret = true;
+    return ret;
+  }
+  
+  public boolean isArabic() {
+    boolean ret = false;
+    if (language != null && language.equals("ar"))
+      ret = true;
+    return ret;
+  }
+  
+  public boolean isRicherThan(Form otherForm) {
+    boolean richer = false;
+    if (! isOk())
+      return false;
+    else if (! otherForm.isOk())
+      return true;
+    String otherFormPos = otherForm.getPos();
+    if (pos != null && pos.length() > 0 && (otherFormPos == null || otherFormPos.length() == 0))
+      return true;
+    // TODO all other cases
+    return richer;
+  }
+  
+  public String getXmlString() {
+    String xmlString = "<form>\n";
+    if (provider != null)
+      xmlString += "  <provider>" + provider + "</provider>\n";
+    if (language != null)
+      xmlString += "  <language>" + language + "</language>\n";
+    if (formName != null)
+      xmlString += "  <form-name>" + formName + "</form-name>\n";
+    if (lemmaName != null)
+      xmlString += "  <lemma-name>" + lemmaName + "</lemma-name>\n";
+    if (pos != null)
+      xmlString += "  <pos>" + pos + "</pos>\n";
+    if (tense != null)
+      xmlString += "  <tense>" + tense + "</tense>\n";
+    if (voice != null)
+      xmlString += "  <voice>" + voice + "</voice>\n";
+    if (casus != null)
+      xmlString += "  <casus>" + casus + "</casus>\n";
+    if (number != null)
+      xmlString += "  <number>" + number + "</number>\n";
+    if (mood != null)
+      xmlString += "  <mood>" + mood + "</mood>\n";
+    if (person != null)
+      xmlString += "  <person>" + person + "</person>\n";
+    if (gender != null)
+      xmlString += "  <gender>" + gender + "</gender>\n";
+    if (definite != null)
+      xmlString += "  <definite>" + definite + "</definite>\n";
+    xmlString += "</form>\n";
+    return xmlString;
+  }
+
+  public String toString() {
+    return getXmlString();
+  }
+
+  public String getTense() {
+    return tense;
+  }
+
+  public void setTense(String tense) {
+    this.tense = tense;
+  }
+
+  public void addTense(String newTense) {
+    if (tense == null)
+      this.tense = newTense;  
+    else
+      tense += newTense;
+  }
+  
+  public String getVoice() {
+    return voice;
+  }
+
+  public void setVoice(String voice) {
+    this.voice = voice;
+  }
+
+  public void addVoice(String newVoice) {
+    if (voice == null)
+      this.voice = newVoice;  
+    else
+      voice += newVoice;
+  }
+  
+  public String getCasus() {
+    return casus;
+  }
+
+  public void setCasus(String casus) {
+    this.casus = casus;
+  }
+
+  public void addCasus(String newCasus) {
+    if (casus == null)
+      this.casus = newCasus;  
+    else
+      casus += newCasus;
+  }
+  
+  public String getNumber() {
+    return number;
+  }
+
+  public void setNumber(String number) {
+    this.number = number;
+  }
+
+  public void addNumber(String newNumber) {
+    if (number == null)
+      this.number = newNumber;  
+    else
+      number += newNumber;
+  }
+  
+  public String getMood() {
+    return mood;
+  }
+
+  public void setMood(String mood) {
+    this.mood = mood;
+  }
+
+  public void addMood(String newMood) {
+    if (mood == null)
+      this.mood = newMood;  
+    else
+      mood += newMood;
+  }
+  
+  public String getPerson() {
+    return person;
+  }
+
+  public void setPerson(String person) {
+    this.person = person;
+  }
+
+  public void addPerson(String newPerson) {
+    if (person == null)
+      this.person = newPerson;  
+    else
+      person += newPerson;
+  }
+  
+  public String getGender() {
+    return gender;
+  }
+
+  public void setGender(String gender) {
+    this.gender = gender;
+  }
+
+  public void addGender(String newGender) {
+    if (gender == null)
+      this.gender = newGender;  
+    else
+      gender += newGender;
+  }
+  
+  public String getDefinite() {
+    return definite;
+  }
+
+  public void setDefinite(String definite) {
+    this.definite = definite;
+  }
+
+  public void addDefinite(String newDefinite) {
+    if (definite == null)
+      this.definite = newDefinite;  
+    else
+      definite += newDefinite;
+  }
+  
+  public String getLemmaName() {
+    return lemmaName;
+  }
+
+  public String getPos() {
+    return pos;
+  }
+
+  public String getProvider() {
+    return provider;
+  }
+  
+  public void setProvider(String provider) {
+    this.provider = provider;  
+  }
+  
+  public void addProvider(String newProvider) {
+    if (provider == null)
+      this.provider = newProvider;  
+    else
+      provider += newProvider;
+  }
+  
+  public String getLanguage() {
+    return language;
+  }
+
+  public void setLanguage(String language) {
+    this.language = language;  
+  }
+  
+  public void addLanguage(String newLanguage) {
+    if (language == null)
+      this.language = newLanguage;  
+    else
+      language += newLanguage;
+  }
+  
+  public String getFormName() {
+    return formName;
+  }
+
+  public void setFormName(String formName) {
+    this.formName = formName;  
+  }
+  
+  public void addFormName(String newFormName) {
+    if (formName == null)
+      this.formName = newFormName;  
+    else
+      formName += newFormName;
+  }
+  
+  public void setLemmaName(String lemmaName) {
+    this.lemmaName = lemmaName;  
+  }
+  
+  public void addLemmaName(String newLemmaName) {
+    if (lemmaName == null)
+      this.lemmaName = newLemmaName;  
+    else
+      lemmaName += newLemmaName;
+  }
+  
+  public void setPos(String pos) {
+    this.pos = pos;  
+  }
+  
+  public void addPos(String newPos) {
+    if (pos == null)
+      this.pos = newPos;  
+    else
+      pos += newPos;
+  }
+  
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/software/eXist/mpdl-modules/src/de/mpg/mpiwg/berlin/mpdl/lt/morph/app/Lemma.java	Wed Nov 24 17:24:23 2010 +0100
@@ -0,0 +1,152 @@
+package de.mpg.mpiwg.berlin.mpdl.lt.morph.app;
+
+import java.util.ArrayList;
+import java.util.Enumeration;
+import java.util.Hashtable;
+
+
+public class Lemma implements Comparable<Lemma> {
+  private String provider;
+  private String language;
+  private String lemmaName;
+  private Hashtable<String, Form> forms;
+
+  public Lemma() {  
+  }
+  
+  public Lemma(String provider, String language, String lemmaName) {
+    this.provider = provider;
+    this.language = language;
+    this.lemmaName = lemmaName;
+    this.forms = new Hashtable<String, Form>();
+    // always contains the form with the same lemma name
+    Form form = new Form(provider, language, lemmaName);
+    addForm(form);
+  }
+  
+  public String getProvider() {
+    return provider;
+  }
+  
+  public void setProvider(String provider) {
+    this.provider = provider;  
+  }
+  
+  public void addProvider(String newProvider) {
+    if (provider == null)
+      this.provider = newProvider;  
+    else
+      provider += newProvider;
+  }
+  
+  public String getLanguage() {
+    return language;
+  }
+
+  public void setLanguage(String language) {
+    this.language = language;  
+  }
+  
+  public void addLanguage(String newLanguage) {
+    if (language == null)
+      this.language = newLanguage;  
+    else
+      language += newLanguage;
+  }
+  
+  public String getLemmaName() {
+    return lemmaName;
+  }
+
+  public void setLemmaName(String lemmaName) {
+    this.lemmaName = lemmaName;  
+  }
+  
+  public void addLemmaName(String newLemmaName) {
+    if (lemmaName == null)
+      this.lemmaName = newLemmaName;  
+    else
+      lemmaName += newLemmaName;
+  }
+  
+  public Hashtable<String, Form> getForms() {
+    return forms;
+  }
+  
+  public ArrayList<Form> getForms(String provider) {
+    ArrayList<Form> result = new ArrayList<Form>();
+    Enumeration<String> keys = forms.keys();
+    while (keys.hasMoreElements()) {
+      String key = keys.nextElement();
+      Form form = forms.get(key);
+      String prov = form.getProvider();
+      if (prov.equals(provider))
+        result.add(form);
+    }
+    return result;
+  }
+  
+  public ArrayList<Form> getFormsList() {
+    ArrayList<Form> result = new ArrayList<Form>();
+    if(forms != null) {
+      Enumeration<String> keys = forms.keys();
+      while (keys.hasMoreElements()) {
+        String key = keys.nextElement();
+        Form form = forms.get(key);
+        result.add(form);
+      }
+    }
+    return result;
+  }
+  
+  public void setForms(ArrayList<Form> forms) {
+    for (int i=0; i<forms.size(); i++) {
+      Form f = forms.get(i);
+      addForm(f);
+    }
+  }
+  
+  public void addForm(Form newForm) {
+    String formKey = newForm.getLanguage() + "###" + newForm.getFormName();
+    if (forms == null)
+      forms = new Hashtable<String, Form>(); 
+    Form f = forms.get(formKey);
+    if (f == null) {
+      forms.put(formKey, newForm);
+    } else {
+      if(newForm.isRicherThan(f))
+        forms.put(formKey, newForm);
+    }
+  }
+  
+  public Form getForm(String formKey) {
+    return forms.get(formKey);
+  }
+  
+  public String getXmlString() {
+    String xmlString = "<lemma>\n";
+    xmlString += "  <provider>" + provider + "</provider>\n";
+    xmlString += "  <language>" + language + "</language>\n";
+    xmlString += "  <lemma-name>" + lemmaName + "</lemma-name>\n";
+    xmlString += "</lemma>";
+    return xmlString;
+  }
+
+  public String toString() {
+    return getXmlString();
+  }
+  
+  public int compareTo(Lemma l) {
+    if (l.getLemmaName() == null && this.getLemmaName() == null) {
+      return 0;
+    }
+    if (this.getLemmaName() == null) {
+      return 1;
+    }
+    if (l.getLemmaName() == null) {
+      return -1;
+    }
+    return this.getLemmaName().compareTo(l.getLemmaName());
+  }
+
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/software/eXist/mpdl-modules/src/de/mpg/mpiwg/berlin/mpdl/lt/morph/app/MorphFileReaderContentHandler.java	Wed Nov 24 17:24:23 2010 +0100
@@ -0,0 +1,127 @@
+package de.mpg.mpiwg.berlin.mpdl.lt.morph.app;
+
+import java.util.Hashtable;
+
+import org.xml.sax.*;
+
+import de.mpg.mpiwg.berlin.mpdl.lt.morph.app.Form;
+import de.mpg.mpiwg.berlin.mpdl.lt.morph.app.Lemma;
+
+public class MorphFileReaderContentHandler implements ContentHandler {
+  private Hashtable<String, Form> forms;
+  private Hashtable<String, Lemma> lemmas;
+  private Element currentElement;
+  private Form currentForm;
+  
+  public MorphFileReaderContentHandler(Hashtable<String, Form> forms, Hashtable<String, Lemma> lemmas) {
+    this.forms = forms;
+    this.lemmas = lemmas;
+  }
+  
+  public void startDocument() throws SAXException {
+  }
+
+  public void endDocument() throws SAXException {
+  }
+  
+  public void characters(char[] c, int start, int length) throws SAXException {
+    if (currentElement != null) {
+      String elemName = currentElement.name;
+      if (currentForm != null) {
+        char[] cCopy = new char[length];
+        System.arraycopy(c, start, cCopy, 0, length);
+        String charactersStr = String.valueOf(cCopy);
+        if (charactersStr != null && ! (charactersStr.trim().equals(""))) {
+          if (elemName.equals("provider")) {
+            currentForm.setProvider(charactersStr);
+          } else if (elemName.equals("language")) {
+            currentForm.setLanguage(charactersStr);
+          } else if (elemName.equals("form-name")) {
+            currentForm.setFormName(charactersStr);
+          } else if (elemName.equals("lemma-name")) {
+            currentForm.setLemmaName(charactersStr);
+          } else if (elemName.equals("pos")) {
+            currentForm.setPos(charactersStr);
+          } else if (elemName.equals("tense")) {
+            currentForm.setTense(charactersStr);
+          } else if (elemName.equals("voice")) {
+            currentForm.setVoice(charactersStr);
+          } else if (elemName.equals("casus")) {
+            currentForm.setCasus(charactersStr);
+          } else if (elemName.equals("number")) {
+            currentForm.setNumber(charactersStr);
+          } else if (elemName.equals("mood")) {
+            currentForm.setMood(charactersStr);
+          } else if (elemName.equals("person")) {
+            currentForm.setPerson(charactersStr);
+          } else if (elemName.equals("gender")) {
+            currentForm.setGender(charactersStr);
+          } else if (elemName.equals("definite")) {
+            currentForm.setDefinite(charactersStr);
+          }
+        }
+      }
+    } 
+  }
+
+  public void ignorableWhitespace(char[] c, int start, int length) throws SAXException {
+  }
+
+  public void processingInstruction(String target, String data) throws SAXException {
+  }
+
+  public void setDocumentLocator(org.xml.sax.Locator arg1) {
+  }
+
+  public void endPrefixMapping(String prefix) throws SAXException {
+  }
+
+  public void skippedEntity(String name) throws SAXException {
+  }
+
+  public void endElement(String uri, String localName, String name) throws SAXException {
+    currentElement = null;
+    if (name.equals("form")) {
+      String provider = currentForm.getProvider();
+      String language = currentForm.getLanguage();
+      String formName = currentForm.getFormName();
+      String lemmaName = currentForm.getLemmaName();
+      String formKey = language + "###" + formName;
+      forms.put(formKey, currentForm);
+      String lemmaKey = language + "###" + lemmaName;
+      Lemma lemma = lemmas.get(lemmaKey);
+      if(lemma == null) {
+        Lemma l = new Lemma(provider, language, lemmaName);
+        l.addForm(currentForm);
+        lemmas.put(lemmaKey, l);
+      } else {
+        lemma.addForm(currentForm);
+      }
+      currentForm = null;
+    }
+  }
+
+  public void startElement(String uri, String localName, String name, Attributes attrs) throws SAXException {
+    currentElement = new Element(name);
+    if (name.equals("form")) {
+      currentForm = new Form();
+    }
+  }
+
+  public void startPrefixMapping(String prefix, String uri) throws SAXException {
+  }
+  
+  private class Element {
+    String name;
+    String value;
+    
+    Element(String name) {
+      this.name = name;
+    }
+
+    Element(String name, String value) {
+      this.name = name;
+      this.value = value;
+    }
+  }
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/software/eXist/mpdl-modules/src/de/mpg/mpiwg/berlin/mpdl/lt/morph/app/MorphologyCache.java	Wed Nov 24 17:24:23 2010 +0100
@@ -0,0 +1,402 @@
+package de.mpg.mpiwg.berlin.mpdl.lt.morph.app;
+
+import java.io.BufferedOutputStream;
+import java.io.File;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.io.OutputStream;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.Date;
+import java.util.Enumeration;
+import java.util.Hashtable;
+
+import org.apache.log4j.Logger;
+
+import de.mpg.mpiwg.berlin.mpdl.lt.analyzer.MpdlNormalizer;
+import de.mpg.mpiwg.berlin.mpdl.lt.general.Language;
+import de.mpg.mpiwg.berlin.mpdl.lt.morph.app.Form;
+import de.mpg.mpiwg.berlin.mpdl.lt.morph.app.Lemma;
+import de.mpg.mpiwg.berlin.mpdl.lt.morph.db.DBMorphHandler;
+import de.mpg.mpiwg.berlin.mpdl.lucene.LuceneUtil;
+import de.mpg.mpiwg.berlin.mpdl.util.FileUtil;
+import de.mpg.mpiwg.berlin.mpdl.util.Util;
+import de.mpg.mpiwg.berlin.mpdl.exception.ApplicationException;
+import de.mpg.mpiwg.berlin.mpdl.general.MpdlConstants;
+
+public class MorphologyCache {
+  private static MorphologyCache instance;
+  private static Logger LOGGER = Logger.getLogger(MorphologyCache.class); // Logs to EXIST_HOME/webapp/WEB-INF/logs/exist.log
+  private static String MPDL_DATA_DIR = MpdlConstants.MPDL_EXIST_DATA_DIR;
+  private static String DB_DIR_DONATUS = MPDL_DATA_DIR + "/dataBerkeleyDB/donatus";
+  private static String DB_DIR_DYNAMIC = MPDL_DATA_DIR + "/dataBerkeleyDB/dynamic"; 
+  private static String DATA_FILES_DIR = MPDL_DATA_DIR + "/dataFiles"; 
+  private static String DATA_FILE_DYNAMIC_FORMS = DATA_FILES_DIR + "/snowball-all-forms.xml";
+  public static int QUERY_MODE = 0;
+  public static int DOCUMENT_MODE = 1;
+  private static int MAX_HASHTABLE_SIZE = MpdlConstants.MORPHOLOGY_CACHE_SIZE;
+  protected int mode = QUERY_MODE;
+  private Hashtable<String, Hashtable<String, Lemma>> forms = new Hashtable<String, Hashtable<String, Lemma>>();  // cache of forms: hashKey is formName
+  private Hashtable<String, Lemma> lemmas = new Hashtable<String, Lemma>();  // cache of lemmas: hashKey is lemmaName
+  private DBMorphHandler dbMorphHandlerStatic;  // handles static morph data (BerkeleyDB)
+  private DBMorphHandler dbMorphHandlerDynamic; // handles dynamic morph data (BerkeleyDB)
+  private OutputStream outputStreamDynamicForms;  // backup file for all dynamic forms
+  private Date beginOfOperation;
+  private Date endOfOperation;
+  
+  public static MorphologyCache getInstance() throws ApplicationException {
+    if (instance == null) {
+      instance = new MorphologyCache();
+      instance.init();
+    }
+    return instance;
+  }
+
+  private void init() throws ApplicationException {
+    LOGGER.info("Mpdl: Init morphology cache ...");
+    instance.beginOperation();
+    dbMorphHandlerStatic = new DBMorphHandler(DB_DIR_DONATUS);
+    dbMorphHandlerStatic.start();
+    dbMorphHandlerStatic.openDatabases();
+    dbMorphHandlerDynamic = new DBMorphHandler(DB_DIR_DYNAMIC);
+    dbMorphHandlerDynamic.start();
+    dbMorphHandlerDynamic.openDatabases();
+    openDynamicFormsDataFile();
+    instance.endOperation();
+    Double elapsedTime = new Util().getSecondWithMillisecondsBetween(instance.beginOfOperation, instance.endOfOperation);
+    LOGGER.info(" Needed time: " + elapsedTime + " seconds.");
+  }
+  
+  public int getMode() {
+    return mode;
+  }
+  
+  public void setMode(int newMode) {
+    this.mode = newMode;
+  }
+  
+  public void end() throws ApplicationException {
+    dbMorphHandlerStatic.closeDatabases();
+    dbMorphHandlerDynamic.closeDatabases();
+    closeDynamicFormsDataFile();
+  }
+
+  public ArrayList<Lemma> getLemmasByFormName(String lang, String formNameArg, boolean normalize) throws ApplicationException {
+    String language = Language.getInstance().getLanguageId(lang);
+    ArrayList<Lemma> retFormLemmas = null;
+    String formName = formNameArg;
+    if (normalize) {
+      MpdlNormalizer normalizer = new MpdlNormalizer(language);
+      formName = normalizer.normalize(formNameArg);
+    }
+    // first look in local cache
+    String key = language + "###" + formName;
+    Hashtable<String, Lemma> formLemmasHashtable = forms.get(key);
+    if (formLemmasHashtable == null) {
+      ArrayList<Lemma> dbFormLemmas = readLemmasByFormName(language, formName);
+      // put lemmas into local cache
+      int localHashTableSize = forms.size();
+      if (localHashTableSize >= MAX_HASHTABLE_SIZE) {
+        clearCache();
+      }
+      if (dbFormLemmas != null && ! dbFormLemmas.isEmpty()) {
+        formLemmasHashtable = new Hashtable<String, Lemma>();
+        for (int i=0; i<dbFormLemmas.size(); i++) {
+          Lemma lemma = dbFormLemmas.get(i);
+          String lemmaName = lemma.getLemmaName();
+          String lemmaKey = language + "###" + lemmaName;
+          Lemma localLemma = lemmas.get(lemmaKey);
+          if (localLemma == null) {
+            ArrayList<Form> lemmaForms = readFormsByLemmaName(language, lemmaName);
+            lemma.setForms(lemmaForms);
+            lemmas.put(lemmaKey, lemma);
+          } else {
+            lemma = localLemma;
+          }
+          formLemmasHashtable.put(lemmaKey, lemma);
+        }
+        forms.put(key, formLemmasHashtable);
+      }
+    } 
+    retFormLemmas = new ArrayList<Lemma>();
+    if (formLemmasHashtable != null) {
+      Enumeration<String> formLemmasKeys = formLemmasHashtable.keys();
+      while(formLemmasKeys.hasMoreElements()) {
+        String lemmaKey = formLemmasKeys.nextElement();
+        Lemma l = formLemmasHashtable.get(lemmaKey);
+        retFormLemmas.add(l);
+      }
+    }
+    Collections.sort(retFormLemmas);
+    return retFormLemmas;
+  }
+  
+  public Lemma getLemma(String lang, String lemmaNameArg, boolean normalize) throws ApplicationException {
+    String language = Language.getInstance().getLanguageId(lang);
+    String lemmaName = lemmaNameArg;
+    if (normalize) {
+      MpdlNormalizer normalizer = new MpdlNormalizer(language);
+      lemmaName = normalizer.normalize(lemmaNameArg);
+    }
+    // first look in local cache
+    String key = language + "###" + lemmaName;
+    Lemma lemma = lemmas.get(key);
+    if (lemma == null) {
+      ArrayList<Form> dbLemmaForms = readFormsByLemmaName(language, lemmaName);
+      if (dbLemmaForms != null && dbLemmaForms.size() > 0) {
+        lemma = new Lemma();
+        lemma.setLemmaName(lemmaName);
+        lemma.setLanguage(language);
+        lemma.setProvider(dbLemmaForms.get(0).getProvider());
+        lemma.setForms(dbLemmaForms);
+        lemmas.put(lemmaName, lemma);
+      }
+    }
+    return lemma;
+  }
+  
+  public void insertFormDynamic(Form newFlatForm) throws ApplicationException {
+    if (! newFlatForm.isOk())
+      return;
+    String provider = newFlatForm.getProvider();
+    String lang = newFlatForm.getLanguage();
+    String language = Language.getInstance().getLanguageId(lang);
+    String lemmaName = newFlatForm.getLemmaName();
+    Lemma newFlatLemma = new Lemma(provider, language, lemmaName);
+    newFlatLemma.addForm(newFlatForm);
+    // write to berkeley db; there is no test if the form is already contained (has to be done before)
+    writeFormLemmaDynamic(newFlatForm, newFlatLemma);
+    // write to backup file
+    String formsXmlStr = newFlatForm.getXmlString();
+    writeToDynamicFile(formsXmlStr);
+    // fill local cache with new form if it is not too full
+    int localHashTableSize = forms.size();
+    if (localHashTableSize >= MAX_HASHTABLE_SIZE) {
+      clearCache();
+    }
+    String lemmaKey = language + "###" + lemmaName;
+    Lemma localLemma = lemmas.get(lemmaKey);
+    if (localLemma == null) {
+      lemmas.put(lemmaKey, newFlatLemma);
+    } else {
+      localLemma.addForm(newFlatForm);
+      String formName = newFlatForm.getFormName();
+      String formKey = language + "###" + formName;
+      Hashtable<String, Lemma> formLemmas = forms.get(formKey);
+      if (formLemmas == null) {
+        formLemmas = new Hashtable<String, Lemma>();
+        formLemmas.put(lemmaKey, localLemma);
+        forms.put(formKey, formLemmas);
+      } else {
+        formLemmas.put(formKey, localLemma);
+      }
+    }
+  }
+  
+  public ArrayList<Form> getFormsByLuceneQuery(String lang, String luceneQueryString, boolean normalize) throws ApplicationException {
+    String language = Language.getInstance().getLanguageId(lang);
+    ArrayList<Form> result = new ArrayList<Form>();
+    luceneQueryString = luceneQueryString.toLowerCase();
+    ArrayList<String> formsFromQuery = getVariantsFromLuceneQuery(luceneQueryString);
+    if (! (formsFromQuery == null || formsFromQuery.isEmpty())) {
+      for (int i=0; i<formsFromQuery.size(); i++) {
+        String formStr = formsFromQuery.get(i);
+        if (normalize) {
+          MpdlNormalizer normalizer = new MpdlNormalizer(language);
+          formStr = normalizer.normalize(formStr);
+        }
+        ArrayList<Lemma> formLemmas = null;
+        // lemma mode: if formName contains "lemmalemma" then the lemma itself is fetched
+        if (formStr.startsWith("lemmalemma")) {
+          formLemmas = new ArrayList<Lemma>();
+          String lemmaName = formStr.substring(10);
+          Lemma lemma = getLemma(language, lemmaName, false);
+          formLemmas.add(lemma);
+        } else {
+          formLemmas = getLemmasByFormName(language, formStr, false);
+        }
+        if (formLemmas != null && ! formLemmas.isEmpty()) {
+          for (int j=0; j<formLemmas.size(); j++) {
+            Lemma l = formLemmas.get(j);
+            ArrayList<Form> lemmaForms = l.getFormsList();
+            result.addAll(lemmaForms);
+          }
+        }
+      }
+    }
+    return result;
+  }
+
+  public ArrayList<Lemma> getLemmasByLuceneQuery(String lang, String luceneQueryString, boolean normalize) throws ApplicationException {
+    String language = Language.getInstance().getLanguageId(lang);
+    Hashtable<String, Lemma> lemmas = new Hashtable<String, Lemma>();
+    luceneQueryString = luceneQueryString.toLowerCase();
+    ArrayList<String> formsFromQuery = getVariantsFromLuceneQuery(luceneQueryString);
+    if (! (formsFromQuery == null || formsFromQuery.isEmpty())) {
+      for (int i=0; i<formsFromQuery.size(); i++) {
+        String formStr = formsFromQuery.get(i);
+        if (normalize) {
+          MpdlNormalizer normalizer = new MpdlNormalizer(language);
+          formStr = normalizer.normalize(formStr);
+        }
+        ArrayList<Lemma> formLemmas = null;
+        // lemma mode: if formName contains "lemmalemma" then the lemma itself is fetched
+        if (formStr.startsWith("lemmalemma")) {
+          formLemmas = new ArrayList<Lemma>();
+          String lemmaName = formStr.substring(10);
+          Lemma lemma = getLemma(language, lemmaName, false);
+          formLemmas.add(lemma);
+        } else {
+          formLemmas = getLemmasByFormName(language, formStr, false);
+        }
+        if (formLemmas != null) {
+          for (int j=0; j<formLemmas.size(); j++) {
+            Lemma lemma = formLemmas.get(j);
+            lemmas.put(lemma.getLemmaName(), lemma);
+          }
+        }
+      }
+    }
+    ArrayList<Lemma> result = new ArrayList<Lemma>();
+    if (lemmas != null) {
+      Enumeration<String> formLemmasKeys = lemmas.keys();
+      while(formLemmasKeys.hasMoreElements()) {
+        String lemmaKey = formLemmasKeys.nextElement();
+        Lemma l = lemmas.get(lemmaKey);
+        result.add(l);
+      }
+    }
+    Collections.sort(result);
+    if (result.isEmpty())
+      return null;
+    else 
+      return result;
+  }
+  
+  public ArrayList<String> getIndexKeysByLemmaNames(String lang, ArrayList<String> lemmaNames) throws ApplicationException {
+    String language = Language.getInstance().getLanguageId(lang);
+    Hashtable<String, String> indexKeys = new Hashtable<String, String>();
+    for (int j=0; j<lemmaNames.size(); j++) {
+      String lemmaName = lemmaNames.get(j);
+      Lemma lemma = getLemma(language, lemmaName, false);
+      indexKeys.put(lemmaName, lemmaName);
+      if (lemma != null) {
+        ArrayList<Form> lemmaForms = lemma.getFormsList();
+        for (int k=0; k<lemmaForms.size(); k++) {
+          Form form = lemmaForms.get(k);
+          ArrayList<Lemma> fLemmas = getLemmasByFormName(language, form.getFormName(), false);
+          if (fLemmas != null) {
+            String indexKey = "";
+            if (fLemmas.size() == 1) {
+              indexKey = fLemmas.get(0).getLemmaName();
+            } else {
+              for (int l=0; l<fLemmas.size(); l++) {
+                Lemma lem = fLemmas.get(l);
+                indexKey = indexKey + "+++" + lem.getLemmaName(); 
+              }
+              indexKeys.put(indexKey, indexKey);
+            }
+          }
+        }
+      }
+    }
+    ArrayList<String> result = new ArrayList<String>();
+    if (indexKeys != null) {
+      Enumeration<String> indexKeysKeys = indexKeys.keys();
+      while(indexKeysKeys.hasMoreElements()) {
+        String indexKey = indexKeysKeys.nextElement();
+        result.add(indexKey);
+      }
+    }
+    Collections.sort(result);
+    if (result.isEmpty())
+      return null;
+    else 
+      return result;
+  }
+  
+  private void clearCache() {
+    forms = null;
+    lemmas = null;
+    forms = new Hashtable<String, Hashtable<String, Lemma>>();
+    lemmas = new Hashtable<String, Lemma>(); 
+  }
+
+  private ArrayList<Lemma> readLemmasByFormName(String lang, String formName) throws ApplicationException {
+    String language = Language.getInstance().getLanguageId(lang);
+    ArrayList<Lemma> lemmasStatic = dbMorphHandlerStatic.readLemmas(language, formName);
+    ArrayList<Lemma> lemmasDynamic = dbMorphHandlerDynamic.readLemmas(language, formName);
+    lemmasStatic.addAll(lemmasDynamic);
+    return lemmasStatic;
+  }
+
+  private ArrayList<Form> readFormsByLemmaName(String lang, String lemmaName) throws ApplicationException {
+    String language = Language.getInstance().getLanguageId(lang);
+    ArrayList<Form> formsStatic = dbMorphHandlerStatic.readForms(language, lemmaName);
+    ArrayList<Form> formsDynamic = dbMorphHandlerDynamic.readForms(language, lemmaName);
+    formsStatic.addAll(formsDynamic);
+    return formsStatic;
+  }
+  
+  private void writeFormLemmaDynamic(Form newFlatForm, Lemma newFlatLemma) throws ApplicationException {
+    dbMorphHandlerDynamic.writeFormLemma(newFlatForm, newFlatLemma);
+    dbMorphHandlerDynamic.writeLemmaForm(newFlatLemma, newFlatForm);
+  }
+  
+  private void openDynamicFormsDataFile() throws ApplicationException {
+    try {
+      File dataFileDynamicForms = new File(DATA_FILE_DYNAMIC_FORMS);
+      if (! dataFileDynamicForms.exists()) {
+        FileUtil.getInstance().copyFile(DATA_FILE_DYNAMIC_FORMS + ".empty", DATA_FILE_DYNAMIC_FORMS);
+      }
+      File dataFileDynamicFormsTmp = new File(DATA_FILE_DYNAMIC_FORMS + ".tmp");
+      dataFileDynamicFormsTmp.delete();
+      FileUtil.getInstance().copyFile(DATA_FILE_DYNAMIC_FORMS, DATA_FILE_DYNAMIC_FORMS + ".tmp");
+      FileUtil.getInstance().deleteLastNBytes(dataFileDynamicFormsTmp, 9);  // without last "</forms>" entry
+      FileOutputStream dataFileOutputStreamDynamicForms = new FileOutputStream(dataFileDynamicFormsTmp, true);
+      outputStreamDynamicForms = new BufferedOutputStream(dataFileOutputStreamDynamicForms);
+    } catch (IOException e) {
+      throw new ApplicationException(e);
+    }
+  }
+  
+  private void closeDynamicFormsDataFile() throws ApplicationException {
+    try {
+      writeToDynamicFile("</forms>\n");
+      if (outputStreamDynamicForms != null)
+        outputStreamDynamicForms.close();
+      File dataFileDynamicForms = new File(DATA_FILE_DYNAMIC_FORMS);
+      File dataFileDynamicFormsTmp = new File(DATA_FILE_DYNAMIC_FORMS + ".tmp");
+      dataFileDynamicForms.delete();
+      dataFileDynamicFormsTmp.renameTo(new File(DATA_FILE_DYNAMIC_FORMS));
+    } catch (IOException e) {
+      throw new ApplicationException(e);
+    }
+  }
+  
+  private void writeToDynamicFile(String outStr) throws ApplicationException {
+    try {
+      if (outputStreamDynamicForms != null) {
+        byte[] bytes = outStr.getBytes("utf-8");
+        outputStreamDynamicForms.write(bytes, 0, bytes.length);
+        outputStreamDynamicForms.flush();
+      }
+    } catch (IOException e) {
+      throw new ApplicationException(e);
+    }
+  }
+  
+  private ArrayList<String> getVariantsFromLuceneQuery(String queryString) {
+    LuceneUtil luceneUtil = LuceneUtil.getInstance();
+    ArrayList<String> variants = luceneUtil.getVariantsFromLuceneQuery(queryString);
+    return variants;
+  }
+
+  private void beginOperation() {
+    beginOfOperation = new Date();
+  }
+
+  private void endOperation() {
+    endOfOperation = new Date();
+  }
+}
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/software/eXist/mpdl-modules/src/de/mpg/mpiwg/berlin/mpdl/lt/morph/app/SimpleMorphContentHandler.java	Wed Nov 24 17:24:23 2010 +0100
@@ -0,0 +1,119 @@
+package de.mpg.mpiwg.berlin.mpdl.lt.morph.app;
+
+import org.xml.sax.*;
+
+
+public class SimpleMorphContentHandler implements ContentHandler {
+  private Element currentElement;
+  private Lemma lemma;
+  private Form form;
+  
+  public SimpleMorphContentHandler() {
+  }
+  
+  public Form getForm() {
+    return form;
+  }
+  
+  public Lemma getLemma() {
+    return lemma;
+  }
+  
+  public void startDocument() throws SAXException {
+  }
+
+  public void endDocument() throws SAXException {
+  }
+  
+  public void characters(char[] c, int start, int length) throws SAXException {
+    if (currentElement != null) {
+      String elemName = currentElement.name;
+      if (form != null) {
+        char[] cCopy = new char[length];
+        System.arraycopy(c, start, cCopy, 0, length);
+        String charactersStr = String.valueOf(cCopy);
+        if (elemName.equals("provider"))
+          form.setProvider(charactersStr);
+        else if (elemName.equals("language"))
+          form.setLanguage(charactersStr);
+        else if (elemName.equals("form-name"))
+          form.setFormName(charactersStr);
+        else if (elemName.equals("lemma-name"))
+          form.setLemmaName(charactersStr);
+        else if (elemName.equals("pos"))
+          form.setPos(charactersStr);
+        else if (elemName.equals("tense"))
+          form.setTense(charactersStr);
+        else if (elemName.equals("voice"))
+          form.setVoice(charactersStr);
+        else if (elemName.equals("casus"))
+          form.setCasus(charactersStr);
+        else if (elemName.equals("number"))
+          form.setNumber(charactersStr);
+        else if (elemName.equals("mood"))
+          form.setMood(charactersStr);
+        else if (elemName.equals("person"))
+          form.setPerson(charactersStr);
+        else if (elemName.equals("gender"))
+          form.setGender(charactersStr);
+        else if (elemName.equals("definite"))
+          form.setDefinite(charactersStr);
+      } else if (lemma != null) {
+        char[] cCopy = new char[length];
+        System.arraycopy(c, start, cCopy, 0, length);
+        String charactersStr = String.valueOf(cCopy);
+        if (elemName.equals("provider"))
+          lemma.setProvider(charactersStr);
+        else if (elemName.equals("language"))
+          lemma.setLanguage(charactersStr);
+        else if (elemName.equals("lemma-name"))
+          lemma.setLemmaName(charactersStr);
+      }
+    }
+  }
+
+  public void ignorableWhitespace(char[] c, int start, int length) throws SAXException {
+  }
+
+  public void processingInstruction(String target, String data) throws SAXException {
+  }
+
+  public void setDocumentLocator(org.xml.sax.Locator arg1) {
+  }
+
+  public void endPrefixMapping(String prefix) throws SAXException {
+  }
+
+  public void skippedEntity(String name) throws SAXException {
+  }
+
+  public void endElement(String uri, String localName, String name) throws SAXException {
+    currentElement = null;
+  }
+
+  public void startElement(String uri, String localName, String name, Attributes attrs) throws SAXException {
+    currentElement = new Element(name);
+    if (name.equals("form")) {
+      form = new Form();
+    } else if (name.equals("lemma")) {
+      lemma = new Lemma();
+    }
+  }
+
+  public void startPrefixMapping(String prefix, String uri) throws SAXException {
+  }
+  
+  private class Element {
+    String name;
+    String value;
+    
+    Element(String name) {
+      this.name = name;
+    }
+
+    Element(String name, String value) {
+      this.name = name;
+      this.value = value;
+    }
+  }
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/software/eXist/mpdl-modules/src/de/mpg/mpiwg/berlin/mpdl/lt/morph/converter/Converter.java	Wed Nov 24 17:24:23 2010 +0100
@@ -0,0 +1,491 @@
+package de.mpg.mpiwg.berlin.mpdl.lt.morph.converter;
+
+import java.io.BufferedInputStream;
+import java.io.BufferedOutputStream;
+import java.io.BufferedReader;
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.FileNotFoundException;
+import java.io.FileOutputStream;
+import java.io.FileReader;
+import java.io.IOException;
+import java.io.InputStream;
+import java.util.Date;
+import java.util.Hashtable;
+
+import org.xml.sax.InputSource;
+import org.xml.sax.SAXException;
+import org.xml.sax.XMLReader;
+
+import com.sun.org.apache.xerces.internal.parsers.SAXParser;
+
+import de.mpg.mpiwg.berlin.mpdl.exception.ApplicationException;
+import de.mpg.mpiwg.berlin.mpdl.general.MpdlConstants;
+import de.mpg.mpiwg.berlin.mpdl.lt.general.Transcoder;
+import de.mpg.mpiwg.berlin.mpdl.lt.morph.app.Form;
+import de.mpg.mpiwg.berlin.mpdl.util.Util;
+
+public class Converter {
+  private static Converter instance;
+  private static String MPDL_DATA_DIR = MpdlConstants.MPDL_DATA_DIR;
+  private static String ORIG_PERSEUS_DATA_DIR = MPDL_DATA_DIR + "/dataFilesOrig/perseus";
+  private static String ORIG_CELEX_DATA_DIR = MPDL_DATA_DIR + "/dataFilesOrig/celex";
+  private static String ORIG_FRENCH_DATA_DIR = MPDL_DATA_DIR + "/dataFilesOrig/french";
+  private static String ORIG_ITALIAN_DATA_DIR = MPDL_DATA_DIR + "/dataFilesOrig/italian";
+  private static String ORIG_DONATUS_SUB_DATA_DIR = MPDL_DATA_DIR + "/dataFilesOrig/donatus-sup";
+  private static String OUT_DATA_DIR = MPDL_DATA_DIR + "/dataFiles";
+  private PerseusContentHandler perseusContentHandler;
+  private Hashtable<String, Hashtable<String, Form>> forms = new Hashtable<String, Hashtable<String, Form>>();
+  private Date beginOfOperation;
+  private Date endOfOperation;
+  
+  public static Converter getInstance() throws ApplicationException {
+    if (instance == null) {
+      instance = new Converter();
+    }
+    return instance;
+  }
+
+  /**
+   * 
+   */
+  public static void main(String[] args) throws ApplicationException {
+    getInstance();
+    instance.beginOperation();
+    System.out.print("Start ...");
+    /*
+    // Latin
+    String inputFileNameLatin = ORIG_PERSEUS_DATA_DIR + "/" + "latin.morph.xml";
+    String outputFileNameLatin = OUT_DATA_DIR + "/" + "perseus-latin-forms.xml";
+    instance.perseusConvert("perseus", "la", inputFileNameLatin, outputFileNameLatin);
+    String inputFileNameDonatusLatinSup = ORIG_DONATUS_SUB_DATA_DIR + "/" + "donatus-sup-la-forms.csv";
+    String outputFileNameDonatusLatinSup = OUT_DATA_DIR + "/" + "donatus-sup-la-forms.xml";
+    instance.donatusSupplementsConvert("donatus-sup", "la", inputFileNameDonatusLatinSup, outputFileNameDonatusLatinSup);
+    instance.forms = new Hashtable<String, Hashtable<String, Form>>();
+    // Greek
+    String inputFileNameGreek = ORIG_PERSEUS_DATA_DIR + "/" + "greek.morph.xml";
+    String outputFileNameGreek = OUT_DATA_DIR + "/" + "perseus-greek-forms.xml";
+    instance.perseusConvert("perseus", "el", inputFileNameGreek, outputFileNameGreek);
+    String inputFileNameDonatusGreekSup = ORIG_DONATUS_SUB_DATA_DIR + "/" + "donatus-sup-el-forms.csv";
+    String outputFileNameDonatusGreekSup = OUT_DATA_DIR + "/" + "donatus-sup-el-forms.xml";
+    instance.donatusSupplementsConvert("donatus-sup", "el", inputFileNameDonatusGreekSup, outputFileNameDonatusGreekSup);
+    instance.forms = new Hashtable<String, Hashtable<String, Form>>();
+    // Arabic
+    String inputFileNameArabic = ORIG_PERSEUS_DATA_DIR + "/" + "arabic.morph.xml";
+    String outputFileNameArabic = OUT_DATA_DIR + "/" + "perseus-arabic-forms.xml";
+    instance.perseusConvert("perseus", "ar", inputFileNameArabic, outputFileNameArabic);
+    String inputFileNameDonatusArabicSup = ORIG_DONATUS_SUB_DATA_DIR + "/" + "donatus-sup-ar-forms.csv";
+    String outputFileNameDonatusArabicSup = OUT_DATA_DIR + "/" + "donatus-sup-ar-forms.xml";
+    instance.donatusSupplementsConvert("donatus-sup", "ar", inputFileNameDonatusArabicSup, outputFileNameDonatusArabicSup);
+    instance.forms = new Hashtable<String, Hashtable<String, Form>>();
+    // Dutch
+    String inputFileNameDutchWords = ORIG_CELEX_DATA_DIR + "/" + "dmw.cd";
+    String inputFileNameDutchLemmas = ORIG_CELEX_DATA_DIR + "/" + "dml.cd";
+    String outputFileNameDutch = OUT_DATA_DIR + "/" + "celex-dutch-forms.xml";
+    instance.celexConvert("celex", "nl", inputFileNameDutchWords, inputFileNameDutchLemmas, outputFileNameDutch);
+    instance.forms = new Hashtable<String, Hashtable<String, Form>>();
+    // German
+    String inputFileNameGermanWords = ORIG_CELEX_DATA_DIR + "/" + "gmw.cd";
+    String inputFileNameGermanLemmas = ORIG_CELEX_DATA_DIR + "/" + "gml.cd";
+    String outputFileNameGerman = OUT_DATA_DIR + "/" + "celex-german-forms.xml";
+    instance.celexConvert("celex", "de", inputFileNameGermanWords, inputFileNameGermanLemmas, outputFileNameGerman);
+    String inputFileNameDonatusGermanSup = ORIG_DONATUS_SUB_DATA_DIR + "/" + "donatus-sup-de-forms.csv";
+    String outputFileNameDonatusGermanSup = OUT_DATA_DIR + "/" + "donatus-sup-de-forms.xml";
+    instance.donatusSupplementsConvert("donatus-sup", "de", inputFileNameDonatusGermanSup, outputFileNameDonatusGermanSup);
+    instance.forms = new Hashtable<String, Hashtable<String, Form>>();
+    // English
+    String inputFileNameEnglishWords = ORIG_CELEX_DATA_DIR + "/" + "emw.cd";
+    String inputFileNameEnglishLemmas = ORIG_CELEX_DATA_DIR + "/" + "eml.cd";
+    String outputFileNameEnglish = OUT_DATA_DIR + "/" + "celex-english-forms.xml";
+    instance.celexConvert("celex", "en", inputFileNameEnglishWords, inputFileNameEnglishLemmas, outputFileNameEnglish);
+    String inputFileNameDonatusEnglishSup = ORIG_DONATUS_SUB_DATA_DIR + "/" + "donatus-sup-en-forms.csv";
+    String outputFileNameDonatusEnglishSup = OUT_DATA_DIR + "/" + "donatus-sup-en-forms.xml";
+    instance.donatusSupplementsConvert("donatus-sup", "en", inputFileNameDonatusEnglishSup, outputFileNameDonatusEnglishSup);
+    instance.forms = new Hashtable<String, Hashtable<String, Form>>();
+    // French
+    String inputFileNameFrench = ORIG_FRENCH_DATA_DIR + "/" + "lexique";
+    String outputFileNameFrench = OUT_DATA_DIR + "/" + "lexique-french-forms.xml";
+    instance.lexiqueConvert("lexique", "fr", inputFileNameFrench, outputFileNameFrench);
+    String inputFileNameDonatusFrenchSup = ORIG_DONATUS_SUB_DATA_DIR + "/" + "donatus-sup-fr-forms.csv";
+    String outputFileNameDonatusFrenchSup = OUT_DATA_DIR + "/" + "donatus-sup-fr-forms.xml";
+    instance.donatusSupplementsConvert("donatus-sup", "fr", inputFileNameDonatusFrenchSup, outputFileNameDonatusFrenchSup);
+    instance.forms = new Hashtable<String, Hashtable<String, Form>>();
+    */
+    // Italian
+    String inputFileNameItalian = ORIG_ITALIAN_DATA_DIR + "/" + "ital.hash";
+    String outputFileNameItalian = OUT_DATA_DIR + "/" + "donatus-italian-forms.xml";
+    instance.donatusItalianConvert("donatus", "it", inputFileNameItalian, outputFileNameItalian);
+    /*
+    String inputFileNameDonatusItalianSup = ORIG_DONATUS_SUB_DATA_DIR + "/" + "donatus-sup-it-forms.csv";
+    String outputFileNameDonatusItalianSup = OUT_DATA_DIR + "/" + "donatus-sup-it-forms.xml";
+    instance.donatusSupplementsConvert("donatus-sup", "it", inputFileNameDonatusItalianSup, outputFileNameDonatusItalianSup);
+    */
+    instance.forms = new Hashtable<String, Hashtable<String, Form>>();
+
+    instance.end();
+    instance.endOperation();
+    Double elapsedTime = new Util().getSecondWithMillisecondsBetween(instance.beginOfOperation, instance.endOfOperation);
+    System.out.println("End.");
+    System.out.println("Needed time: " + elapsedTime + " seconds");
+  }
+
+  private void perseusConvert(String provider, String language, String inputFileName, String outputFileName) throws ApplicationException {
+    File inputFile = new File(inputFileName);
+    perseusContentHandler = new PerseusContentHandler(provider, language, outputFileName);
+    try {
+      XMLReader xmlParser = new SAXParser();
+      xmlParser.setContentHandler(perseusContentHandler);
+      InputStream inputStream = new FileInputStream(inputFile);
+      BufferedInputStream bufferedInputStream = new BufferedInputStream(inputStream);
+      InputSource input = new InputSource(bufferedInputStream);
+      xmlParser.parse(input);
+      bufferedInputStream.close();
+      forms = perseusContentHandler.getForms();
+    } catch (SAXException e) {
+      throw new ApplicationException(e);
+    } catch (IOException e) {
+      throw new ApplicationException(e);
+    }
+  }
+  
+  private void celexConvert(String provider, String language, String inputFileNameWords, String inputFileNameLemmas, String outputFileName) throws ApplicationException {
+    File inputFileLemmas = new File(inputFileNameLemmas);
+    Hashtable<Integer, String> lemmas = loadLemmas(inputFileLemmas);
+    File inputFileWords = new File(inputFileNameWords);
+    File outputFile = new File(outputFileName);
+    writeCelexForms(provider, language, lemmas, inputFileWords, outputFile);
+  }
+
+  private void lexiqueConvert(String provider, String language, String inputFileName, String outputFileName) throws ApplicationException {
+    File inputFile = new File(inputFileName);
+    File outputFile = new File(outputFileName);
+    writeLexiqueForms(provider, language, inputFile, outputFile);
+  }
+
+  private void donatusItalianConvert(String provider, String language, String inputFileName, String outputFileName) throws ApplicationException {
+    File inputFile = new File(inputFileName);
+    File outputFile = new File(outputFileName);
+    writeDonatusItalianForms(provider, language, inputFile, outputFile);
+  }
+
+  private void donatusSupplementsConvert(String provider, String language, String inputFileName, String outputFileName) throws ApplicationException {
+    File inputFile = new File(inputFileName);
+    File outputFile = new File(outputFileName);
+    writeDonatusSupplementsForms(provider, language, inputFile, outputFile);
+  }
+
+  private Hashtable<Integer, String> loadLemmas(File inputFile) {
+    Hashtable<Integer, String> retLemmas = new Hashtable<Integer, String>();
+    BufferedReader in = null;
+    try {
+      in = new BufferedReader(new FileReader(inputFile));
+      String line = null;
+      while((line = in.readLine()) != null) {
+        int from = line.indexOf("\\");
+        int to = line.indexOf("\\", from + 1);
+        String idStr = line.substring(0, from);
+        Integer idInt = new Integer(idStr);
+        String lemma = line.substring(from + 1, to);
+        retLemmas.put(idInt, lemma);
+      }
+    } catch (FileNotFoundException e) {
+      e.printStackTrace();
+    } catch (IOException e) {
+      e.printStackTrace();
+    } finally {
+      // always close the stream 
+      if (in != null) try { in.close(); } catch (Exception e) { }
+    }
+    return retLemmas;
+  }
+  
+  private void writeCelexForms(String provider, String language, Hashtable<Integer, String> lemmas, File inputFileWords, File outputFile) throws ApplicationException {
+    BufferedReader in = null;
+    BufferedOutputStream out = null;
+    forms = new Hashtable<String, Hashtable<String, Form>>();
+    try {
+      in = new BufferedReader(new FileReader(inputFileWords));
+      out = new BufferedOutputStream(new FileOutputStream(outputFile));
+      write("<forms>\n", out);
+      String line = null;
+      while((line = in.readLine()) != null) {
+        int delim1 = line.indexOf("\\");
+        int delim2 = line.indexOf("\\", delim1 + 1);
+        int delim3 = line.indexOf("\\", delim2 + 1);
+        int delim4 = line.indexOf("\\", delim3 + 1);
+        String formName = line.substring(delim1 + 1, delim2);
+        String lemmaIdStr = line.substring(delim3 + 1, delim4);
+        Integer lemmaIdInt = null;
+        try {
+          lemmaIdInt = new Integer(lemmaIdStr);
+        } catch (NumberFormatException e) {
+          System.out.println("Warning: Lemma id: " + lemmaIdStr + " is not correct");
+        }
+        if (lemmaIdInt != null) {
+          String lemmaName = lemmas.get(lemmaIdInt);
+          Form form = new Form();
+          form.setProvider(provider);
+          form.setLanguage(language);
+          form.setFormName(formName);
+          form.setLemmaName(lemmaName);
+          form.normalize();
+          if (form.isOk()) {
+            Hashtable<String, Form> formLemmas = forms.get(formName);
+            if (formLemmas == null) {
+              formLemmas = new Hashtable<String, Form>();
+              formLemmas.put(lemmaName, form);
+              forms.put(formName, formLemmas);
+              write(form, out);
+            } else {
+              Form formLemma = formLemmas.get(lemmaName);
+              if (formLemma == null) {
+                formLemmas.put(lemmaName, form);
+                write(form, out);
+              }
+            }
+          }
+        }
+      }
+      write("</forms>\n", out);
+    } catch (FileNotFoundException e) {
+      throw new ApplicationException(e);
+    } catch (IOException e) {
+      throw new ApplicationException(e);
+    } finally {
+      // always close the stream 
+      if (in != null) try { in.close(); } catch (Exception e) { }
+      if (out != null) try { out.close(); } catch (Exception e) { }
+    }
+  }
+
+  private void writeLexiqueForms(String provider, String language, File inputFile, File outputFile) throws ApplicationException {
+    BufferedReader in = null;
+    BufferedOutputStream out = null;
+    forms = new Hashtable<String, Hashtable<String, Form>>();
+    try {
+      in = new BufferedReader(new FileReader(inputFile));
+      out = new BufferedOutputStream(new FileOutputStream(outputFile));
+      write("<forms>\n", out);
+      String line = null;
+      while((line = in.readLine()) != null) {
+        int delim1 = line.indexOf("\t");
+        int delim2 = line.indexOf("\t", delim1 + 1);
+        String formName = line.substring(0, delim1).trim();
+        String lemmaName = line.substring(delim1 + 1, delim2).trim();
+        if (lemmaName.equals("="))
+          lemmaName = formName;
+        Form form = new Form();
+        form.setProvider(provider);
+        form.setLanguage(language);
+        form.setFormName(formName);
+        form.setLemmaName(lemmaName);
+        form.normalize();
+        if (form.isOk()) {
+          Hashtable<String, Form> formLemmas = forms.get(formName);
+          if (formLemmas == null) {
+            formLemmas = new Hashtable<String, Form>();
+            formLemmas.put(lemmaName, form);
+            forms.put(formName, formLemmas);
+            write(form, out);
+          } else {
+            Form formLemma = formLemmas.get(lemmaName);
+            if (formLemma == null) {
+              formLemmas.put(lemmaName, form);
+              write(form, out);
+            }
+          }
+        }
+      }
+      write("</forms>\n", out);
+    } catch (FileNotFoundException e) {
+      throw new ApplicationException(e);
+    } catch (IOException e) {
+      throw new ApplicationException(e);
+    } finally {
+      // always close the stream 
+      if (in != null) try { in.close(); } catch (Exception e) { }
+      if (out != null) try { out.close(); } catch (Exception e) { }
+    }
+  }
+
+  private void writeDonatusItalianForms(String provider, String language, File inputFile, File outputFile) throws ApplicationException {
+    BufferedReader in = null;
+    BufferedOutputStream out = null;
+    forms = new Hashtable<String, Hashtable<String, Form>>();
+    try {
+      in = new BufferedReader(new FileReader(inputFile));
+      out = new BufferedOutputStream(new FileOutputStream(outputFile));
+      write("<forms>\n", out);
+      String line = null;
+      while((line = in.readLine()) != null) {
+        // one line is of the form: 'risoluino' => '<NL>V risolvino,risolvere  pres imperat 3rd pl ...</NL><NL>...</NL>',
+        // or of the form: 'legamenti' => '<NL>N legamento  masc pl ...</NL><NL>...</NL>',
+        // this method only recognize the first lemma TODO recognize all lemmas for the form
+        int delim1 = line.indexOf("'");
+        int delim2 = line.indexOf("'", delim1 + 1);
+        int delim3 = line.indexOf("'", delim2 + 1);
+        int delim4 = delim3 + 6; // beginning of the lemma
+        int delim5 = line.indexOf(" ", delim4 + 1); // end of the first lemma(s) is separated by a blank
+        String formName = line.substring(delim1 + 1, delim2);
+        formName = formName.replace("\\", "");
+        String lemmaName = line.substring(delim4 + 1, delim5);
+        int commaInLemma = lemmaName.indexOf(","); // when there are more than one lemma
+        if (commaInLemma != -1)
+          lemmaName = lemmaName.substring(0, commaInLemma);  
+        lemmaName = lemmaName.replace("\\", "");
+        Form form = new Form();
+        form.setProvider(provider);
+        form.setLanguage(language);
+        form.setFormName(formName);
+        form.setLemmaName(lemmaName);
+        form.normalize();
+        boolean lineContainsAp = line.contains("\''");  // some of the form lines contain irregular strings of the form: 'par\'' => '<NL>N pari/^,pari     indeclform  adverb</NL>
+        if (form.isOk() && ! lineContainsAp) {
+          Hashtable<String, Form> formLemmas = forms.get(formName);
+          if (formLemmas == null) {
+            formLemmas = new Hashtable<String, Form>();
+            formLemmas.put(lemmaName, form);
+            forms.put(formName, formLemmas);
+            write(form, out);
+          } else {
+            Form formLemma = formLemmas.get(lemmaName);
+            if (formLemma == null) {
+              formLemmas.put(lemmaName, form);
+              write(form, out);
+            }
+          }
+        }
+      }
+      write("</forms>\n", out);
+    } catch (FileNotFoundException e) {
+      throw new ApplicationException(e);
+    } catch (IOException e) {
+      throw new ApplicationException(e);
+    } finally {
+      // always close the stream 
+      if (in != null) try { in.close(); } catch (Exception e) { }
+      if (out != null) try { out.close(); } catch (Exception e) { }
+    }
+  }
+  
+  private void writeDonatusSupplementsForms(String provider, String language, File inputFile, File outputFile) throws ApplicationException {
+    BufferedReader in = null;
+    BufferedOutputStream out = null;
+    try {
+      in = new BufferedReader(new FileReader(inputFile));
+      out = new BufferedOutputStream(new FileOutputStream(outputFile));
+      write("<forms>\n", out);
+      String line = null;
+      String lemmaName = "";
+      String formName = "";
+      // each line is a form
+      while((line = in.readLine()) != null) {
+        if (line.length() == 0)
+          break;
+        String firstChar = line.substring(0, 1);
+        String mode = "lemmaAndForm";
+        if (firstChar.equals(","))
+          mode = "form";
+        if (mode.equals("lemmaAndForm")) {
+          int quote2 = line.indexOf("\"", 1);
+          lemmaName = line.substring(1, quote2);
+          int quote3 = line.indexOf("\"", quote2 + 1);
+          int quote4 = line.indexOf("\"", quote3 + 1);
+          formName = line.substring(quote3 + 1, quote4);
+        } else if (mode.equals("form")) {
+          int quote2 = line.indexOf("\"", 3);
+          formName = line.substring(2, quote2);
+        }
+        Form form = new Form();
+        form.setProvider(provider);
+        form.setLanguage(language);
+        form.setFormName(formName);
+        form.setLemmaName(lemmaName);
+        if (form.isGreek())
+          transcodeFromBetaCode2Unicode(form);
+        else if (form.isArabic())
+          form = transcodeFromBuckwalter2Unicode(form);
+        form.normalize();
+        if (form.isOk()) {
+          Hashtable<String, Form> formLemmas = forms.get(formName);
+          if (formLemmas == null) {
+            formLemmas = new Hashtable<String, Form>();
+            formLemmas.put(lemmaName, form);
+            forms.put(formName, formLemmas);
+            write(form, out);
+          } else {
+            Form formLemma = formLemmas.get(lemmaName);
+            if (formLemma == null) {
+              formLemmas.put(lemmaName, form);
+              write(form, out);
+            }
+          }
+        }
+      }
+      write("</forms>\n", out);
+    } catch (FileNotFoundException e) {
+      throw new ApplicationException(e);
+    } catch (IOException e) {
+      throw new ApplicationException(e);
+    } finally {
+      // always close the stream 
+      if (in != null) try { in.close(); } catch (Exception e) { }
+      if (out != null) try { out.close(); } catch (Exception e) { }
+    }
+  }
+
+  private void write(Form form, BufferedOutputStream out) throws ApplicationException {
+    try {
+      String xmlFormStr = form.getXmlString();
+      byte[] bytes = xmlFormStr.getBytes("utf-8");
+      out.write(bytes, 0, bytes.length);
+      out.flush();
+    } catch (IOException e) {
+      throw new ApplicationException(e);
+    }
+  }
+
+  private void write(String inputString, BufferedOutputStream out) throws ApplicationException {
+    try {
+      byte[] bytes = inputString.getBytes("utf-8");
+      out.write(bytes, 0, bytes.length);
+      out.flush();
+    } catch (IOException e) {
+      throw new ApplicationException(e);
+    }
+  }
+
+  private Form transcodeFromBetaCode2Unicode(Form form) throws ApplicationException {
+    String formName = form.getFormName();
+    String lemmaName = form.getLemmaName();
+    Transcoder transcoder = Transcoder.getInstance();
+    String encodedUnicodeForm = transcoder.transcodeFromBetaCode2Unicode(formName);
+    String encodedUnicodeLemma = transcoder.transcodeFromBetaCode2Unicode(lemmaName);
+    form.setFormName(encodedUnicodeForm);
+    form.setLemmaName(encodedUnicodeLemma);
+    return form;
+  }
+  
+  private Form transcodeFromBuckwalter2Unicode(Form form) throws ApplicationException {
+    String formName = form.getFormName();
+    String lemmaName = form.getLemmaName();
+    Transcoder transcoder = Transcoder.getInstance();
+    String encodedUnicodeForm = transcoder.transcodeFromBuckwalter2Unicode(formName);
+    String encodedUnicodeLemma = transcoder.transcodeFromBuckwalter2Unicode(lemmaName);
+    form.setFormName(encodedUnicodeForm);
+    form.setLemmaName(encodedUnicodeLemma);
+    return form;
+  }
+
+  private void end() throws ApplicationException {
+  }
+
+  private void beginOperation() {
+    beginOfOperation = new Date();
+  }
+
+  private void endOperation() {
+    endOfOperation = new Date();
+  }
+
+}
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/software/eXist/mpdl-modules/src/de/mpg/mpiwg/berlin/mpdl/lt/morph/converter/PerseusContentHandler.java	Wed Nov 24 17:24:23 2010 +0100
@@ -0,0 +1,220 @@
+package de.mpg.mpiwg.berlin.mpdl.lt.morph.converter;
+
+import java.io.BufferedOutputStream;
+import java.io.File;
+import java.io.FileNotFoundException;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.io.OutputStream;
+import java.util.Hashtable;
+
+import org.xml.sax.*;
+
+import de.mpg.mpiwg.berlin.mpdl.exception.ApplicationException;
+import de.mpg.mpiwg.berlin.mpdl.lt.general.Transcoder;
+import de.mpg.mpiwg.berlin.mpdl.lt.morph.app.Form;
+
+public class PerseusContentHandler implements ContentHandler {
+  private static String[] XML_FORM_FIELD_NAMES = {"form", "lemma", "pos", "tense", "voice", "case", "number", "mood", "person", "gender", "definite"};
+  private Hashtable<String, Hashtable<String, Form>> forms;
+  private File outputFile;
+  private String provider;
+  private String language;
+  private OutputStream out;
+  private Element currentElement;
+  private Form form;
+  
+  public PerseusContentHandler(String provider, String language, String outputFileName) throws ApplicationException {
+    this.outputFile = new File(outputFileName);
+    this.provider = provider;
+    this.language = language;
+  }
+  
+  public Hashtable<String, Hashtable<String, Form>> getForms() {
+    return forms;  
+  }
+  
+  public void startDocument() throws SAXException {
+    try {
+      out = new BufferedOutputStream(new FileOutputStream(outputFile));
+      forms = new Hashtable<String, Hashtable<String, Form>>();
+    } catch (FileNotFoundException e) {
+      throw new SAXException(e);
+    }
+    write("<forms>\n");
+  }
+
+  public void endDocument() throws SAXException {
+    write("</forms>\n");
+    try { 
+      if (out != null)
+        out.close(); 
+    } catch (Exception e) { 
+        // nothing: always close the stream at the end of the method
+    }  
+  }
+  
+  public void characters(char[] c, int start, int length) throws SAXException {
+    if (currentElement != null) {
+      String elemName = currentElement.name;
+      if (form != null && isXmlFormField(elemName)) {
+        char[] cCopy = new char[length];
+        System.arraycopy(c, start, cCopy, 0, length);
+        String charactersStr = String.valueOf(cCopy);
+        if (charactersStr != null && ! (charactersStr.trim().equals(""))) {
+          if (elemName.equals("form"))
+            form.addFormName(charactersStr);
+          else if (elemName.equals("lemma"))
+            form.addLemmaName(charactersStr);
+          else if (elemName.equals("pos"))
+            form.addPos(charactersStr);
+          else if (elemName.equals("tense"))
+            form.addTense(charactersStr);
+          else if (elemName.equals("voice"))
+            form.addVoice(charactersStr);
+          else if (elemName.equals("case"))
+            form.addCasus(charactersStr);
+          else if (elemName.equals("number"))
+            form.addNumber(charactersStr);
+          else if (elemName.equals("mood"))
+            form.addMood(charactersStr);
+          else if (elemName.equals("person"))
+            form.addPerson(charactersStr);
+          else if (elemName.equals("gender"))
+            form.addGender(charactersStr);
+          else if (elemName.equals("definite"))
+            form.addDefinite(charactersStr);
+        }
+      }
+    } 
+  }
+
+  public void ignorableWhitespace(char[] c, int start, int length) throws SAXException {
+  }
+
+  public void processingInstruction(String target, String data) throws SAXException {
+  }
+
+  public void setDocumentLocator(org.xml.sax.Locator arg1) {
+  }
+
+  public void endPrefixMapping(String prefix) throws SAXException {
+  }
+
+  public void skippedEntity(String name) throws SAXException {
+  }
+
+  public void endElement(String uri, String localName, String name) throws SAXException {
+    currentElement = null;
+    try {
+      if (name.equals("analysis")) {
+        if (form.isGreek())
+          form = transcodeFromBetaCode2Unicode(form);
+        else if (form.isArabic())
+          form = transcodeFromBuckwalter2Unicode(form);
+        form.normalize();
+        if (form.isOk()) {
+          String formName = form.getFormName();
+          String lemmaName = form.getLemmaName();
+          Hashtable<String, Form> formLemmas = forms.get(formName);
+          if (formLemmas == null) {
+            formLemmas = new Hashtable<String, Form>();
+            formLemmas.put(lemmaName, form);
+            forms.put(formName, formLemmas);
+            write(form);
+          } else {
+            Form formLemma = formLemmas.get(lemmaName);
+            if (formLemma == null) {
+              formLemmas.put(lemmaName, form);
+              write(form);
+            }
+          }
+       }
+        form = null;
+      }
+    } catch (ApplicationException e) {
+      throw new SAXException(e);
+    }
+  }
+
+  public void startElement(String uri, String localName, String name, Attributes attrs) throws SAXException {
+    currentElement = new Element(name);
+    if (name.equals("analysis")) {
+      form = new Form();
+      form.setProvider(provider);
+      form.setLanguage(language);
+    }
+  }
+
+  public void startPrefixMapping(String prefix, String uri) throws SAXException {
+  }
+  
+  private boolean isXmlFormField(String fieldName) {
+    boolean isXmlFormField = false;
+    for (int i=0; i<XML_FORM_FIELD_NAMES.length; i++) {
+      String n = XML_FORM_FIELD_NAMES[i];
+      if (fieldName.toLowerCase().equals(n)) {
+        isXmlFormField = true;
+        break;
+      }
+    }
+    return isXmlFormField;
+  }
+  
+  private void write(String outStr) throws SAXException {
+    try {
+      byte[] bytes = outStr.getBytes("utf-8");
+      out.write(bytes, 0, bytes.length);
+      out.flush();
+    } catch (IOException e) {
+      throw new SAXException(e);
+    }
+  }
+  
+  private void write(Form form) throws SAXException {
+    try {
+      String xmlFormStr = form.getXmlString();
+      byte[] bytes = xmlFormStr.getBytes("utf-8");
+      out.write(bytes, 0, bytes.length);
+      out.flush();
+    } catch (IOException e) {
+      throw new SAXException(e);
+    }
+  }
+  
+  private Form transcodeFromBetaCode2Unicode(Form form) throws ApplicationException {
+    String formName = form.getFormName();
+    String lemmaName = form.getLemmaName();
+    Transcoder transcoder = Transcoder.getInstance();
+    String encodedUnicodeForm = transcoder.transcodeFromBetaCode2Unicode(formName);
+    String encodedUnicodeLemma = transcoder.transcodeFromBetaCode2Unicode(lemmaName);
+    form.setFormName(encodedUnicodeForm);
+    form.setLemmaName(encodedUnicodeLemma);
+    return form;
+  }
+  
+  private Form transcodeFromBuckwalter2Unicode(Form form) throws ApplicationException {
+    String formName = form.getFormName();
+    String lemmaName = form.getLemmaName();
+    Transcoder transcoder = Transcoder.getInstance();
+    String encodedUnicodeForm = transcoder.transcodeFromBuckwalter2Unicode(formName);
+    String encodedUnicodeLemma = transcoder.transcodeFromBuckwalter2Unicode(lemmaName);
+    form.setFormName(encodedUnicodeForm);
+    form.setLemmaName(encodedUnicodeLemma);
+    return form;
+  }
+
+  private class Element {
+    String name;
+    String value;
+    
+    Element(String name) {
+      this.name = name;
+    }
+
+    Element(String name, String value) {
+      this.name = name;
+      this.value = value;
+    }
+  }
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/software/eXist/mpdl-modules/src/de/mpg/mpiwg/berlin/mpdl/lt/morph/db/DBMorphHandler.java	Wed Nov 24 17:24:23 2010 +0100
@@ -0,0 +1,242 @@
+package de.mpg.mpiwg.berlin.mpdl.lt.morph.db;
+
+import java.io.IOException;
+import java.io.Reader;
+import java.io.StringReader;
+import java.io.UnsupportedEncodingException;
+import java.util.ArrayList;
+import java.util.Hashtable;
+
+import org.xml.sax.InputSource;
+import org.xml.sax.SAXException;
+import org.xml.sax.XMLReader;
+
+import com.sleepycat.je.Cursor;
+import com.sleepycat.je.Database;
+import com.sleepycat.je.DatabaseEntry;
+import com.sleepycat.je.DatabaseException;
+import com.sleepycat.je.LockMode;
+import com.sleepycat.je.OperationStatus;
+import com.sun.org.apache.xerces.internal.parsers.SAXParser;
+
+import de.mpg.mpiwg.berlin.mpdl.exception.ApplicationException;
+import de.mpg.mpiwg.berlin.mpdl.lt.general.Language;
+import de.mpg.mpiwg.berlin.mpdl.lt.morph.app.Form;
+import de.mpg.mpiwg.berlin.mpdl.lt.morph.app.Lemma;
+import de.mpg.mpiwg.berlin.mpdl.lt.morph.app.SimpleMorphContentHandler;
+
+public class DBMorphHandler {
+  private String dbDirectory;
+  private DbEnvMorph morphDbEnv;
+  
+  public DBMorphHandler(String dbDir) {
+    this.dbDirectory = dbDir;
+  }
+  
+  public void start() throws ApplicationException {
+    morphDbEnv = new DbEnvMorph();
+    morphDbEnv.setDataDir(dbDirectory);
+    morphDbEnv.init(); // open databases in read/write mode
+  }
+  
+  public void openDatabases() throws ApplicationException {
+    morphDbEnv.openDatabases();
+  }
+
+  public void closeDatabases() throws ApplicationException {
+    morphDbEnv.close();
+  }
+
+  public void deleteMorphData() throws ApplicationException {
+    morphDbEnv.removeDatabases();
+  }
+  
+  public long getSize() throws ApplicationException {
+    long size = 0;
+    try {
+      Database formDB = morphDbEnv.getFormDB();
+      size = formDB.count();
+    } catch (DatabaseException e) {
+      throw new ApplicationException(e);
+    }
+    return size;
+  }
+  
+
+  public void writeFormLemma(Form form, Lemma lemma) throws ApplicationException {
+    try {
+      String lang = Language.getInstance().getLanguageId(form.getLanguage());
+      String keyStr = lang + "###" + form.getFormName();
+      String valueStr = lemma.getXmlString();
+      DatabaseEntry dbEntryKey = new DatabaseEntry(keyStr.getBytes("utf-8"));
+      DatabaseEntry dbEntryValue = new DatabaseEntry(valueStr.getBytes("utf-8"));
+      Database formDB = morphDbEnv.getFormDB();
+      formDB.put(null, dbEntryKey, dbEntryValue);
+    } catch (DatabaseException e) {
+      throw new ApplicationException(e);
+    } catch (UnsupportedEncodingException e) {
+      throw new ApplicationException(e);
+    }
+  }
+    
+  public void writeLemmaForm(Lemma lemma, Form form) throws ApplicationException {
+    try {
+      String lang = Language.getInstance().getLanguageId(lemma.getLanguage());
+      String keyStr = lang + "###" + lemma.getLemmaName();
+      String valueStr = form.getXmlString();
+      DatabaseEntry dbEntryKey = new DatabaseEntry(keyStr.getBytes("utf-8"));
+      DatabaseEntry dbEntryValue = new DatabaseEntry(valueStr.getBytes("utf-8"));
+      Database lemmaDB = morphDbEnv.getLemmaDB();
+      lemmaDB.put(null, dbEntryKey, dbEntryValue);
+    } catch (DatabaseException e) {
+      throw new ApplicationException(e);
+    } catch (UnsupportedEncodingException e) {
+      throw new ApplicationException(e);
+    }
+  }
+    
+  public void deleteLemma(Lemma lemma) throws ApplicationException {
+    try {
+      String lang = Language.getInstance().getLanguageId(lemma.getLanguage());
+      String keyStr = lang + "###" + lemma.getLemmaName();
+      DatabaseEntry dbEntryKey = new DatabaseEntry(keyStr.getBytes("utf-8"));
+      Database lemmaDB = morphDbEnv.getLemmaDB();
+      lemmaDB.delete(null, dbEntryKey);
+    } catch (DatabaseException e) {
+      throw new ApplicationException(e);
+    } catch (UnsupportedEncodingException e) {
+      throw new ApplicationException(e);
+    }
+  }      
+      
+  public void deleteForm(Form form) throws ApplicationException {
+    try {
+      String lang = Language.getInstance().getLanguageId(form.getLanguage());
+      String keyStr = lang + "###" + form.getFormName();
+      DatabaseEntry dbEntryKey = new DatabaseEntry(keyStr.getBytes("utf-8"));
+      Database formDB = morphDbEnv.getFormDB();
+      formDB.delete(null, dbEntryKey);
+    } catch (DatabaseException e) {
+      throw new ApplicationException(e);
+    } catch (UnsupportedEncodingException e) {
+      throw new ApplicationException(e);
+    }
+  }      
+      
+  public ArrayList<Form> readForms(String language, String lemmaName) throws ApplicationException {
+    ArrayList<Form> retForms = new ArrayList<Form>();
+    String lang = Language.getInstance().getLanguageId(language);
+    String hashKey = lang + "###" + lemmaName;
+    try {
+      Database lemmaDB = morphDbEnv.getLemmaDB();
+      Cursor cursor = lemmaDB.openCursor(null, null);
+      byte[] bHashKey = hashKey.getBytes("utf-8");
+      DatabaseEntry dbEntryKey = new DatabaseEntry(bHashKey);
+      DatabaseEntry foundFormValue = new DatabaseEntry();
+      OperationStatus operationStatus = cursor.getSearchKey(dbEntryKey, foundFormValue, LockMode.DEFAULT);
+      while (operationStatus == OperationStatus.SUCCESS) {
+        byte[] foundFormValueBytes = foundFormValue.getData();
+        String foundFormValueStr = new String(foundFormValueBytes, "utf-8");
+        Form f = parseXmlFormString(foundFormValueStr);
+        retForms.add(f);
+        operationStatus = cursor.getNextDup(dbEntryKey, foundFormValue, LockMode.DEFAULT);
+      }
+      cursor.close();
+    } catch (DatabaseException e) {
+      throw new ApplicationException(e);
+    } catch (UnsupportedEncodingException e) {
+      throw new ApplicationException(e);
+    }
+    return retForms;
+  }
+  
+  // TODO diese Methode wird nicht verwendet bis jetzt
+  public Hashtable<String, Form> readForms() throws ApplicationException {
+    Hashtable<String, Form> retForms = new Hashtable<String, Form>();
+    try {
+      Database lemmaDB = morphDbEnv.getLemmaDB();
+      Cursor cursor = lemmaDB.openCursor(null, null);
+      DatabaseEntry dbEntryKey = new DatabaseEntry();
+      DatabaseEntry foundFormValue = new DatabaseEntry();
+      OperationStatus operationStatus = cursor.getFirst(dbEntryKey, foundFormValue, LockMode.DEFAULT);
+      while (operationStatus == OperationStatus.SUCCESS) {
+        byte[] foundFormValueBytes = foundFormValue.getData();
+        String foundFormValueStr = new String(foundFormValueBytes, "utf-8");
+        Form f = parseXmlFormString(foundFormValueStr);
+        String formHashKey = f.getLanguage() + "###" + f.getFormName();
+        retForms.put(formHashKey, f);
+        operationStatus = cursor.getNext(dbEntryKey, foundFormValue, LockMode.DEFAULT);
+      }
+      cursor.close();
+    } catch (DatabaseException e) {
+      throw new ApplicationException(e);
+    } catch (UnsupportedEncodingException e) {
+      throw new ApplicationException(e);
+    }
+    return retForms;
+  }
+  
+  public ArrayList<Lemma> readLemmas(String language, String formName) throws ApplicationException {
+    ArrayList<Lemma> retForms = new ArrayList<Lemma>();
+    String lang = Language.getInstance().getLanguageId(language);
+    String hashKey = lang + "###" + formName;
+    try {
+      Database formDB = morphDbEnv.getFormDB();
+      Cursor cursor = formDB.openCursor(null, null);
+      byte[] bHashKey = hashKey.getBytes("utf-8");
+      DatabaseEntry dbEntryKey = new DatabaseEntry(bHashKey);
+      DatabaseEntry foundLemmaValue = new DatabaseEntry();
+      OperationStatus operationStatus = cursor.getSearchKey(dbEntryKey, foundLemmaValue, LockMode.DEFAULT);
+      while (operationStatus == OperationStatus.SUCCESS) {
+        byte[] foundLemmaValueBytes = foundLemmaValue.getData();
+        String foundLemmaValueStr = new String(foundLemmaValueBytes, "utf-8");
+        Lemma l = parseXmlLemmaString(foundLemmaValueStr);
+        retForms.add(l);
+        operationStatus = cursor.getNextDup(dbEntryKey, foundLemmaValue, LockMode.DEFAULT);
+      }
+      cursor.close();
+    } catch (DatabaseException e) {
+      throw new ApplicationException(e);
+    } catch (UnsupportedEncodingException e) {
+      throw new ApplicationException(e);
+    }
+    return retForms;
+  }
+  
+  private Form parseXmlFormString(String xmlString) throws ApplicationException {
+    Form form = null;
+    try {
+      XMLReader xmlParser = new SAXParser();
+      SimpleMorphContentHandler morphContentHandler = new SimpleMorphContentHandler();
+      xmlParser.setContentHandler(morphContentHandler);
+      Reader reader = new StringReader(xmlString);
+      InputSource input = new InputSource(reader);
+      xmlParser.parse(input);
+      form = morphContentHandler.getForm();
+    } catch (SAXException e) {
+      throw new ApplicationException(e);
+    } catch (IOException e) {
+      throw new ApplicationException(e);
+    }
+    return form;
+  }
+
+  private Lemma parseXmlLemmaString(String xmlString) throws ApplicationException {
+    Lemma lemma = null;
+    try {
+      XMLReader xmlParser = new SAXParser();
+      SimpleMorphContentHandler morphContentHandler = new SimpleMorphContentHandler();
+      xmlParser.setContentHandler(morphContentHandler);
+      Reader reader = new StringReader(xmlString);
+      InputSource input = new InputSource(reader);
+      xmlParser.parse(input);
+      lemma = morphContentHandler.getLemma();
+    } catch (SAXException e) {
+      throw new ApplicationException(e);
+    } catch (IOException e) {
+      throw new ApplicationException(e);
+    }
+    return lemma;
+  }
+
+}
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/software/eXist/mpdl-modules/src/de/mpg/mpiwg/berlin/mpdl/lt/morph/db/DBMorphSupWriter.java	Wed Nov 24 17:24:23 2010 +0100
@@ -0,0 +1,265 @@
+package de.mpg.mpiwg.berlin.mpdl.lt.morph.db;
+
+import java.io.BufferedOutputStream;
+import java.io.BufferedReader;
+import java.io.File;
+import java.io.FileNotFoundException;
+import java.io.FileOutputStream;
+import java.io.FileReader;
+import java.io.IOException;
+import java.io.UnsupportedEncodingException;
+import java.util.Date;
+import java.util.HashMap;
+import java.util.Iterator;
+
+import com.sleepycat.je.Cursor;
+import com.sleepycat.je.Database;
+import com.sleepycat.je.DatabaseEntry;
+import com.sleepycat.je.DatabaseException;
+import com.sleepycat.je.LockMode;
+import com.sleepycat.je.OperationStatus;
+import com.sleepycat.je.util.DbLoad;
+
+import de.mpg.mpiwg.berlin.mpdl.util.Util;
+import de.mpg.mpiwg.berlin.mpdl.exception.ApplicationException;
+import de.mpg.mpiwg.berlin.mpdl.general.MpdlConstants;
+import de.mpg.mpiwg.berlin.mpdl.lt.general.Transcoder;
+
+public class DBMorphSupWriter {
+  private static DBMorphSupWriter instance;
+  private static String MPDL_DATA_DIR = MpdlConstants.MPDL_DATA_DIR;
+  private static String DATA_FILES_DIR_DONATUS_ADD_SUP = MPDL_DATA_DIR + "/dataFiles/donatusAdditionalSup";
+  private static String DB_DIR_DONATUS_ADD_SUP = MPDL_DATA_DIR + "/dataFiles/donatusAdditionalSup/db";
+  private static String[] DONATUS_SUP_DUMPS = {"cache-la", "cache-el", "cache-it"};
+  private DbEnvMorphSup dbEnvMorphSup;
+  private Date beginOfOperation;
+  private Date endOfOperation;
+  
+  public static DBMorphSupWriter getInstance() throws ApplicationException {
+    if (instance == null) {
+      instance = new DBMorphSupWriter();
+    }
+    return instance;
+  }
+
+  public static void main(String[] args) throws ApplicationException {
+    getInstance();
+    instance.beginOperation();
+    System.out.print("Start ...");
+    instance.initReadWrite();
+    // instance.loadDonatusSupDbDumpsToDb();
+    instance.printSizeOfAllMorphSupDBs();
+    // instance.writeDonatusSupsToFiles();
+    instance.end();
+    instance.endOperation();
+    Double elapsedTime = new Util().getSecondWithMillisecondsBetween(instance.beginOfOperation, instance.endOfOperation);
+    System.out.println("End.");
+    System.out.println("Needed time: " + elapsedTime + " seconds");
+  }
+
+  private void initReadWrite() throws ApplicationException {
+    dbEnvMorphSup = new DbEnvMorphSup();
+    dbEnvMorphSup.setDataDir(DB_DIR_DONATUS_ADD_SUP);
+    dbEnvMorphSup.initReadWrite();
+  }
+  
+  private void loadDonatusSupDbDumpsToDb() throws ApplicationException {
+    for (int i=0; i<DONATUS_SUP_DUMPS.length; i++) {
+      String donatusSupName = DONATUS_SUP_DUMPS[i];
+      loadDbDumpToDb(donatusSupName);
+    }
+  }
+  
+  private void loadDbDumpToDb(String donatusSupName) throws ApplicationException {
+    String dumpFileName = DATA_FILES_DIR_DONATUS_ADD_SUP + "/" + donatusSupName + ".dump";
+    String dbName = donatusSupName + "Dump.db";
+    try {
+      BufferedReader bufferedReader = new BufferedReader(new FileReader(dumpFileName));
+      DbLoad loader = new DbLoad();
+      loader.setEnv(dbEnvMorphSup.getEnv());
+      loader.setDbName(dbName);
+      loader.setInputReader(bufferedReader);
+      loader.setIgnoreUnknownConfig(true);
+      loader.load();
+      bufferedReader.close();
+    } catch (FileNotFoundException e) {
+      throw new ApplicationException(e);
+    } catch (IOException e) {
+      throw new ApplicationException(e);
+    } catch (DatabaseException e) {
+      throw new ApplicationException(e);
+    }
+  }
+  
+  private void end() throws ApplicationException {
+    for (int i=0; i<DONATUS_SUP_DUMPS.length; i++) {
+       String donatusSupName = DONATUS_SUP_DUMPS[i];
+       dbEnvMorphSup.closeDatabase(donatusSupName);
+       dbEnvMorphSup.closeDatabase(donatusSupName + "Dump");
+    }
+    dbEnvMorphSup.close();
+  }
+
+  private String readEntry(String morphSupName, String formName) throws ApplicationException {
+    String retString = null;
+    try {
+      String keyStr = formName;
+      DatabaseEntry dbEntryKey = new DatabaseEntry(keyStr.getBytes("utf-8"));
+      Database morpgSupDB = dbEnvMorphSup.getMorphSupDB(morphSupName);
+      Cursor cursor = morpgSupDB.openCursor(null, null);
+      DatabaseEntry foundValue = new DatabaseEntry();
+      OperationStatus operationStatus = cursor.getSearchKey(dbEntryKey, foundValue, LockMode.DEFAULT);
+      if (operationStatus == OperationStatus.SUCCESS) {
+        byte[] foundValueBytes = foundValue.getData();
+        retString = new String(foundValueBytes, "utf-8");
+      }
+      cursor.close();
+    } catch (DatabaseException e) {
+      throw new ApplicationException(e);
+    } catch (UnsupportedEncodingException e) {
+      throw new ApplicationException(e);
+    }
+    return retString;
+  }
+  
+  private void printSizeOfAllMorphSupDBs() throws ApplicationException {
+    for (int i=0; i<DONATUS_SUP_DUMPS.length; i++) {
+      String donatusSupName = DONATUS_SUP_DUMPS[i];
+      int size = getSizes(donatusSupName + "Dump");
+      System.out.println(donatusSupName + ": " + size + " records");
+    }
+  }
+  
+  private int getSizes(String donatusSupName) throws ApplicationException {
+    int size = 0;
+    try {
+      dbEnvMorphSup.openDatabase(donatusSupName);
+      Database morphDB = dbEnvMorphSup.getMorphSupDB(donatusSupName);
+      size = (int) morphDB.count();
+    } catch (DatabaseException e) {
+      throw new ApplicationException(e);
+    }
+    return size;
+  }
+  
+  private HashMap<String, DatabaseEntry> getWholeMorphHashMap(String donatusSupName) throws ApplicationException {
+    HashMap<String, DatabaseEntry> morphHashMap = new HashMap<String, DatabaseEntry>();
+    try {
+      dbEnvMorphSup.openDatabase(donatusSupName + "Dump");
+      Database morphDB = dbEnvMorphSup.getMorphSupDB(donatusSupName + "Dump");
+      Cursor cursor = morphDB.openCursor(null, null);
+      DatabaseEntry dbEntryKey = new DatabaseEntry();
+      DatabaseEntry dbEntryValue = new DatabaseEntry();
+      OperationStatus operationStatus = cursor.getFirst(dbEntryKey, dbEntryValue, LockMode.DEFAULT);
+      while (operationStatus == OperationStatus.SUCCESS) {
+        int size = dbEntryKey.getSize();
+        if (size > 0) {
+          byte[] dbEntryKeyBytes = dbEntryKey.getData();
+          String dbEntryKeyStr = new String(dbEntryKeyBytes, "utf-8");
+          DatabaseEntry newDbEntryValue = new DatabaseEntry(dbEntryValue.getData());
+          morphHashMap.put(dbEntryKeyStr, newDbEntryValue);
+        }
+        operationStatus = cursor.getNext(dbEntryKey, dbEntryValue, LockMode.DEFAULT);
+      }
+      cursor.close();
+    } catch (DatabaseException e) {
+      throw new ApplicationException(e);
+    } catch (UnsupportedEncodingException e) {
+      throw new ApplicationException(e);
+    }
+    return morphHashMap;
+  }
+  
+  private void writeDonatusSupsToFiles() throws ApplicationException {
+    BufferedReader in = null;
+    BufferedOutputStream out = null;
+    try {
+      for (int i=0; i<DONATUS_SUP_DUMPS.length; i++) {
+        String donatusSupName = DONATUS_SUP_DUMPS[i];
+        HashMap<String, DatabaseEntry> morphHashMap = getWholeMorphHashMap(donatusSupName);
+        Iterator<String> morphDumpIter = morphHashMap.keySet().iterator();
+        File outputFile = new File(DATA_FILES_DIR_DONATUS_ADD_SUP + "/donatus-sup-" + donatusSupName + ".xml");
+        out = new BufferedOutputStream(new FileOutputStream(outputFile));
+        write("<forms>\n", out);
+        while (morphDumpIter.hasNext()) {
+          write("<form>\n", out);
+          write("<provider>" + "donatus-sup" + "</provider>\n", out);
+          String language = "unknown";     
+          if (donatusSupName.startsWith("cache-"))
+            language = donatusSupName.substring(6);
+          write("<language>" + language + "</language>\n", out);
+          String morphKeyStr = morphDumpIter.next();
+          String formStr = morphKeyStr;
+          if (language.equals("el"))
+            formStr = transcodeFromBetaCode2Unicode(formStr);
+          formStr = formStr.toLowerCase();
+          write("<form-name>" + formStr + "</form-name>\n", out);
+          DatabaseEntry morphValue = morphHashMap.get(morphKeyStr);
+          byte[] morphValueBytes = morphValue.getData();
+          String wholeLemmaStr = new String(morphValueBytes, "utf-8");
+          // only first lemma is recognized TODO recognize all lemmas for the form
+          char splitSymbol = '\u0009';
+          int firstIndexOfSplitSymbol = wholeLemmaStr.indexOf(splitSymbol);
+          String lemmaForm = wholeLemmaStr;
+          if (firstIndexOfSplitSymbol != -1)
+            lemmaForm = wholeLemmaStr.substring(0, firstIndexOfSplitSymbol);
+          else
+            lemmaForm = lemmaForm + "XXXXXX";
+          char splitSymbol2 = '\u000B';
+          int firstIndexOfSplitSymbol2 = lemmaForm.indexOf(splitSymbol2);
+          if (firstIndexOfSplitSymbol2 != -1)
+            lemmaForm = lemmaForm.substring(0, firstIndexOfSplitSymbol2);
+          if (language.equals("el"))
+            lemmaForm = transcodeFromBetaCode2Unicode(lemmaForm);
+          lemmaForm = lemmaForm.replaceAll("#\\d", "");
+          lemmaForm = lemmaForm.toLowerCase();
+          write("<lemma-name>" + lemmaForm + "</lemma-name>\n", out);
+          write("</form>\n", out);
+        }
+        write("</forms>\n", out);
+      }
+    } catch (FileNotFoundException e) {
+      throw new ApplicationException(e);
+    } catch (UnsupportedEncodingException e) {
+      throw new ApplicationException(e);
+    } finally {
+      // always close the stream 
+      if (in != null) try { in.close(); } catch (Exception e) { }
+      if (out != null) try { out.close(); } catch (Exception e) { }
+    }
+  }
+  
+  private void write(byte[] inputBytes, BufferedOutputStream out) throws ApplicationException {
+    try {
+      out.write(inputBytes, 0, inputBytes.length);
+      out.flush();
+    } catch (IOException e) {
+      throw new ApplicationException(e);
+    }
+  }
+
+  private void write(String outStr, BufferedOutputStream out) throws ApplicationException {
+    try {
+      byte[] bytes = outStr.getBytes("utf-8");
+      out.write(bytes, 0, bytes.length);
+      out.flush();
+    } catch (IOException e) {
+      throw new ApplicationException(e);
+    }
+  }
+  
+  private String transcodeFromBetaCode2Unicode(String inputStr) throws ApplicationException {
+    Transcoder transcoder = Transcoder.getInstance();
+    String encodedUnicodeForm = transcoder.transcodeFromBetaCode2Unicode(inputStr);
+    return encodedUnicodeForm;
+  }
+
+  private void beginOperation() {
+    beginOfOperation = new Date();
+  }
+
+  private void endOperation() {
+    endOfOperation = new Date();
+  }
+
+}
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/software/eXist/mpdl-modules/src/de/mpg/mpiwg/berlin/mpdl/lt/morph/db/DBMorphWriter.java	Wed Nov 24 17:24:23 2010 +0100
@@ -0,0 +1,168 @@
+package de.mpg.mpiwg.berlin.mpdl.lt.morph.db;
+
+import java.io.BufferedInputStream;
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.util.ArrayList;
+import java.util.Date;
+
+import org.xml.sax.InputSource;
+import org.xml.sax.SAXException;
+import org.xml.sax.XMLReader;
+
+import com.sun.org.apache.xerces.internal.parsers.SAXParser;
+
+import de.mpg.mpiwg.berlin.mpdl.lt.morph.app.Form;
+import de.mpg.mpiwg.berlin.mpdl.lt.morph.app.Lemma;
+import de.mpg.mpiwg.berlin.mpdl.lt.morph.db.DBMorphHandler;
+import de.mpg.mpiwg.berlin.mpdl.util.Util;
+import de.mpg.mpiwg.berlin.mpdl.exception.ApplicationException;
+import de.mpg.mpiwg.berlin.mpdl.general.MpdlConstants;
+
+public class DBMorphWriter {
+  private static DBMorphWriter instance;
+  private static String MPDL_DATA_DIR = MpdlConstants.MPDL_DATA_DIR;
+  private static String DB_DIR_DONATUS = MPDL_DATA_DIR + "/dataBerkeleyDB/donatus";
+  private static String DATA_FILES_DIR = MPDL_DATA_DIR + "/dataFiles";
+  private DBMorphHandler dbMorphHandler;
+  private Date beginOfOperation;
+  private Date endOfOperation;
+  
+  public static DBMorphWriter getInstance() throws ApplicationException {
+    if (instance == null) {
+      instance = new DBMorphWriter();
+      instance.init();
+    }
+    return instance;
+  }
+
+  /**
+   * 
+   */
+  public static void main(String[] args) throws ApplicationException {
+    getInstance();
+    instance.beginOperation();
+    System.out.println("Start ...");
+    instance.init();
+    instance.openMorphData();
+    // instance.deleteMorphData();
+    long size = instance.getSize();
+    System.out.println("Count forms: " + size);
+    // instance.writeMorphData();
+    // instance.readSampleData();
+    instance.end();
+    instance.endOperation();
+    Double elapsedTime = new Util().getSecondWithMillisecondsBetween(instance.beginOfOperation, instance.endOfOperation);
+    System.out.println("End.");
+    System.out.println("Needed time: " + elapsedTime + " seconds");
+  }
+
+  private void init() throws ApplicationException {
+    dbMorphHandler = new DBMorphHandler(DB_DIR_DONATUS);
+    dbMorphHandler.start();
+  }
+  
+  private void openMorphData() throws ApplicationException {
+    dbMorphHandler.openDatabases();
+  }
+  
+  private void deleteMorphData() throws ApplicationException {
+    dbMorphHandler.deleteMorphData();
+  }
+  
+  private void writeMorphData() throws ApplicationException {
+    String inputFileNameLatin = DATA_FILES_DIR + "/" + "perseus-latin-forms.xml";
+    instance.write(inputFileNameLatin);
+    String inputFileNameGreek = DATA_FILES_DIR + "/" + "perseus-greek-forms.xml";
+    instance.write(inputFileNameGreek);
+    String inputFileNameArabic = DATA_FILES_DIR + "/" + "perseus-arabic-forms.xml";
+    instance.write(inputFileNameArabic);
+    String inputFileNameDutch = DATA_FILES_DIR + "/" + "celex-dutch-forms.xml";
+    instance.write(inputFileNameDutch);
+    String inputFileNameGerman = DATA_FILES_DIR + "/" + "celex-german-forms.xml";
+    instance.write(inputFileNameGerman);
+    String inputFileNameEnglish = DATA_FILES_DIR + "/" + "celex-english-forms.xml";
+    instance.write(inputFileNameEnglish);
+    String inputFileNameFrench = DATA_FILES_DIR + "/" + "lexique-french-forms.xml";
+    instance.write(inputFileNameFrench);
+    String inputFileNameItalian = DATA_FILES_DIR + "/" + "donatus-italian-forms.xml";
+    instance.write(inputFileNameItalian);
+    String[] languages = {"ar", "de", "en", "el", "fr", "it", "la"};
+    for (int i = 0; i < languages.length; i++) {
+      String language = languages[i];
+      String inputFileNameDonatusSup = DATA_FILES_DIR + "/" + "donatus-sup-" + language + "-forms.xml";
+      instance.write(inputFileNameDonatusSup);
+    }
+    String[] donatusAdditionalSups = {"cache-la", "cache-el", "cache-it"};
+    for (int i = 0; i < donatusAdditionalSups.length; i++) {
+      String donatusAdditionalSupName = donatusAdditionalSups[i];
+      String inputFileNameDonatusAddSup = DATA_FILES_DIR + "/donatusAdditionalSup/" + "donatus-sup-" + donatusAdditionalSupName + ".xml";
+      instance.write(inputFileNameDonatusAddSup);
+    }
+  }
+  
+  private void write(String inputFileName) throws ApplicationException {
+    File inputFile = new File(inputFileName);
+    if (! inputFile.exists()) {
+      System.out.println("Input file: " + inputFile.getAbsolutePath() + " does not exist.");
+      return;
+    }
+    DBMorphWriterContentHandler morphContentHandler = new DBMorphWriterContentHandler(dbMorphHandler);
+    try {
+      XMLReader xmlParser = new SAXParser();
+      xmlParser.setContentHandler(morphContentHandler);
+      InputStream inputStream = new FileInputStream(inputFile);
+      BufferedInputStream bufferedInputStream = new BufferedInputStream(inputStream);
+      InputSource input = new InputSource(bufferedInputStream);
+      xmlParser.parse(input);
+      bufferedInputStream.close();
+    } catch (SAXException e) {
+      throw new ApplicationException(e);
+    } catch (IOException e) {
+      throw new ApplicationException(e);
+    }
+  }
+  
+  private long getSize() throws ApplicationException {
+    long size = dbMorphHandler.getSize();
+    return size;
+  }
+  
+  private void addSampleData() throws ApplicationException {
+    Lemma l1 = new Lemma("perseus", "la", "abrogo");
+    Form f1 = new Form("perseus", "la", "abrogare");
+    Form f2 = new Form("perseus", "la", "abroges");
+    dbMorphHandler.writeFormLemma(f1, l1);
+    dbMorphHandler.writeLemmaForm(l1, f1);
+    dbMorphHandler.writeLemmaForm(l1, f2);
+  }
+  
+  private void readSampleData() throws ApplicationException {
+    ArrayList<Form> forms = dbMorphHandler.readForms("la", "abrogo");
+    System.out.println("Forms: " + forms);
+  }
+  
+  private void deleteSampleData() throws ApplicationException {
+    Lemma l1 = new Lemma("perseus", "la", "abrogo");
+    Form f1 = new Form("perseus", "la", "abrogare");
+    Form f2 = new Form("perseus", "la", "abroges");
+    dbMorphHandler.deleteLemma(l1);
+    dbMorphHandler.deleteForm(f1);
+    dbMorphHandler.deleteForm(f2);
+  }
+  
+  private void end() throws ApplicationException {
+    dbMorphHandler.closeDatabases();
+  }
+
+  private void beginOperation() {
+    beginOfOperation = new Date();
+  }
+
+  private void endOperation() {
+    endOfOperation = new Date();
+  }
+
+}
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/software/eXist/mpdl-modules/src/de/mpg/mpiwg/berlin/mpdl/lt/morph/db/DBMorphWriterContentHandler.java	Wed Nov 24 17:24:23 2010 +0100
@@ -0,0 +1,133 @@
+package de.mpg.mpiwg.berlin.mpdl.lt.morph.db;
+
+import java.util.Hashtable;
+
+import org.xml.sax.*;
+
+import de.mpg.mpiwg.berlin.mpdl.exception.ApplicationException;
+import de.mpg.mpiwg.berlin.mpdl.lt.morph.app.Form;
+import de.mpg.mpiwg.berlin.mpdl.lt.morph.app.Lemma;
+
+public class DBMorphWriterContentHandler implements ContentHandler {
+  private DBMorphHandler dbMorphHandler;
+  private Element currentElement;
+  private Form form;
+  private Lemma lemma;
+  private Hashtable<String, Form> forms;
+  
+  public DBMorphWriterContentHandler(DBMorphHandler dbMorphHandler) {
+    this.dbMorphHandler = dbMorphHandler;
+  }
+  
+  public void startDocument() throws SAXException {
+    forms = new Hashtable<String, Form>();
+  }
+
+  public void endDocument() throws SAXException {
+    forms = null;
+  }
+  
+  // TODO setPos etc. ersetzen durch addPos etc.
+  public void characters(char[] c, int start, int length) throws SAXException {
+    if (currentElement != null) {
+      String elemName = currentElement.name;
+      if (form != null) {
+        char[] cCopy = new char[length];
+        System.arraycopy(c, start, cCopy, 0, length);
+        String charactersStr = String.valueOf(cCopy);
+        if (charactersStr != null && ! (charactersStr.trim().equals(""))) {
+          if (elemName.equals("provider")) {
+            form.addProvider(charactersStr);
+            lemma.addProvider(charactersStr);
+          } else if (elemName.equals("language")) {
+            form.addLanguage(charactersStr);
+            lemma.addLanguage(charactersStr);
+          } else if (elemName.equals("form-name")) {
+            form.addFormName(charactersStr);
+          } else if (elemName.equals("lemma-name")) {
+            form.addLemmaName(charactersStr);
+            lemma.addLemmaName(charactersStr);
+          } else if (elemName.equals("pos")) {
+            form.addPos(charactersStr);
+          } else if (elemName.equals("tense")) {
+            form.addTense(charactersStr);
+          } else if (elemName.equals("voice")) {
+            form.addVoice(charactersStr);
+          } else if (elemName.equals("casus")) {
+            form.addCasus(charactersStr);
+          } else if (elemName.equals("number")) {
+            form.addNumber(charactersStr);
+          } else if (elemName.equals("mood")) {
+            form.addMood(charactersStr);
+          } else if (elemName.equals("person")) {
+            form.addPerson(charactersStr);
+          } else if (elemName.equals("gender")) {
+            form.addGender(charactersStr);
+          } else if (elemName.equals("definite")) {
+            form.addDefinite(charactersStr);
+          }
+        }
+      }
+    } 
+  }
+
+  public void ignorableWhitespace(char[] c, int start, int length) throws SAXException {
+  }
+
+  public void processingInstruction(String target, String data) throws SAXException {
+  }
+
+  public void setDocumentLocator(org.xml.sax.Locator arg1) {
+  }
+
+  public void endPrefixMapping(String prefix) throws SAXException {
+  }
+
+  public void skippedEntity(String name) throws SAXException {
+  }
+
+  public void startElement(String uri, String localName, String name, Attributes attrs) throws SAXException {
+    currentElement = new Element(name, "");
+    if (localName.equals("form")) {
+      form = new Form();
+      lemma = new Lemma();
+    }
+  }
+
+  public void endElement(String uri, String localName, String name) throws SAXException {
+    currentElement = null;
+    if (localName.equals("form")) {
+      String keyStr = form.getFormName();
+      forms.put(keyStr, form);
+      write(form, lemma);
+      form = null;
+      lemma = null;
+    }
+  }
+
+  public void startPrefixMapping(String prefix, String uri) throws SAXException {
+  }
+  
+  private void write(Form form, Lemma lemma) throws SAXException {
+    try {
+      dbMorphHandler.writeFormLemma(form, lemma);
+      dbMorphHandler.writeLemmaForm(lemma, form);
+    } catch (ApplicationException e) {
+      throw new SAXException(e);
+    }
+  }
+  
+  private class Element {
+    String name;
+    String value;
+    
+    Element(String name) {
+      this.name = name;
+    }
+
+    Element(String name, String value) {
+      this.name = name;
+      this.value = value;
+    }
+  }
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/software/eXist/mpdl-modules/src/de/mpg/mpiwg/berlin/mpdl/lt/morph/db/DbEnvMorph.java	Wed Nov 24 17:24:23 2010 +0100
@@ -0,0 +1,105 @@
+package de.mpg.mpiwg.berlin.mpdl.lt.morph.db;
+
+import java.io.File;
+
+import com.sleepycat.je.Database;
+import com.sleepycat.je.DatabaseConfig;
+import com.sleepycat.je.DatabaseException;
+import com.sleepycat.je.Environment;
+import com.sleepycat.je.EnvironmentConfig;
+
+import de.mpg.mpiwg.berlin.mpdl.exception.ApplicationException;
+
+public class DbEnvMorph {
+  private String dataDir;
+  private File envPath;
+  private Environment env;
+  private EnvironmentConfig envConfig;
+  private DatabaseConfig dbConfig;
+  private Database lemmaDB;
+  private Database formDB;
+
+  public DbEnvMorph() {
+  }
+
+  public void setDataDir(String dataDir) {
+    this.dataDir = dataDir;
+  }
+  
+  public void init() throws ApplicationException {
+    try {
+      envConfig = new EnvironmentConfig();
+      dbConfig = new DatabaseConfig();
+      envConfig.setReadOnly(false);
+      dbConfig.setReadOnly(false);
+      envConfig.setAllowCreate(true);
+      dbConfig.setAllowCreate(true);
+      envConfig.setTransactional(true);
+      dbConfig.setTransactional(true);
+      // allow duplicates for keys
+      dbConfig.setSortedDuplicates(true);
+      envPath = new File(dataDir);
+      env = new Environment(envPath, envConfig);
+    } catch (DatabaseException e) {
+      throw new ApplicationException(e);
+    }
+  }
+
+  public void openDatabases() throws ApplicationException {
+    try {
+      // open databases (and create them if they do not exist)
+      lemmaDB = env.openDatabase(null, "LemmaDB", dbConfig);
+      formDB = env.openDatabase(null, "FormDB", dbConfig);
+    } catch (DatabaseException e) {
+      throw new ApplicationException(e);
+    }
+  }
+  
+  public void removeDatabases() throws ApplicationException {
+    try {
+      if (formDB != null)
+        formDB.close();
+      if (lemmaDB != null)
+        lemmaDB.close();
+      env.removeDatabase(null, "LemmaDB");    
+      env.removeDatabase(null, "FormDB");
+      formDB = null;
+      lemmaDB = null;
+      /*   
+      boolean bla = true;
+      env.truncateDatabase(null, "LemmaDB", bla);    
+      env.truncateDatabase(null, "FormDB", bla);
+      */
+    } catch (DatabaseException e) {
+      throw new ApplicationException(e);
+    }
+  }
+  
+  public Environment getEnv() {
+    return env;
+  }
+
+  public Database getLemmaDB() {
+    return lemmaDB;
+  }
+
+  public Database getFormDB() {
+    return formDB;
+  }
+
+  public void close() throws ApplicationException {
+    if (env != null) {
+      try {
+        if (formDB != null)
+          formDB.close();
+        if (lemmaDB != null)
+          lemmaDB.close();
+        if (env != null)
+          env.close();
+      } catch (DatabaseException e) {
+        throw new ApplicationException(e);
+      }
+    }
+  }
+}
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/software/eXist/mpdl-modules/src/de/mpg/mpiwg/berlin/mpdl/lt/morph/db/DbEnvMorphSup.java	Wed Nov 24 17:24:23 2010 +0100
@@ -0,0 +1,101 @@
+package de.mpg.mpiwg.berlin.mpdl.lt.morph.db;
+
+import java.io.File;
+import java.util.HashMap;
+
+import com.sleepycat.je.Database;
+import com.sleepycat.je.DatabaseConfig;
+import com.sleepycat.je.DatabaseException;
+import com.sleepycat.je.Environment;
+import com.sleepycat.je.EnvironmentConfig;
+
+import de.mpg.mpiwg.berlin.mpdl.exception.ApplicationException;
+
+public class DbEnvMorphSup {
+  private String dataDir;
+  private File envPath;
+  private Environment env;
+  private EnvironmentConfig envConfig;
+  private DatabaseConfig dbConfig;
+  private HashMap<String, Database> morphSupDBs = new HashMap<String, Database>();
+
+  public DbEnvMorphSup() {
+  }
+
+  public void setDataDir(String dataDir) {
+    this.dataDir = dataDir;
+  }
+  
+  public void initReadOnly() throws ApplicationException {
+    try {
+      envConfig = new EnvironmentConfig();
+      dbConfig = new DatabaseConfig();
+      envPath = new File(dataDir);
+      env = new Environment(envPath, envConfig);
+    } catch (DatabaseException e) {
+      throw new ApplicationException(e);
+    }
+  }
+
+  public void initReadWrite() throws ApplicationException {
+    try {
+      envConfig = new EnvironmentConfig();
+      dbConfig = new DatabaseConfig();
+      envConfig.setReadOnly(false);
+      dbConfig.setReadOnly(false);
+      envConfig.setAllowCreate(true);
+      dbConfig.setAllowCreate(true);
+      envConfig.setTransactional(true);
+      dbConfig.setTransactional(true);
+      envPath = new File(dataDir);
+      env = new Environment(envPath, envConfig);
+    } catch (DatabaseException e) {
+      throw new ApplicationException(e);
+    }
+  }
+
+  public void openDatabase(String morphSupName) throws ApplicationException {
+    try {
+      Database lexDB = morphSupDBs.get(morphSupName);
+      if (lexDB == null) {
+        Database morphSupDB = env.openDatabase(null, morphSupName + ".db", dbConfig);
+        morphSupDBs.put(morphSupName, morphSupDB);
+      }
+    } catch (DatabaseException e) {
+      throw new ApplicationException(e);
+    }
+  }
+  
+  public void closeDatabase(String morphSupName) throws ApplicationException {
+    try {
+      if (morphSupDBs != null) {
+        Database morphSupDB = morphSupDBs.get(morphSupName);
+        if (morphSupDB != null)
+          morphSupDB.close();
+      }
+    } catch (DatabaseException e) {
+      throw new ApplicationException(e);
+    }
+  }
+  
+  public Environment getEnv() {
+    return env;
+  }
+
+  public Database getMorphSupDB(String morphSupName) {
+    Database morphSupDB = morphSupDBs.get(morphSupName);
+    return morphSupDB;
+  }
+
+  public void close() throws ApplicationException {
+    if (env != null) {
+      try {
+        if (env != null)
+          env.close();
+      } catch (DatabaseException e) {
+        throw new ApplicationException(e);
+      }
+    }
+  }
+}
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/software/eXist/mpdl-modules/src/de/mpg/mpiwg/berlin/mpdl/lucene/LuceneUtil.java	Wed Nov 24 17:24:23 2010 +0100
@@ -0,0 +1,27 @@
+package de.mpg.mpiwg.berlin.mpdl.lucene;
+
+import java.util.ArrayList;
+
+public class LuceneUtil {
+  private static LuceneUtil instance;
+  
+  public static LuceneUtil getInstance() {
+    if (instance == null) {
+      instance = new LuceneUtil();
+    }
+    return instance;
+  }
+
+  public ArrayList<String> getVariantsFromLuceneQuery(String queryString) {
+    ArrayList<String> variants = new ArrayList<String>();
+    String[] variantTokens = queryString.split(" ");  // TODO throw the phrases away (e.g.: "bla bla bla")
+    for (int i = 0; i < variantTokens.length; i++) {
+      String token = variantTokens[i];
+      if (! (token.contains("*") || token.contains("?") || token.contains("~") || token.contains("-") || token.contains("+") || token.contains("^") || token.contains("OR") || token.contains("AND") || token.contains("NOT"))) {
+        variants.add(token);
+      }
+    }
+    return variants;
+  }
+
+}
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/software/eXist/mpdl-modules/src/de/mpg/mpiwg/berlin/mpdl/lucene/MorphQueryParser.java	Wed Nov 24 17:24:23 2010 +0100
@@ -0,0 +1,175 @@
+package de.mpg.mpiwg.berlin.mpdl.lucene;
+
+import java.io.IOException;
+import java.io.StringReader;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Vector;
+
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.index.Term;
+import org.apache.lucene.queryParser.ParseException;
+import org.apache.lucene.queryParser.QueryParser;
+import org.apache.lucene.search.BooleanClause;
+import org.apache.lucene.search.BooleanQuery;
+import org.apache.lucene.search.MultiPhraseQuery;
+import org.apache.lucene.search.PhraseQuery;
+import org.apache.lucene.search.Query;
+import org.apache.lucene.search.TermQuery;
+
+import de.mpg.mpiwg.berlin.mpdl.exception.ApplicationException;
+import de.mpg.mpiwg.berlin.mpdl.lt.analyzer.MpdlMorphAnalyzer;
+import de.mpg.mpiwg.berlin.mpdl.lt.morph.app.MorphologyCache;
+
+public class MorphQueryParser extends QueryParser {
+  String language;
+
+  public MorphQueryParser(String f, MpdlMorphAnalyzer a) {
+    super(f, a);
+    this.language = a.getLanguage();
+  }
+
+  /**
+   * @exception ParseException throw in overridden method to disallow
+   */
+  protected Query getFieldQuery(String field, String queryText)  throws ParseException {
+    // Use the analyzer to get all the tokens, and then build a TermQuery,
+    // PhraseQuery, or nothing based on the term count
+    Analyzer analyzer = getAnalyzer();
+    TokenStream source = analyzer.tokenStream(field, new StringReader(queryText));
+    Vector v = new Vector();
+    org.apache.lucene.analysis.Token t;
+    int positionCount = 0;
+    boolean severalTokensAtSamePosition = false;
+
+    while (true) {
+      try {
+        t = source.next();
+      }
+      catch (IOException e) {
+        t = null;
+      }
+      if (t == null)
+        break;
+      v.addElement(t);
+      if (t.getPositionIncrement() != 0)
+        positionCount += t.getPositionIncrement();
+      else
+        severalTokensAtSamePosition = true;
+    }
+    try {
+      source.close();
+    }
+    catch (IOException e) {
+      // ignore
+    }
+
+    if (v.size() == 0)
+      return null;
+    else if (v.size() == 1) {
+      t = (org.apache.lucene.analysis.Token) v.elementAt(0);
+      // BEGIN MPDL specific extensions
+      Query retMorphQuery = null;
+      try {
+        String termText = t.termText();
+        ArrayList<String> lemmaNames = null;
+        if (termText != null && ! termText.trim().equals("")) {
+          // lemma mode: if term contains "lemmalemma" then the lemma itself is fetched
+          if (termText.startsWith("lemmalemma")) {
+            lemmaNames = new ArrayList<String>();
+            String lemmaName = termText.substring(10);
+            lemmaNames.add(lemmaName);
+          } else {
+            String[] lemmasStrArray = termText.split("\\+\\+\\+");
+            if (lemmasStrArray != null)
+              lemmaNames = new ArrayList<String>();
+            for (int i=0; i<lemmasStrArray.length; i++) {
+              String lemmaStr = lemmasStrArray[i];
+              if (! lemmaStr.trim().equals(""))
+                lemmaNames.add(lemmaStr);
+            }
+          }
+        } else {
+          return null;
+        }
+        MorphologyCache morphologyCache = MorphologyCache.getInstance();
+        ArrayList<String> morphIndexKeys = morphologyCache.getIndexKeysByLemmaNames(language, lemmaNames);
+        if (morphIndexKeys == null) {
+          return null;
+        } else if (morphIndexKeys.size() == 1) {
+          String morphIndexKey = morphIndexKeys.get(0);
+          retMorphQuery = new TermQuery(new Term(field, morphIndexKey));
+        } else if (morphIndexKeys.size() > 1) {
+          BooleanQuery retMorphQueryBoolean = new BooleanQuery(true);
+          for (int i=0; i<morphIndexKeys.size(); i++) {
+            String morphIndexKey = morphIndexKeys.get(i);
+            TermQuery morpTermQuery = new TermQuery(new Term(field, morphIndexKey));
+            retMorphQueryBoolean.add(morpTermQuery, BooleanClause.Occur.SHOULD);
+            retMorphQuery = retMorphQueryBoolean;
+          }
+        }
+      } catch (ApplicationException e) {
+        throw new ParseException(e.getMessage());
+      }
+      return retMorphQuery;
+      // END MPDL specific extensions
+    } else {
+      if (severalTokensAtSamePosition) {
+        if (positionCount == 1) {
+          // no phrase query:
+          BooleanQuery q = new BooleanQuery(true);
+          for (int i = 0; i < v.size(); i++) {
+            t = (org.apache.lucene.analysis.Token) v.elementAt(i);
+            TermQuery currentQuery = new TermQuery(
+                new Term(field, t.termText()));
+            q.add(currentQuery, BooleanClause.Occur.SHOULD);
+          }
+          return q;
+        }
+        else {
+          // phrase query:
+          MultiPhraseQuery mpq = new MultiPhraseQuery();
+          mpq.setSlop(getPhraseSlop());
+          List multiTerms = new ArrayList();
+          int position = -1;
+          for (int i = 0; i < v.size(); i++) {
+            t = (org.apache.lucene.analysis.Token) v.elementAt(i);
+            if (t.getPositionIncrement() > 0 && multiTerms.size() > 0) {
+              if (getEnablePositionIncrements()) {
+                mpq.add((Term[])multiTerms.toArray(new Term[0]),position);
+              } else {
+                mpq.add((Term[])multiTerms.toArray(new Term[0]));
+              }
+              multiTerms.clear();
+            }
+            position += t.getPositionIncrement();
+            multiTerms.add(new Term(field, t.termText()));
+          }
+          if (getEnablePositionIncrements()) {
+            mpq.add((Term[])multiTerms.toArray(new Term[0]),position);
+          } else {
+            mpq.add((Term[])multiTerms.toArray(new Term[0]));
+          }
+          return mpq;
+        }
+      }
+      else {
+        PhraseQuery pq = new PhraseQuery();
+        pq.setSlop(getPhraseSlop());
+        int position = -1;
+        for (int i = 0; i < v.size(); i++) {
+          t = (org.apache.lucene.analysis.Token) v.elementAt(i);
+          if (getEnablePositionIncrements()) {
+            position += t.getPositionIncrement();
+            pq.add(new Term(field, t.termText()),position);
+          } else {
+            pq.add(new Term(field, t.termText()));
+          }
+        }
+        return pq;
+      }
+    }
+  }
+  
+}
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/software/eXist/mpdl-modules/src/de/mpg/mpiwg/berlin/mpdl/schedule/MpdlChainScheduler.java	Wed Nov 24 17:24:23 2010 +0100
@@ -0,0 +1,243 @@
+package de.mpg.mpiwg.berlin.mpdl.schedule;
+
+import java.net.URL;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.Date;
+import java.util.HashMap;
+import java.util.Iterator;
+import java.util.List;
+import java.util.PriorityQueue;
+import java.util.Queue;
+
+import org.apache.log4j.Logger;
+import org.quartz.JobDataMap;
+import org.quartz.JobDetail;
+import org.quartz.JobExecutionContext;
+import org.quartz.JobListener;
+import org.quartz.SchedulerException;
+import org.quartz.SimpleTrigger;
+import org.quartz.Trigger;
+import org.quartz.impl.StdSchedulerFactory;
+
+import de.mpg.mpiwg.berlin.mpdl.exception.ApplicationException;
+
+public class MpdlChainScheduler {
+  private static MpdlChainScheduler instance;
+  private static String CRUD_JOB = "MPDL_CRUD_JOB";
+  private static String CRUD_TRIGGER = "MPDL_CRUD_TRIGGER";
+  private static String CRUD_GROUP = "MPDL_CRUD_GROUP";
+  private static Logger LOGGER = Logger.getLogger(MpdlChainScheduler.class); // Logs to EXIST_HOME/webapp/WEB-INF/logs/exist.log
+  private org.quartz.Scheduler scheduler;
+  private JobListener jobListener;
+  private Queue<MpdlDocOperation> docOperationQueue = new PriorityQueue<MpdlDocOperation>();
+  private HashMap<Integer, MpdlDocOperation> finishedDocOperations = new HashMap<Integer, MpdlDocOperation>();
+  private boolean operationInProgress = false;
+  private int jobOrderId = 0;
+  
+  public static MpdlChainScheduler getInstance() throws ApplicationException {
+    if (instance == null) {
+      instance = new MpdlChainScheduler();
+      instance.init();
+    }
+    return instance;
+  }
+
+  public MpdlDocOperation doOperation(MpdlDocOperation docOperation) throws ApplicationException {
+    jobOrderId++;
+    docOperation.setOrderId(jobOrderId);
+    queueOperation(docOperation);
+    scheduleNextOperation();
+    return docOperation;
+  }
+  
+  public void finishOperation(MpdlDocOperation docOperation) throws ApplicationException {
+    operationInProgress = false;
+    Date now = new Date();
+    docOperation.setEnd(now);
+    docOperation.setStatus("finished");
+    int jobId = new Integer(docOperation.getOrderId());
+    finishedDocOperations.put(jobId, docOperation);
+    log(docOperation);
+    // schedule next job if there is one
+    scheduleNextOperation();
+  }
+  
+  private void log(MpdlDocOperation docOperation) {
+    Date startTime = docOperation.getStart();
+    Date endTime = docOperation.getEnd();
+    long executionTime = -1;
+    if (startTime != null && endTime != null)
+      executionTime = (endTime.getTime() - startTime.getTime());
+    String jobInfo = "MPDL: Document operation " + docOperation.toString() + ": started at: " + startTime + 
+      " and ended at: " + endTime + " (needed time: " + executionTime + " ms)";
+    LOGGER.info(jobInfo);
+  }
+  
+  public synchronized void scheduleNextOperation() throws ApplicationException {
+    if (isOperationInProgress()) {
+      // nothing, operation has to wait
+    } else {
+      MpdlDocOperation docOperation = docOperationQueue.poll();
+      if (docOperation == null) {
+        // if queue is empty then do nothing (there are no more operations to execute)
+      } else {
+        Date now = new Date();
+        operationInProgress = true;
+        docOperation.setStart(now);
+        scheduleJob(docOperation, now);
+      }
+    }
+  }
+  
+  public ArrayList<MpdlDocOperation> getDocOperations() throws ApplicationException {
+    ArrayList<MpdlDocOperation> docOperations = new ArrayList<MpdlDocOperation>();
+    try {
+      // first: all finished jobs
+      Collection<MpdlDocOperation> finiDocOperations = finishedDocOperations.values();
+      docOperations.addAll(finiDocOperations);
+      // second: all currently executed jobs
+      if (operationInProgress) {
+        List<JobExecutionContext> currentJobs = (List<JobExecutionContext>) scheduler.getCurrentlyExecutingJobs();
+        Iterator<JobExecutionContext> iter = currentJobs.iterator();
+        while (iter.hasNext()) {
+          JobExecutionContext jobExecutionContext = iter.next();
+          MpdlDocOperation docOperation = getDocOperation(jobExecutionContext);
+          if (docOperation != null) {
+            docOperations.add(docOperation);
+          }
+        }
+      }
+      // third: all queued jobs
+      Iterator<MpdlDocOperation> iter = docOperationQueue.iterator();
+      while (iter.hasNext()) {
+        MpdlDocOperation docOperation = iter.next();
+        docOperations.add(docOperation);
+      }
+    } catch (SchedulerException e) {
+      LOGGER.error(e.getMessage());
+      throw new ApplicationException(e);
+    }
+    return docOperations;
+  }
+    
+  public MpdlDocOperation getDocOperation(int jobId) throws ApplicationException {
+    MpdlDocOperation docOperation = null;
+    try {
+      // first try: looks into currently executing jobs
+      if (operationInProgress) {
+        List<JobExecutionContext> currentJobs = (List<JobExecutionContext>) scheduler.getCurrentlyExecutingJobs();
+        Iterator<JobExecutionContext> iter = currentJobs.iterator();
+        while (iter.hasNext()) {
+          JobExecutionContext jobExecutionContext = iter.next();
+          docOperation = getDocOperation(jobExecutionContext);
+          if (docOperation != null) {
+            int dopOpJobId = docOperation.getOrderId();
+            if (jobId == dopOpJobId)
+              return docOperation;
+          }
+        }
+      }
+      // second try: look into finished jobs
+      docOperation = finishedDocOperations.get(new Integer(jobId));
+      if (docOperation != null) {
+        return docOperation;
+      }
+      // third try: look into queued jobs
+      Iterator<MpdlDocOperation> iter = docOperationQueue.iterator();
+      while (iter.hasNext()) {
+        docOperation = iter.next();
+        if (docOperation.getOrderId() == jobId)
+          return docOperation;
+      }
+    } catch (SchedulerException e) {
+      LOGGER.error(e.getMessage());
+      throw new ApplicationException(e);
+    }
+    // if not found return null
+    return null;
+  }
+  
+  public MpdlDocOperation getDocOperation(JobExecutionContext jobExecutionContext) {
+    MpdlDocOperation docOperation = null;
+    if (jobExecutionContext != null) {
+      JobDetail job = jobExecutionContext.getJobDetail();
+      JobDataMap parameters = job.getJobDataMap();
+      docOperation = (MpdlDocOperation) parameters.get("operation");
+    }
+    return docOperation;
+  }
+  
+  private void queueOperation(MpdlDocOperation docOperation) {
+    int operationsBefore = docOperationQueue.size();
+    if (operationsBefore == 0)
+     docOperation.setStatus("waiting in operation queue");
+    else 
+      docOperation.setStatus("waiting in operation queue: " + operationsBefore + " operations heve to be executed before this operation");
+    docOperationQueue.offer(docOperation);
+  }
+  
+  private synchronized boolean isOperationInProgress() {
+    return operationInProgress;  
+  }
+  
+  private void scheduleJob(MpdlDocOperation docOperation, Date fireTime) throws ApplicationException {
+    try {
+      int jobId = docOperation.getOrderId();
+      String jobName = CRUD_JOB + "-id-" + jobId + "-timeId-" + fireTime;
+      JobDetail job = new JobDetail(jobName, CRUD_GROUP, MpdlDocJob.class);
+      JobDataMap parameters = new JobDataMap();
+      parameters.put("operation", docOperation);
+      job.setJobDataMap(parameters);
+      job.addJobListener(jobListener.getName());        
+      String triggerName = CRUD_TRIGGER + "-id-" + jobId + "-timeId-" + fireTime;
+      Trigger trigger = new SimpleTrigger(triggerName, CRUD_GROUP, fireTime);
+      scheduler.scheduleJob(job, trigger);
+      String jobInfo = "MPDL: Schedule document operation: " + docOperation.toString() + ": done at: " + fireTime.toString();
+      LOGGER.info(jobInfo);
+    } catch (SchedulerException e) {
+      LOGGER.error(e.getMessage());
+      throw new ApplicationException(e);
+    }
+  }
+  
+  private void init() throws ApplicationException {
+    try {
+      if (scheduler == null) {
+        String quartzPath = getQuartzPath();
+        StdSchedulerFactory schedulerFactory = new StdSchedulerFactory(quartzPath);
+        scheduler = schedulerFactory.getScheduler();
+        jobListener = new MpdlChainSchedulerListener();
+        scheduler.addJobListener(jobListener);
+        scheduler.start();
+        LOGGER.info("MPDL: Started Quartz scheduler factory: " + quartzPath);
+      } 
+    } catch (SchedulerException e) {
+      LOGGER.error(e.getMessage());
+      throw new ApplicationException(e);
+    }
+  }
+  
+  public void end() throws ApplicationException {
+    try {
+      if (scheduler != null) {
+        scheduler.shutdown();
+      }
+      String quartzPath = getQuartzPath();
+      LOGGER.info("MPDL: Ended Quartz scheduler factory: " + quartzPath);
+    } catch (SchedulerException e) {
+      LOGGER.error(e.getMessage());
+      throw new ApplicationException(e);
+    }
+  }
+
+  private String getQuartzPath() {
+    URL quartzUrl = MpdlChainScheduler.class.getResource("quartz.properties");
+    String quartzPath = quartzUrl.getPath();
+    if (quartzPath.indexOf(".jar!") != -1) {
+      int beginIndex = quartzPath.indexOf(".jar!") + 6;
+      quartzPath = quartzPath.substring(beginIndex);
+    }
+    return quartzPath;    
+  }
+}
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/software/eXist/mpdl-modules/src/de/mpg/mpiwg/berlin/mpdl/schedule/MpdlChainSchedulerListener.java	Wed Nov 24 17:24:23 2010 +0100
@@ -0,0 +1,53 @@
+package de.mpg.mpiwg.berlin.mpdl.schedule;
+
+import org.apache.log4j.Logger;
+import org.quartz.JobDataMap;
+import org.quartz.JobDetail;
+import org.quartz.JobExecutionContext;
+import org.quartz.JobExecutionException;
+import org.quartz.JobListener;
+
+import de.mpg.mpiwg.berlin.mpdl.exception.ApplicationException;
+
+public class MpdlChainSchedulerListener implements JobListener {
+  private static Logger LOGGER = Logger.getLogger(MpdlChainSchedulerListener.class); // Logs to EXIST_HOME/webapp/WEB-INF/logs/exist.log
+  
+  public String getName() {
+    return "MpdlJobChainingListener";
+  }
+
+  public void jobToBeExecuted(JobExecutionContext inContext) {
+  }
+
+  public void jobExecutionVetoed(JobExecutionContext inContext) {
+    String message = "MPDL: JobChainingListener: Job execution was vetoed.";
+    LOGGER.debug(message);
+  }
+
+  public void jobWasExecuted(JobExecutionContext inContext, JobExecutionException inException) {
+    // after finishing his job it tries to schedule the next operation (if there is one in the queue)
+    MpdlDocOperation docOperation = null;
+    try {
+      MpdlChainScheduler mpdlChainScheduler = MpdlChainScheduler.getInstance();
+      docOperation = getDocOperation(inContext);
+      mpdlChainScheduler.finishOperation(docOperation);
+    } catch (ApplicationException e) {
+      if (docOperation != null) {
+        docOperation.setErrorMessage(e.getMessage());
+      }
+      LOGGER.error(e.getMessage());
+    }
+  }
+
+  private MpdlDocOperation getDocOperation(JobExecutionContext context) {
+    MpdlDocOperation docOperation = null;
+    if (context != null) {
+      JobDetail job = context.getJobDetail();
+      JobDataMap parameters = job.getJobDataMap();
+      docOperation = (MpdlDocOperation) parameters.get("operation");
+    }
+    return docOperation;
+  }
+  
+
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/software/eXist/mpdl-modules/src/de/mpg/mpiwg/berlin/mpdl/schedule/MpdlDocJob.java	Wed Nov 24 17:24:23 2010 +0100
@@ -0,0 +1,94 @@
+package de.mpg.mpiwg.berlin.mpdl.schedule;
+
+import java.util.Date;
+
+import org.apache.log4j.Logger;
+import org.quartz.Job;
+import org.quartz.JobDataMap;
+import org.quartz.JobDetail;
+import org.quartz.JobExecutionContext;
+import org.quartz.JobExecutionException;
+
+import de.mpg.mpiwg.berlin.mpdl.client.DocumentHandler;
+import de.mpg.mpiwg.berlin.mpdl.escidoc.ESciDocIngestor;
+import de.mpg.mpiwg.berlin.mpdl.escidoc.ESciDocRestSession;
+import de.mpg.mpiwg.berlin.mpdl.exception.ApplicationException;
+import de.mpg.mpiwg.berlin.mpdl.xmlrpc.MpdlXmlRpcDocHandler;
+
+public class MpdlDocJob implements Job {
+  public static String STATUS_BEGIN = "started";
+  private static Logger LOGGER = Logger.getLogger(MpdlDocJob.class); // Logs to EXIST_HOME/webapp/WEB-INF/logs/exist.log
+  private JobExecutionContext currentExecutedContext;
+  
+  public void execute(JobExecutionContext context) throws JobExecutionException {
+    this.currentExecutedContext = context;
+    MpdlDocOperation docOperation = getDocOperation();
+    docOperation.setIncludePdf(true); // default is true: handle also Pdf/Html version of the document
+    try {
+      docOperation.setStatus(STATUS_BEGIN);
+      String operationName = docOperation.getName();   
+      String cookieId = docOperation.getESciDocCookieId();
+      MpdlXmlRpcDocHandler mpdlXmlRpcDocHandler = MpdlXmlRpcDocHandler.getInstance();
+      ESciDocRestSession eSciDocSession = ESciDocRestSession.getInstance(cookieId);
+      ESciDocIngestor eSciDocIngestor = new ESciDocIngestor(eSciDocSession);
+      if (operationName.equals("create") || operationName.equals("update")) {
+        DocumentHandler docHandler = new DocumentHandler(mpdlXmlRpcDocHandler, eSciDocIngestor);
+        docHandler.doOperation(docOperation);
+      } else if (operationName.equals("delete")) {
+        DocumentHandler docHandler = new DocumentHandler(mpdlXmlRpcDocHandler, eSciDocIngestor);
+        docHandler.doOperation(docOperation);
+      } else if (operationName.equals("updateExist")) {
+        DocumentHandler docHandler = new DocumentHandler(mpdlXmlRpcDocHandler);
+        docHandler.doOperation(docOperation);
+      } else if (operationName.equals("deleteExist")) {
+        DocumentHandler docHandler = new DocumentHandler(mpdlXmlRpcDocHandler);
+        docHandler.doOperation(docOperation);
+      } else if (operationName.equals("importAllDocumentsLocallyExist")) {
+        DocumentHandler docHandler = new DocumentHandler(mpdlXmlRpcDocHandler);
+        docOperation.setIncludePdf(false);  // for performance reasons while importing documents: do not generate Pdf/Html-Versions of the document
+        docHandler.doOperation(docOperation);
+      } else if (operationName.equals("generatePdfHtmlDocumentFiles")) {
+        DocumentHandler docHandler = new DocumentHandler(mpdlXmlRpcDocHandler);
+        docOperation.setIncludePdf(true);  
+        docHandler.doOperation(docOperation);
+      }
+      Date startingTime = docOperation.getStart();
+      String jobInfo = "MPDL: Document operation " + docOperation.toString() + ": started at: " + startingTime;
+      LOGGER.info(jobInfo);
+      this.currentExecutedContext = null;
+    } catch (Exception e) {
+      try {
+        // Quartz will automatically unschedule all triggers associated with this job so that it does not run again
+        MpdlChainScheduler mpdlChainScheduler = MpdlChainScheduler.getInstance();
+        mpdlChainScheduler.finishOperation(docOperation);
+        String errorMessage = e.getMessage();
+        if (errorMessage == null) {
+          Throwable t = e.getCause();
+          if (t == null) {
+            errorMessage = e.toString();
+          } else {
+            errorMessage = t.getMessage();
+          }
+        }
+        docOperation.setErrorMessage(errorMessage);
+        LOGGER.error(errorMessage, e);
+        JobExecutionException jobExecutionException = new JobExecutionException(e);
+        jobExecutionException.setUnscheduleAllTriggers(true);
+        throw jobExecutionException;
+      } catch (ApplicationException ex) {
+        // nothing
+      }
+    }
+  } 
+
+  private MpdlDocOperation getDocOperation() {
+    MpdlDocOperation docOperation = null;
+    if (currentExecutedContext != null) {
+      JobDetail job = currentExecutedContext.getJobDetail();
+      JobDataMap parameters = job.getJobDataMap();
+      docOperation = (MpdlDocOperation) parameters.get("operation");
+    }
+    return docOperation;
+  }
+
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/software/eXist/mpdl-modules/src/de/mpg/mpiwg/berlin/mpdl/schedule/MpdlDocOperation.java	Wed Nov 24 17:24:23 2010 +0100
@@ -0,0 +1,195 @@
+package de.mpg.mpiwg.berlin.mpdl.schedule;
+
+import java.util.Date;
+
+import org.w3c.dom.Node;
+
+import de.mpg.mpiwg.berlin.mpdl.escidoc.MetadataRecord;
+
+public class MpdlDocOperation implements Comparable<MpdlDocOperation> {
+  private int id;
+  private Date start;
+  private Date end;
+  private String name;
+  private String status;
+  private String errorMessage;
+  private String uploadFileName;
+  private String srcUrl;   
+  private String docBase;
+  private String language;
+  private String fileName;
+  private String eSciDocDestUrl;
+  private String eSciDocCookieId;
+  private MetadataRecord mdRecord;
+  private Node docNode;
+  private boolean includePdf = false;  // default
+  
+  public MpdlDocOperation(String name, String srcUrl, String uploadFileName, String docBase, String language, String fileName) {
+    this.name = name;
+    this.srcUrl = srcUrl;
+    this.uploadFileName = uploadFileName;
+    this.docBase = docBase;
+    this.language = language;
+    this.fileName = fileName;
+  }
+
+  public int compareTo(MpdlDocOperation op) {
+    Integer opOrderId = new Integer(op.id);
+    Integer thisOrderId = new Integer(id);
+    return thisOrderId.compareTo(opOrderId);
+  }
+  
+  public boolean isFinished() {
+    if (status != null && status.equals("finished"))
+      return true;
+    else 
+      return false;
+  }
+  
+  public boolean isError() {
+    if (errorMessage != null && errorMessage.length() > 0)
+      return true;
+    else 
+      return false;
+  }
+  
+  public int getOrderId() {
+    return id;
+  }
+
+  public void setOrderId(int orderId) {
+    this.id = orderId;
+  }
+
+  public String getStatus() {
+    return status;
+  }
+
+  public void setStatus(String status) {
+    this.status = status;
+  }
+
+  public Date getStart() {
+    return start;
+  }
+
+  public void setStart(Date start) {
+    this.start = start;
+  }
+
+  public Date getEnd() {
+    return end;
+  }
+
+  public void setEnd(Date end) {
+    this.end = end;
+  }
+
+  public String getName() {
+    return name;
+  }
+
+  public void setName(String name) {
+    this.name = name;
+  }
+
+  public String getErrorMessage() {
+    return errorMessage;
+  }
+
+  public void setErrorMessage(String errorMessage) {
+    this.errorMessage = errorMessage;
+  }
+
+  public String getDestUrl() {
+    return "/" + docBase + "/" + language + "/" + fileName;
+  }
+  
+  public String getSrcUrl() {
+    return srcUrl;
+  }
+
+  public void setSrcUrl(String srcUrl) {
+    this.srcUrl = srcUrl;
+  }
+
+  public String getUploadFileName() {
+    return uploadFileName;
+  }
+
+  public void setUploadFileName(String uploadFileName) {
+    this.uploadFileName = uploadFileName;
+  }
+
+  public String getDocBase() {
+    return docBase;
+  }
+
+  public void setDocBase(String docBase) {
+    this.docBase = docBase;
+  }
+
+  public String getLanguage() {
+    return language;
+  }
+
+  public void setLanguage(String language) {
+    this.language = language;
+  }
+
+  public String getFileName() {
+    return fileName;
+  }
+
+  public void setFileName(String fileName) {
+    this.fileName = fileName;
+  }
+  
+  public String getESciDocDestUrl() {
+    return eSciDocDestUrl;
+  }
+
+  public void setESciDocDestUrl(String sciDocDestUrl) {
+    eSciDocDestUrl = sciDocDestUrl;
+  }
+
+  public String getESciDocCookieId() {
+    return eSciDocCookieId;
+  }
+
+  public void setESciDocCookieId(String sciDocCookieId) {
+    eSciDocCookieId = sciDocCookieId;
+  }
+
+  public MetadataRecord getMdRecord() {
+    return mdRecord;
+  }
+
+  public void setMdRecord(MetadataRecord mdRecord) {
+    this.mdRecord = mdRecord;
+  }
+
+  public Node getDocNode() {
+    return docNode;
+  }
+
+  public void setDocNode(Node docNode) {
+    this.docNode = docNode;
+  }
+
+  public boolean includePdf() {
+    return includePdf;
+  }
+  
+  public void setIncludePdf (boolean includePdf) {
+    this.includePdf = includePdf;  
+  }
+  
+  public String toString() {
+    if (name.equals("delete"))
+      return name + "(" + id + ", " + "/" + docBase + "/" + language + "/" + fileName + ")";
+    else 
+      return name + "(" + id + ", " + uploadFileName + ", " + "/" + docBase + "/" + language + "/" + fileName + ")";
+  }
+  
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/software/eXist/mpdl-modules/src/de/mpg/mpiwg/berlin/mpdl/schedule/quartz.properties	Wed Nov 24 17:24:23 2010 +0100
@@ -0,0 +1,9 @@
+org.quartz.scheduler.instanceName = MpdlScheduler
+org.quartz.scheduler.instanceId = auto
+org.quartz.scheduler.rmi.export = false
+org.quartz.scheduler.rmi.proxy = false
+
+org.quartz.threadPool.class = org.quartz.simpl.SimpleThreadPool
+org.quartz.threadPool.threadCount = 3
+
+org.quartz.jobStore.class = org.quartz.simpl.RAMJobStore
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/software/eXist/mpdl-modules/src/de/mpg/mpiwg/berlin/mpdl/util/FileUtil.java	Wed Nov 24 17:24:23 2010 +0100
@@ -0,0 +1,424 @@
+package de.mpg.mpiwg.berlin.mpdl.util;
+
+import java.io.BufferedInputStream;
+import java.io.BufferedOutputStream;
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.FileNotFoundException;
+import java.io.FileOutputStream;
+import java.io.FilenameFilter;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStream;
+import java.io.RandomAccessFile;
+import java.net.MalformedURLException;
+import java.net.URI;
+import java.net.URL;
+import java.net.URLConnection;
+import java.util.ArrayList;
+import java.util.List;
+
+import de.mpg.mpiwg.berlin.mpdl.exception.ApplicationException;
+
+public class FileUtil {
+  private static FileUtil instance;
+
+  public static FileUtil getInstance() throws ApplicationException {
+    if (instance == null) {
+      instance = new FileUtil();
+    }
+    return instance;
+  }
+
+  public void createDirectory(String dir) {
+    File destLocalDirectory = new File(dir);
+    destLocalDirectory.mkdirs();
+  }
+ 
+  public void deleteDirectory(String dir) {
+    File destLocalDirectory = new File(dir);
+    // directory with all files and subdirectories is deleted
+    deleteDirectory(destLocalDirectory);
+  }
+ 
+  /**
+   *  Deletes all files and subdirectories under dir. No exception is thrown 
+   *  if dir (or one of its children) does nor exist.
+   *  @dir dir the directory to be deleted
+   *  @return true if all deletions were successful. If a deletion fails, the method stops attempting to delete and returns false.
+   */
+  public boolean deleteDirectory(File dir) {
+    if (dir.isDirectory()) {
+      String[] children = dir.list();
+      for (int i=0; i<children.length; i++) {
+        boolean success = deleteDirectory(new File(dir, children[i]));
+        if (!success) {
+          return false;  // if one of the files or subdirectories could not be deleted return false but recursively works further
+        }
+      }
+    }
+    // The directory is now empty so delete it
+    return dir.delete();
+  }
+
+  public void deleteFile(String fileName) {
+    File destLocalFile = new File(fileName);
+    // if destLocalFile does not exist nothing happens and no exception is thrown
+    // if destLocalFile is a directory and is not empty an exception is thrown (should not happen because this method is called only by remove)
+    destLocalFile.delete();  
+  }
+ 
+  /**
+   * List all files in a directory
+   * @param dirStr
+   * @return
+   */
+  public File[] getFiles(String dirStr) {
+    File dir = new File(dirStr);
+    File[] dirFiles = dir.listFiles();
+    return dirFiles;
+  }
+  
+  /**
+   * 
+   * @param dirStr
+   * @param filter
+   * @return
+   */
+  public File[] getFiles(String dirStr, FilenameFilter filter) {
+    File dir = new File(dirStr);
+    File[] dirFiles = dir.listFiles(filter);
+    return dirFiles;
+  }
+  
+  /**
+   * Write all bytes into destFile. If directory for that destFile does not exist 
+   * it creates this directory including parent directories. 
+   * @param bytes bytes to write
+   * @param destFileName destination file name
+   * @throws Exception
+   */
+  public void saveFile(byte[] bytes, String destFileName) throws ApplicationException {
+    OutputStream out = null;
+    try {
+      if (bytes == null)
+        return;  // do nothing
+      File destFile = new File(destFileName);
+      File destDir = new File(destFile.getParent()); 
+      if (! destDir.exists()) {
+        destDir.mkdirs();  // create the directory including parent directories which do not exist
+      }
+      out = new BufferedOutputStream(new FileOutputStream(destFile));
+      out.write(bytes, 0, bytes.length);
+      out.flush();
+    } catch (FileNotFoundException e) {
+      throw new ApplicationException(e);
+    } catch (IOException e) {
+      throw new ApplicationException(e);
+    } finally {
+      try { 
+        if (out != null)
+          out.close(); 
+        } catch (Exception e) { 
+          // nothing: always close the stream at the end of the method
+        }  
+    }
+  }
+
+  public void copyFile(String srcFileName, String destFileName) throws ApplicationException {
+    InputStream in = null;
+    OutputStream out = null;
+    try {
+      File srcFile = new File(srcFileName);
+      if (! srcFile.exists())
+        return; // do nothing
+      File destFile = new File(destFileName);
+      File destDir = new File(destFile.getParent()); 
+      if (! destDir.exists()) {
+        destDir.mkdirs();  // create the directory including parent directories which do not exist
+      }
+      in = new BufferedInputStream(new FileInputStream(srcFile));
+      out = new BufferedOutputStream(new FileOutputStream(destFile));
+      int bufLen = 20000*1024;
+      byte[] buf = new byte[bufLen];
+      int len = 0;
+      while ((len = in.read(buf)) > 0) {
+        out.write(buf, 0, len);
+      }
+      out.flush();  // buffered content is flushed to file
+    } catch (FileNotFoundException e) {
+      throw new ApplicationException(e);
+    } catch (IOException e) {
+      throw new ApplicationException(e);
+    } finally {
+      try { 
+        if (in != null) 
+          in.close();
+        if (out != null)
+          out.close();
+      } catch (Exception e) {
+        // nothing: always close the stream at the end of the method
+      }
+    }
+  }
+  
+  public void saveUrlToLocalFile(URL srcUrl, String destFileName) throws ApplicationException {
+    BufferedInputStream in = null;
+    BufferedOutputStream out = null;
+    try {
+      /* wenn ein Zugriff mit "http:" gemacht wird, wird die XML Deklaration (<?xml version="1.0"?>) nicht ausgelesen 
+       * beim Zugriff mit "file;" ist das anders
+       * evtl. wieder einbauen, um die Deklaration manuell zu schreiben
+      URLConnection urlConn = srcUrl.openConnection();
+      String contentTypeStr = urlConn.getContentType();
+      String contentEncodingStr = urlConn.getContentEncoding();
+      boolean contentTypeXml = false;
+      if (contentTypeStr != null) {
+        contentTypeStr = contentTypeStr.toLowerCase();
+        if (contentTypeStr.indexOf("application/xml") != -1 || contentTypeStr.indexOf("text/xml") != -1)
+          contentTypeXml = true;
+      }
+      */
+      InputStream inputStream = srcUrl.openStream();
+      in = new BufferedInputStream(inputStream);
+      File outputFile = new File(destFileName);
+      File outputDir = new File(outputFile.getParent()); 
+      if (! outputDir.exists()) {
+        outputDir.mkdirs();  // create the directory including parent directories which do not exist
+      }
+      out = new BufferedOutputStream(new FileOutputStream(outputFile));
+      int bufLen = 1000*1024;
+      byte[] buf = new byte[bufLen];
+      int len = 0;
+      /*
+      if (contentTypeXml) {
+        String xmlDecl = "<?xml version=\"1.0\"?>\n";
+        out.write(xmlDecl.getBytes("utf-8"));
+      }
+      */
+      while ((len = in.read(buf)) > 0) {
+        out.write(buf, 0, len);
+        out.flush();
+      }
+    } catch (MalformedURLException e) {
+      throw new ApplicationException(e);
+    } catch (IOException e) {
+      throw new ApplicationException(e);
+    } finally {
+      try { 
+        if (in != null)
+          in.close();
+        if (out != null)
+          out.close(); 
+        } catch (Exception e) { 
+          // nothing: always close the stream at the end of the method
+        }  
+    }
+  }
+  
+  public void saveInputStreamToLocalFile(InputStream srcInputStream, String destFileName) throws ApplicationException {
+    BufferedInputStream in = null;
+    BufferedOutputStream out = null;
+    try {
+      in = new BufferedInputStream(srcInputStream);
+      File outputFile = new File(destFileName);
+      File outputDir = new File(outputFile.getParent()); 
+      if (! outputDir.exists()) {
+        outputDir.mkdirs();  // create the directory including parent directories which do not exist
+      }
+      out = new BufferedOutputStream(new FileOutputStream(outputFile));
+      int bufLen = 1000*1024;
+      byte[] buf = new byte[bufLen];
+      int len = 0;
+      while ((len = in.read(buf)) > 0) {
+        out.write(buf, 0, len);
+        out.flush();
+      }
+    } catch (MalformedURLException e) {
+      throw new ApplicationException(e);
+    } catch (IOException e) {
+      throw new ApplicationException(e);
+    } finally {
+      try { 
+        if (in != null)
+          in.close(); 
+        if (out != null)
+          out.close(); 
+        } catch (Exception e) { 
+          // nothing: always close the stream at the end of the method
+        }  
+    }
+  }
+  
+  public void deleteLastNBytes(File file, int countBytes) throws ApplicationException {
+    try {
+      RandomAccessFile raf = new RandomAccessFile(file, "rw");
+      long length = raf.length();
+      raf.setLength(length - countBytes);
+      raf.close();
+    } catch (IOException e) {
+      throw new ApplicationException(e);
+    }
+  }
+  
+  public void testFile(String fileName) throws ApplicationException {
+    File file = new File(fileName);
+    boolean fileExists = file.exists();
+    if (! fileExists) {
+      throw new ApplicationException("File: " + fileName + " does not exist");
+    }
+  }
+  
+  /**
+   *  Reads a chunk of data of an input stream.
+   *  Does not close the stream until last bytes are read
+   *  @in in the input stream to be read
+   *  @chunkSize chunkSize length of the chunk which is read
+   *  @return byte[] of bytes read
+   */
+  public byte[] readBytes(InputStream in, int chunkSize) throws ApplicationException {
+    byte[] resultBytes = new byte[chunkSize];
+    try {
+      int len = in.read(resultBytes, 0, chunkSize);
+      if (len == -1) {
+        try { in.close(); } catch (Exception e) { }  // close the stream if end of file is reached
+        resultBytes = null;
+      } else if (len < chunkSize && len != chunkSize) {  // if read chunk is last chunk of the file it delivers this chunk 
+        byte[] tmp = new byte[len];
+        System.arraycopy(resultBytes, 0, tmp, 0, len);
+        resultBytes = tmp;
+      }
+    } catch (FileNotFoundException e) {
+      throw new ApplicationException(e);
+    } catch (IOException e) {
+      throw new ApplicationException(e);
+    } 
+    return resultBytes;  
+  }
+
+  /**
+   *  Reads a file storing intermediate data into an array.
+   *  @file file the file to be read
+   *  @return byte[] of file content
+   */
+  public byte[] readBytes(String fileName) throws ApplicationException {
+    InputStream in = null;
+    byte[] out = new byte[0]; 
+    try {
+      in = new BufferedInputStream(new FileInputStream(fileName));
+      // the length of a buffer can vary
+      int bufLen = 20000*1024;
+      byte[] buf = new byte[bufLen];
+      byte[] tmp = null;
+      int len = 0;
+      while((len = in.read(buf, 0, bufLen)) != -1) {
+        // extend array
+        tmp = new byte[out.length + len];
+        System.arraycopy(out, 0, tmp, 0, out.length);
+        System.arraycopy(buf, 0, tmp, out.length, len);
+        out = tmp;
+        tmp = null;            
+      }
+    } catch (FileNotFoundException e) {
+      throw new ApplicationException(e);
+    } catch (IOException e) {
+      throw new ApplicationException(e);
+    } finally {
+      // always close the stream 
+      if (in != null) try { in.close(); } catch (Exception e) { }
+    }
+    return out;  
+  }
+
+  public String getMimeType(String fileName) throws ApplicationException {
+    String mimeType = null;
+    File file = new File(fileName);
+    try {
+      URI uri = file.toURI();
+      URL url = uri.toURL();
+      URLConnection urlConnection = url.openConnection();
+      mimeType = urlConnection.getContentType();
+    } catch (MalformedURLException e) {
+      throw new ApplicationException(e);
+    } catch (IOException e) {
+      throw new ApplicationException(e);
+    }
+    return mimeType;
+  }
+  
+  /**
+   *  Reads a file storing intermediate data into an array.
+   *  @file file the file to be read
+   *  @return byte array of the file content
+   *  TODO test this method if it is really faster
+   */
+  private byte[] readBytesFast(String file) throws ApplicationException {
+    InputStream in = null;
+    byte[] buf = null; 
+    int bufLen = 20000*1024;
+    try {
+      in = new BufferedInputStream(new FileInputStream(file));
+      buf = new byte[bufLen];
+      byte[] tmp = null;
+      int len    = 0;
+      List data  = new ArrayList(24); // keeps pieces of data
+      while((len = in.read(buf, 0, bufLen)) != -1){
+        tmp = new byte[len];
+        System.arraycopy(buf, 0, tmp, 0, len); // still need to do copy 
+        data.add(tmp);
+      }
+      /* This part os optional. This method could return a List data
+         for further processing, etc. */
+      len = 0;
+      if (data.size() == 1) return (byte[]) data.get(0);
+      for (int i=0;i<data.size();i++) len += ((byte[]) data.get(i)).length;
+      buf = new byte[len]; // final output buffer 
+      len = 0;
+      for (int i=0;i<data.size();i++){ // fill with data 
+        tmp = (byte[]) data.get(i);
+        System.arraycopy(tmp,0,buf,len,tmp.length);
+        len += tmp.length;
+      } 
+    } catch (FileNotFoundException e) {
+      throw new ApplicationException(e);
+    } catch (IOException e) {
+      throw new ApplicationException(e);
+    } finally {
+      if (in != null) try { in.close(); } catch (Exception e) {}
+    }
+    return buf;  
+  }
+
+  /*
+   * 
+   Insert a document (today.rss as 20050401) by a URL connection (PUT request):
+
+   URL u = "http://www.cafeaulait.org/today.rss";
+   InputStream in = u.openStream();
+   URL u = new URL("http://eliza.elharo.com:8080/exist/servlet/db/syndication/20050401");
+   HttpURLConnection conn = (HttpURLConnection) u.openConnection();
+   conn.setDoOutput(true);
+   conn.setRequestMethod("PUT");
+   conn.setHeaderField("Content-type", "application/xml");
+   OutputStream out = conn.getOutputStream();
+   for (int c = in.read(); c != -1; c = in.read()) {
+     out.write(c);
+   }
+   out.flush();
+   out.close();
+   in.close();
+   // read the response...
+
+   Delete a document (20050401) by a URL connection:
+
+   URL u = new URL("http://eliza.elharo.com:8080/exist/servlet/db/syndication/20050401");
+   HttpURLConnection conn = (HttpURLConnection) u.openConnection();
+   conn.setRequestMethod("DELETE");
+   conn.connect();
+   // read the response...
+   * 
+   * 
+  */
+  
+  
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/software/eXist/mpdl-modules/src/de/mpg/mpiwg/berlin/mpdl/util/FilenameFilterExtension.java	Wed Nov 24 17:24:23 2010 +0100
@@ -0,0 +1,22 @@
+package de.mpg.mpiwg.berlin.mpdl.util;
+
+import java.io.File;
+import java.io.FilenameFilter;
+
+public class FilenameFilterExtension implements FilenameFilter {
+  
+  private String fileExtension;
+
+  public FilenameFilterExtension(String fileExtension) {
+    this.fileExtension = fileExtension;
+  }
+
+  public boolean accept(File dir, String name) {
+    String nameToLower = name.toLowerCase();
+    String fileExtensionToLower = fileExtension.toLowerCase();
+    return nameToLower.endsWith("." + fileExtensionToLower);
+    // MimeTable mimetab = MimeTable.getInstance();
+    // MimeType mime = mimetab.getContentTypeFor(name);
+    // return mime != null && mime.isXMLType();
+  }
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/software/eXist/mpdl-modules/src/de/mpg/mpiwg/berlin/mpdl/util/MpdlITextRenderer.java	Wed Nov 24 17:24:23 2010 +0100
@@ -0,0 +1,570 @@
+package de.mpg.mpiwg.berlin.mpdl.util;
+
+import java.io.ByteArrayOutputStream;
+import java.io.File;
+import java.io.FileNotFoundException;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.io.OutputStream;
+import java.io.UnsupportedEncodingException;
+import java.util.Hashtable;
+
+import org.apache.commons.httpclient.HttpClient;
+import org.apache.commons.httpclient.HttpException;
+import org.apache.commons.httpclient.methods.GetMethod;
+import org.xhtmlrenderer.layout.SharedContext;
+import org.xhtmlrenderer.pdf.ITextFontResolver;
+import org.xhtmlrenderer.pdf.ITextRenderer;
+import org.xhtmlrenderer.util.XRRuntimeException;
+
+import com.lowagie.text.DocumentException;
+import com.lowagie.text.pdf.BaseFont;
+
+import de.mpg.mpiwg.berlin.mpdl.escidoc.MetadataRecord;
+import de.mpg.mpiwg.berlin.mpdl.exception.ApplicationException;
+import de.mpg.mpiwg.berlin.mpdl.general.MpdlConstants;
+
+public class MpdlITextRenderer {
+  private static MpdlITextRenderer instance;
+  private ITextRenderer renderer = new ITextRenderer();
+  private Hashtable<String, String> fontFileNames;
+
+  public static MpdlITextRenderer getInstance() throws ApplicationException {
+    if (instance == null) {
+      instance = new MpdlITextRenderer();
+      instance.init();
+    }
+    return instance;
+  }
+
+  public void init() throws ApplicationException {
+    renderer = new ITextRenderer();
+    SharedContext rendererSharedContext = renderer.getSharedContext();
+    MpdlITextUserAgent mpdlUserAgent = new MpdlITextUserAgent();  // user agent to get a callback handle to the web access of images (getImageResource(url))
+    mpdlUserAgent.setSharedContext(rendererSharedContext);
+    rendererSharedContext.setUserAgentCallback(mpdlUserAgent);
+    fontFileNames = new Hashtable<String, String>();
+    String fontJunicodeFileName = MpdlConstants.MPDL_EXIST_DATA_DIR + "/fonts/Junicode-Regular.ttf";
+    String fontJunicodeBoldFileName = MpdlConstants.MPDL_EXIST_DATA_DIR + "/fonts/Junicode-Bold.ttf";
+    String fontJunicodeItalicFileName = MpdlConstants.MPDL_EXIST_DATA_DIR + "/fonts/Junicode-Italic.ttf";
+    String fontJunicodeBoldItalicFileName = MpdlConstants.MPDL_EXIST_DATA_DIR + "/fonts/Junicode-BoldItalic.ttf";
+    String fontSunExtAFileName = MpdlConstants.MPDL_EXIST_DATA_DIR + "/fonts/Sun-ExtA.ttf";  // chinese symbols
+    String fontSunExtBFileName = MpdlConstants.MPDL_EXIST_DATA_DIR + "/fonts/Sun-ExtB.ttf";  // chinese symbols
+    String fontDejaVuFileName = MpdlConstants.MPDL_EXIST_DATA_DIR + "/fonts/DejaVuSans.ttf";  // arabic symbols
+    setFont(fontJunicodeFileName);
+    setFont(fontJunicodeBoldFileName);
+    setFont(fontJunicodeItalicFileName);
+    setFont(fontJunicodeBoldItalicFileName);  // if set then some not bold italic characters are shown bold (e.g. in Benedetti_1585.xml)
+    setFont(fontSunExtAFileName);
+    setFont(fontSunExtBFileName);
+    setFont(fontDejaVuFileName);
+  }
+  
+  public byte[] createPdf(String htmlPageFragment, String language, String topLeftStr, String topRightStr, String bottomLeftStr, String bottomRightStr) throws ApplicationException {
+    byte[] pdfBytes = null;
+    try {
+      String htmlPageDoc = getPageHtmlDoc(htmlPageFragment, language, topLeftStr, topRightStr, bottomLeftStr, bottomRightStr);  
+      renderer.setDocumentFromString(htmlPageDoc);
+      renderer.layout();
+      ByteArrayOutputStream baos = new ByteArrayOutputStream();
+      renderer.createPDF(baos);
+      pdfBytes = baos.toByteArray();
+      baos.close();
+    } catch (Exception e) {
+      init();
+      String message = e.getMessage();
+      if (message.indexOf("nausikaa") > 0 && message.indexOf("500") > 0) {
+        throw new ApplicationException("Could not fetch image from nausikaa2.rz-berlin.mpg.de: please try again later");
+      }
+      throw new ApplicationException(e);
+    }
+    return pdfBytes;
+  }
+  
+  public void createFile(boolean pdf, boolean html, String mode, MetadataRecord mdRecord) throws ApplicationException {
+    OutputStream osPdf = null;
+    OutputStream osHtml = null;
+    OutputStream osHtmlPdf = null;
+    String eXistIdentifier = mdRecord.getEXistIdentifier();
+    String language = mdRecord.getLanguage();
+    if (eXistIdentifier == null)
+      throw new ApplicationException("Pdf/Html-Generation failed: no eXist-Identifier given in mdRecord");
+    String eXistIdentifierWithoutExtension = eXistIdentifier.substring(0, eXistIdentifier.length() - 4);  // without ".xml"
+    String destFileNamePdf = MpdlConstants.MPDL_EXIST_DATA_DIR + "/documents" + eXistIdentifierWithoutExtension + ".pdf";
+    String destFileNameHtml = MpdlConstants.MPDL_EXIST_DATA_DIR + "/documents" + eXistIdentifierWithoutExtension + ".html";
+    String destFileNameHtmlPdfTmp = MpdlConstants.MPDL_EXIST_DATA_DIR + "/documents" + eXistIdentifierWithoutExtension + "-4Pdf.html";
+    try {
+      // start document
+      if (pdf) {
+        osPdf = new FileOutputStream(new File(destFileNamePdf));
+        osHtmlPdf = new FileOutputStream(new File(destFileNameHtmlPdfTmp));
+      }
+      if (html)
+        osHtml = new FileOutputStream(new File(destFileNameHtml));
+      int countPages = httpClientGetCountPages(mdRecord);
+      // style page
+      String pageStyleHtml = "float:left; clear:both; border: thin solid #808080; width: 21.0cm; margin-top: 0.2cm; margin-bottom: 1cm; margin-left: 0.7cm; margin-right: 0.7cm; padding: 0.2cm;";
+      // firstPage
+      String firstPageHtmlShort = getFirstPageHtml(mdRecord, true);
+      String firstPageHtmlLong = getFirstPageHtml(mdRecord, false);
+      String mdRecordStr = getMdRecordString(mdRecord);
+      String htmlHeadStr = getHtmlHead(null, mdRecordStr);
+      String fontStyle = getFontStyle(language);
+      if(pdf) {
+        write("<html>" + htmlHeadStr + "<body style=\"" + fontStyle +  "\">", osHtmlPdf);
+        // first page
+        write(firstPageHtmlLong, osHtmlPdf);
+      }
+      if (html) {
+        write("<html>" + htmlHeadStr + "<body style=\"" + fontStyle +  "\">", osHtml);
+        // first page
+        write("<div style=\"" + pageStyleHtml + "\">", osHtml);
+        write(firstPageHtmlShort, osHtml);
+        write("</div>", osHtml);
+      }
+      // table of content of document
+      String htmlToc = getTocHtml(mdRecord);
+      if (html && htmlToc != null) {
+        write("<div style=\"" + pageStyleHtml + "\">", osHtml);
+        write(htmlToc, osHtml);
+        write("</div>", osHtml);
+      }
+      if(pdf && htmlToc != null) {
+        write(htmlToc, osHtmlPdf);
+      }
+      // all pages of the document
+      for(int i=1; i<=countPages; i++) {
+        String htmlPageFragment = httpClientGetPageFragmentHtml(eXistIdentifier, mode, i);
+        htmlPageFragment = removeXmlStartString(htmlPageFragment);
+        String pnHrefName = "<a name=\"pn" + i + "\"></a>";
+        if (html) {
+          write("<div style=\"" + "clear:both; text-align:right; width:21.0cm; font-weight:bold;" + "\">", osHtml);
+          write(pnHrefName, osHtml);
+          write("</div>", osHtml);
+          write("<div style=\"" + pageStyleHtml + "\">", osHtml);
+          String htmlPageFragmentWithImageUrl = htmlPageFragment.replaceAll("src=\"images/", "src=\"http://" + MpdlConstants.MPDL_FULL_EXIST_HOST_NAME + "/mpdl/images/");  // to find the camera.png file on webserver mpdl-proto
+          write(htmlPageFragmentWithImageUrl, osHtml);
+          write("</div>", osHtml);
+        }
+        htmlPageFragment = pnHrefName + htmlPageFragment;
+        if(pdf) {
+          String htmlPageFragmentWithImageDir = htmlPageFragment.replaceAll("src=\"images/", "src=\"../../../../../mpdl/images/");  // to find the camera.png file in webbapp/mpdl/image/ directory
+          write(htmlPageFragmentWithImageDir, osHtmlPdf);
+        }
+      }
+      if (html) {
+        write("</body></html>", osHtml);
+      }
+      // create PDF document
+      if(pdf) {
+        write("</body></html>", osHtmlPdf);
+        osHtmlPdf.close();
+        renderer.setDocument(new File(destFileNameHtmlPdfTmp));
+        renderer.layout();  // takes the most time
+        renderer.createPDF(osPdf);
+      }
+    } catch (Exception e) {
+      init();
+      String message = e.getMessage();
+      if (message != null && message.indexOf("nausikaa") > 0 && message.indexOf("500") > 0) {
+        throw new ApplicationException("fetch image is not possible: " + message);
+      }
+      throw new ApplicationException(e);
+    } finally {
+      try {
+        osHtmlPdf.close();
+        osPdf.close();
+        osHtml.close();
+        FileUtil.getInstance().deleteFile(destFileNameHtmlPdfTmp);
+      } catch (IOException e) {
+        // nothing
+      }
+    }
+  }
+
+  private String getFirstPageHtml(MetadataRecord mdRecord, boolean shortPage) {
+    String author = mdRecord.getCreator();
+    String title = mdRecord.getTitle();
+    String year = mdRecord.getYear();
+    String existId = mdRecord.getEXistIdentifier();
+    String firstPageHtml = "<div class=\"firstPage\">";
+    firstPageHtml = firstPageHtml + "<h2 style=\"text-align:center\">" + "Max Planck Institute for the History of Science" + "</h2>";
+    firstPageHtml = firstPageHtml + "<p style=\"text-align:center\">" + "Max-Planck-Institut für Wissenschaftsgeschichte" + "</p>";
+    firstPageHtml = firstPageHtml + "<p style=\"text-align:center\">" + "MPDL project" + "</p>";
+    firstPageHtml = firstPageHtml + "<br></br>";
+    firstPageHtml = firstPageHtml + "<br></br>";
+    if (! shortPage) {
+      firstPageHtml = firstPageHtml + "<br></br>";
+      firstPageHtml = firstPageHtml + "<br></br>";
+      firstPageHtml = firstPageHtml + "<br></br>";
+      firstPageHtml = firstPageHtml + "<br></br>";
+    }
+    if (author != null) {
+      firstPageHtml = firstPageHtml + "<h2 style=\"text-align:center\">" + author + "</h2>";
+    }
+    if (title != null) {
+      firstPageHtml = firstPageHtml + "<h2 style=\"text-align:center\">" + title + "</h2>";
+    }
+    if (year != null) {
+      firstPageHtml = firstPageHtml + "<h2 style=\"text-align:center\">" + year + "</h2>";
+    }
+    if (! shortPage) {
+      firstPageHtml = firstPageHtml + "<br></br>";
+      firstPageHtml = firstPageHtml + "<br></br>";
+      firstPageHtml = firstPageHtml + "<br></br>";
+      firstPageHtml = firstPageHtml + "<br></br>";
+      firstPageHtml = firstPageHtml + "<br></br>";
+      firstPageHtml = firstPageHtml + "<br></br>";
+      firstPageHtml = firstPageHtml + "<br></br>";
+      firstPageHtml = firstPageHtml + "<br></br>";
+      firstPageHtml = firstPageHtml + "<br></br>";
+      firstPageHtml = firstPageHtml + "<br></br>";
+      firstPageHtml = firstPageHtml + "<br></br>";
+      firstPageHtml = firstPageHtml + "<br></br>";
+      firstPageHtml = firstPageHtml + "<br></br>";
+      firstPageHtml = firstPageHtml + "<br></br>";
+      firstPageHtml = firstPageHtml + "<br></br>";
+      firstPageHtml = firstPageHtml + "<br></br>";
+      firstPageHtml = firstPageHtml + "<br></br>";
+      firstPageHtml = firstPageHtml + "<br></br>";
+    }
+    firstPageHtml = firstPageHtml + "<br></br>";
+    firstPageHtml = firstPageHtml + "<br></br>";
+    firstPageHtml = firstPageHtml + "<br></br>";
+    firstPageHtml = firstPageHtml + "<br></br>";
+    String urlDocuView = "http://" + MpdlConstants.MPDL_FULL_EXIST_HOST_NAME + "/mpdl/interface/echo/echoDocuView.xql";
+    String document = "?document=" + existId;
+    String urlDoc = urlDocuView + document;
+    firstPageHtml = firstPageHtml + "<p style=\"font:11pt sans-serif;\">Document link: <br></br><a href=\"" + urlDoc + "\">" +  urlDocuView + " <br></br>" + document + "</a></p>";
+    firstPageHtml = firstPageHtml + "</div>";
+    return firstPageHtml;
+  }
+  
+  private String getTocHtml(MetadataRecord mdRecord) throws ApplicationException {
+    String htmlStr = null;
+    String eXistIdentifier = mdRecord.getEXistIdentifier();
+    String htmlToc = httpClientGetContentListHtml(eXistIdentifier);
+    String resultSizeStr = XmlUtil.getInstance().evaluateToString(htmlToc, "//div[@class = 'queryResultHits']", null);
+    int resultSize = 0;
+    if (resultSizeStr != null)
+      resultSize = Integer.parseInt(resultSizeStr);
+    if (resultSize <= 0)
+      return null;
+    if (htmlToc != null) {
+      htmlToc = removeXmlStartString(htmlToc);
+      htmlToc = htmlToc.replaceAll("page-fragment\\.xql.*pn=", "#pn");
+      htmlToc = htmlToc.replaceAll(">Page: ", ">");
+      htmlToc = "<text style=\"font-weight:bold; font-size:20pt; margin-left:2%; \">Content</text>" + htmlToc;
+      htmlStr = "<div class=\"tocPage\">";
+      htmlStr = htmlStr + htmlToc;
+      htmlStr = htmlStr + "</div>";
+    }
+    return htmlStr;
+  }
+  
+  private String getPageHtmlDoc(String htmlFragment, String language, String topLeftStr, String topRightStr, String bottomLeftStr, String bottomRightStr) {
+    String fontStyle = getFontStyle(language);
+    String stylePage = getStylePage(topLeftStr, topRightStr, bottomLeftStr, bottomRightStr);
+    String htmlStr = "<html xmlns=\"http://www.w3.org/1999/xhtml\">";
+    String htmlHeadStr = getHtmlHead(stylePage, topLeftStr);
+    htmlStr = htmlStr + htmlHeadStr;
+    htmlStr = htmlStr + "<body style=\"" + fontStyle +  "\">";
+    htmlStr = htmlStr + htmlFragment;
+    htmlStr = htmlStr + "</body>";
+    htmlStr = htmlStr + "</html>";
+    return htmlStr;
+  }
+  
+  private String getMdRecordString(MetadataRecord mdRecord) {
+    String author = mdRecord.getCreator();
+    String title = mdRecord.getTitle();
+    String year = mdRecord.getYear();
+    String mdRecordStr = "";
+    if (mdRecord != null) {
+      if (author != null && ! author.equals(""))
+        mdRecordStr = mdRecordStr + author;
+      if (title != null && ! title.equals(""))
+        mdRecordStr = mdRecordStr + ". " + title;
+      if (year != null && ! year.equals(""))
+        mdRecordStr = mdRecordStr + ". " + year + ".";
+      else 
+        mdRecordStr = mdRecordStr + ".";
+    }
+    return mdRecordStr;
+  }
+  
+  private String getHtmlHead(String stylePageStr, String titleStr) {
+    String htmlStr = "<head>";
+    if (stylePageStr != null)
+      htmlStr = htmlStr + "<style type=\"text/css\">" + stylePageStr + "</style>";
+    htmlStr = htmlStr + "<title>" + titleStr + "</title>";
+    String httpExistHostName = "http" + "://" + MpdlConstants.MPDL_FULL_EXIST_HOST_NAME;
+    htmlStr = htmlStr + "<link rel=\"stylesheet\" type=\"text/css\" href=\"" + httpExistHostName + "/mpdl/presentation/pageHtml.css\" />";
+    htmlStr = htmlStr + "</head>";
+    return htmlStr;
+  }
+
+  private String removeXmlStartString(String inputStr) {
+    String xmlStartStr = "<?xml version=\"1.0\" encoding=\"utf-8\"?>";
+    boolean startsWithXmlStartStr = inputStr.startsWith(xmlStartStr);
+    if (startsWithXmlStartStr) {
+      int xmlStartStrLength = xmlStartStr.length();
+      int xmlStartStrIndex = -1;
+      xmlStartStrIndex = inputStr.indexOf(xmlStartStr);
+      if (xmlStartStrIndex != -1)
+        inputStr = inputStr.substring(xmlStartStrLength);
+    }
+    return inputStr;
+  }
+  
+  private String getFontStyle(String language) {
+    String fontFamily = "Junicode";
+    if (language.equals("ar"))
+      fontFamily = "DejaVu Sans";
+    else if (language.equals("zh") || language.equals("zho-Hant"))
+      fontFamily = "Sun-ExtA, Sun-ExtB";
+    return "font-size:11pt; font-family:" + fontFamily + ";";
+  }
+  
+  private String getStylePage(String topLeftStr, String topRightStr, String bottomLeftStr, String bottomRightStr) {
+    String fontStylePage = "8pt, sans-serif; ";
+    String stylePage = "@page {" + "size: A4;" + "margin-top: 1.5cm;" + "margin-bottom: 1cm;" + "margin-left: 0.7cm;" + "margin-right: 0.7cm;" + "border: thin solid #808080;" + "padding: 0.2cm;" + " font-size: 10px;" + 
+      " @top-left { font: " + fontStylePage + " padding-left: 0.2cm; padding-right: 1cm; font-weight:bold; content: " + topLeftStr + ";}" + 
+      " @top-right { font: " + fontStylePage + " white-space: nowrap; font-weight:bold; content: " + topRightStr + ";}" + 
+      " @bottom-left { font: " + fontStylePage + " white-space: nowrap; font-weight:bold; content: " + bottomLeftStr + ";}" + 
+      " @bottom-right { font: " + fontStylePage + " white-space: nowrap; font-weight:bold; content: " + bottomRightStr + ";}" + "}";
+    return stylePage;
+  }
+  
+  private String httpClientGetPageFragmentHtml(String docName, String mode, int pageNumber) throws ApplicationException {
+    String retPageFragment = null;
+    try {
+      HttpClient httpClient = new HttpClient();
+      String requestName = "/mpdl/interface/page-fragment.xql?document=" + docName + "&mode=" + mode + "&pn=" + pageNumber + "&characterNormalization=orig";
+      String urlStr = "http" + "://" + MpdlConstants.MPDL_EXIST_HOST_NAME + ":" + MpdlConstants.MPDL_EXIST_PORT + requestName;
+      GetMethod method = new GetMethod(urlStr);
+      httpClient.executeMethod(method); 
+      byte[] responseBody = method.getResponseBody();
+      retPageFragment = new String(responseBody, "utf-8");
+      method.releaseConnection();
+    } catch (HttpException e) {
+      throw new ApplicationException(e);
+    } catch (IOException e) {
+      throw new ApplicationException(e);
+    }
+    return retPageFragment;
+  }
+  
+  private String httpClientGetContentListHtml(String docName) throws ApplicationException {
+    String retHtmlFragment = null;
+    try {
+      HttpClient httpClient = new HttpClient();
+      String requestName = "/mpdl/interface/doc-query.xql?document=" + docName + "&queryType=toc&queryResultPageSize=10000";
+      String urlStr = "http" + "://" + MpdlConstants.MPDL_EXIST_HOST_NAME + ":" + MpdlConstants.MPDL_EXIST_PORT + requestName;
+      GetMethod method = new GetMethod(urlStr);
+      httpClient.executeMethod(method); 
+      byte[] responseBody = method.getResponseBody();
+      retHtmlFragment = new String(responseBody, "utf-8");
+      method.releaseConnection();
+    } catch (HttpException e) {
+      throw new ApplicationException(e);
+    } catch (IOException e) {
+      throw new ApplicationException(e);
+    }
+    return retHtmlFragment;
+  }
+  
+  private int httpClientGetCountPages(MetadataRecord mdRecord) throws ApplicationException {
+    int count = -1;
+    String docName = mdRecord.getEXistIdentifier();
+    String docBase = mdRecord.getDocBase();
+    String pbTag = "echo:pb";
+    if (docBase != null && docBase.equals("archimedes"))
+      pbTag = "pb";
+    try {
+      HttpClient httpClient = new HttpClient();
+      String requestName = "/mpdl/interface/xquery.xql?document=" + docName + "&xquery=count(//" + pbTag + ")";
+      String urlStr = "http" + "://" + MpdlConstants.MPDL_EXIST_HOST_NAME + ":" + MpdlConstants.MPDL_EXIST_PORT + requestName;
+      GetMethod method = new GetMethod(urlStr);
+      httpClient.executeMethod(method); 
+      byte[] responseBody = method.getResponseBody();
+      String xmlResult = new String(responseBody, "utf-8");
+      method.releaseConnection();
+      if (xmlResult != null && ! xmlResult.equals("")) {
+        XmlUtil xmlUtil = XmlUtil.getInstance();
+        String countPagesStr = xmlUtil.evaluateToString(xmlResult, "/result/queryResult/records/record/content", null);
+        count = Integer.parseInt(countPagesStr);
+      }
+      if (count == 0)
+        count = 1;  // if no pb tag found then document consists of one page
+    } catch (HttpException e) {
+      throw new ApplicationException(e);
+    } catch (IOException e) {
+      throw new ApplicationException(e);
+    }
+    return count;
+  }
+  
+  private void write(String str, OutputStream out) throws ApplicationException {
+    try {
+      byte[] bytes = str.getBytes("utf-8");
+      out.write(bytes, 0, bytes.length);
+      out.flush();
+    } catch (UnsupportedEncodingException e) {
+      throw new ApplicationException(e);
+    } catch (FileNotFoundException e) {
+      throw new ApplicationException(e);
+    } catch (IOException e) {
+      throw new ApplicationException(e);
+    } 
+  }
+
+  private void setFont(String fontFileName) throws ApplicationException {
+    try {
+      String existingFontFileName = fontFileNames.get(fontFileName);
+      if (existingFontFileName == null) {
+        fontFileNames.put(fontFileName, fontFileName);
+        ITextFontResolver fontResolver = renderer.getFontResolver();
+        fontResolver.addFont(fontFileName, BaseFont.IDENTITY_H, BaseFont.NOT_EMBEDDED);  // Identy_H is Unicode Horizontal; not_embedded means not embedded in the PDF doc
+      }
+    } catch (XRRuntimeException e) {
+      init();
+      String message = e.getMessage();
+      if (message.indexOf("nausikaa") > 0 && message.indexOf("500") > 0) {
+        throw new ApplicationException("Could not fetch image from nausikaa2.rz-berlin.mpg.de: please try again later");
+      }
+      throw new ApplicationException(e);
+    } catch (IOException e) {
+      init();
+      String message = e.getMessage();
+      if (message.indexOf("nausikaa") > 0 && message.indexOf("500") > 0) {
+        throw new ApplicationException("fetch image is not possible: " + message);
+      }
+      throw new ApplicationException(e);
+    } catch (DocumentException e) {
+      init();
+      String message = e.getMessage();
+      if (message.indexOf("nausikaa") > 0 && message.indexOf("500") > 0) {
+        throw new ApplicationException("fetch image is not possible: " + message);
+      }
+      throw new ApplicationException(e);
+    }
+  }
+
+  // old method: each page is set as an own html page
+  public void createFileOld(boolean pdf, boolean html, String mode, MetadataRecord mdRecord) throws ApplicationException {
+    OutputStream osPdf = null;
+    OutputStream osHtml = null;
+    OutputStream osHtmlPdf = null;
+    String eXistIdentifier = mdRecord.getEXistIdentifier();
+    String language = mdRecord.getLanguage();
+    if (eXistIdentifier == null)
+      throw new ApplicationException("Pdf/Html-Generation failed: no eXist-Identifier given in mdRecord");
+    String eXistIdentifierWithoutExtension = eXistIdentifier.substring(0, eXistIdentifier.length() - 4);  // without ".xml"
+    String destFileNamePdf = MpdlConstants.MPDL_EXIST_DATA_DIR + "/documents" + eXistIdentifierWithoutExtension + ".pdf";
+    String destFileNameHtml = MpdlConstants.MPDL_EXIST_DATA_DIR + "/documents" + eXistIdentifierWithoutExtension + ".html";
+    String destFileNameHtmlPdfTmp = MpdlConstants.MPDL_EXIST_DATA_DIR + "/documents" + eXistIdentifierWithoutExtension + "-4Pdf.html";
+    try {
+      // start document
+      if (pdf) {
+        osPdf = new FileOutputStream(new File(destFileNamePdf));
+        osHtmlPdf = new FileOutputStream(new File(destFileNameHtmlPdfTmp));
+      }
+      if (html)
+        osHtml = new FileOutputStream(new File(destFileNameHtml));
+      int countPages = httpClientGetCountPages(mdRecord);
+      // style page
+      String pageStyleHtml = "float:left; clear:both; border: thin solid #808080; width: 21.0cm; margin-top: 0.2cm; margin-bottom: 1cm; margin-left: 0.7cm; margin-right: 0.7cm; padding: 0.2cm;";
+      // firstPage
+      String firstPageHtmlShort = getFirstPageHtml(mdRecord, true);
+      String firstPageHtmlLong = getFirstPageHtml(mdRecord, false);
+      String mdRecordStr = getMdRecordString(mdRecord);
+      String htmlHeadStr = getHtmlHead(null, mdRecordStr);
+      String fontStyle = getFontStyle(language);
+      if(pdf) {
+        write("<html>" + htmlHeadStr + "<body style=\"" + fontStyle +  "\">", osHtmlPdf);
+        // first page
+        write(firstPageHtmlLong, osHtmlPdf);
+        renderer.setDocumentFromString("<html>" + htmlHeadStr + "<body style=\"" + fontStyle +  "\">" + firstPageHtmlLong + "</body></html>");
+        renderer.layout();
+        renderer.createPDF(osPdf, false);
+      }
+      if (html) {
+        write("<html>" + htmlHeadStr + "<body style=\"" + fontStyle +  "\">", osHtml);
+        // first page
+        write("<div style=\"" + pageStyleHtml + "\">", osHtml);
+        write(firstPageHtmlShort, osHtml);
+        write("</div>", osHtml);
+      }
+      // table of content of document
+      String htmlToc = getTocHtml(mdRecord);
+      if (html && htmlToc != null) {
+        write("<div style=\"" + pageStyleHtml + "\">", osHtml);
+        write(htmlToc, osHtml);
+        write("</div>", osHtml);
+      }
+      if(pdf && htmlToc != null) {
+        write(htmlToc, osHtmlPdf);
+        renderer.setDocumentFromString("<html>" + htmlHeadStr + "<body style=\"" + fontStyle +  "\">" + htmlToc  + "</body></html>");
+        renderer.layout();
+        renderer.writeNextDocument();
+      }
+      // all pages of the document
+      for(int i=1; i<=countPages; i++) {
+        String htmlPageFragment = httpClientGetPageFragmentHtml(eXistIdentifier, mode, i);
+        htmlPageFragment = removeXmlStartString(htmlPageFragment);
+        String pnHrefName = "<a name=\"pn" + i + "\"></a>";
+        if (html) {
+          write("<div style=\"" + "clear:both; text-align:right; width:21.0cm; font-weight:bold;" + "\">", osHtml);
+          write(pnHrefName, osHtml);
+          write("</div>", osHtml);
+          write("<div style=\"" + pageStyleHtml + "\">", osHtml);
+          String htmlPageFragmentWithImageUrl = htmlPageFragment.replaceAll("src=\"images/", "src=\"http://" + MpdlConstants.MPDL_FULL_EXIST_HOST_NAME + "/mpdl/images/");  // to find the camera.png file on webserver mpdl-proto
+          write(htmlPageFragmentWithImageUrl, osHtml);
+          write("</div>", osHtml);
+        }
+        htmlPageFragment = pnHrefName + htmlPageFragment;
+        if(pdf) {
+          String htmlPageFragmentWithImageUrl = htmlPageFragment.replaceAll("src=\"images/", "src=\"http://" + MpdlConstants.MPDL_FULL_EXIST_HOST_NAME + "/mpdl/images/");  // to find the camera.png file on webserver mpdl-proto
+          String htmlPageFragmentSinglePage = htmlPageFragmentWithImageUrl.replaceAll("class=\"page\">", "class=\"singlePage\">");
+          String pnPdf = "&quot;Page " + i + " (&quot; counter(page) &quot;)&quot;";
+          String htmlPage = getPageHtmlDoc(htmlPageFragmentSinglePage, language, "&quot;&quot;", pnPdf, "&quot;&quot;", "&quot;&quot;");  
+          write(htmlPage, osHtmlPdf);
+          renderer.setDocumentFromString(htmlPage);
+          try {
+            renderer.layout();
+            renderer.writeNextDocument();
+          } catch (XRRuntimeException e) {
+            System.out.println("XXXX: " + e.getMessage());
+          }
+        }
+      }
+      if (html) {
+        write("</body></html>", osHtml);
+      }
+      // create PDF document
+      if(pdf) {
+        write("</body></html>", osHtmlPdf);
+        osHtmlPdf.close();
+        renderer.finishPDF();
+      }
+    } catch (Exception e) {
+      init();
+      String message = e.getMessage();
+      if (message.indexOf("nausikaa") > 0 && message.indexOf("500") > 0) {
+        throw new ApplicationException("Could not fetch image from nausikaa2.rz-berlin.mpg.de: please try again later");
+      }
+      throw new ApplicationException(e);
+    } finally {
+      try {
+        osHtmlPdf.close();
+        osPdf.close();
+        osHtml.close();
+        FileUtil.getInstance().deleteFile(destFileNameHtmlPdfTmp);
+      } catch (IOException e) {
+        // nothing
+      }
+    }
+  }
+
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/software/eXist/mpdl-modules/src/de/mpg/mpiwg/berlin/mpdl/util/MpdlITextUserAgent.java	Wed Nov 24 17:24:23 2010 +0100
@@ -0,0 +1,149 @@
+package de.mpg.mpiwg.berlin.mpdl.util;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.net.URL;
+
+import org.apache.log4j.Logger;
+import org.xhtmlrenderer.layout.SharedContext;
+import org.xhtmlrenderer.pdf.ITextFSImage;
+import org.xhtmlrenderer.pdf.ITextOutputDevice;
+import org.xhtmlrenderer.pdf.PDFAsImage;
+import org.xhtmlrenderer.resource.ImageResource;
+import org.xhtmlrenderer.swing.NaiveUserAgent;
+
+import com.lowagie.text.Image;
+import com.lowagie.text.Rectangle;
+import com.lowagie.text.pdf.PdfReader;
+
+public class MpdlITextUserAgent extends NaiveUserAgent {
+  private static final int IMAGE_CACHE_CAPACITY = 32;
+  private static final float DEFAULT_DOTS_PER_POINT = 20f * 4f / 3f;
+  private static Logger LOGGER = Logger.getLogger(MpdlITextUserAgent.class); // Logs to EXIST_HOME/webapp/WEB-INF/logs/exist.log
+  private SharedContext sharedContext;
+  private ITextOutputDevice outputDevice;
+  
+  public MpdlITextUserAgent() {
+    super(IMAGE_CACHE_CAPACITY);
+    outputDevice = new ITextOutputDevice(DEFAULT_DOTS_PER_POINT);
+  }
+  
+  @SuppressWarnings("unchecked")
+  public ImageResource getImageResource(String inputUri) {
+    ImageResource resource = null;
+    String uri = resolveURI(inputUri);
+    resource = (ImageResource) _imageCache.get(uri);
+    if (resource == null) {
+      InputStream is = resolveAndOpenStream(uri);
+      if (is != null) {
+        try {
+          URL url = new URL(uri);
+          if (url.getPath() != null && url.getPath().toLowerCase().endsWith(".pdf")) {
+            PdfReader reader = outputDevice.getReader(url);
+            PDFAsImage image = new PDFAsImage(url);
+            Rectangle rect = reader.getPageSizeWithRotation(1);
+            image.setInitialWidth(rect.getWidth()*outputDevice.getDotsPerPoint());
+            image.setInitialHeight(rect.getHeight()*outputDevice.getDotsPerPoint());
+            resource = new ImageResource(image);
+          } else {
+            Image image = getImage(url);
+            if (image == null)
+              return null;
+            scaleToOutputResolution(image);
+            resource = new ImageResource(new ITextFSImage(image));
+          }
+          _imageCache.put(uri, resource);
+        } catch (IOException e) {
+          LOGGER.error("Can't get image file: unexpected problem for URI: '" + uri + "': " + e.getMessage(), e);
+        } finally {
+          try {
+            if (is != null)
+              is.close();
+          } catch (IOException e) {
+            // ignore
+          }  
+        }
+      }
+    }
+    if (resource == null) {
+      resource = new ImageResource(null);
+    }
+    return resource;
+  }
+  
+  private void scaleToOutputResolution(Image image) {
+    float factor = sharedContext.getDotsPerPixel();
+    image.scaleAbsolute(image.getPlainWidth() * factor, image.getPlainHeight() * factor);
+  }
+
+  public SharedContext getSharedContext() {
+    return sharedContext;
+  }
+
+  public void setSharedContext(SharedContext sharedContext) {
+    this.sharedContext = sharedContext;
+  }
+  
+  private Image getImage(URL url) {
+    Image image = null;
+    try {
+      image = Image.getInstance(url);
+    } catch (Exception e) {
+      try {
+        Thread.sleep(1000);
+      } catch (InterruptedException ee) {
+        // nothing
+      }
+      LOGGER.error("first retry to get image for URL '" + url.toString() + "': " + e.getMessage(), e);
+      try {
+        image = Image.getInstance(url);
+      } catch (Exception e2) {
+        try {
+          Thread.sleep(1000);
+        } catch (InterruptedException ee) {
+          // nothing
+        }
+        LOGGER.error("second retry to get image for URL '" + url.toString() + "': " + e.getMessage(), e);
+        try {
+          image = Image.getInstance(url);
+        } catch (Exception e3) {
+          LOGGER.error("third retry to get image for URL '" + url.toString() + "': " + e.getMessage(), e);
+          return null;
+        }
+      }
+    }
+    return image;
+  }
+
+  protected InputStream resolveAndOpenStream(String inputUri) {
+    InputStream is = null;
+    String uri = resolveURI(inputUri);
+    try {
+      is = new URL(uri).openStream();
+    } catch (Exception e) {
+      try {
+        Thread.sleep(1000);
+      } catch (InterruptedException ee) {
+        // nothing
+      }
+      LOGGER.error("first retry to open stream for URL '" + uri + "': " + e.getMessage(), e);
+      try {
+        is = new URL(uri).openStream();
+      } catch (Exception e2) {
+        try {
+          Thread.sleep(1000);
+        } catch (InterruptedException ee) {
+          // nothing
+        }
+        LOGGER.error("second retry to open stream for URL '" + uri + "': " + e.getMessage(), e);
+        try {
+          is = new URL(uri).openStream();
+        } catch (Exception e3) {
+          LOGGER.error("third retry to open stream for URL '" + uri + "': " + e.getMessage(), e);
+          return null;
+        }
+      }
+    }
+    return is;
+  }
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/software/eXist/mpdl-modules/src/de/mpg/mpiwg/berlin/mpdl/util/StringUtilEscapeChars.java	Wed Nov 24 17:24:23 2010 +0100
@@ -0,0 +1,475 @@
+package de.mpg.mpiwg.berlin.mpdl.util;
+
+import java.io.UnsupportedEncodingException;
+import java.net.URLEncoder;
+import java.text.CharacterIterator;
+import java.text.StringCharacterIterator;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+public class StringUtilEscapeChars {
+  public static String deleteSpecialXmlEntities(String inputStr) {
+    inputStr = inputStr.replaceAll("&lt;", "");
+    inputStr = inputStr.replaceAll("&gt;", "");
+    inputStr = inputStr.replaceAll("&amp;lt;", "");
+    inputStr = inputStr.replaceAll("&amp;gt;", "");
+    return inputStr;
+  }
+ 
+  public static String resolveXmlEntities(String inputStr) {
+     inputStr = inputStr.replaceAll("&amp;", "&");
+     inputStr = inputStr.replaceAll("&lt;", "<");
+     inputStr = inputStr.replaceAll("&gt;", ">");
+     inputStr = inputStr.replaceAll("&quot;", "\"");
+     inputStr = inputStr.replaceAll("&apos;", "'");
+    return inputStr;
+  }
+ 
+  public static String deresolveXmlEntities(String inputStr) {
+    StringBuffer buf = new StringBuffer();
+    for (int i = 0; i < inputStr.length(); i++) {
+      char c = inputStr.charAt(i);
+      String replace = new String();
+      switch (c) {
+        case '&': replace = "&amp;"; break; 
+        case '<': replace = "&lt;"; break; 
+        case '>': replace = "&gt;"; break; 
+        case '"': replace = "&quot;"; break; 
+        // case '\'': replace = "&apos;"; break; // causes problems in DictionarizerContentHandler
+        default: replace += c; break;
+      }
+      buf.append(replace);
+    }
+    return buf.toString();
+  }
+  
+    /**
+     * Escape characters for text appearing in HTML markup.
+     * 
+     * <P>This method exists as a defence against Cross Site Scripting (XSS) hacks.
+     * The idea is to neutralize control characters commonly used by scripts, such that
+     * they will not be executed by the browser. This is done by replacing the control
+     * characters with their escaped equivalents.  
+     * See {@link hirondelle.web4j.security.SafeText} as well.
+     * 
+     * <P>The following characters are replaced with corresponding 
+     * HTML character entities :
+     * <table border='1' cellpadding='3' cellspacing='0'>
+     * <tr><th> Character </th><th>Replacement</th></tr>
+     * <tr><td> < </td><td> &lt; </td></tr>
+     * <tr><td> > </td><td> &gt; </td></tr>
+     * <tr><td> & </td><td> &amp; </td></tr>
+     * <tr><td> " </td><td> &quot;</td></tr>
+     * <tr><td> \t </td><td> &#009;</td></tr>
+     * <tr><td> ! </td><td> &#033;</td></tr>
+     * <tr><td> # </td><td> &#035;</td></tr>
+     * <tr><td> $ </td><td> &#036;</td></tr>
+     * <tr><td> % </td><td> &#037;</td></tr>
+     * <tr><td> ' </td><td> &#039;</td></tr>
+     * <tr><td> ( </td><td> &#040;</td></tr> 
+     * <tr><td> ) </td><td> &#041;</td></tr>
+     * <tr><td> * </td><td> &#042;</td></tr>
+     * <tr><td> + </td><td> &#043; </td></tr>
+     * <tr><td> , </td><td> &#044; </td></tr>
+     * <tr><td> - </td><td> &#045; </td></tr>
+     * <tr><td> . </td><td> &#046; </td></tr>
+     * <tr><td> / </td><td> &#047; </td></tr>
+     * <tr><td> : </td><td> &#058;</td></tr>
+     * <tr><td> ; </td><td> &#059;</td></tr>
+     * <tr><td> = </td><td> &#061;</td></tr>
+     * <tr><td> ? </td><td> &#063;</td></tr>
+     * <tr><td> @ </td><td> &#064;</td></tr>
+     * <tr><td> [ </td><td> &#091;</td></tr>
+     * <tr><td> \ </td><td> &#092;</td></tr>
+     * <tr><td> ] </td><td> &#093;</td></tr>
+     * <tr><td> ^ </td><td> &#094;</td></tr>
+     * <tr><td> _ </td><td> &#095;</td></tr>
+     * <tr><td> ` </td><td> &#096;</td></tr>
+     * <tr><td> { </td><td> &#123;</td></tr>
+     * <tr><td> | </td><td> &#124;</td></tr>
+     * <tr><td> } </td><td> &#125;</td></tr>
+     * <tr><td> ~ </td><td> &#126;</td></tr>
+     * </table>
+     * 
+     * <P>Note that JSTL's {@code <c:out>} escapes <em>only the first 
+     * five</em> of the above characters.
+     */
+     public static String forHTML(String aText){
+       final StringBuilder result = new StringBuilder();
+       final StringCharacterIterator iterator = new StringCharacterIterator(aText);
+       char character =  iterator.current();
+       while (character != CharacterIterator.DONE ){
+         if (character == '<') {
+           result.append("&lt;");
+         }
+         else if (character == '>') {
+           result.append("&gt;");
+         }
+         else if (character == '&') {
+           result.append("&amp;");
+        }
+         else if (character == '\"') {
+           result.append("&quot;");
+         }
+         else if (character == '\t') {
+           addCharEntity(9, result);
+         }
+         else if (character == '!') {
+           addCharEntity(33, result);
+         }
+         else if (character == '#') {
+           addCharEntity(35, result);
+         }
+         else if (character == '$') {
+           addCharEntity(36, result);
+         }
+         else if (character == '%') {
+           addCharEntity(37, result);
+         }
+         else if (character == '\'') {
+           addCharEntity(39, result);
+         }
+         else if (character == '(') {
+           addCharEntity(40, result);
+         }
+         else if (character == ')') {
+           addCharEntity(41, result);
+         }
+         else if (character == '*') {
+           addCharEntity(42, result);
+         }
+         else if (character == '+') {
+           addCharEntity(43, result);
+         }
+         else if (character == ',') {
+           addCharEntity(44, result);
+         }
+         else if (character == '-') {
+           addCharEntity(45, result);
+         }
+         else if (character == '.') {
+           addCharEntity(46, result);
+         }
+         else if (character == '/') {
+           addCharEntity(47, result);
+         }
+         else if (character == ':') {
+           addCharEntity(58, result);
+         }
+         else if (character == ';') {
+           addCharEntity(59, result);
+         }
+         else if (character == '=') {
+           addCharEntity(61, result);
+         }
+         else if (character == '?') {
+           addCharEntity(63, result);
+         }
+         else if (character == '@') {
+           addCharEntity(64, result);
+         }
+         else if (character == '[') {
+           addCharEntity(91, result);
+         }
+         else if (character == '\\') {
+           addCharEntity(92, result);
+         }
+         else if (character == ']') {
+           addCharEntity(93, result);
+         }
+         else if (character == '^') {
+           addCharEntity(94, result);
+         }
+         else if (character == '_') {
+           addCharEntity(95, result);
+         }
+         else if (character == '`') {
+           addCharEntity(96, result);
+         }
+         else if (character == '{') {
+           addCharEntity(123, result);
+         }
+         else if (character == '|') {
+           addCharEntity(124, result);
+         }
+         else if (character == '}') {
+           addCharEntity(125, result);
+         }
+         else if (character == '~') {
+           addCharEntity(126, result);
+         }
+         else {
+           //the char is not a special one
+           //add it to the result as is
+           result.append(character);
+         }
+         character = iterator.next();
+       }
+       return result.toString();
+    }
+    
+
+    /**
+    * Escape all ampersand characters in a URL. 
+    *  
+    * <P>Replaces all <tt>'&'</tt> characters with <tt>'&amp;'</tt>.
+    * 
+    *<P>An ampersand character may appear in the query string of a URL.
+    * The ampersand character is indeed valid in a URL.
+    * <em>However, URLs usually appear as an <tt>HREF</tt> attribute, and 
+    * such attributes have the additional constraint that ampersands 
+    * must be escaped.</em>
+    * 
+    * <P>The JSTL <c:url> tag does indeed perform proper URL encoding of 
+    * query parameters. But it does not, in general, produce text which 
+    * is valid as an <tt>HREF</tt> attribute, simply because it does 
+    * not escape the ampersand character. This is a nuisance when 
+    * multiple query parameters appear in the URL, since it requires a little 
+    * extra work.
+    */
+    public static String forHrefAmpersand(String aURL){
+      return aURL.replace("&", "&amp;");
+    }
+     
+    /**
+     * Synonym for <tt>URLEncoder.encode(String, "UTF-8")</tt>.
+     *
+     * <P>Used to ensure that HTTP query strings are in proper form, by escaping
+     * special characters such as spaces.
+     *
+     * <P>It is important to note that if a query string appears in an <tt>HREF</tt>
+     * attribute, then there are two issues - ensuring the query string is valid HTTP
+     * (it is URL-encoded), and ensuring it is valid HTML (ensuring the 
+     * ampersand is escaped).
+     */
+     public static String forURL(String aURLFragment){
+       String result = null;
+       try {
+         result = URLEncoder.encode(aURLFragment, "UTF-8");
+       }
+       catch (UnsupportedEncodingException ex){
+         throw new RuntimeException("UTF-8 not supported", ex);
+       }
+       return result;
+     }
+
+    /**
+    * Escape characters for text appearing as XML data, between tags.
+    * 
+    * <P>The following characters are replaced with corresponding character entities :
+    * <table border='1' cellpadding='3' cellspacing='0'>
+    * <tr><th> Character </th><th> Encoding </th></tr>
+    * <tr><td> < </td><td> &lt; </td></tr>
+    * <tr><td> > </td><td> &gt; </td></tr>
+    * <tr><td> & </td><td> &amp; </td></tr>
+    * <tr><td> " </td><td> &quot;</td></tr>
+    * <tr><td> ' </td><td> &#039;</td></tr>
+    * </table>
+    * 
+    * <P>Note that JSTL's {@code <c:out>} escapes the exact same set of 
+    * characters as this method. <span class='highlight'>That is, {@code <c:out>}
+    *  is good for escaping to produce valid XML, but not for producing safe 
+    *  HTML.</span>
+    */
+    public static String forXML(String aText){
+      final StringBuilder result = new StringBuilder();
+      final StringCharacterIterator iterator = new StringCharacterIterator(aText);
+      char character =  iterator.current();
+      while (character != CharacterIterator.DONE ){
+        if (character == '<') {
+          result.append("&lt;");
+        }
+        else if (character == '>') {
+          result.append("&gt;");
+        }
+        else if (character == '\"') {
+          result.append("&quot;");
+        }
+        else if (character == '\'') {
+          result.append("&#039;");
+        }
+        else if (character == '&') {
+           result.append("&amp;");
+        }
+        else {
+          //the char is not a special one
+          //add it to the result as is
+          result.append(character);
+        }
+        character = iterator.next();
+      }
+      return result.toString();
+    }
+
+    /**
+    * Return <tt>aText</tt> with all <tt>'<'</tt> and <tt>'>'</tt> characters
+    * replaced by their escaped equivalents.
+    */
+    public static String toDisableTags(String aText){
+      final StringBuilder result = new StringBuilder();
+      final StringCharacterIterator iterator = new StringCharacterIterator(aText);
+      char character =  iterator.current();
+      while (character != CharacterIterator.DONE ){
+        if (character == '<') {
+          result.append("&lt;");
+        }
+        else if (character == '>') {
+          result.append("&gt;");
+        }
+        else {
+          //the char is not a special one
+          //add it to the result as is
+          result.append(character);
+        }
+        character = iterator.next();
+      }
+      return result.toString();
+    }
+    
+
+    /**
+    * Replace characters having special meaning in regular expressions
+    * with their escaped equivalents, preceded by a '\' character.
+    *
+    * <P>The escaped characters include :
+    *<ul>
+    *<li>.
+    *<li>\
+    *<li>?, * , and +
+    *<li>&
+    *<li>:
+    *<li>{ and }
+    *<li>[ and ]
+    *<li>( and )
+    *<li>^ and $
+    *</ul>
+    */
+    public static String forRegex(String aRegexFragment){
+      final StringBuilder result = new StringBuilder();
+
+      final StringCharacterIterator iterator = 
+        new StringCharacterIterator(aRegexFragment)
+      ;
+      char character =  iterator.current();
+      while (character != CharacterIterator.DONE ){
+        /*
+        * All literals need to have backslashes doubled.
+        */
+        if (character == '.') {
+          result.append("\\.");
+        }
+        else if (character == '\\') {
+          result.append("\\\\");
+        }
+        else if (character == '?') {
+          result.append("\\?");
+        }
+        else if (character == '*') {
+          result.append("\\*");
+        }
+        else if (character == '+') {
+          result.append("\\+");
+        }
+        else if (character == '&') {
+          result.append("\\&");
+        }
+        else if (character == ':') {
+          result.append("\\:");
+        }
+        else if (character == '{') {
+          result.append("\\{");
+        }
+        else if (character == '}') {
+          result.append("\\}");
+        }
+        else if (character == '[') {
+          result.append("\\[");
+        }
+        else if (character == ']') {
+          result.append("\\]");
+        }
+        else if (character == '(') {
+          result.append("\\(");
+        }
+        else if (character == ')') {
+          result.append("\\)");
+        }
+        else if (character == '^') {
+          result.append("\\^");
+        }
+        else if (character == '$') {
+          result.append("\\$");
+        }
+        else {
+          //the char is not a special one
+          //add it to the result as is
+          result.append(character);
+        }
+        character = iterator.next();
+      }
+      return result.toString();
+    }
+    
+    /**
+    * Escape <tt>'$'</tt> and <tt>'\'</tt> characters in replacement strings.
+    * 
+    * <P>Synonym for <tt>Matcher.quoteReplacement(String)</tt>.
+    * 
+    * <P>The following methods use replacement strings which treat 
+    * <tt>'$'</tt> and <tt>'\'</tt> as special characters:
+    * <ul>
+    * <li><tt>String.replaceAll(String, String)</tt>
+    * <li><tt>String.replaceFirst(String, String)</tt>
+    * <li><tt>Matcher.appendReplacement(StringBuffer, String)</tt>
+    * </ul>
+    * 
+    * <P>If replacement text can contain arbitrary characters, then you 
+    * will usually need to escape that text, to ensure special characters 
+    * are interpreted literally.
+    */
+    public static String forReplacementString(String aInput){
+      return Matcher.quoteReplacement(aInput);
+    }
+    
+    /**
+    * Disable all <tt><SCRIPT></tt> tags in <tt>aText</tt>.
+    * 
+    * <P>Insensitive to case.
+    */  
+    public static String forScriptTagsOnly(String aText){
+      String result = null;
+      Matcher matcher = SCRIPT.matcher(aText);
+      result = matcher.replaceAll("&lt;SCRIPT>");
+      matcher = SCRIPT_END.matcher(result);
+      result = matcher.replaceAll("&lt;/SCRIPT>");
+      return result;
+    }
+    
+    // PRIVATE //
+    
+    private StringUtilEscapeChars(){
+      //empty - prevent construction
+    }
+    
+    private static final Pattern SCRIPT = Pattern.compile(
+      "<SCRIPT>", Pattern.CASE_INSENSITIVE
+     );
+    private static final Pattern SCRIPT_END = Pattern.compile(
+      "</SCRIPT>", Pattern.CASE_INSENSITIVE
+    );
+    
+    private static void addCharEntity(Integer aIdx, StringBuilder aBuilder){
+      String padding = "";
+      if( aIdx <= 9 ){
+         padding = "00";
+      }
+      else if( aIdx <= 99 ){
+        padding = "0";
+      }
+      else {
+        //no prefix
+      }
+      String number = padding + aIdx.toString();
+      aBuilder.append("&#" + number + ";");
+    }
+  }
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/software/eXist/mpdl-modules/src/de/mpg/mpiwg/berlin/mpdl/util/Util.java	Wed Nov 24 17:24:23 2010 +0100
@@ -0,0 +1,48 @@
+package de.mpg.mpiwg.berlin.mpdl.util;
+
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.IOException;
+import java.util.Date;
+import java.util.Properties;
+
+public class Util {
+
+  public Properties getProperties(String fullFileName) {
+    Properties props = new Properties(); 
+    try {
+      File file = new File(fullFileName);
+      FileInputStream in = new FileInputStream(file);
+      props.load(in);
+    } catch (IOException e) {
+    }
+    return props;
+  }
+  
+  public String toYearStr(String inputStr) {
+    String retYearStr = inputStr.trim();
+    int index = inputStr.indexOf("-");
+    if (index > 0) {
+      retYearStr = inputStr.substring(0, index);
+      retYearStr = retYearStr.trim();
+    }
+    try {
+      Integer year = new Integer(retYearStr);
+      if (year < 10000 || year > 2500)
+        retYearStr = null;
+    } catch (NumberFormatException e) {
+      retYearStr = null;
+    }
+    return retYearStr;
+  }
+
+  public Double getSecondWithMillisecondsBetween(Date begin, Date end) {
+    long beginMS = begin.getTime();
+    long endMS = end.getTime();
+    long elapsedSeconds = (endMS - beginMS) / 1000;
+    long elapsedMilliSecondsAfterSeconds1 = (endMS - beginMS) - (elapsedSeconds * 1000);
+    Double seconds = new Double(elapsedSeconds + "." + elapsedMilliSecondsAfterSeconds1); 
+    return seconds;
+  }
+ 
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/software/eXist/mpdl-modules/src/de/mpg/mpiwg/berlin/mpdl/util/XmlUtil.java	Wed Nov 24 17:24:23 2010 +0100
@@ -0,0 +1,384 @@
+package de.mpg.mpiwg.berlin.mpdl.util;
+
+import java.io.File;
+import java.io.IOException;
+import java.io.Reader;
+import java.io.StringReader;
+import java.io.StringWriter;
+import java.net.URL;
+import java.text.DateFormat;
+import java.text.ParseException;
+import java.text.SimpleDateFormat;
+import java.util.ArrayList;
+import java.util.Date;
+
+import javax.xml.XMLConstants;
+import javax.xml.namespace.NamespaceContext;
+import javax.xml.parsers.DocumentBuilder;
+import javax.xml.parsers.DocumentBuilderFactory;
+import javax.xml.transform.OutputKeys;
+import javax.xml.transform.Source;
+import javax.xml.transform.Transformer;
+import javax.xml.transform.TransformerConfigurationException;
+import javax.xml.transform.TransformerException;
+import javax.xml.transform.TransformerFactory;
+import javax.xml.transform.dom.DOMSource;
+import javax.xml.transform.sax.SAXSource;
+import javax.xml.transform.stream.StreamResult;
+import javax.xml.validation.Schema;
+import javax.xml.validation.SchemaFactory;
+import javax.xml.validation.Validator;
+import javax.xml.xpath.XPath;
+import javax.xml.xpath.XPathConstants;
+import javax.xml.xpath.XPathFactory;
+
+import net.sf.saxon.om.NodeInfo;
+
+import org.w3c.dom.Document;
+import org.w3c.dom.NamedNodeMap;
+import org.w3c.dom.Node;
+import org.w3c.dom.NodeList;
+import org.xml.sax.InputSource;
+import org.xml.sax.SAXException;
+
+import de.mpg.mpiwg.berlin.mpdl.exception.ApplicationException;
+
+public class XmlUtil {
+  static String JAXP_SCHEMA_LANGUAGE = "http://java.sun.com/xml/jaxp/properties/schemaLanguage"; 
+  static String JAXP_SCHEMA_SOURCE = "http://java.sun.com/xml/jaxp/properties/schemaSource";
+  static String W3C_XML_SCHEMA = XMLConstants.W3C_XML_SCHEMA_NS_URI; 
+
+  public static XmlUtil getInstance() {
+    return new XmlUtil();
+  }
+
+  public Node doc(String url) throws ApplicationException {
+    Node root = null;
+    try {
+      DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
+      dbf.setNamespaceAware(true);
+      DocumentBuilder db = dbf.newDocumentBuilder();
+      InputSource inputSource = new InputSource(url); 
+      Document doc = db.parse(inputSource);   
+      root = doc.getFirstChild();
+    } catch (Exception e) {
+      throw new ApplicationException(e);
+    }
+    return root;
+  }
+
+  public Node parse(String xmlFileName) throws ApplicationException {
+    File xmlFile = new File(xmlFileName);
+    XmlUtil xmlUtil = XmlUtil.getInstance();
+    Node retNode = null;
+    try {
+     retNode = xmlUtil.doc(xmlFile);
+    } catch (ApplicationException e) {
+      throw new ApplicationException("Your source file is not valid: " + e.getMessage());
+    }
+    return retNode;
+  }
+  
+  public Node doc(File xmlFile) throws ApplicationException {
+    Node root = null;
+    try {
+      DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
+      dbf.setNamespaceAware(true);
+      DocumentBuilder db = dbf.newDocumentBuilder();
+      Document doc = db.parse(xmlFile);   
+      root = doc.getFirstChild();
+    } catch (Exception e) {
+      throw new ApplicationException(e);
+    }
+    return root;
+  }
+
+  public void validateByRelaxNG(File xmlFile, URL schemaUrl) throws ApplicationException {
+    System.setProperty(SchemaFactory.class.getName() + ":" + XMLConstants.RELAXNG_NS_URI, "com.thaiopensource.relaxng.jaxp.CompactSyntaxSchemaFactory");
+    SchemaFactory factory = SchemaFactory.newInstance(XMLConstants.RELAXNG_NS_URI);
+    Schema schema = null;
+    try {
+      schema = factory.newSchema(schemaUrl);
+    } catch (SAXException e) {
+      throw new ApplicationException(e);
+    }
+    Validator validator = schema.newValidator();
+    InputSource inputSource = new InputSource(xmlFile.getPath());
+    Source source = new SAXSource(inputSource); 
+    try {
+      validator.validate(source);
+    } catch (SAXException e) {
+      String message = e.getMessage();
+      String text = "Your file is not valid against the RelaxNG schema: " + schemaUrl;
+      throw new ApplicationException(text + ":\n" + message);
+    } catch (IOException e) {
+      String message = e.getMessage();
+      String text = "Your file is not valid against the RelaxNG schema: " + schemaUrl;
+      throw new ApplicationException(text + ": " + message);
+    }    
+  }
+  
+  public String getNodeValue(Node node) {
+    String nodeValueStr = node.getNodeValue();
+    if (nodeValueStr == null)
+      nodeValueStr = node.getTextContent();
+    return nodeValueStr;
+  }
+  
+  public String getNodeAttributeValue(Node node, String attrName) {
+    NamedNodeMap attrs = node.getAttributes();
+    if (attrs == null) {
+      return null;
+    }
+    Node attN = attrs.getNamedItem(attrName);
+    if (attN == null) {
+      return null;
+    }
+    return attN.getNodeValue();
+  }
+  
+  public ArrayList<String> toStringArray(NodeList nodes) {
+    ArrayList<String> nodeValues = null;
+    for (int i=0; i< nodes.getLength(); i++) {
+      Node node = nodes.item(i);
+      if (nodeValues == null)
+        nodeValues = new ArrayList<String>();
+      String nodeValue = node.getNodeValue();
+      nodeValues.add(nodeValue);
+    }
+    return nodeValues;
+  }
+  
+  public String toXsDate(Date date) {
+    DateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss.SSS'Z'");
+    String xsDateStr = dateFormat.format(date);
+    return xsDateStr;
+  }
+  
+  public Date toDate(String xsDateStr) throws ApplicationException {
+    Date retDate = null;
+    if (xsDateStr == null)
+      return null;
+    try {
+      DateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss.SSS'Z'");
+      retDate = dateFormat.parse(xsDateStr);
+    } catch (ParseException e) {
+      throw new ApplicationException(e);
+    }
+    return retDate;
+  }
+
+  public String evaluateToString(String xmlString, String xpathExpression, NamespaceContext nsContext) throws ApplicationException {
+    String resultStr = null;
+    ArrayList<String> strArray = evaluateToStringArray(xmlString, xpathExpression, nsContext);
+    if (strArray != null && strArray.size() > 0)
+      resultStr = strArray.get(0);
+    return resultStr;
+  }
+  
+  public String evaluateToString(InputSource inputSource, String xpathExpression, NamespaceContext nsContext) throws ApplicationException {
+    String resultStr = null;
+    ArrayList<String> strArray = evaluateToStringArray(inputSource, xpathExpression, nsContext);
+    if (strArray != null && strArray.size() > 0)
+      resultStr = strArray.get(0);
+    return resultStr;
+  }
+  
+  public String evaluateToString(Node node, String xpathExpression, NamespaceContext nsContext) throws ApplicationException {
+    String resultStr = null;
+    ArrayList<String> strArray = evaluateToStringArray(node, xpathExpression, nsContext);
+    if (strArray != null && strArray.size() > 0)
+      resultStr = strArray.get(0);
+    return resultStr;
+  }
+
+  public ArrayList<String> evaluateToStringArray(String xmlString, String xpathExpression, NamespaceContext nsContext) throws ApplicationException {
+    Reader stringReader = new StringReader(xmlString);
+    InputSource inputSource = new InputSource(stringReader); 
+    ArrayList<String> retStrArray = evaluateToStringArray(inputSource, xpathExpression, nsContext);
+    return retStrArray;
+  }
+  
+  public ArrayList<String> evaluateToStringArray(InputSource inputSource, String xpathExpression, NamespaceContext nsContext) throws ApplicationException {
+    ArrayList<String> retStrArray = null;
+    try {
+      XPath xpath = XPathFactory.newInstance().newXPath();
+      if (nsContext != null)
+        xpath.setNamespaceContext(nsContext); 
+      Object resultObjects = xpath.evaluate(xpathExpression, inputSource, XPathConstants.NODESET);
+      if (resultObjects != null) {
+        retStrArray = nodesetToStringArray(resultObjects);
+      }
+    } catch (Exception e) {
+      throw new ApplicationException(e);
+    }
+    return retStrArray;
+  }
+  
+  public ArrayList<Node> evaluateToNodeArray(InputSource inputSource, String xpathExpression, NamespaceContext nsContext) throws ApplicationException {
+    ArrayList<Node> retArray = null;
+    try {
+      XPath xpath = XPathFactory.newInstance().newXPath();
+      if (nsContext != null)
+        xpath.setNamespaceContext(nsContext); 
+      Object resultObjects = xpath.evaluate(xpathExpression, inputSource, XPathConstants.NODESET);
+      if (resultObjects != null) {
+        retArray = nodesetToNodeArray(resultObjects);
+      }
+    } catch (Exception e) {
+      throw new ApplicationException(e);
+    }
+    return retArray;
+  }
+  
+  public ArrayList<String> evaluateToStringArray(Node node, String xpathExpression, NamespaceContext nsContext) throws ApplicationException {
+    ArrayList<String> retStrArray = null;
+    try {
+      XPath xpath = XPathFactory.newInstance().newXPath();
+      if (nsContext != null)
+        xpath.setNamespaceContext(nsContext); 
+      Object resultObjects = xpath.evaluate(xpathExpression, node, XPathConstants.NODESET);
+      if (resultObjects != null) {
+        retStrArray = nodesetToStringArray(resultObjects);
+      }
+    } catch (Exception e) {
+      throw new ApplicationException(e);
+    }
+    return retStrArray;
+  }
+  
+  /*
+   * XPath evaluation: handles both, javax and also Saxon's implementation
+   * javax XPath evaluation: returns a NodeList
+   * Saxon's XPath evaluation: returns an ArrayList of TinyTextImpl (which could be casted to NodeInfo which could be handled as if it was a dom node)
+   */
+  private ArrayList<String> nodesetToStringArray(Object nodesetObjects) {
+    ArrayList<String> retStrArray = null;
+    if (nodesetObjects instanceof NodeList) {
+      NodeList resultNodeList = (NodeList) nodesetObjects;
+      int length = resultNodeList.getLength();
+      if (length > 0) {
+        retStrArray = new ArrayList<String>();
+        for (int i=0; i<length; i++) {
+          Node n = resultNodeList.item(i);
+          String nodeValue = n.getNodeValue();
+          if (nodeValue == null)
+            nodeValue = n.getTextContent();
+          if (nodeValue != null)
+            retStrArray.add(nodeValue);
+        }
+      }
+    } else if (nodesetObjects instanceof ArrayList) {
+      ArrayList arrayListNodes = (ArrayList) nodesetObjects;
+      retStrArray = new ArrayList<String>();
+      for (int i=0; i<arrayListNodes.size(); i++) {
+        Object arrayListNode = arrayListNodes.get(i);
+        if (arrayListNode instanceof Node) {
+          Node n = (Node) arrayListNode;
+          String nodeValue = n.getNodeValue();
+          if (nodeValue == null)
+            nodeValue = n.getTextContent();
+          if (nodeValue != null)
+            retStrArray.add(nodeValue);
+        } else if (arrayListNode instanceof NodeInfo) {
+          NodeInfo n = (NodeInfo) arrayListNode;
+          String nodeValue = n.getStringValue();
+          retStrArray.add(nodeValue);
+        }
+      }
+    } 
+    return retStrArray;
+  }
+
+  /*
+   * XPath evaluation: handles both, javax and also Saxon's implementation
+   * javax XPath evaluation: returns a NodeList
+   * Saxon's XPath evaluation: returns an ArrayList of TinyTextImpl (which could be casted to NodeInfo which could be handled as if it was a dom node)
+   */
+  private ArrayList<Node> nodesetToNodeArray(Object nodesetObjects) {
+    ArrayList<Node> retArray = null;
+    if (nodesetObjects instanceof NodeList) {
+      NodeList resultNodeList = (NodeList) nodesetObjects;
+      int length = resultNodeList.getLength();
+      if (length > 0) {
+        retArray = new ArrayList<Node>();
+        for (int i=0; i<length; i++) {
+          Node n = resultNodeList.item(i);
+          retArray.add(n);
+        }
+      }
+    } else if (nodesetObjects instanceof ArrayList) {
+      ArrayList arrayListNodes = (ArrayList) nodesetObjects;
+      retArray = new ArrayList<Node>();
+      for (int i=0; i<arrayListNodes.size(); i++) {
+        Object arrayListNode = arrayListNodes.get(i);
+        if (arrayListNode instanceof Node) {
+          Node n = (Node) arrayListNode;
+          retArray.add(n);
+        } else if (arrayListNode instanceof NodeInfo) {
+          NodeInfo n = (NodeInfo) arrayListNode;
+          // TODO provide clean return value
+        }
+      }
+    } 
+    return retArray;
+  }
+
+  // TODO not used yet, test it
+  public Node doc(File xmlFile, File schemaFile) throws ApplicationException {
+    Node root = null;
+    try {
+      DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
+      dbf.setNamespaceAware(true);
+      // dbf.setAttribute(JAXP_SCHEMA_LANGUAGE, W3C_XML_SCHEMA);  // TODO vielleicht doch wieder anschalten
+      dbf.setAttribute(JAXP_SCHEMA_SOURCE, schemaFile);
+      DocumentBuilder db = dbf.newDocumentBuilder();
+      Document doc = db.parse(xmlFile);   
+      root = doc.getFirstChild();
+    } catch (Exception e) {
+      throw new ApplicationException(e);
+    }
+    return root;
+  }
+
+  // TODO not used yet, test it
+  public void validateByRelaxNG(Node docNode, URL schemaUrl) throws ApplicationException {
+    // System.setProperty(SchemaFactory.class.getName() + ":" + XMLConstants.RELAXNG_NS_URI, "com.thaiopensource.relaxng.jaxp.XMLSyntaxSchemaFactory");
+    System.setProperty(SchemaFactory.class.getName() + ":" + XMLConstants.RELAXNG_NS_URI, "com.thaiopensource.relaxng.jaxp.CompactSyntaxSchemaFactory");
+    SchemaFactory factory = SchemaFactory.newInstance(XMLConstants.RELAXNG_NS_URI);
+    Schema schema = null;
+    try {
+      schema = factory.newSchema(schemaUrl);
+    } catch (SAXException e) {
+      throw new ApplicationException(e);
+    }
+    Validator validator = schema.newValidator();
+    DOMSource source = new DOMSource(docNode);
+    try {
+      validator.validate(source);
+    } catch (SAXException e) {
+      throw new ApplicationException(e);
+    } catch (IOException e) {
+      throw new ApplicationException(e);
+    }    
+  }
+
+  // TODO not used yet, test it
+  public String toStringByTransformer(Node doc) throws ApplicationException {
+    String xmlString = null;
+    try {
+      Transformer transformer = TransformerFactory.newInstance().newTransformer();
+      transformer.setOutputProperty(OutputKeys.INDENT, "yes");
+      //initialize StreamResult with File object to save to file
+      StreamResult result = new StreamResult(new StringWriter());
+      DOMSource source = new DOMSource(doc);
+      transformer.transform(source, result);
+      xmlString = result.getWriter().toString();
+    } catch (TransformerConfigurationException e) {
+      throw new ApplicationException(e);
+    } catch (TransformerException e) {
+      throw new ApplicationException(e);
+    }
+    return xmlString;
+  }
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/software/eXist/mpdl-modules/src/de/mpg/mpiwg/berlin/mpdl/xml/SchemaHandler.java	Wed Nov 24 17:24:23 2010 +0100
@@ -0,0 +1,368 @@
+package de.mpg.mpiwg.berlin.mpdl.xml;
+
+import java.io.File;
+import java.net.MalformedURLException;
+import java.net.URL;
+import java.util.ArrayList;
+import java.util.Date;
+import java.util.Iterator;
+
+import javax.xml.namespace.NamespaceContext;
+
+import org.w3c.dom.Node;
+
+import de.mpg.mpiwg.berlin.mpdl.escidoc.MetadataRecord;
+import de.mpg.mpiwg.berlin.mpdl.exception.ApplicationException;
+import de.mpg.mpiwg.berlin.mpdl.general.MpdlConstants;
+import de.mpg.mpiwg.berlin.mpdl.schedule.MpdlDocOperation;
+import de.mpg.mpiwg.berlin.mpdl.util.StringUtilEscapeChars;
+import de.mpg.mpiwg.berlin.mpdl.util.Util;
+import de.mpg.mpiwg.berlin.mpdl.util.XmlUtil;
+
+public class SchemaHandler {
+
+  /**
+   * 
+   * @param fileName
+   * @param docOperation
+   * @return doc root node of xml file
+   * @throws ApplicationException
+   */
+  public Node validate(String fileName, MpdlDocOperation docOperation) throws ApplicationException {
+    File destFile = new File(fileName);
+    String docBase = docOperation.getDocBase();
+    // file name validation
+    String fName = docOperation.getFileName();
+    if (fName == null || fName.trim().equals(""))
+      throw new ApplicationException("Your document file name is empty. Please specify a file name for your document.");
+    if (! fName.endsWith(".xml"))
+      throw new ApplicationException("Your document file name does not end with \".xml\". Please specify a file name with the suffix \".xml\" for your document.");
+    // RelaxNG schema validation
+    validateByRelaxNGSchema(destFile, docBase);
+    // parse validation
+    Node docNode = parse(destFile);
+    // first simple validations
+    validate(docNode, docBase);
+    // validate metadata
+    MetadataRecord mdRecord = getMetadataRecord(docNode, docOperation);
+    validate(mdRecord);
+    docOperation.setMdRecord(mdRecord);
+    return docNode;
+  }
+  
+  public MetadataRecord getMetadataRecord(Node documentNode, MpdlDocOperation docOperation) throws ApplicationException {
+    MetadataRecord mdRecord = null;
+    String eXistIdentifier = docOperation.getDestUrl(); 
+    String docBase = docOperation.getDocBase();
+    if (docBase != null && docBase.equals("echo")) {
+      mdRecord = getMetadataRecordEcho(documentNode);
+      if (mdRecord != null) {
+        String identifier = mdRecord.getIdentifier();
+        if (identifier == null) {
+          String id = getIdByExistId(eXistIdentifier);
+          mdRecord.setIdentifier("ECHO:" + id + ".xml");
+        }
+      }
+    } else if (docBase != null && docBase.equals("archimedes")) {
+      mdRecord = getMetadataRecordArchimedes(documentNode);
+      if (mdRecord != null) {
+        String id = getIdByExistId(eXistIdentifier);
+        mdRecord.setIdentifier("ARCHIMEDES:" + id + ".xml");
+      }
+    }
+    if (mdRecord != null) {
+      mdRecord.setEXistIdentifier(eXistIdentifier);
+      mdRecord.setMediaType("fulltext");
+    }
+    return mdRecord;
+  }
+  
+  public ArrayList<String> getPBFileNames(Node documentNode, String docBase) throws ApplicationException {
+    ArrayList<String> pbFileNamesArrayStr = null;
+    if (docBase != null && docBase.equals("echo")) {
+      XmlUtil xmlUtil = XmlUtil.getInstance();
+      NamespaceContext nsContext = getEchoNsContext();
+      pbFileNamesArrayStr = xmlUtil.evaluateToStringArray(documentNode, "//echo:pb/@file", nsContext);
+    } else if (docBase != null && docBase.equals("archimedes")) {
+      XmlUtil xmlUtil = XmlUtil.getInstance();
+      ArrayList<String> pbsStrArray = xmlUtil.evaluateToStringArray(documentNode, "//pb", null);
+      if (pbsStrArray != null) {
+        pbFileNamesArrayStr = new ArrayList<String>();
+        int countPBs = pbsStrArray.size();
+        for (int i=1; i<=countPBs; i++) {
+          pbFileNamesArrayStr.add("" + i); // empty names for each page break
+        }
+      }
+    }
+    return pbFileNamesArrayStr;
+  }
+  
+  public String getPageImgDir(MetadataRecord mdRecord) throws ApplicationException {
+    String dcId = mdRecord.getIdentifier();  // dublin core identifier: is used to find the digilib image directory for this document
+    String id = getIdByDCIdentifier(dcId);
+    String imagesDocDirectory = "/permanent/library/" + id;
+    if (mdRecord.hasArchimedesDocBase())
+      imagesDocDirectory = "/permanent/archimedes/" + id;
+    String echoDir = mdRecord.getEchoDir();
+    if (echoDir != null)
+      imagesDocDirectory = echoDir;
+    String pageImgSubDir =  "pageimg"; // default name: if digilib does not answer then this name is used
+    String indexMetaPageImgDir = getIndexMetaDataPageImg(imagesDocDirectory);
+    if (indexMetaPageImgDir != null)
+      pageImgSubDir = indexMetaPageImgDir;
+    String pageImgDir = imagesDocDirectory + "/" + pageImgSubDir;
+    return pageImgDir;
+  }
+  
+  private Node parse(File file) throws ApplicationException {
+    XmlUtil xmlUtil = XmlUtil.getInstance();
+    Node retNode = null;
+    try {
+     retNode = xmlUtil.doc(file);
+    } catch (ApplicationException e) {
+      throw new ApplicationException("Your source file is not valid: " + e.getMessage());
+    }
+    return retNode;
+  }
+  
+  private void validate(Node docNode, String docBase) throws ApplicationException {
+    XmlUtil xmlUtil = XmlUtil.getInstance();
+    NamespaceContext nsContext = getEchoNsContext();
+    String echoTest = null;
+    String archimedesTest = null;
+    try {
+      echoTest = xmlUtil.evaluateToString(docNode, "/echo:echo/echo:metadata", nsContext);
+      archimedesTest = xmlUtil.evaluateToString(docNode, "/archimedes/info", null);
+    } catch (ApplicationException e) {
+      throw new ApplicationException("Your source file is not an \"echo\" or \"archimedes\" file. Please proof that file.");
+    }
+    if (docBase.equals("echo") && archimedesTest != null)
+      throw new ApplicationException("Your source file is an \"archimedes\" file. " + "Please specify \"archimedes\" in your destination document base.");
+    if (docBase.equals("archimedes") && echoTest != null)
+      throw new ApplicationException("Your source file is an \"echo\" file. " + "Please specify \"echo\" in your destination document base.");
+  }
+  
+  private void validateByRelaxNGSchema(File destFile, String docBase) throws ApplicationException {
+    XmlUtil xmlUtil = XmlUtil.getInstance();
+    if (docBase.equals("echo")) {
+      URL echoSchemaUrl = getEchoRelaxNGSchemaUrl();
+      xmlUtil.validateByRelaxNG(destFile, echoSchemaUrl);
+    }
+  }
+  
+  private URL getEchoRelaxNGSchemaUrl() throws ApplicationException {
+    String echoSchemaUrlStr = "http://" + MpdlConstants.MPDL_EXIST_HOST_NAME + ":" + MpdlConstants.MPDL_EXIST_PORT + MpdlConstants.MPDL_ECHO_RELAXNG_PATH;
+    URL echoSchemaUrl = null;
+    try {
+      echoSchemaUrl = new URL(echoSchemaUrlStr);
+    } catch (MalformedURLException e) {
+      throw new ApplicationException(e);
+    }
+    return echoSchemaUrl;    
+  }
+
+  private void validate(MetadataRecord mdRecord) throws ApplicationException {
+    String identifier = mdRecord.getIdentifier();
+    String creator = mdRecord.getCreator();
+    String title = mdRecord.getTitle();
+    if (identifier == null || identifier.trim().equals(""))
+      throw new ApplicationException("Your document file does not contain the metadata field: " + "identifier");
+    if (creator == null || creator.trim().equals(""))
+      throw new ApplicationException("Your document file does not contain the metadata field: " + "creator");
+    if (title == null || title.trim().equals(""))
+      throw new ApplicationException("Your document file does not contain the metadata field: " + "title");
+  }
+  
+  private MetadataRecord getMetadataRecordEcho(Node documentNode) throws ApplicationException {
+    XmlUtil xmlUtil = XmlUtil.getInstance();
+    NamespaceContext nsContext = getEchoNsContext();
+    String identifier = xmlUtil.evaluateToString(documentNode, "/echo:echo/echo:metadata/dcterms:identifier", nsContext);
+    if (identifier != null)
+      identifier = StringUtilEscapeChars.deresolveXmlEntities(identifier);
+    String creator = xmlUtil.evaluateToString(documentNode, "/echo:echo/echo:metadata/dcterms:creator", nsContext);
+    if (creator != null)
+      creator = StringUtilEscapeChars.deresolveXmlEntities(creator);
+    String title = xmlUtil.evaluateToString(documentNode, "/echo:echo/echo:metadata/dcterms:title", nsContext);
+    if (title != null)
+      title = StringUtilEscapeChars.deresolveXmlEntities(title);
+    String language = xmlUtil.evaluateToString(documentNode, "/echo:echo/echo:metadata/dcterms:language", nsContext);
+    if (language != null)
+      language = StringUtilEscapeChars.deresolveXmlEntities(language);
+    String yearStr = xmlUtil.evaluateToString(documentNode, "/echo:echo/echo:metadata/dcterms:date", nsContext);
+    Date date = null; 
+    if (yearStr != null && ! yearStr.equals("")) {
+      yearStr = StringUtilEscapeChars.deresolveXmlEntities(yearStr);
+      yearStr = new Util().toYearStr(yearStr);  // test if possible etc
+      if (yearStr != null)
+        date = XmlUtil.getInstance().toDate(yearStr + "-01-01T00:00:00.000Z");
+    }
+    String rights = xmlUtil.evaluateToString(documentNode, "/echo:echo/echo:metadata/dcterms:rights", nsContext);
+    if (rights != null)
+      rights = StringUtilEscapeChars.deresolveXmlEntities(rights);
+    String license = xmlUtil.evaluateToString(documentNode, "/echo:echo/echo:metadata/dcterms:license", nsContext);
+    if (license != null)
+      license = StringUtilEscapeChars.deresolveXmlEntities(license);
+    String accessRights = xmlUtil.evaluateToString(documentNode, "/echo:echo/echo:metadata/dcterms:accessRights", nsContext);
+    if (accessRights != null)
+      accessRights = StringUtilEscapeChars.deresolveXmlEntities(accessRights);
+    String echoDir = xmlUtil.evaluateToString(documentNode, "/echo:echo/echo:metadata/echo:echodir", nsContext);
+    if (echoDir != null)
+      echoDir = StringUtilEscapeChars.deresolveXmlEntities(echoDir);
+    String echoLink = xmlUtil.evaluateToString(documentNode, "/echo:echo/echo:metadata/echo:echolink", nsContext);
+    if (echoLink != null)
+      echoLink = StringUtilEscapeChars.deresolveXmlEntities(echoLink);
+    MetadataRecord mdRecord = new MetadataRecord(identifier, language, creator, title, null, null, "text/xml", rights, date);
+    mdRecord.setDocBase("echo");
+    mdRecord.setLicense(license);
+    mdRecord.setAccessRights(accessRights);
+    mdRecord.setEchoLink(echoLink);
+    mdRecord.setEchoDir(echoDir);
+    return mdRecord;
+  }
+
+  private MetadataRecord getMetadataRecordArchimedes(Node documentNode) throws ApplicationException {
+    XmlUtil xmlUtil = XmlUtil.getInstance();
+    String identifier = xmlUtil.evaluateToString(documentNode, "/archimedes/info/cvs_file", null);
+    if (identifier != null)
+      identifier = StringUtilEscapeChars.deresolveXmlEntities(identifier);
+    String creator = xmlUtil.evaluateToString(documentNode, "/archimedes/info/author", null);
+    if (creator != null)
+      creator = StringUtilEscapeChars.deresolveXmlEntities(creator);
+    String title = xmlUtil.evaluateToString(documentNode, "/archimedes/info/title", null);
+    if (title != null)
+      title = StringUtilEscapeChars.deresolveXmlEntities(title);
+    String language = xmlUtil.evaluateToString(documentNode, "/archimedes/info/lang", null);
+    if (language != null)
+      language = StringUtilEscapeChars.deresolveXmlEntities(language);
+    String yearStr = xmlUtil.evaluateToString(documentNode, "/archimedes/info/date", null);
+    Date date = null; 
+    if (yearStr != null && ! yearStr.equals("")) {
+      yearStr = StringUtilEscapeChars.deresolveXmlEntities(yearStr);
+      yearStr = new Util().toYearStr(yearStr);  // test if possible etc
+      if (yearStr != null)
+        date = XmlUtil.getInstance().toDate(yearStr + "-01-01T00:00:00.000Z");
+    }
+    String rights = "open access";
+    String license = "http://echo.mpiwg-berlin.mpg.de/policy/oa_basics/declaration";
+    String accessRights = "free";
+    MetadataRecord mdRecord = new MetadataRecord(identifier, language, creator, title, null, null, "text/xml", rights, date);
+    mdRecord.setDocBase("archimedes");
+    mdRecord.setLicense(license);
+    mdRecord.setAccessRights(accessRights);
+    return mdRecord;
+  }
+
+  private String getIndexMetaDataPageImg(String imagesDocDirectory) throws ApplicationException {
+    String resultStr = null;
+    String nausikaaURLTexter = "http://nausikaa2.mpiwg-berlin.mpg.de/digitallibrary/servlet/Texter";
+    XmlUtil xmlUtil = XmlUtil.getInstance();
+    String pageImageDirectory = null; 
+    try {
+      Node imagesDocDirectoryIndexMetaNode = xmlUtil.doc(nausikaaURLTexter + "?fn=" + imagesDocDirectory + "/index.meta");
+      pageImageDirectory = xmlUtil.evaluateToString(imagesDocDirectoryIndexMetaNode, "/resource/meta/texttool/image", null);
+    } catch (Exception e) {
+      // return null if digilib does not work
+    }
+    if (pageImageDirectory != null) {
+      resultStr = pageImageDirectory;
+    }
+    return resultStr;
+  }
+  
+  private String getIdByExistId(String eXistIdentifier) {
+    String id = null;
+    if (eXistIdentifier == null)
+      return null;
+    int firstDelimPos = eXistIdentifier.indexOf("/", 2);
+    int secondDelimPos = eXistIdentifier.indexOf("/", firstDelimPos + 1);
+    int thirdDelimPos = eXistIdentifier.indexOf(".xml", secondDelimPos + 1);
+    if (firstDelimPos == -1 || secondDelimPos == -1 || thirdDelimPos == -1)
+      id = eXistIdentifier;
+    else
+      id = eXistIdentifier.substring(secondDelimPos + 1, thirdDelimPos);
+    return id;
+  }
+  
+  private String getIdByDCIdentifier(String dcIdentifier) {
+    if (dcIdentifier == null || dcIdentifier.trim().equals(""))
+      return null;
+    // if dcIdentifier starts with "ECHO:" or "ARCHIMEDES:" then delete it
+    if (dcIdentifier.startsWith("ECHO:"))
+      dcIdentifier = dcIdentifier.substring(5);
+    if (dcIdentifier.startsWith("ARCHIMEDES:"))
+      dcIdentifier = dcIdentifier.substring(11);
+    // delete the .xml suffix if there is one
+    if (dcIdentifier.endsWith(".xml")) {
+      int size = dcIdentifier.length();
+      dcIdentifier = dcIdentifier.substring(0, size - 4);
+    }
+    return dcIdentifier;
+  }
+  
+  public NamespaceContext getEchoNsContext() {
+    NamespaceContext nsContext = new NamespaceContext() {
+      public String getNamespaceURI(String prefix) {
+        String uri;
+        if (prefix.equals("de"))
+          uri = "http://www.mpiwg-berlin.mpg.de/ns/de/1.0/";
+        else if (prefix.equals("echo"))
+          uri = "http://www.mpiwg-berlin.mpg.de/ns/echo/1.0/";
+        else if (prefix.equals("dc"))
+          uri = "http://purl.org/dc/elements/1.1/";
+        else if (prefix.equals("dcterms"))
+          uri = "http://purl.org/dc/terms";
+        else if (prefix.equals("dcq"))
+          uri = "http://purl.org/dc/qualifiers/1.0/";
+        else if (prefix.equals("xhtml"))
+          uri = "http://www.w3.org/1999/xhtml";
+        else if (prefix.equals("dct"))
+          uri = "http://purl.org/dc/terms/1.0/";
+        else if (prefix.equals("xlink"))
+          uri = "http://www.w3.org/1999/xlink";
+        else if (prefix.equals("rdf"))
+          uri = "http://www.w3.org/1999/02/22-rdf-syntax-ns#";
+        else if (prefix.equals("xsi"))
+          uri = "http://www.w3.org/2001/XMLSchema-instance";
+        else if (prefix.equals("mml"))
+          uri = "http://www.w3.org/1998/Math/MathML";
+        else
+          uri = null;
+        return uri;
+      }
+      
+      public String getPrefix(String uri) {
+        if (uri.equals("http://www.mpiwg-berlin.mpg.de/ns/de/1.0/"))
+          return "de";
+        else if (uri.equals("http://www.mpiwg-berlin.mpg.de/ns/echo/1.0/"))
+          return "echo";
+        else if (uri.equals("http://purl.org/dc/elements/1.1/"))
+          return "dc";
+        else if (uri.equals("http://purl.org/dc/terms"))
+          return "dcterms";
+        else if (uri.equals("http://purl.org/dc/qualifiers/1.0/"))
+          return "dcq";
+        else if (uri.equals("http://www.w3.org/1999/xhtml"))
+          return "xhtml";
+        else if (uri.equals("http://purl.org/dc/terms/1.0/"))
+          return "dct";
+        else if (uri.equals("http://www.w3.org/1999/xlink"))
+          return "xlink";
+        else if (uri.equals("http://www.w3.org/1999/02/22-rdf-syntax-ns#"))
+          return "rdf";
+        else if (uri.equals("http://www.w3.org/2001/XMLSchema-instance"))
+          return "xsi";
+        else if (uri.equals("http://www.w3.org/1998/Math/MathML"))
+          return "mml";
+        else
+          return null;
+      }
+
+      public Iterator getPrefixes(String namespace) {
+        return null;
+      }
+    };
+    return nsContext;    
+  }
+
+  
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/software/eXist/mpdl-modules/src/de/mpg/mpiwg/berlin/mpdl/xmlrpc/FilenameFilterExtension.java	Wed Nov 24 17:24:23 2010 +0100
@@ -0,0 +1,22 @@
+package de.mpg.mpiwg.berlin.mpdl.xmlrpc;
+
+import java.io.File;
+import java.io.FilenameFilter;
+
+public class FilenameFilterExtension implements FilenameFilter {
+  
+  private String fileExtension;
+
+  public FilenameFilterExtension(String fileExtension) {
+    this.fileExtension = fileExtension;
+  }
+
+  public boolean accept(File dir, String name) {
+    String nameToLower = name.toLowerCase();
+    String fileExtensionToLower = fileExtension.toLowerCase();
+    return nameToLower.endsWith("." + fileExtensionToLower);
+    // MimeTable mimetab = MimeTable.getInstance();
+    // MimeType mime = mimetab.getContentTypeFor(name);
+    // return mime != null && mime.isXMLType();
+  }
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/software/eXist/mpdl-modules/src/de/mpg/mpiwg/berlin/mpdl/xmlrpc/MpdlXmlRpcDocHandler.java	Wed Nov 24 17:24:23 2010 +0100
@@ -0,0 +1,148 @@
+package de.mpg.mpiwg.berlin.mpdl.xmlrpc;
+
+import org.apache.log4j.Logger;
+
+import de.mpg.mpiwg.berlin.mpdl.exception.ApplicationException;
+import de.mpg.mpiwg.berlin.mpdl.general.MpdlConstants;
+import de.mpg.mpiwg.berlin.mpdl.schedule.MpdlDocOperation;
+import de.mpg.mpiwg.berlin.mpdl.xmlrpc.MpdlXmlRpcInterface;
+import de.mpg.mpiwg.berlin.mpdl.xmlrpc.MpdlXmlRpcInterfaceImpl;
+
+/**
+ * Handler for eXist collections and documents (singleton). 
+ * It could not be used in a multi threading environment.
+ * Locally saved documents could be stored (over XML-RPC) into eXist
+ * collections. Collections could be configured language specific (see
+ * instance variable "languages" below).
+ * Your local directory structure must look like this:
+ * documents
+ *   archimedes
+ *     ar
+ *       yourDoc1.xml
+ *       ...
+ *     ...
+ *     zh
+ *       yourDoc1.xml
+ *       ...
+ *   echo
+ *     ar
+ *       yourDoc1.xml
+ *       ...
+ *     ...
+ *     zh
+ *       yourDoc1.xml
+ *       ...
+ *       
+ */
+public class MpdlXmlRpcDocHandler {
+  private static MpdlXmlRpcDocHandler instance;
+  private static Logger LOGGER = Logger.getLogger(MpdlXmlRpcDocHandler.class); // Logs to EXIST_HOME/webapp/WEB-INF/logs/exist.log
+  private static String DOC_ROOT_COLLECTION_MORPH = "/db/mpdl/documents/morph";
+  private static String DOC_ROOT_COLLECTION_STANDARD = "/db/mpdl/documents/standard";
+  private static String LOCAL_DOC_DIR = MpdlConstants.MPDL_EXIST_DATA_DIR + "/" + "documents";
+  private static String SERVER_NAME = MpdlConstants.MPDL_EXIST_HOST_NAME;
+  private static int SERVER_PORT =  MpdlConstants.MPDL_EXIST_PORT;
+  private static String ADMIN_USER_NAME = MpdlConstants.MPDL_EXIST_ADMIN_USER_NAME;
+  private static String ADMIN_USER_PW = MpdlConstants.MPDL_EXIST_ADMIN_USER_PW;
+  private MpdlXmlRpcInterface eXistXmlRpcInterface = null;
+
+  public static MpdlXmlRpcDocHandler getInstance() throws ApplicationException {
+    if (instance == null) {
+      instance = new MpdlXmlRpcDocHandler();
+      instance.init();
+    }
+    return instance;
+  }
+
+  private void init() throws ApplicationException {
+    eXistXmlRpcInterface = MpdlXmlRpcInterfaceImpl.getInstance(SERVER_NAME, SERVER_PORT, ADMIN_USER_NAME, ADMIN_USER_PW);
+  }
+  
+  public boolean documentExists(MpdlDocOperation docOperation) throws ApplicationException {
+    String docFileName = docOperation.getFileName();
+    String docBase = docOperation.getDocBase();
+    String language = docOperation.getLanguage();
+    boolean docExists = documentExists(docBase, language, docFileName);
+    return docExists;
+  }
+  
+  public void saveDocumentFile(MpdlDocOperation docOperation) throws ApplicationException {
+    String docFileName = docOperation.getFileName();
+    String docBase = docOperation.getDocBase();
+    String language = docOperation.getLanguage();
+    saveDocumentFile(docBase, language, docFileName);
+  }
+  
+  public void saveDocumentFile(String localFile, String existIdentifier) throws ApplicationException {
+    int index = existIdentifier.lastIndexOf("/");
+    String collection = existIdentifier.substring(0, index);
+    String docFileName = existIdentifier.substring(index); 
+    String documentCollectionMorph = DOC_ROOT_COLLECTION_MORPH + collection;
+    String documentCollectionStandard = DOC_ROOT_COLLECTION_STANDARD + collection;
+    eXistXmlRpcInterface.saveDocument(documentCollectionMorph, docFileName, localFile);
+    LOGGER.info("MPDL: XML-RPC: Document: \"" + localFile + "\" saved to eXist collection: \"" + documentCollectionMorph + "\"");
+    eXistXmlRpcInterface.saveDocument(documentCollectionStandard, docFileName, localFile);
+    LOGGER.info("MPDL: XML-RPC: Document: \"" + localFile + "\" saved to eXist collection: \"" + documentCollectionStandard + "\"");
+  }
+  
+  public void deleteDocumentFile(MpdlDocOperation docOperation) throws ApplicationException {
+    String docFileName = docOperation.getFileName();
+    String docBase = docOperation.getDocBase();
+    String language = docOperation.getLanguage();
+    deleteDocumentFile(docBase, language, docFileName);
+  }
+  
+  public void deleteDocumentFile(String existIdentifier) throws ApplicationException {
+    int index = existIdentifier.lastIndexOf("/");
+    String collection = existIdentifier.substring(0, index);
+    String docFileName = existIdentifier.substring(index); 
+    String documentCollectionMorph = DOC_ROOT_COLLECTION_MORPH + collection;
+    String documentCollectionStandard = DOC_ROOT_COLLECTION_STANDARD + collection;
+    eXistXmlRpcInterface.deleteDocument(documentCollectionMorph, docFileName);
+    LOGGER.info("MPDL: XML-RPC: Document deleted: \"" + documentCollectionMorph + "/" + docFileName + "\"");
+    eXistXmlRpcInterface.deleteDocument(documentCollectionStandard, docFileName);
+    LOGGER.info("MPDL: XML-RPC: Document deleted: \"" + documentCollectionStandard + "/" + docFileName + "\"");
+  }
+  
+  public void createCollection(String fullCollectionName) throws ApplicationException {
+    eXistXmlRpcInterface.createCollection(fullCollectionName);
+  }
+  
+  public void deleteCollection(String fullCollectionName) throws ApplicationException {
+    eXistXmlRpcInterface.deleteCollection(fullCollectionName);
+  }
+  
+  private boolean documentExists(String docBase, String language, String docFileName) throws ApplicationException {
+    String documentCollection = DOC_ROOT_COLLECTION_STANDARD + "/" + docBase + "/" + language;
+    String fullDocName = documentCollection + "/" + docFileName;
+    String[] fullCollectionDocNames = eXistXmlRpcInterface.getDocumentNames(documentCollection);
+    boolean isAvailable = false;
+    if (fullCollectionDocNames != null) {
+      for (int i=0; i<fullCollectionDocNames.length; i++) {
+        String docName = fullCollectionDocNames[i];
+        if (docName.equals(fullDocName))
+          isAvailable = true;
+      }
+    }
+    return isAvailable;
+  }
+  
+  private void saveDocumentFile(String docBase, String language, String docFileName) throws ApplicationException {
+    String documentCollectionMorph = DOC_ROOT_COLLECTION_MORPH + "/" + docBase + "/" + language;
+    String documentCollectionStandard = DOC_ROOT_COLLECTION_STANDARD + "/" + docBase + "/" + language;
+    String localFile = LOCAL_DOC_DIR + "/" + docBase + "/" + language + "/" + docFileName;
+    eXistXmlRpcInterface.saveDocument(documentCollectionMorph, docFileName, localFile);
+    LOGGER.info("MPDL: XML-RPC: Document: \"" + localFile + "\" saved to eXist collection: \"" + documentCollectionMorph + "\"");
+    eXistXmlRpcInterface.saveDocument(documentCollectionStandard, docFileName, localFile);
+    LOGGER.info("MPDL: XML-RPC: Document: \"" + localFile + "\" saved to eXist collection: \"" + documentCollectionStandard + "\"");
+  }
+  
+  private void deleteDocumentFile(String docBase, String language, String docFileName) throws ApplicationException {
+    String documentCollectionMorph = DOC_ROOT_COLLECTION_MORPH + "/" + docBase + "/" + language;
+    String documentCollectionStandard = DOC_ROOT_COLLECTION_STANDARD + "/" + docBase + "/" + language;
+    eXistXmlRpcInterface.deleteDocument(documentCollectionMorph, docFileName);
+    LOGGER.info("MPDL: XML-RPC: Document deleted: \"" + documentCollectionMorph + "/" + docFileName + "\"");
+    eXistXmlRpcInterface.deleteDocument(documentCollectionStandard, docFileName);
+    LOGGER.info("MPDL: XML-RPC: Document deleted: \"" + documentCollectionStandard + "/" + docFileName + "\"");
+  }
+}
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/software/eXist/mpdl-modules/src/de/mpg/mpiwg/berlin/mpdl/xmlrpc/MpdlXmlRpcInterface.java	Wed Nov 24 17:24:23 2010 +0100
@@ -0,0 +1,76 @@
+package de.mpg.mpiwg.berlin.mpdl.xmlrpc;
+
+import de.mpg.mpiwg.berlin.mpdl.exception.ApplicationException;
+
+public interface MpdlXmlRpcInterface {
+  /**
+   * Get all document names of a collection (without subcollections)
+   * @param collection
+   * @return string array of document names e.g. {"/db/test/test1.xml", "/db/test/test1.xml"}
+   */
+  public String[] getDocumentNames(String collection) throws ApplicationException;
+
+  /**
+   * Read a document of a collection into a local file
+   * @param collection  collection name e.g. /db/test
+   * @param documentName  document name in the collection e.g. test.xml 
+   * @param localFile  file name (with file path) to which the document has to be stored
+   * @return true if document is read into local file else false
+   */
+  public boolean readDocumentIntoLocalFile(String collection, String documentName, String localFile) throws ApplicationException;
+  
+  /**
+   * Save a local file into a collection. If a document with that name already exists in that collection it is overwritten.
+   * @param collection  collection name e.g. /db/test
+   * @param documentName  document name in the collection where the document should be saved e.g. test.xml 
+   * @param localFile  file name (with file path) of the origin from where the document should be saved into the collection
+   * @return true if the local file is saved in collection else false
+   */
+  public boolean saveDocument(String collection, String documentName, String localFile) throws ApplicationException;
+  
+  /**
+   * Save all local files of a directory (without subdirectories) into a collection. Existing documents with same name in that collection are overwritten.
+   * @param collection  collection name e.g. /db/test
+   * @param localFileDirectory  directory name of the origin from where the documents should be saved recursively into the exist collection
+   * @return true if all local files in localFileDirectory are saved in collection else false
+   */
+  public boolean saveDocuments(String collection, String localFileDirectory) throws ApplicationException;
+  
+  /**
+   * Save all local files of a directory with the fileExtension (without subdirectories) into a collection. Existing documents with same name in that collection are overwritten.
+   * @param collection  collection name e.g. /db/test
+   * @param localFileDirectory  directory name of the origin from where the documents should be saved recursively into the exist collection
+   * @param fileExtension file extension name: e.g. xml
+   * @return true if all local files in localFileDirectory are saved in collection else false
+   */
+  public boolean saveDocuments(String collection, String localFileDirectory, String fileExtension) throws ApplicationException;
+  /**
+   * Delete all documents of a collection
+   * @param collection  collection name e.g. /db/test
+   * @return true if all documents in that collection could be deleted else false
+   */
+  public boolean deleteDocuments(String collection) throws ApplicationException;
+  
+  /**
+   * Delete a document of a collection
+   * @param collection  collection name e.g. /db/test
+   * @param documentName  document name in the collection e.g. test.xml 
+   * @return true if document could be deleted else false
+   */
+  public boolean deleteDocument(String collection, String documentName) throws ApplicationException;
+  
+  /**
+   * Create the collection
+   * @param collection e.g. /db/test
+   * @return true if collection could be created else false
+   */
+  public boolean createCollection(String collection) throws ApplicationException;
+  
+  /**
+   * Deletes the collection with all documents and subcollections
+   * @param collection  collection name e.g. /db/test
+   * @return true if collection could be deleted else false
+   */
+  public boolean deleteCollection(String collection) throws ApplicationException;
+
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/software/eXist/mpdl-modules/src/de/mpg/mpiwg/berlin/mpdl/xmlrpc/MpdlXmlRpcInterfaceImpl.java	Wed Nov 24 17:24:23 2010 +0100
@@ -0,0 +1,434 @@
+package de.mpg.mpiwg.berlin.mpdl.xmlrpc;
+
+import java.io.BufferedInputStream;
+import java.io.BufferedOutputStream;
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.FileNotFoundException;
+import java.io.FileOutputStream;
+import java.io.FilenameFilter;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStream;
+import java.net.MalformedURLException;
+import java.net.URI;
+import java.net.URL;
+import java.net.URLConnection;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.Hashtable;
+import java.util.List;
+
+import org.apache.log4j.Logger;
+import org.apache.xmlrpc.XmlRpcException;
+import org.apache.xmlrpc.client.XmlRpcClient;
+import org.apache.xmlrpc.client.XmlRpcClientConfigImpl;
+
+import de.mpg.mpiwg.berlin.mpdl.exception.ApplicationException;
+
+public class MpdlXmlRpcInterfaceImpl implements MpdlXmlRpcInterface {
+  private static MpdlXmlRpcInterfaceImpl instance;
+  private static Logger LOGGER = Logger.getLogger(MpdlXmlRpcInterfaceImpl.class); // Logs to EXIST_HOME/webapp/WEB-INF/logs/exist.log
+  private String serverName = "localhost";
+  private int port = 8090;
+  private String xmlRpcUri = "http://" + serverName + ":" + port + "/exist/xmlrpc";
+  private XmlRpcClient xmlClient = null;
+  private String userName = "admin";
+  private String pw = "";
+
+  private MpdlXmlRpcInterfaceImpl() {
+  }
+  
+  public static MpdlXmlRpcInterfaceImpl getInstance(String serverName, int port, String userName, String pw) throws ApplicationException {
+    if (instance == null) {
+      instance = new MpdlXmlRpcInterfaceImpl();
+	  }
+    instance.serverName = serverName;
+    instance.port = port;
+    instance.userName = userName;
+    instance.pw = pw;
+    instance.xmlRpcUri = "http://" + serverName + ":" + port + "/exist/xmlrpc";
+    instance.init();
+	  return instance;
+  }
+
+  public String[] getDocumentNames(String collection) throws ApplicationException {
+    String[] documentNames = null;
+    try {
+      Object[] params = new Object[1];
+      params[0] = collection;
+      HashMap collectionDesc = (HashMap) xmlClient.execute("getCollectionDesc", params);
+      Object[] documents = (Object[]) collectionDesc.get("documents");
+      documentNames = new String[documents.length];
+      for (int i=0; i < documents.length; i++) {
+        HashMap doc = (HashMap) documents[i];
+        String docName = (String) doc.get("name");
+        documentNames[i] = collection + "/" + docName;
+      }
+    } catch (XmlRpcException e) {
+      logError(e);
+      throw new ApplicationException(e);
+    }
+    return documentNames;
+  }
+
+  public boolean readDocumentIntoLocalFile(String collection, String documentName, String localFileName) throws ApplicationException {
+    boolean success = false;
+    OutputStream out = null;
+    try {
+      out = new BufferedOutputStream(new FileOutputStream(localFileName));
+      Hashtable options = new Hashtable();
+      options.put("indent", "yes");  
+      options.put("encoding", "UTF-8");  
+      options.put("expand-xincludes", "yes");  
+      options.put("highlight-matches", "elements");  
+      // get the document from eXist 
+      Object[] params = new Object[2];
+      params[0] = collection + "/" + documentName;
+      params[1] = options;  
+      HashMap result = (HashMap) xmlClient.execute("getDocumentData", params);
+      byte[] data = (byte[]) result.get("data");
+      String handle = (String) result.get("handle");
+      Integer offset = (Integer) result.get("offset");
+      out.write(data); // the first chunk of data
+      // if there are more than one chunk of data
+      while (offset != 0) {
+        Object[] paramsGetNextChunk = new Object[2];
+        paramsGetNextChunk[0] = handle;
+        paramsGetNextChunk[1] = offset;  
+        HashMap resultChunk = (HashMap) xmlClient.execute("getNextChunk", paramsGetNextChunk);
+        data = (byte[]) resultChunk.get("data");
+        out.write(data);
+        offset = (Integer) resultChunk.get("offset");
+      }
+      out.flush();
+      success = true;
+    } catch (XmlRpcException e) {
+      logError(e);
+      throw new ApplicationException(e);
+    } catch (IOException e) {
+      logError(e);
+      throw new ApplicationException(e);
+    } finally {
+      try { 
+        if (out != null)
+          out.close();
+      } catch (Exception e) {
+        // nothing: always close the stream at the end of the method
+      }
+    }
+    return success;
+  }  
+
+  public boolean saveDocument(String collection, String documentName, String localFile) throws ApplicationException {
+    boolean success = false;
+    String mimeType = getMimeType(localFile);  // e.g. text/xml or application/xml ...
+    try {
+      testFile(localFile);
+      // first: create an empty file on server
+      byte[] dummyChunk = new byte[0];
+      Object[] paramsUpload1 = new Object[2];
+      paramsUpload1[0] = dummyChunk;
+      paramsUpload1[1] = 0;
+      String tmpFileNameOnServer = (String) xmlClient.execute("upload", paramsUpload1);  
+      // second: upload the whole file in chunks to server file; normally needs 3 seconds for 20 MB
+      int chunkSize = 20000 * 1024;  // copies data from a file in 20 MB chunks to server file so that not too much RAM is consumed on server 
+      InputStream localFileInputStream = new BufferedInputStream(new FileInputStream(localFile));
+      byte[] chunk = new byte[chunkSize];
+      while ((chunk = readBytes(localFileInputStream, chunkSize)) != null) {
+        int realChunkSize = chunk.length;  // is smaller than chunkSize if last chunk is read
+        Object[] paramsUpload2 = new Object[3];
+        paramsUpload2[0] = tmpFileNameOnServer;
+        paramsUpload2[1] = chunk;
+        paramsUpload2[2] = realChunkSize;
+        String uploadResult = (String) xmlClient.execute("upload", paramsUpload2);
+      }
+      // third: parseLocal
+      String docName = collection + "/" + documentName;
+      Object[] paramsParseLocal = new Object[4];
+      paramsParseLocal[0] = tmpFileNameOnServer;
+      paramsParseLocal[1] = docName;
+      paramsParseLocal[2] = true;  // overwrites an existing file of the same name if it exists
+      paramsParseLocal[3] = mimeType; // mime type of the document   
+      success = (Boolean) xmlClient.execute("parseLocal", paramsParseLocal);
+    } catch (XmlRpcException e) {
+      logError(e);
+      throw new ApplicationException(e);
+    } catch (FileNotFoundException e) {
+      logError(e);
+      throw new ApplicationException(e);
+    }
+    return success;
+  }
+
+  public boolean saveDocuments(String collection, String localFileDirectory) throws ApplicationException {
+    boolean success = true;
+    testFile(localFileDirectory); // directory exists ?
+    File localFileDir = new File(localFileDirectory);
+    File[] files = localFileDir.listFiles();
+    for (int i=0; i < files.length; i++) {
+      File f = files[i];
+      String fileNameWithoutPath = f.getName();
+      String localFileName = f.getPath();
+      boolean localSuccess = saveDocument(collection, fileNameWithoutPath, localFileName);
+      if (! localSuccess)
+        success = false;
+    }
+    return success;
+  }
+
+  public boolean saveDocuments(String collection, String localFileDirectory, String fileExtension) throws ApplicationException {
+    boolean success = true;
+    testFile(localFileDirectory); // directory exists ?
+    File localFileDir = new File(localFileDirectory);
+    FilenameFilter filter = new FilenameFilterExtension(fileExtension);
+    File[] files = localFileDir.listFiles(filter);
+    for (int i=0; i < files.length; i++) {
+      File f = files[i];
+      String fileNameWithoutPath = f.getName();
+      String localFileName = f.getPath();
+      boolean localSuccess = saveDocument(collection, fileNameWithoutPath, localFileName);
+      if (! localSuccess)
+        success = false;
+    }
+    return success;
+  }
+
+  public boolean deleteDocument(String collection, String documentName) throws ApplicationException {
+    boolean success = false;
+    String[] docDesc = getDocumentDescription(collection, documentName);
+    if (docDesc == null) {  // document does not exist
+      System.out.println("Warning: Could not delete: " + collection + "/" + documentName + " because it does not exist");
+    } else {
+      success = deleteDocument(collection + "/" + documentName);
+    }
+    return success;
+  }
+
+  public boolean deleteDocuments(String collection) throws ApplicationException {
+    boolean success = true;
+    String[] documentNames = getDocumentNames(collection);
+    for (int i=0; i < documentNames.length; i++) {
+      String documentName = documentNames[i];
+      boolean localSuccess = deleteDocument(documentName);
+      if (! localSuccess)
+        success = false;
+    }
+    return success;
+  }
+
+  public boolean createCollection(String collection) throws ApplicationException {
+    boolean success = false;
+    try {
+      Object[] params = new Object[1];
+      params[0] = collection;
+      success = (Boolean) xmlClient.execute("createCollection", params);  
+    } catch (XmlRpcException e) {
+      logError(e);
+      throw new ApplicationException(e);
+    }
+    return success;
+  }
+
+  public boolean deleteCollection(String collection) throws ApplicationException {
+    boolean success = false;
+    try {
+      Object[] params = new Object[1];
+      params[0] = collection;
+      success = (Boolean) xmlClient.execute("removeCollection", params);  
+    } catch (XmlRpcException e) {
+      logError(e);
+      throw new ApplicationException(e);
+    }
+    return success;
+  }
+
+  private void init() throws ApplicationException {
+    try {
+      XmlRpcClientConfigImpl config = new XmlRpcClientConfigImpl();
+      config.setServerURL(new URL(xmlRpcUri));
+      config.setBasicUserName(userName);
+      config.setBasicPassword(pw);
+      xmlClient = new XmlRpcClient();
+      xmlClient.setConfig(config);
+    } catch (MalformedURLException e) {
+      logError(e);
+      throw new ApplicationException(e);
+    }    
+  }
+  
+  private void logError(Exception e) {
+    LOGGER.error(e.getMessage());
+    LOGGER.error(e.getStackTrace());
+  }
+  
+  private String[] getDocumentDescription(String collection, String documentName) throws ApplicationException {
+    String[] documentDesc = new String[3];
+    try {
+      Object[] params = new Object[1];
+      params[0] = collection + "/" + documentName;
+      HashMap docDesc = (HashMap) xmlClient.execute("describeResource", params);
+      if (docDesc == null || docDesc.isEmpty())
+        return null;
+      String name = (String) docDesc.get("name");
+      String owner = (String) docDesc.get("owner");
+      String group = (String) docDesc.get("group");
+      documentDesc[0] = name;
+      documentDesc[1] = owner;
+      documentDesc[2] = group;
+    } catch (XmlRpcException e) {
+      logError(e);
+      throw new ApplicationException(e);
+    }
+    return documentDesc;
+  }
+  
+  private boolean deleteDocument(String fullDocumentName) throws ApplicationException {
+    boolean success = false;
+    try {
+      Object[] params = new Object[1];
+      params[0] = fullDocumentName;
+      success = (Boolean) xmlClient.execute("remove", params);  
+    } catch (XmlRpcException e) {
+      logError(e);
+      throw new ApplicationException(e);
+    }
+    return success;
+  }
+
+  private void testFile(String fileName) throws ApplicationException {
+    File file = new File(fileName);
+    boolean fileExists = file.exists();
+    if (! fileExists) {
+      throw new ApplicationException("File: " + fileName + " does not exist");
+    }
+  }
+  
+  /**
+   *  Reads a chunk of data of an input stream.
+   *  Does not close the stream until last bytes are read
+   *  @in in the input stream to be read
+   *  @chunkSize chunkSize length of the chunk which is read
+   *  @return byte[] of bytes read
+   */
+  private byte[] readBytes(InputStream in, int chunkSize) throws ApplicationException {
+    byte[] resultBytes = new byte[chunkSize];
+    try {
+      int len = in.read(resultBytes, 0, chunkSize);
+      if (len == -1) {
+        try { in.close(); } catch (Exception e) { }  // close the stream if end of file is reached
+        resultBytes = null;
+      } else if (len < chunkSize && len != chunkSize) {  // if read chunk is last chunk of the file it delivers this chunk 
+        byte[] tmp = new byte[len];
+        System.arraycopy(resultBytes, 0, tmp, 0, len);
+        resultBytes = tmp;
+      }
+    } catch (FileNotFoundException e) {
+      logError(e);
+      throw new ApplicationException(e);
+    } catch (IOException e) {
+      logError(e);
+      throw new ApplicationException(e);
+    } 
+    return resultBytes;  
+  }
+
+  /**
+   *  Reads a file storing intermediate data into an array.
+   *  @file file the file to be read
+   *  @return byte[] of file content
+   */
+  private byte[] readAllBytes(String file) throws ApplicationException {
+    InputStream in = null;
+    byte[] out = new byte[0]; 
+    try {
+      in = new BufferedInputStream(new FileInputStream(file));
+      // the length of a buffer can vary
+      int bufLen = 20000*1024;
+      byte[] buf = new byte[bufLen];
+      byte[] tmp = null;
+      int len = 0;
+      while((len = in.read(buf, 0, bufLen)) != -1) {
+        // extend array
+        tmp = new byte[out.length + len];
+        System.arraycopy(out, 0, tmp, 0, out.length);
+        System.arraycopy(buf, 0, tmp, out.length, len);
+        out = tmp;
+        tmp = null;            
+      }
+    } catch (FileNotFoundException e) {
+      logError(e);
+      throw new ApplicationException(e);
+    } catch (IOException e) {
+      logError(e);
+      throw new ApplicationException(e);
+    } finally {
+      // always close the stream 
+      if (in != null) try { in.close(); } catch (Exception e) { }
+    }
+    return out;  
+  }
+
+  private String getMimeType(String fileName) throws ApplicationException {
+    String mimeType = null;
+    File file = new File(fileName);
+    try {
+      URI uri = file.toURI();
+      URL url = uri.toURL();
+      URLConnection urlConnection = url.openConnection();
+      mimeType = urlConnection.getContentType();
+    } catch (MalformedURLException e) {
+      logError(e);
+      throw new ApplicationException(e);
+    } catch (IOException e) {
+      logError(e);
+      throw new ApplicationException(e);
+    }
+    return mimeType;
+  }
+  
+  /**
+   *  Reads a file storing intermediate data into an array.
+   *  @file file the file to be read
+   *  @return byte array of the file content
+   *  TODO test this method if it is really faster
+   */
+  private byte[] readAllBytesFast(String file) throws ApplicationException {
+    InputStream in = null;
+    byte[] buf = null; 
+    int bufLen = 20000*1024;
+    try {
+      in = new BufferedInputStream(new FileInputStream(file));
+      buf = new byte[bufLen];
+      byte[] tmp = null;
+      int len    = 0;
+      List data  = new ArrayList(24); // keeps peaces of data
+      while((len = in.read(buf, 0, bufLen)) != -1){
+        tmp = new byte[len];
+        System.arraycopy(buf, 0, tmp, 0, len); // still need to do copy 
+        data.add(tmp);
+      }
+      /* This part os optional. This method could return a List data
+         for further processing, etc. */
+      len = 0;
+      if (data.size() == 1) return (byte[]) data.get(0);
+      for (int i=0;i<data.size();i++) len += ((byte[]) data.get(i)).length;
+      buf = new byte[len]; // final output buffer 
+      len = 0;
+      for (int i=0;i<data.size();i++){ // fill with data 
+        tmp = (byte[]) data.get(i);
+        System.arraycopy(tmp,0,buf,len,tmp.length);
+        len += tmp.length;
+      } 
+    } catch (FileNotFoundException e) {
+      logError(e);
+      throw new ApplicationException(e);
+    } catch (IOException e) {
+      logError(e);
+      throw new ApplicationException(e);
+    } finally {
+      if (in != null) try { in.close(); } catch (Exception e) {}
+    }
+    return buf;  
+  }
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/software/eXist/mpdl-modules/src/org/exist/xquery/modules/document/DocumentModule.java	Wed Nov 24 17:24:23 2010 +0100
@@ -0,0 +1,58 @@
+/*
+ *  eXist Open Source Native XML Database: Extension module
+ *  
+ *  This program is free software; you can redistribute it and/or
+ *  modify it under the terms of the GNU Lesser General Public License
+ *  as published by the Free Software Foundation; either version 2
+ *  of the License, or (at your option) any later version.
+ *  
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU Lesser General Public License for more details.
+ *  
+ *  You should have received a copy of the GNU Lesser General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *  
+ *  $Id: DocumentModule.java $
+ */
+package org.exist.xquery.modules.document;
+
+import org.exist.xquery.AbstractInternalModule;
+import org.exist.xquery.FunctionDef;
+
+/**
+ * eXist Document Module Extension
+ * 
+ * @author Josef Willenborg, Max Planck Institute for the history of science,
+ * http://www.mpiwg-berlin.mpg.de, jwillenborg@mpiwg-berlin.mpg.de 
+ */
+public class DocumentModule extends AbstractInternalModule {
+  public final static String NAMESPACE_URI = "http://exist-db.org/xquery/document";
+  public final static String PREFIX = "doc";
+  
+  private final static FunctionDef[] functions = {
+    new FunctionDef(GetFragmentBetweenFunctionByFileSearch.signature, GetFragmentBetweenFunctionByFileSearch.class)
+  };
+  
+  public DocumentModule() {
+    super(functions);
+  }
+  
+  public String getNamespaceURI() {
+    return NAMESPACE_URI;
+  }
+  
+  public String getDefaultPrefix() {
+    return PREFIX;
+  }
+  
+  public String getDescription() {
+    return "A module for document functions";
+  }
+  
+  public String getReleaseVersion() {
+    return "A module for document functions";
+  }
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/software/eXist/mpdl-modules/src/org/exist/xquery/modules/document/GetFragmentBetweenFunctionByFileSearch.java	Wed Nov 24 17:24:23 2010 +0100
@@ -0,0 +1,284 @@
+/*
+ *  eXist Open Source Native XML Database: Extension module
+ *  Copyright (C) 2009 Max Planck Institute for the history of science.
+ *  Josef Willenborg, jwillenborg@mpiwg-berlin.mpg.de
+ *  http://www.mpiwg-berlin.mpg.de
+ *  
+ *  This program is free software; you can redistribute it and/or
+ *  modify it under the terms of the GNU Lesser General Public License
+ *  as published by the Free Software Foundation; either version 2
+ *  of the License, or (at your option) any later version.
+ *  
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU Lesser General Public License for more details.
+ *  
+ *  You should have received a copy of the GNU Lesser General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *  
+ *  $Id: GetFragmentBetweenFunction.java $
+ */
+package org.exist.xquery.modules.document;
+
+import java.io.BufferedReader;
+import java.io.FileReader;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+import org.exist.dom.QName;
+import org.exist.xquery.BasicFunction;
+import org.exist.xquery.Cardinality;
+import org.exist.xquery.CompiledXQuery;
+import org.exist.xquery.FunctionSignature;
+import org.exist.xquery.XPathException;
+import org.exist.xquery.XQuery;
+import org.exist.xquery.XQueryContext;
+import org.exist.xquery.value.Item;
+import org.exist.xquery.value.Sequence;
+import org.exist.xquery.value.SequenceType;
+import org.exist.xquery.value.StringValue;
+import org.exist.xquery.value.Type;
+import org.exist.xquery.value.ValueSequence;
+
+/**
+ * Delivers the fragment between two milestones in an XML document.
+ * It leads to more performance for most XML documents because it
+ * determines the fragment directly by file search on the XML file on 
+ * the file system.
+ * Precondition of this function is that all the XML document could be 
+ * read on the file system cache (see FS_DOC_CACHE_PATH as the root path 
+ * for the XML documents). 
+ * Static XML documents could easily be copied to that file system path. 
+ * But look also to the extension of the eXist class RpcConnection which 
+ * implements the FS_DOC_CACHE for all important operations on XML documents.
+ */
+public class GetFragmentBetweenFunctionByFileSearch extends BasicFunction {
+  private final static String FS_DOC_CACHE_PATH = "/webapp/WEB-INF/data/fs-doc-cache";
+  
+	public final static FunctionSignature signature =
+		new FunctionSignature(
+	      new QName("getFragmentBetween", DocumentModule.NAMESPACE_URI, DocumentModule.PREFIX),
+	      "A function which delivers the xml fragment between milestones)",
+        new SequenceType[] { new SequenceType(Type.STRING, Cardinality.ONE), 
+                             new SequenceType(Type.STRING, Cardinality.ONE), 
+                             new SequenceType(Type.INTEGER, Cardinality.ONE),
+                             new SequenceType(Type.INTEGER, Cardinality.ONE) },
+	      new SequenceType(Type.STRING, Cardinality.ZERO_OR_MORE));
+
+	public GetFragmentBetweenFunctionByFileSearch(XQueryContext context) {
+		super(context, signature);
+	}
+
+	/*
+	 * Get the fragment between two milestones by position
+   * @param args 1. docUriStr document URI (e.g. /db/shakespeare/hamlet.xml), 
+   * 2. msName milestone name (e.g.: pb), 3. msPositionFrom first milestone (e.g.: 10), 
+   * 4. msPositionTo second milestone (e.g.: 11)
+	 */
+  public Sequence eval(Sequence[] args, Sequence contextSequence) throws XPathException {
+    Sequence docUri = args[0];
+    Sequence milestoneName = args[1];
+    Sequence milestonePositionFrom = args[2];
+    Sequence milestonePositionTo = args[3];
+    String docUriStr = docUri.getStringValue();
+    String milestoneNameStr = milestoneName.getStringValue();
+    int milestonePositionFromInt = new Integer(milestonePositionFrom.getStringValue());
+    int milestonePositionToInt = new Integer(milestonePositionTo.getStringValue());
+    ValueSequence resultFragment = new ValueSequence();
+    int countMs = getCountMs(docUriStr, milestoneNameStr); 
+    // test milestone positions within document: return an empty string if not valid
+    if (milestonePositionFromInt < 1 || milestonePositionToInt <= milestonePositionFromInt || milestonePositionFromInt > countMs || milestonePositionToInt > countMs+1) {
+      resultFragment.add(new StringValue(""));
+      return resultFragment;
+    }
+    String msFromPathName = getNodePath(docUriStr, milestoneNameStr, milestonePositionFromInt);
+    String msToPathName = getNodePath(docUriStr, milestoneNameStr, milestonePositionToInt);
+    String openElementsOfMsFrom = pathName2XmlTags(msFromPathName, "open");    
+    String closingElementsOfMsTo = pathName2XmlTags(msToPathName, "close");  
+    // fetch the fragment between the two milestones
+    String fragment = getFragmentBetween(docUriStr, milestoneNameStr, milestonePositionFromInt, milestonePositionToInt);
+    fragment = openElementsOfMsFrom + fragment + closingElementsOfMsTo; 
+    StringValue strValFragment = new StringValue(fragment);
+    resultFragment.add(strValFragment);
+    return resultFragment;
+  }
+
+  /**
+	 * Fetch the fragment between two milestones in an XML document
+	 * bufferSize is important for better performance: each chunk in this size is 
+	 * matched against the regular expression, if it is too small or too high then 
+	 * performance could be bad
+	 * @param docUriStr document URI (e.g. /db/shakespeare/hamlet.xml)
+	 * @param msName milestone name (e.g.: pb)
+	 * @param msPositionFrom first milestone (e.g.: 10) 
+	 * @param msPositionTo second milestone (e.g.: 11)
+	 * @return fragment between the two milestones with msPositionFrom and msPositionTo
+	 * @throws XPathException
+	 */
+  private String getFragmentBetween(String docUriStr, String msName, int msPositionFrom, int msPositionTo) throws XPathException {
+    int bufferSize = 16384; // performance: buffer size 4096 is 25% slower
+    String existHomeFilePath = getExistHomeFilePath();
+    String docLocalFileName = existHomeFilePath + FS_DOC_CACHE_PATH + docUriStr;
+    /*
+     * find milestones: <pb n=7 />
+     * find milestones explicitly closed: <pb n=5>blabla</pb>
+     * find milestones in multilines:
+     *   <pb
+     *    n=10
+     *   />
+     * find case insensitive and in multilines: Pattern.CASE_INSENSITIVE | Pattern.MULTILINE
+     */
+    String regExprMsInternClosed = "<" + msName + "[^>]*?/>";
+    String regExprMsExternClosed = "<" + msName + "[^>]*?>[^>]*?</" + msName + "\\s*>";
+    String regExprMilestone = regExprMsInternClosed + "|" + regExprMsExternClosed;
+    Pattern p = Pattern.compile(regExprMilestone, Pattern.CASE_INSENSITIVE | Pattern.MULTILINE); // both flags enabled
+    String readBufferStr = "";
+    char[] readBuffer = new char[bufferSize];
+    String msFragmentBuffer = "";
+    int msCount = 0;
+    String result = "";
+    boolean eof = false;
+    String ms = "";
+    try {
+      BufferedReader in = new BufferedReader(new FileReader(docLocalFileName));
+      while (!eof && !(msCount >= msPositionTo)) {
+        int countReadChars = in.read(readBuffer, 0, bufferSize);
+        // last page: delivers all characters to the end in the document 
+        if (countReadChars == -1) {
+          eof = true;
+          in.close();
+          return ms + msFragmentBuffer;
+        }
+        readBufferStr = new String(readBuffer, 0, countReadChars);
+        msFragmentBuffer = msFragmentBuffer + readBufferStr;
+        Matcher m = p.matcher(msFragmentBuffer);
+        int fragmentBeginPos = 0;
+        while (m.find()) {
+          int msBeginPos = m.start();
+          int msEndPos = m.end();
+          // we have the milestone fragments (milestone end could be matched) one by one
+          // milestone end: cut the part in the last line before the milestone
+          String msFragment = ms + msFragmentBuffer.substring(fragmentBeginPos, msBeginPos);
+          // add result milestone fragments which are between msPositionFrom and msPositionTo
+          // last fragment in document (last page): is not added
+          if (msCount >= msPositionFrom && msCount < msPositionTo) {
+            result = result + msFragment;
+          }
+          fragmentBeginPos = msEndPos;
+          ms = msFragmentBuffer.substring(msBeginPos, msEndPos);
+          msCount++; // each found milestone increments the count of milestones
+        }
+        // delivers the portion after the last found milestone; this is used for the next msFragmentBuffer for matching
+        msFragmentBuffer = msFragmentBuffer.substring(fragmentBeginPos, msFragmentBuffer.length());
+      }
+      in.close();
+    } catch (IOException e) {
+      throw new XPathException(e);
+    }
+    return result;
+  }
+
+  private String getNodePath(String docPath, String msName, int position) throws XPathException {
+    String query = 
+      "let $ms := doc('" + docPath + "')//" + msName + "[" + position + "]/.. \n" + 
+      "let $result := " +
+      "  if ($ms) " +
+      "  then util:node-xpath($ms)" +
+      "  else (\"\") \n" +
+      "return $result"; 
+    String nodePath = executeXQuery(query);
+    return nodePath;
+  }
+
+  private int getCountMs(String docPath, String msName) throws XPathException {
+    int count = -1;
+    String query = "let $result := count(doc('" + docPath + "')//" + msName + ")" + "\n" + "return $result"; 
+    String resultStr = executeXQuery(query);
+    count = new Integer(resultStr);
+    return count;
+  }
+
+  /**
+   * A path name delivered by function xnode-path (with special strings such as 
+   * "@", "[", "]", " eq ") is converted to an XML String with xml tags, 
+   * opened or closed such as the mode says
+   * @param pathName delivered by function xnode-path: Example: /archimedes[@xmlns:xlink eq "http://www.w3.org/1999/xlink"]/text/body/chap/p[@type eq "main"]/s/foreign[@lang eq "en"]
+   * @param mode open or close
+   * @return xml tags opened or closed
+   */
+  private String pathName2XmlTags(String pathName, String mode) {
+    String result = "";
+    ArrayList<String> elements = pathName2ElementsWithAttributes(pathName);
+    if (mode.equals("open")) {
+      for (int i=0; i < elements.size(); i++) {
+        String element = elements.get(i);
+        element = element.replaceAll("\\[", " ");  // opening element: replace open bracket with space
+        element = element.replaceAll(" eq ", "=");  // opening element: remove @ character 
+        element = element.replaceAll("@", "");  // opening element: remove @ character 
+        element = element.replaceAll("\\]", "");  // opening element: remove closing bracket
+        if (! (element.length() == 0))
+          result += "<" + element + ">\n";
+      }
+    } else if (mode.equals("close")) {
+      for (int i=elements.size()-1; i >= 0; i--) {
+        String element = elements.get(i);
+        element = element.replaceAll("\\[[^\\]]*\\]", "");  // closing element: remove brackets with attributes
+        if (! (element.length() == 0))
+          result += "</" + element + ">\n";
+      }
+    }
+    return result;
+  }
+  
+  private ArrayList<String> pathName2ElementsWithAttributes(String pathName) {
+    ArrayList<String> result = new ArrayList<String>();
+    String regExpr = "/[^/]+\\[[^\\]]*\\]" + "|" + "/[^/\\[]+"; // pathName example: "/archimedes[@xmlns:xlink eq "http://www.w3.org/1999/xlink"]/text/body/chap/p[@type eq "main"]/s/foreign[@lang eq "en"]"
+    Pattern p = Pattern.compile(regExpr, Pattern.CASE_INSENSITIVE | Pattern.MULTILINE); // both flags enabled
+    Matcher m = p.matcher(pathName);
+    while (m.find()) {
+      int msBeginPos = m.start();
+      int msEndPos = m.end();
+      String elementName = pathName.substring(msBeginPos+1, msEndPos);  // without first "/" character
+      result.add(elementName);
+    }
+    return result;
+  }
+  
+  private String getExistHomeFilePath() throws XPathException {
+    return context.getBroker().getConfiguration().getExistHome().getAbsolutePath();
+  }
+
+  private String executeXQuery(String xQueryStr) throws XPathException {
+    XQuery xQuery = context.getBroker().getXQueryService();
+    CompiledXQuery compiledXQuery = xQuery.compile(context, xQueryStr);
+    Sequence sequence = compiledXQuery.eval(null); // without context
+    Item item = sequence.itemAt(0);
+    String nodeValueStr = item.getStringValue();
+    return nodeValueStr;
+  }
+
+  /**
+   * not yet used but useful in future
+   * @param docPath
+   * @return
+   * @throws XPathException
+   */
+  private String getNamespaceString(String docPath) throws XPathException {
+    String query = 
+      "let $elem := doc('" + docPath + "')/*" + "\n" + 
+      "let $prefs := in-scope-prefixes($elem)" + "\n" + 
+      "for $pref in $prefs" + "\n" + 
+      "  let $uri := namespace-uri-for-prefix($pref, $elem)" + "\n" + 
+      "  let $result := " +
+      "    if ($pref = \"xml\") " +
+      "    then ()" + "\n" +
+      "    else  concat(\"xmlns:\", $pref, \"=&quot;\", $uri, \"&quot;\") \n" +
+      "return $result"; 
+      String resultStr = executeXQuery(query);
+      return resultStr;
+  }
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/software/eXist/mpdl-modules/src/org/exist/xquery/modules/mpdldoc/CheckUri.java	Wed Nov 24 17:24:23 2010 +0100
@@ -0,0 +1,91 @@
+/*
+ *  eXist Open Source Native XML Database: Extension module
+ *  Copyright (C) 2008 Josef Willenborg
+ *  jwillenborg@mpiwg-berlin.mpg.de
+ *  http://www.mpiwg-berlin.mpg.de
+ *  
+ *  This program is free software; you can redistribute it and/or
+ *  modify it under the terms of the GNU Lesser General Public License
+ *  as published by the Free Software Foundation; either version 2
+ *  of the License, or (at your option) any later version.
+ *  
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU Lesser General Public License for more details.
+ *  
+ *  You should have received a copy of the GNU Lesser General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *  
+ *  $Id:  $
+ */
+package org.exist.xquery.modules.mpdldoc;
+
+import java.io.IOException;
+import org.apache.commons.httpclient.HttpClient;
+import org.apache.commons.httpclient.methods.GetMethod;
+import org.apache.commons.httpclient.params.HttpClientParams;
+import org.apache.commons.httpclient.params.HttpMethodParams;
+import org.exist.dom.QName;
+import org.exist.xquery.BasicFunction;
+import org.exist.xquery.Cardinality;
+import org.exist.xquery.FunctionSignature;
+import org.exist.xquery.XPathException;
+import org.exist.xquery.XQueryContext;
+import org.exist.xquery.value.BooleanValue;
+import org.exist.xquery.value.NumericValue;
+import org.exist.xquery.value.Sequence;
+import org.exist.xquery.value.SequenceType;
+import org.exist.xquery.value.Type;
+
+/**
+ * @author Josef Willenborg (jwillenborg@mpiwg-berlin.mpg.de)
+ */
+public class CheckUri extends BasicFunction {
+
+	public final static FunctionSignature signature =
+		new FunctionSignature(
+			new QName("check-uri", MPDLDocModule.NAMESPACE_URI, MPDLDocModule.PREFIX),
+			"A function which checks the uri-string if it is available within a timeout value (in ms).",
+			new SequenceType[] { 
+			  new SequenceType(Type.STRING, Cardinality.ZERO_OR_ONE),
+        new SequenceType(Type.INTEGER, Cardinality.ZERO_OR_ONE)
+			  },
+			new SequenceType(Type.BOOLEAN, Cardinality.EXACTLY_ONE));
+
+	public CheckUri(XQueryContext context) {
+		super(context, signature);
+	}
+
+  public Sequence eval(Sequence[] args, Sequence contextSequence) throws XPathException {
+    Sequence isOk = BooleanValue.TRUE;
+    HttpClient httpClient = new HttpClient();
+    GetMethod method = null;
+    try {
+      Sequence firstSeq = args[0];
+      Sequence secondSeq = args[1];
+      if (firstSeq.isEmpty())
+        return isOk;
+      String uriStr = firstSeq.getStringValue();
+      int milliseconds = 2000; // default value
+      if (! secondSeq.isEmpty()) {
+        NumericValue value = (NumericValue) secondSeq.convertTo(Type.NUMBER);
+        milliseconds = value.getInt();
+      }
+      httpClient.getParams().setParameter(HttpMethodParams.SO_TIMEOUT, new Integer(milliseconds));
+      httpClient.getParams().setParameter(HttpClientParams.CONNECTION_MANAGER_TIMEOUT, new Long(milliseconds));
+      method = new GetMethod(uriStr);
+      method.getParams().setParameter(HttpMethodParams.SO_TIMEOUT, new Integer(milliseconds));
+      method.setFollowRedirects(true); 
+      httpClient.executeMethod(method); 
+    } catch (IOException e) {
+      isOk = BooleanValue.FALSE;  // if timeout exception is thrown
+    } finally {
+      if (method != null) {
+        method.releaseConnection();
+      }
+    }
+    return isOk;
+  }
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/software/eXist/mpdl-modules/src/org/exist/xquery/modules/mpdldoc/ESciDocLogin.java	Wed Nov 24 17:24:23 2010 +0100
@@ -0,0 +1,79 @@
+/*
+ *  eXist Open Source Native XML Database: Extension module
+ *  Copyright (C) 2008 Josef Willenborg
+ *  jwillenborg@mpiwg-berlin.mpg.de
+ *  http://www.mpiwg-berlin.mpg.de
+ *  
+ *  This program is free software; you can redistribute it and/or
+ *  modify it under the terms of the GNU Lesser General Public License
+ *  as published by the Free Software Foundation; either version 2
+ *  of the License, or (at your option) any later version.
+ *  
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU Lesser General Public License for more details.
+ *  
+ *  You should have received a copy of the GNU Lesser General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *  
+ *  $Id: TextModule.java $
+ */
+package org.exist.xquery.modules.mpdldoc;
+
+import org.exist.dom.QName;
+import org.exist.xquery.BasicFunction;
+import org.exist.xquery.Cardinality;
+import org.exist.xquery.FunctionSignature;
+import org.exist.xquery.XPathException;
+import org.exist.xquery.XQueryContext;
+import org.exist.xquery.value.Sequence;
+import org.exist.xquery.value.SequenceType;
+import org.exist.xquery.value.StringValue;
+import org.exist.xquery.value.Type;
+import org.exist.xquery.value.ValueSequence;
+
+import de.mpg.mpiwg.berlin.mpdl.escidoc.ESciDocRestSession;
+import de.mpg.mpiwg.berlin.mpdl.exception.ApplicationException;
+
+/**
+ * @author Josef Willenborg (jwillenborg@mpiwg-berlin.mpg.de)
+ */
+public class ESciDocLogin extends BasicFunction {
+
+	public final static FunctionSignature signature =
+		new FunctionSignature(
+			new QName("escidoc-login", MPDLDocModule.NAMESPACE_URI, MPDLDocModule.PREFIX),
+			"A function which delivers an eSciDoc cookie id for the given login name and password",
+			new SequenceType[] { 
+			  new SequenceType(Type.STRING, Cardinality.EXACTLY_ONE),
+        new SequenceType(Type.STRING, Cardinality.EXACTLY_ONE)
+			  },
+			new SequenceType(Type.STRING, Cardinality.EXACTLY_ONE));
+
+	public ESciDocLogin(XQueryContext context) {
+		super(context, signature);
+	}
+
+	public Sequence eval(Sequence[] args, Sequence contextSequence) throws XPathException {
+	  try {
+      Sequence firstSeq = args[0];
+      Sequence secondSeq = args[1];
+      if (firstSeq.isEmpty() || secondSeq.isEmpty())
+        return Sequence.EMPTY_SEQUENCE;
+      String userName = firstSeq.getStringValue();
+      String pw = secondSeq.getStringValue();
+      String eSciDocCookieId = ESciDocRestSession.login(userName, pw);
+      ValueSequence resultSequence = new ValueSequence();
+      StringValue strValCookieId = new StringValue("");
+      if (eSciDocCookieId != null)
+        strValCookieId = new StringValue(eSciDocCookieId);
+      resultSequence.add(strValCookieId);
+      return resultSequence;
+	  } catch (ApplicationException e) {
+	    throw new XPathException(e);
+	  }
+	}
+
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/software/eXist/mpdl-modules/src/org/exist/xquery/modules/mpdldoc/GetESciDocContainerIdByExistId.java	Wed Nov 24 17:24:23 2010 +0100
@@ -0,0 +1,88 @@
+/*
+ *  eXist Open Source Native XML Database: Extension module
+ *  Copyright (C) 2008 Josef Willenborg
+ *  jwillenborg@mpiwg-berlin.mpg.de
+ *  http://www.mpiwg-berlin.mpg.de
+ *  
+ *  This program is free software; you can redistribute it and/or
+ *  modify it under the terms of the GNU Lesser General Public License
+ *  as published by the Free Software Foundation; either version 2
+ *  of the License, or (at your option) any later version.
+ *  
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU Lesser General Public License for more details.
+ *  
+ *  You should have received a copy of the GNU Lesser General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *  
+ *  $Id: TextModule.java $
+ */
+package org.exist.xquery.modules.mpdldoc;
+
+import org.exist.dom.QName;
+import org.exist.xquery.BasicFunction;
+import org.exist.xquery.Cardinality;
+import org.exist.xquery.FunctionSignature;
+import org.exist.xquery.XPathException;
+import org.exist.xquery.XQueryContext;
+import org.exist.xquery.value.Sequence;
+import org.exist.xquery.value.SequenceType;
+import org.exist.xquery.value.StringValue;
+import org.exist.xquery.value.Type;
+import org.exist.xquery.value.ValueSequence;
+
+import de.mpg.mpiwg.berlin.mpdl.escidoc.ESciDocRestSession;
+import de.mpg.mpiwg.berlin.mpdl.exception.ApplicationException;
+
+/**
+ * @author Josef Willenborg (jwillenborg@mpiwg-berlin.mpg.de)
+ */
+public class GetESciDocContainerIdByExistId extends BasicFunction {
+
+	public final static FunctionSignature signature =
+		new FunctionSignature(
+			new QName("escidoc-get-containerid", MPDLDocModule.NAMESPACE_URI, MPDLDocModule.PREFIX),
+			"A function which delivers the container id of the first argument: existId." +
+			"Second argument is the cookieId.",
+			new SequenceType[] { 
+        new SequenceType(Type.STRING, Cardinality.EXACTLY_ONE),
+			  new SequenceType(Type.STRING, Cardinality.EXACTLY_ONE)
+			  },
+			new SequenceType(Type.STRING, Cardinality.EXACTLY_ONE));
+
+	public GetESciDocContainerIdByExistId(XQueryContext context) {
+		super(context, signature);
+	}
+
+	public Sequence eval(Sequence[] args, Sequence contextSequence) throws XPathException {
+	  try {
+      Sequence firstSeq = args[0];
+      Sequence secondSeq = args[1];
+      if (firstSeq.isEmpty() || secondSeq.isEmpty())
+        return Sequence.EMPTY_SEQUENCE;
+      String firstSeqStrValue = firstSeq.getStringValue();
+      String existId = null;
+      if (! firstSeqStrValue.equals(""))
+        existId = firstSeqStrValue;
+      String secondSeqStrValue = secondSeq.getStringValue();
+      String eSciDocCookieId = null;
+      if (! secondSeqStrValue.equals(""))
+        eSciDocCookieId = secondSeqStrValue;
+      ESciDocRestSession eSciDocSession = ESciDocRestSession.getInstance(eSciDocCookieId);
+      String eScidDocContainerId = eSciDocSession.getContainerIdByEXistId(existId);
+      ValueSequence result = new ValueSequence();
+      if (eScidDocContainerId != null) {
+        result.add(new StringValue(eScidDocContainerId));
+      } else {
+        result.add(new StringValue(""));
+      }
+      return result;
+	  } catch (ApplicationException e) {
+	    throw new XPathException(e);
+	  }
+	}
+	
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/software/eXist/mpdl-modules/src/org/exist/xquery/modules/mpdldoc/GetESciDocs.java	Wed Nov 24 17:24:23 2010 +0100
@@ -0,0 +1,119 @@
+/*
+ *  eXist Open Source Native XML Database: Extension module
+ *  Copyright (C) 2008 Josef Willenborg
+ *  jwillenborg@mpiwg-berlin.mpg.de
+ *  http://www.mpiwg-berlin.mpg.de
+ *  
+ *  This program is free software; you can redistribute it and/or
+ *  modify it under the terms of the GNU Lesser General Public License
+ *  as published by the Free Software Foundation; either version 2
+ *  of the License, or (at your option) any later version.
+ *  
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU Lesser General Public License for more details.
+ *  
+ *  You should have received a copy of the GNU Lesser General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *  
+ *  $Id: TextModule.java $
+ */
+package org.exist.xquery.modules.mpdldoc;
+
+import java.util.ArrayList;
+
+import org.exist.dom.QName;
+import org.exist.memtree.DocumentImpl;
+import org.exist.memtree.MemTreeBuilder;
+import org.exist.xquery.BasicFunction;
+import org.exist.xquery.Cardinality;
+import org.exist.xquery.FunctionSignature;
+import org.exist.xquery.XPathException;
+import org.exist.xquery.XQueryContext;
+import org.exist.xquery.value.Sequence;
+import org.exist.xquery.value.SequenceType;
+import org.exist.xquery.value.Type;
+
+import de.mpg.mpiwg.berlin.mpdl.escidoc.ESciDocRestSession;
+import de.mpg.mpiwg.berlin.mpdl.exception.ApplicationException;
+import de.mpg.mpiwg.berlin.mpdl.general.MpdlConstants;
+
+/**
+ * @author Josef Willenborg (jwillenborg@mpiwg-berlin.mpg.de)
+ */
+public class GetESciDocs extends BasicFunction {
+
+	public final static FunctionSignature signature =
+		new FunctionSignature(
+			new QName("escidoc-get-docs", MPDLDocModule.NAMESPACE_URI, MPDLDocModule.PREFIX),
+			"A function which delivers all eSciDoc documents restricted to the first argument: docbase." +
+			"Second argument is the cookieId.",
+			new SequenceType[] { 
+        new SequenceType(Type.STRING, Cardinality.EXACTLY_ONE),
+			  new SequenceType(Type.STRING, Cardinality.EXACTLY_ONE)
+			  },
+			new SequenceType(Type.NODE, Cardinality.EXACTLY_ONE));
+
+	public GetESciDocs(XQueryContext context) {
+		super(context, signature);
+	}
+
+	public Sequence eval(Sequence[] args, Sequence contextSequence) throws XPathException {
+	  try {
+      Sequence firstSeq = args[0];
+      Sequence secondSeq = args[1];
+      if (firstSeq.isEmpty() || secondSeq.isEmpty())
+        return Sequence.EMPTY_SEQUENCE;
+      String firstSeqStrValue = firstSeq.getStringValue();
+      String docBase = null;
+      if (! firstSeqStrValue.equals(""))
+        docBase = firstSeqStrValue;
+      String docBaseContainerId = MpdlConstants.MPDL_ESCIDOC_ECHO_CONTAINER_ID;
+      if (docBase != null && docBase.equals("archimedes"))
+        docBaseContainerId = MpdlConstants.MPDL_ESCIDOC_ARCHIMEDES_CONTAINER_ID;
+      String secondSeqStrValue = secondSeq.getStringValue();
+      String eSciDocCookieId = null;
+      if (! secondSeqStrValue.equals(""))
+        eSciDocCookieId = secondSeqStrValue;
+      ESciDocRestSession eSciDocSession = ESciDocRestSession.getInstance(eSciDocCookieId);
+      String containerXmlStr = eSciDocSession.getContainer(docBaseContainerId);
+      ArrayList<String> containerIdsOfDocBaseContainer = eSciDocSession.getContainerIds(containerXmlStr);
+      ArrayList<String> containerTitlesOfDocBaseContainer = eSciDocSession.getContainerTitles(containerXmlStr);
+      DocumentImpl doc = null;
+      if (containerIdsOfDocBaseContainer != null) {
+        MemTreeBuilder builder = context.getDocumentBuilder();
+        builder.startElement("", "documents", "documents", null);
+        for (int i=0; i<containerIdsOfDocBaseContainer.size(); i++) {
+          builder.startElement("", "doc", "doc", null);
+          builder.startElement("", "container-id", "container-id", null);
+          String containerId = containerIdsOfDocBaseContainer.get(i);
+          builder.characters(containerId);
+          builder.endElement();
+          builder.startElement("", "exist-id", "exist-id", null);
+          String containerTitle = containerTitlesOfDocBaseContainer.get(i);
+          String existId = "";
+          if (containerTitle != null) {
+            int beginIndex = containerTitle.indexOf("document-id:");
+            int endIndex = containerTitle.indexOf(".xml", beginIndex);
+            if (beginIndex > 0 && endIndex > 0) {
+              existId = containerTitle.substring(beginIndex + 13, endIndex + 4);
+              builder.characters(existId);
+            }
+          }
+          builder.endElement();
+          builder.endElement();
+        }
+        builder.endElement();
+        doc = ((DocumentImpl)builder.getDocument());
+      } else {
+        return Sequence.EMPTY_SEQUENCE;
+      }
+      return doc;
+	  } catch (ApplicationException e) {
+	    throw new XPathException(e);
+	  }
+	}
+	
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/software/eXist/mpdl-modules/src/org/exist/xquery/modules/mpdldoc/GetJobs.java	Wed Nov 24 17:24:23 2010 +0100
@@ -0,0 +1,248 @@
+/*
+ *  eXist Open Source Native XML Database: Extension module
+ *  Copyright (C) 2008 Josef Willenborg
+ *  jwillenborg@mpiwg-berlin.mpg.de
+ *  http://www.mpiwg-berlin.mpg.de
+ *  
+ *  This program is free software; you can redistribute it and/or
+ *  modify it under the terms of the GNU Lesser General Public License
+ *  as published by the Free Software Foundation; either version 2
+ *  of the License, or (at your option) any later version.
+ *  
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU Lesser General Public License for more details.
+ *  
+ *  You should have received a copy of the GNU Lesser General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *  
+ *  $Id: TextModule.java $
+ */
+package org.exist.xquery.modules.mpdldoc;
+
+import java.util.ArrayList;
+import java.util.Date;
+
+import org.exist.dom.QName;
+import org.exist.http.servlets.RequestWrapper;
+import org.exist.memtree.DocumentImpl;
+import org.exist.memtree.MemTreeBuilder;
+import org.exist.xquery.BasicFunction;
+import org.exist.xquery.Cardinality;
+import org.exist.xquery.FunctionSignature;
+import org.exist.xquery.Variable;
+import org.exist.xquery.XPathException;
+import org.exist.xquery.XQueryContext;
+import org.exist.xquery.functions.request.RequestModule;
+import org.exist.xquery.value.JavaObjectValue;
+import org.exist.xquery.value.Sequence;
+import org.exist.xquery.value.SequenceType;
+import org.exist.xquery.value.Type;
+
+import de.mpg.mpiwg.berlin.mpdl.exception.ApplicationException;
+import de.mpg.mpiwg.berlin.mpdl.general.MpdlConstants;
+import de.mpg.mpiwg.berlin.mpdl.schedule.MpdlChainScheduler;
+import de.mpg.mpiwg.berlin.mpdl.schedule.MpdlDocOperation;
+
+/**
+ * @author Josef Willenborg (jwillenborg@mpiwg-berlin.mpg.de)
+ */
+public class GetJobs extends BasicFunction {
+
+	public final static FunctionSignature signature =
+		new FunctionSignature(
+			new QName("get-jobs", MPDLDocModule.NAMESPACE_URI, MPDLDocModule.PREFIX),
+			"A function which delivers all jobs or the job given by an id.",
+			new SequenceType[] { new SequenceType(Type.STRING, Cardinality.EXACTLY_ONE)},
+			new SequenceType(Type.NODE, Cardinality.EXACTLY_ONE));
+
+	public GetJobs(XQueryContext context) {
+		super(context, signature);
+	}
+
+	public Sequence eval(Sequence[] args, Sequence contextSequence) throws XPathException {
+	  try {
+      Sequence firstSeq = args[0];
+      if (firstSeq.isEmpty())
+        return Sequence.EMPTY_SEQUENCE;
+      String firstSeqStrValue = firstSeq.getStringValue();
+      boolean getAllJobs = false;
+      if (firstSeqStrValue.equals("all"))
+        getAllJobs = true;
+      MpdlChainScheduler scheduler = MpdlChainScheduler.getInstance();
+      ArrayList<MpdlDocOperation> docOperations = new ArrayList<MpdlDocOperation>();
+      if (getAllJobs) {
+        docOperations = scheduler.getDocOperations();
+      } else {
+        String jobIdStr = firstSeq.getStringValue();
+        int jobId = Integer.parseInt(jobIdStr);
+        MpdlDocOperation docOperation = scheduler.getDocOperation(jobId);
+        if (docOperation != null)
+          docOperations.add(docOperation);
+      }
+      DocumentImpl doc = null;
+      if ((getAllJobs && ! docOperations.isEmpty()) || (! getAllJobs && ! docOperations.isEmpty())) {
+        MemTreeBuilder builder = context.getDocumentBuilder();
+        builder.startElement("", "mpdl-doc-operations", "mpdl-doc-operations", null);
+        for (int i=0; i<docOperations.size(); i++) {
+          MpdlDocOperation docOperation = docOperations.get(i);
+          builder.startElement("", "mpdl-doc-operation", "mpdl-doc-operation", null);
+          builder.startElement("", "name", "name", null);
+          builder.characters(docOperation.getName());
+          builder.endElement();
+          builder.startElement("", "job-id", "job-id", null);
+          int jobId = docOperation.getOrderId();
+          builder.characters(String.valueOf(jobId));
+          builder.endElement();
+          builder.startElement("", "status", "status", null);
+          builder.startElement("", "started", "started", null);
+          Date start = docOperation.getStart();
+          String startStr = "No start time available because job is scheduled into server queue where other jobs have been started earlier";
+          if (start != null)
+            startStr = start.toString();
+          builder.characters(startStr);
+          builder.endElement();
+          builder.startElement("", "finished", "finished", null);
+          Date end = docOperation.getEnd();
+          String endStr = "No end time available because job is not finished yet";
+          if (end != null)
+            endStr = end.toString();
+          builder.characters(endStr);
+          builder.endElement();
+          builder.startElement("", "description", "description", null);
+          String status = docOperation.getStatus();
+          if (status != null)
+            builder.characters(status);
+          builder.endElement();
+          String eXistBaseUrlStr = getEXistFullBaseUrlStr();
+          builder.startElement("", "url", "url", null);
+          String statusUrl = eXistBaseUrlStr + "/" + MpdlConstants.MPDL_PROJECT_NAME + "/scheduler/" + "get-jobs.xql?id=" + jobId;
+          builder.characters(statusUrl);
+          builder.endElement();
+          builder.startElement("", "error-message", "error-message", null);
+          String errorMessage = docOperation.getErrorMessage();
+          if (errorMessage == null)
+            builder.characters("no error");
+          else
+            builder.characters(errorMessage);
+          builder.endElement();
+          builder.endElement();
+          String escidocDestUrl = null;
+          String existDestUrl = null;
+          if (docOperation.isFinished() && ! docOperation.isError()) {
+            escidocDestUrl = docOperation.getESciDocDestUrl();
+            existDestUrl = eXistBaseUrlStr + "/" + MpdlConstants.MPDL_PROJECT_NAME + "/" + "page-query-result.xql?document=" + docOperation.getDestUrl();
+          }
+          if (docOperation.getName().equals("delete")  || docOperation.getName().equals("deleteExist")) {
+            builder.startElement("", "dest", "dest", null);
+            builder.startElement("", "doc-base", "doc-base", null);
+            builder.characters(docOperation.getDocBase());
+            builder.endElement();
+            builder.startElement("", "language", "language", null);
+            builder.characters(docOperation.getLanguage());
+            builder.endElement();
+            builder.startElement("", "file-name", "file-name", null);
+            builder.characters(docOperation.getFileName());
+            builder.endElement();
+            if (! docOperation.getName().equals("deleteExist")) {
+              builder.startElement("", "escidoc-url", "escidoc-url", null);
+              if (escidocDestUrl != null)
+                builder.characters(escidocDestUrl);
+              builder.endElement();
+            }
+            builder.startElement("", "exist-url", "exist-url", null);
+            if (existDestUrl != null)
+              builder.characters(existDestUrl);
+            builder.endElement();
+            builder.endElement();
+          } else if (docOperation.getName().equals("create") || docOperation.getName().equals("update") || docOperation.getName().equals("updateExist")) {
+            builder.startElement("", "src", "src", null);
+            builder.startElement("", "url", "url", null);
+            builder.characters(docOperation.getSrcUrl());
+            builder.endElement();
+            builder.startElement("", "upload-file-name", "upload-file-name", null);
+            builder.characters(docOperation.getUploadFileName());
+            builder.endElement();
+            builder.endElement();
+            builder.startElement("", "dest", "dest", null);
+            builder.startElement("", "doc-base", "doc-base", null);
+            builder.characters(docOperation.getDocBase());
+            builder.endElement();
+            builder.startElement("", "language", "language", null);
+            builder.characters(docOperation.getLanguage());
+            builder.endElement();
+            builder.startElement("", "file-name", "file-name", null);
+            builder.characters(docOperation.getFileName());
+            builder.endElement();
+            if (! docOperation.getName().equals("updateExist")) {
+              builder.startElement("", "escidoc-url", "escidoc-url", null);
+              if (escidocDestUrl != null)
+                builder.characters(escidocDestUrl);
+              builder.endElement();
+            }
+            builder.startElement("", "exist-url", "exist-url", null);
+            if (existDestUrl != null)
+              builder.characters(existDestUrl);
+            builder.endElement();
+            builder.endElement();
+          }
+          builder.startElement("", "description", "description", null);
+          builder.characters("Document operations are maintained on server asychronously. Each operation is scheduled into a server job queue " + 
+              "and is executed when all previous started jobs in the queue are worked off. Each operation needs some execution time dependant " + 
+              "on the size and the number of pages of the document, the speed of the network connection and the performance of the used " +
+              "eSciDoc and eXist server.");
+          builder.endElement();
+          builder.endElement();
+        }
+        builder.endElement();
+        doc = ((DocumentImpl) builder.getDocument());
+      } else if (getAllJobs && docOperations.isEmpty()) {
+        MemTreeBuilder builder = context.getDocumentBuilder();
+        builder.startElement("", "message", "message", null);
+        builder.characters("there are no scheduled jobs (neither finished, queued or executed)");
+        builder.endElement();
+        doc = ((DocumentImpl) builder.getDocument());
+      } else {
+        MemTreeBuilder builder = context.getDocumentBuilder();
+        builder.startElement("", "error", "error", null);
+        builder.startElement("", "message", "message", null);
+        String jobIdStr = firstSeq.getStringValue();
+        int jobId = Integer.parseInt(jobIdStr);
+        builder.characters("job with id: " + jobId + " was not scheduled and could therefore not be listed");
+        builder.endElement();
+        builder.endElement();
+        doc = ((DocumentImpl) builder.getDocument());
+      }
+      if (doc == null)
+        return Sequence.EMPTY_SEQUENCE;
+      else 
+        return doc;
+	  } catch (ApplicationException e) {
+	    throw new XPathException(e);
+	  }
+	}
+	
+  private String getEXistFullBaseUrlStr() {
+    return "http://" + MpdlConstants.MPDL_FULL_EXIST_HOST_NAME;
+  }
+  
+  private String getBaseUrlStr() {
+    String baseUrlStr = "";
+    try {
+      RequestModule myModule = (RequestModule) context.getModule(RequestModule.NAMESPACE_URI);
+      // request object is read from global variable $request
+      Variable var = myModule.resolveVariable(RequestModule.REQUEST_VAR);
+      JavaObjectValue value = (JavaObjectValue) var.getValue().itemAt(0);
+      RequestWrapper requestWrapper = (RequestWrapper) value.getObject();
+      // String protocol = requestWrapper.getProtocol();
+      String hostName = requestWrapper.getServerName();
+      int port = requestWrapper.getServerPort();
+      baseUrlStr = "http" + "://" + hostName + ":" + port;    
+    } catch (XPathException e) {
+
+    }
+    return baseUrlStr;
+  }
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/software/eXist/mpdl-modules/src/org/exist/xquery/modules/mpdldoc/Html2Pdf.java	Wed Nov 24 17:24:23 2010 +0100
@@ -0,0 +1,102 @@
+/*
+ *  eXist Open Source Native XML Database: Extension module
+ *  Copyright (C) 2008 Josef Willenborg
+ *  jwillenborg@mpiwg-berlin.mpg.de
+ *  http://www.mpiwg-berlin.mpg.de
+ *  
+ *  This program is free software; you can redistribute it and/or
+ *  modify it under the terms of the GNU Lesser General Public License
+ *  as published by the Free Software Foundation; either version 2
+ *  of the License, or (at your option) any later version.
+ *  
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU Lesser General Public License for more details.
+ *  
+ *  You should have received a copy of the GNU Lesser General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *  
+ *  $Id:  $
+ */
+package org.exist.xquery.modules.mpdldoc;
+
+import org.exist.dom.QName;
+import org.exist.storage.serializers.Serializer;
+import org.exist.xquery.BasicFunction;
+import org.exist.xquery.Cardinality;
+import org.exist.xquery.FunctionSignature;
+import org.exist.xquery.XPathException;
+import org.exist.xquery.XQueryContext;
+import org.exist.xquery.value.Base64Binary;
+import org.exist.xquery.value.NodeValue;
+import org.exist.xquery.value.Sequence;
+import org.exist.xquery.value.SequenceType;
+import org.exist.xquery.value.Type;
+import org.xml.sax.SAXException;
+
+import de.mpg.mpiwg.berlin.mpdl.exception.ApplicationException;
+import de.mpg.mpiwg.berlin.mpdl.util.MpdlITextRenderer;
+import de.mpg.mpiwg.berlin.mpdl.util.StringUtilEscapeChars;
+
+/**
+ * @author Josef Willenborg (jwillenborg@mpiwg-berlin.mpg.de)
+ */
+public class Html2Pdf extends BasicFunction {
+
+	public final static FunctionSignature signature =
+		new FunctionSignature(
+			new QName("html2pdf", MPDLDocModule.NAMESPACE_URI, MPDLDocModule.PREFIX),
+			"A function which converts the input HTML fragment to pdf",
+			new SequenceType[] { 
+			  new SequenceType(Type.NODE, Cardinality.EXACTLY_ONE),
+        new SequenceType(Type.STRING, Cardinality.EXACTLY_ONE),
+        new SequenceType(Type.STRING, Cardinality.EXACTLY_ONE),
+        new SequenceType(Type.STRING, Cardinality.EXACTLY_ONE),
+        new SequenceType(Type.STRING, Cardinality.EXACTLY_ONE),
+        new SequenceType(Type.STRING, Cardinality.EXACTLY_ONE)
+			  },
+			new SequenceType(Type.BYTE, Cardinality.EXACTLY_ONE));
+
+	public Html2Pdf(XQueryContext context) {
+		super(context, signature);
+	}
+
+  public Sequence eval(Sequence[] args, Sequence contextSequence) throws XPathException {
+    try {
+      Sequence firstSeq = args[0];
+      Sequence secondSeq = args[1];
+      Sequence thirdSeq = args[2];
+      Sequence fourthSeq = args[3];
+      Sequence fifthSeq = args[4];
+      Sequence sixthSeq = args[5];
+      if (firstSeq.isEmpty() || secondSeq.isEmpty() || thirdSeq.isEmpty() || fourthSeq.isEmpty() || fifthSeq.isEmpty() || sixthSeq.isEmpty())
+        return Sequence.EMPTY_SEQUENCE;
+      NodeValue nodeValue= (NodeValue) firstSeq.itemAt(0);
+      Serializer serializer = context.getBroker().getSerializer();
+      serializer.reset();
+      String nodeValueStr = serializer.serialize(nodeValue);
+
+      String language = secondSeq.getStringValue();
+      String topLeftStrTmp = thirdSeq.getStringValue();
+      String topRightStrTmp = fourthSeq.getStringValue();
+      String bottomLeftStrTmp = fifthSeq.getStringValue();
+      String bottomRightStrTmp = sixthSeq.getStringValue();
+      String topLeftStr = "&quot;" + StringUtilEscapeChars.deresolveXmlEntities(topLeftStrTmp) + "&quot;";
+      String topRightStr = "&quot;" + StringUtilEscapeChars.deresolveXmlEntities(topRightStrTmp)  + "&quot;";
+      String bottomLeftStr = "&quot;" + StringUtilEscapeChars.deresolveXmlEntities(bottomLeftStrTmp)  + "&quot;";
+      String bottomRightStr = "&quot;" + StringUtilEscapeChars.deresolveXmlEntities(bottomRightStrTmp)  + "&quot;";
+      
+      MpdlITextRenderer mpdlRenderer = MpdlITextRenderer.getInstance();
+      String singlePageStr = nodeValueStr.replaceAll("class=\"page\">", "class=\"singlePage\">");
+      byte[] pdfBytes = mpdlRenderer.createPdf(singlePageStr, language, topLeftStr, topRightStr, bottomLeftStr, bottomRightStr);    
+
+      return new Base64Binary(pdfBytes);
+    } catch (ApplicationException e) {
+      throw new XPathException(e.getMessage());
+    } catch (SAXException e) {
+      throw new XPathException(e);
+    }
+  }
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/software/eXist/mpdl-modules/src/org/exist/xquery/modules/mpdldoc/MPDLDocModule.java	Wed Nov 24 17:24:23 2010 +0100
@@ -0,0 +1,64 @@
+/*
+ *  eXist Open Source Native XML Database: Extension module
+ *  Copyright (C) 2008 Josef Willenborg
+ *  jwillenborg@mpiwg-berlin.mpg.de
+ *  http://www.mpiwg-berlin.mpg.de
+ *  
+ *  This program is free software; you can redistribute it and/or
+ *  modify it under the terms of the GNU Lesser General Public License
+ *  as published by the Free Software Foundation; either version 2
+ *  of the License, or (at your option) any later version.
+ *  
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU Lesser General Public License for more details.
+ *  
+ *  You should have received a copy of the GNU Lesser General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *  
+ *  $Id: TextModule.java $
+ */
+package org.exist.xquery.modules.mpdldoc;
+
+import org.exist.xquery.AbstractInternalModule;
+import org.exist.xquery.FunctionDef;
+
+/**
+ * @author Josef Willenborg (jwillenborg@mpiwg-berlin.mpg.de)
+ */
+public class MPDLDocModule extends AbstractInternalModule {
+	public final static String NAMESPACE_URI = "http://exist-db.org/xquery/mpdldoc";
+	public final static String PREFIX = "mpdldoc";
+	
+	private final static FunctionDef[] functions = {
+    new FunctionDef(CheckUri.signature, CheckUri.class),
+    new FunctionDef(MpdlDocOperationStarter.signature, MpdlDocOperationStarter.class),
+    new FunctionDef(ESciDocLogin.signature, ESciDocLogin.class),
+    new FunctionDef(GetESciDocs.signature, GetESciDocs.class),
+    new FunctionDef(GetJobs.signature, GetJobs.class),
+    new FunctionDef(GetESciDocContainerIdByExistId.signature, GetESciDocContainerIdByExistId.class),
+    new FunctionDef(Html2Pdf.signature, Html2Pdf.class)
+	};
+	
+	public MPDLDocModule() {
+		super(functions);
+	}
+
+	public String getNamespaceURI() {
+		return NAMESPACE_URI;
+	}
+
+	public String getDefaultPrefix() {
+		return PREFIX;
+	}
+
+	public String getDescription() {
+		return "A module for document functions";
+	}
+  
+  public String getReleaseVersion() {
+    return "A module for document functions";
+  }
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/software/eXist/mpdl-modules/src/org/exist/xquery/modules/mpdldoc/MpdlDocOperationStarter.java	Wed Nov 24 17:24:23 2010 +0100
@@ -0,0 +1,97 @@
+/*
+ *  eXist Open Source Native XML Database: Extension module
+ *  Copyright (C) 2008 Josef Willenborg
+ *  jwillenborg@mpiwg-berlin.mpg.de
+ *  http://www.mpiwg-berlin.mpg.de
+ *  
+ *  This program is free software; you can redistribute it and/or
+ *  modify it under the terms of the GNU Lesser General Public License
+ *  as published by the Free Software Foundation; either version 2
+ *  of the License, or (at your option) any later version.
+ *  
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU Lesser General Public License for more details.
+ *  
+ *  You should have received a copy of the GNU Lesser General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *  
+ *  $Id: TextModule.java $
+ */
+package org.exist.xquery.modules.mpdldoc;
+
+import org.exist.dom.QName;
+import org.exist.xquery.BasicFunction;
+import org.exist.xquery.Cardinality;
+import org.exist.xquery.FunctionSignature;
+import org.exist.xquery.XPathException;
+import org.exist.xquery.XQueryContext;
+import org.exist.xquery.value.Sequence;
+import org.exist.xquery.value.SequenceType;
+import org.exist.xquery.value.StringValue;
+import org.exist.xquery.value.Type;
+import org.exist.xquery.value.ValueSequence;
+
+import de.mpg.mpiwg.berlin.mpdl.exception.ApplicationException;
+import de.mpg.mpiwg.berlin.mpdl.schedule.MpdlChainScheduler;
+import de.mpg.mpiwg.berlin.mpdl.schedule.MpdlDocOperation;
+
+/**
+ * @author Josef Willenborg (jwillenborg@mpiwg-berlin.mpg.de)
+ */
+public class MpdlDocOperationStarter extends BasicFunction {
+
+	public final static FunctionSignature signature =
+		new FunctionSignature(
+			new QName("do", MPDLDocModule.NAMESPACE_URI, MPDLDocModule.PREFIX),
+      "A function which performs/schedules an operation (insert, update or delete) for the given source file (given by local file path) " +
+      "to the destination (given by document base, language and fileName) both to eSciDoc and eXist." +
+      "It returns a node which contains information about the scheduled operation (jobId etc.).",
+			new SequenceType[] { new SequenceType(Type.STRING, Cardinality.EXACTLY_ONE), 
+                           new SequenceType(Type.STRING, Cardinality.EXACTLY_ONE),
+                           new SequenceType(Type.STRING, Cardinality.EXACTLY_ONE),
+                           new SequenceType(Type.STRING, Cardinality.EXACTLY_ONE),
+                           new SequenceType(Type.STRING, Cardinality.EXACTLY_ONE),
+                           new SequenceType(Type.STRING, Cardinality.EXACTLY_ONE),
+			                     new SequenceType(Type.STRING, Cardinality.EXACTLY_ONE) },
+			new SequenceType(Type.STRING, Cardinality.EXACTLY_ONE));
+
+	public MpdlDocOperationStarter(XQueryContext context) {
+		super(context, signature);
+	}
+
+	public Sequence eval(Sequence[] args, Sequence contextSequence) throws XPathException {
+    Sequence firstSeq = args[0];
+    Sequence secondSeq = args[1];
+	  Sequence thirdSeq = args[2];
+    Sequence fourthSeq = args[3];
+    Sequence fifthSeq = args[4];
+    Sequence sixthSeq = args[5];
+    Sequence seventhSeq = args[6];
+    if (firstSeq.isEmpty() || secondSeq.isEmpty() || thirdSeq.isEmpty() || fourthSeq.isEmpty() || fifthSeq.isEmpty() || sixthSeq.isEmpty() || seventhSeq.isEmpty())
+      return Sequence.EMPTY_SEQUENCE;
+    String operationName = firstSeq.getStringValue();
+    String srcUrl = secondSeq.getStringValue();
+    String uploadFileName = thirdSeq.getStringValue();
+    String docBase = fourthSeq.getStringValue();
+    String language = fifthSeq.getStringValue();
+    String fileName = sixthSeq.getStringValue();
+    String eSciDocCookieId = seventhSeq.getStringValue();
+		MpdlDocOperation docOperation = new MpdlDocOperation(operationName, srcUrl, uploadFileName, docBase, language, fileName);	
+    docOperation.setESciDocCookieId(eSciDocCookieId);
+    try { 
+  		MpdlChainScheduler scheduler = MpdlChainScheduler.getInstance();
+  		docOperation = scheduler.doOperation(docOperation);
+    } catch (ApplicationException e) {
+      throw new XPathException(e);
+    }
+    int jobId = docOperation.getOrderId();
+    ValueSequence resultSequence = new ValueSequence();
+    StringValue strValItems = new StringValue(String.valueOf(jobId));
+    resultSequence.add(strValItems);
+    return resultSequence;
+	}
+	
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/software/eXist/mpdl-modules/src/org/exist/xquery/modules/mpdltext/Dictionarize.java	Wed Nov 24 17:24:23 2010 +0100
@@ -0,0 +1,97 @@
+/*
+ *  eXist Open Source Native XML Database: Extension module
+ *  Copyright (C) 2008 Josef Willenborg
+ *  jwillenborg@mpiwg-berlin.mpg.de
+ *  http://www.mpiwg-berlin.mpg.de
+ *  
+ *  This program is free software; you can redistribute it and/or
+ *  modify it under the terms of the GNU Lesser General Public License
+ *  as published by the Free Software Foundation; either version 2
+ *  of the License, or (at your option) any later version.
+ *  
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU Lesser General Public License for more details.
+ *  
+ *  You should have received a copy of the GNU Lesser General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *  
+ *  $Id: TextModule.java $
+ */
+package org.exist.xquery.modules.mpdltext;
+
+import java.io.IOException;
+import java.io.Reader;
+import java.io.StringReader;
+
+import org.exist.dom.QName;
+import org.exist.xquery.BasicFunction;
+import org.exist.xquery.Cardinality;
+import org.exist.xquery.FunctionSignature;
+import org.exist.xquery.XPathException;
+import org.exist.xquery.XQueryContext;
+import org.exist.xquery.value.Sequence;
+import org.exist.xquery.value.SequenceType;
+import org.exist.xquery.value.StringValue;
+import org.exist.xquery.value.Type;
+import org.exist.xquery.value.ValueSequence;
+import org.xml.sax.InputSource;
+import org.xml.sax.SAXException;
+import org.xml.sax.XMLReader;
+
+import com.sun.org.apache.xerces.internal.parsers.SAXParser;
+
+import de.mpg.mpiwg.berlin.mpdl.exception.ApplicationException;
+import de.mpg.mpiwg.berlin.mpdl.lt.doc.DictionarizerContentHandler;
+
+/**
+ * @author Josef Willenborg (jwillenborg@mpiwg-berlin.mpg.de)
+ */
+public class Dictionarize extends BasicFunction {
+
+	public final static FunctionSignature signature =
+		new FunctionSignature(
+			new QName("dictionarize", MPDLTextModule.NAMESPACE_URI, MPDLTextModule.PREFIX),
+			"A function which dictionarize the given xml fragment string of the given language." +
+			"Result is xml fragment which contains the original xml fragment enriched by " +
+			"a word tag for each word which contains attributes for the lemma and pollux dictionary.",
+			new SequenceType[] { new SequenceType(Type.STRING, Cardinality.EXACTLY_ONE), 
+                           new SequenceType(Type.STRING, Cardinality.EXACTLY_ONE)},
+			new SequenceType(Type.STRING, Cardinality.EXACTLY_ONE));
+
+	public Dictionarize(XQueryContext context) {
+		super(context, signature);
+	}
+
+	public Sequence eval(Sequence[] args, Sequence contextSequence) throws XPathException {
+	  Sequence xmlFragmentSeq = args[0];
+    Sequence languageSeq = args[1];
+    String xmlFragment = "";
+    String language = "";
+		if (xmlFragmentSeq.isEmpty() || languageSeq.isEmpty())
+			return Sequence.EMPTY_SEQUENCE;
+		xmlFragment = xmlFragmentSeq.getStringValue();
+		language = languageSeq.getStringValue();
+		String outputXmlFragment = null;
+		try { 
+		  DictionarizerContentHandler dictContentHandler = new DictionarizerContentHandler(language);
+      XMLReader xmlParser = new SAXParser();
+      xmlParser.setContentHandler(dictContentHandler);
+      Reader stringReaderXmlFragment = new StringReader(xmlFragment);
+      InputSource input = new InputSource(stringReaderXmlFragment);
+      xmlParser.parse(input);
+      outputXmlFragment = dictContentHandler.getXmlFragment();
+    } catch (ApplicationException e) {
+      throw new XPathException(e);
+    } catch (IOException e) {
+      throw new XPathException(e);
+    } catch (SAXException e) {
+      throw new XPathException(e);
+    }
+    ValueSequence result = new ValueSequence();
+    result.add(new StringValue(outputXmlFragment));
+		return result;
+	}
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/software/eXist/mpdl-modules/src/org/exist/xquery/modules/mpdltext/EncodeBig5.java	Wed Nov 24 17:24:23 2010 +0100
@@ -0,0 +1,81 @@
+/*
+ *  eXist Open Source Native XML Database: Extension module
+ *  Copyright (C) 2008 Josef Willenborg
+ *  jwillenborg@mpiwg-berlin.mpg.de
+ *  http://www.mpiwg-berlin.mpg.de
+ *  
+ *  This program is free software; you can redistribute it and/or
+ *  modify it under the terms of the GNU Lesser General Public License
+ *  as published by the Free Software Foundation; either version 2
+ *  of the License, or (at your option) any later version.
+ *  
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU Lesser General Public License for more details.
+ *  
+ *  You should have received a copy of the GNU Lesser General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *  
+ *  $Id: TextModule.java $
+ */
+package org.exist.xquery.modules.mpdltext;
+
+import java.io.UnsupportedEncodingException;
+
+import org.exist.dom.QName;
+import org.exist.xquery.BasicFunction;
+import org.exist.xquery.Cardinality;
+import org.exist.xquery.FunctionSignature;
+import org.exist.xquery.XPathException;
+import org.exist.xquery.XQueryContext;
+import org.exist.xquery.value.Sequence;
+import org.exist.xquery.value.SequenceType;
+import org.exist.xquery.value.StringValue;
+import org.exist.xquery.value.Type;
+import org.exist.xquery.value.ValueSequence;
+
+public class EncodeBig5 extends BasicFunction {
+
+	public final static FunctionSignature signature =
+		new FunctionSignature(
+			new QName("encode-big5", MPDLTextModule.NAMESPACE_URI, MPDLTextModule.PREFIX),
+			"A function which delivers an encoded translation of the big5 input string",
+			new SequenceType[] { new SequenceType(Type.STRING, Cardinality.ZERO_OR_MORE) },
+			new SequenceType(Type.STRING, Cardinality.ZERO_OR_MORE));
+
+	public EncodeBig5(XQueryContext context) {
+		super(context, signature);
+	}
+
+	public Sequence eval(Sequence[] args, Sequence contextSequence) throws XPathException {
+	  Sequence seqBig5InputStr = args[0];
+    String big5InputStr = "";
+    String charset = "big5";
+		if (seqBig5InputStr.isEmpty())
+			return Sequence.EMPTY_SEQUENCE;
+		else
+		  big5InputStr = seqBig5InputStr.getStringValue();
+		ValueSequence result = null;
+    String resultStr = "";
+    try {
+      byte[] resultBytes = big5InputStr.getBytes(charset);
+      for (int i=0; i < resultBytes.length; i++) {
+        byte b = resultBytes[i];
+        int unsigned = unsignedByteToInt(b);
+        String hexStr = Integer.toHexString(unsigned);
+        resultStr = resultStr + "%" + hexStr;
+      }
+      result = new ValueSequence();
+      result.add(new StringValue(resultStr));
+    } catch (UnsupportedEncodingException e) {
+      
+    }
+		return result;
+	}
+
+  private int unsignedByteToInt(byte b) {
+    return (int) b & 0xFF;
+  }
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/software/eXist/mpdl-modules/src/org/exist/xquery/modules/mpdltext/GetBig5EncodedTerms.java	Wed Nov 24 17:24:23 2010 +0100
@@ -0,0 +1,118 @@
+/*
+ *  eXist Open Source Native XML Database: Extension module
+ *  Copyright (C) 2008 Josef Willenborg
+ *  jwillenborg@mpiwg-berlin.mpg.de
+ *  http://www.mpiwg-berlin.mpg.de
+ *  
+ *  This program is free software; you can redistribute it and/or
+ *  modify it under the terms of the GNU Lesser General Public License
+ *  as published by the Free Software Foundation; either version 2
+ *  of the License, or (at your option) any later version.
+ *  
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU Lesser General Public License for more details.
+ *  
+ *  You should have received a copy of the GNU Lesser General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *  
+ *  $Id: TextModule.java $
+ */
+package org.exist.xquery.modules.mpdltext;
+
+import java.io.UnsupportedEncodingException;
+import java.util.ArrayList;
+
+import org.exist.dom.QName;
+import org.exist.memtree.DocumentImpl;
+import org.exist.memtree.MemTreeBuilder;
+import org.exist.xquery.BasicFunction;
+import org.exist.xquery.Cardinality;
+import org.exist.xquery.FunctionSignature;
+import org.exist.xquery.XPathException;
+import org.exist.xquery.XQueryContext;
+import org.exist.xquery.value.Sequence;
+import org.exist.xquery.value.SequenceType;
+import org.exist.xquery.value.Type;
+
+/**
+ * @author Josef Willenborg (jwillenborg@mpiwg-berlin.mpg.de)
+ */
+public class GetBig5EncodedTerms extends BasicFunction {
+
+	public final static FunctionSignature signature =
+		new FunctionSignature(
+			new QName("get-big5-encoded-terms", MPDLTextModule.NAMESPACE_URI, MPDLTextModule.PREFIX),
+			"bla bla",
+			new SequenceType[] { new SequenceType(Type.STRING, Cardinality.ZERO_OR_MORE) },
+			new SequenceType(Type.NODE, Cardinality.EXACTLY_ONE));
+
+	public GetBig5EncodedTerms(XQueryContext context) {
+		super(context, signature);
+	}
+
+	public Sequence eval(Sequence[] args, Sequence contextSequence) throws XPathException {
+    Sequence luceneQueryStringSeq = args[0];
+    String luceneQueryString = "";
+		if (luceneQueryStringSeq.isEmpty())
+			return Sequence.EMPTY_SEQUENCE;
+		luceneQueryString = luceneQueryStringSeq.getStringValue();
+		ArrayList<String> queryTerms = getTermsFromLuceneQuery(luceneQueryString);
+    int size = queryTerms.size();
+    MemTreeBuilder builder = context.getDocumentBuilder();
+    builder.startElement("", "big5-mappings", "big5-mappings", null);
+    for (int i=0; i<size; i++) {
+      String queryTerm = queryTerms.get(i);
+      String big5EncodedQueryTerm = encodeBig5(queryTerm);
+      builder.startElement("", "big5-mapping", "big5-mapping", null);
+      builder.startElement("", "term", "term", null);
+      builder.characters(queryTerm);
+      builder.endElement();
+      builder.startElement("", "mapping", "mapping", null);
+      builder.characters(big5EncodedQueryTerm);
+      builder.endElement();
+      builder.endElement();
+    }
+    builder.endElement();
+    DocumentImpl doc = ((DocumentImpl)builder.getDocument());
+
+		return doc;
+	}
+	
+  // TODO method is only simple: proof all Lucene cases
+  private ArrayList<String> getTermsFromLuceneQuery(String queryString) {
+    ArrayList<String> terms = new ArrayList<String>();
+    String[] variantTokens = queryString.split(" ");  // TODO throw the phrases away (e.g.: "bla bla bla")
+    for (int i = 0; i < variantTokens.length; i++) {
+      String token = variantTokens[i];
+      if (! (token.contains("*") || token.contains("?") || token.contains("~") || token.contains("-") || token.contains("+") || token.contains("^") || token.contains("OR") || token.contains("AND") || token.contains("NOT"))) {
+        terms.add(token);
+      }
+    }
+    return terms;
+  }
+
+  private String encodeBig5(String inputStr) {
+    String resultStr = "";
+    String charset = "big5";
+    try {
+      byte[] resultBytes = inputStr.getBytes(charset);
+      for (int i=0; i < resultBytes.length; i++) {
+        byte b = resultBytes[i];
+        int unsigned = unsignedByteToInt(b);
+        String hexStr = Integer.toHexString(unsigned);
+        resultStr = resultStr + "%" + hexStr;
+      }
+    } catch (UnsupportedEncodingException e) {
+      
+    }
+    return resultStr;
+  }
+
+  private int unsignedByteToInt(byte b) {
+    return (int) b & 0xFF;
+  }
+
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/software/eXist/mpdl-modules/src/org/exist/xquery/modules/mpdltext/GetDonatusQueryVariants.java	Wed Nov 24 17:24:23 2010 +0100
@@ -0,0 +1,92 @@
+/*
+ *  eXist Open Source Native XML Database: Extension module
+ *  Copyright (C) 2008 Josef Willenborg
+ *  jwillenborg@mpiwg-berlin.mpg.de
+ *  http://www.mpiwg-berlin.mpg.de
+ *  
+ *  This program is free software; you can redistribute it and/or
+ *  modify it under the terms of the GNU Lesser General Public License
+ *  as published by the Free Software Foundation; either version 2
+ *  of the License, or (at your option) any later version.
+ *  
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU Lesser General Public License for more details.
+ *  
+ *  You should have received a copy of the GNU Lesser General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *  
+ *  $Id: TextModule.java $
+ */
+package org.exist.xquery.modules.mpdltext;
+
+import java.util.ArrayList;
+
+import org.exist.dom.QName;
+import org.exist.xquery.BasicFunction;
+import org.exist.xquery.Cardinality;
+import org.exist.xquery.FunctionSignature;
+import org.exist.xquery.XPathException;
+import org.exist.xquery.XQueryContext;
+import org.exist.xquery.value.Sequence;
+import org.exist.xquery.value.SequenceType;
+import org.exist.xquery.value.StringValue;
+import org.exist.xquery.value.Type;
+import org.exist.xquery.value.ValueSequence;
+
+import de.mpg.mpiwg.berlin.mpdl.donatus.xmlrpc.DonatusCache;
+import de.mpg.mpiwg.berlin.mpdl.donatus.xmlrpc.DonatusVariant;
+import de.mpg.mpiwg.berlin.mpdl.exception.ApplicationException;
+
+/**
+ * @author Josef Willenborg (jwillenborg@mpiwg-berlin.mpg.de)
+ */
+public class GetDonatusQueryVariants extends BasicFunction {
+
+	public final static FunctionSignature signature =
+		new FunctionSignature(
+			new QName("get-donatus-query-variants", MPDLTextModule.NAMESPACE_URI, MPDLTextModule.PREFIX),
+			"A function which delivers morphological variants (seperated by |) of a given Lucene query string of a given " +
+			"language over the lemma of that variant by the Donatus language technology",
+			new SequenceType[] { new SequenceType(Type.STRING, Cardinality.ZERO_OR_MORE), 
+			                     new SequenceType(Type.STRING, Cardinality.ZERO_OR_MORE) },
+			new SequenceType(Type.STRING, Cardinality.ZERO_OR_MORE));
+
+	public GetDonatusQueryVariants(XQueryContext context) {
+		super(context, signature);
+	}
+
+	public Sequence eval(Sequence[] args, Sequence contextSequence) throws XPathException {
+	  Sequence langSeq = args[0];
+    Sequence luceneQueryStringSeq = args[1];
+    String language = "";
+    String luceneQueryString = "";
+		if (langSeq.isEmpty() || luceneQueryStringSeq.isEmpty())
+			return Sequence.EMPTY_SEQUENCE;
+		language = langSeq.getStringValue();
+		luceneQueryString = luceneQueryStringSeq.getStringValue();
+		ArrayList<DonatusVariant> resultVariants = null;
+		try { 
+	    DonatusCache donatusCache = DonatusCache.getInstance();
+	    resultVariants = donatusCache.getQueryVariants(language, luceneQueryString);
+    } catch (ApplicationException e) {
+      throw new XPathException(e);
+    }
+    ValueSequence result = new ValueSequence();
+    String resultStr = "";
+    int size = resultVariants.size();
+    for (int i=0; i<size; i++) {
+      String variantStr = resultVariants.get(i).getForm();
+      resultStr = resultStr + variantStr + "|";
+    }
+    if (size == 0) {
+      result.add(new StringValue(""));
+    } else {
+      resultStr = resultStr.substring(0, resultStr.length() - 1); // without last | character
+      result.add(new StringValue(resultStr));
+    }
+		return result;
+	}
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/software/eXist/mpdl-modules/src/org/exist/xquery/modules/mpdltext/GetLemma.java	Wed Nov 24 17:24:23 2010 +0100
@@ -0,0 +1,131 @@
+/*
+ *  eXist Open Source Native XML Database: Extension module
+ *  Copyright (C) 2008 Josef Willenborg
+ *  jwillenborg@mpiwg-berlin.mpg.de
+ *  http://www.mpiwg-berlin.mpg.de
+ *  
+ *  This program is free software; you can redistribute it and/or
+ *  modify it under the terms of the GNU Lesser General Public License
+ *  as published by the Free Software Foundation; either version 2
+ *  of the License, or (at your option) any later version.
+ *  
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU Lesser General Public License for more details.
+ *  
+ *  You should have received a copy of the GNU Lesser General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *  
+ *  $Id: TextModule.java $
+ */
+package org.exist.xquery.modules.mpdltext;
+
+import java.util.ArrayList;
+
+import org.exist.dom.QName;
+import org.exist.memtree.DocumentImpl;
+import org.exist.memtree.MemTreeBuilder;
+import org.exist.xquery.BasicFunction;
+import org.exist.xquery.Cardinality;
+import org.exist.xquery.FunctionSignature;
+import org.exist.xquery.XPathException;
+import org.exist.xquery.XQueryContext;
+import org.exist.xquery.value.Sequence;
+import org.exist.xquery.value.SequenceType;
+import org.exist.xquery.value.Type;
+
+import de.mpg.mpiwg.berlin.mpdl.exception.ApplicationException;
+import de.mpg.mpiwg.berlin.mpdl.lt.morph.app.Form;
+import de.mpg.mpiwg.berlin.mpdl.lt.morph.app.Lemma;
+import de.mpg.mpiwg.berlin.mpdl.lt.morph.app.MorphologyCache;
+
+/**
+ * @author Josef Willenborg (jwillenborg@mpiwg-berlin.mpg.de)
+ */
+public class GetLemma extends BasicFunction {
+
+	public final static FunctionSignature signature =
+		new FunctionSignature(
+			new QName("get-lemma", MPDLTextModule.NAMESPACE_URI, MPDLTextModule.PREFIX),
+			"A function which delivers the lemma of a given lemma name and language " +
+			"by the MPDL language technology",
+			new SequenceType[] { new SequenceType(Type.STRING, Cardinality.ZERO_OR_MORE), 
+			                     new SequenceType(Type.STRING, Cardinality.ZERO_OR_MORE) },
+			new SequenceType(Type.NODE, Cardinality.EXACTLY_ONE));
+
+	public GetLemma(XQueryContext context) {
+		super(context, signature);
+	}
+
+	public Sequence eval(Sequence[] args, Sequence contextSequence) throws XPathException {
+	  Sequence langSeq = args[0];
+    Sequence formNameSeq = args[1];
+    String language = "";
+    String formName = "";
+		if (langSeq.isEmpty() || formNameSeq.isEmpty())
+			return Sequence.EMPTY_SEQUENCE;
+		language = langSeq.getStringValue();
+		formName = formNameSeq.getStringValue();
+    ArrayList<Lemma> lemmas = null;
+		try { 
+	    MorphologyCache morphologyCache = MorphologyCache.getInstance();
+	    Lemma lemma = morphologyCache.getLemma(language, formName, true);
+	    if (lemma == null) {
+       lemma = new Lemma("standard analyzer", language, formName); // delivers at least one lemma with one form (with the formName)
+	    }
+      lemmas = new ArrayList<Lemma>();
+      lemmas.add(lemma);
+    } catch (ApplicationException e) {
+      throw new XPathException(e);
+    }
+    DocumentImpl doc = null;
+    if (lemmas != null) {
+      MemTreeBuilder builder = context.getDocumentBuilder();
+      builder.startElement("", "lemmas", "lemmas", null);
+      for (int i=0; i<lemmas.size(); i++) {
+        Lemma lemma = lemmas.get(i);
+        builder.startElement("", "lemma", "lemma", null);
+        builder.startElement("", "provider", "provider", null);
+        builder.characters(lemma.getProvider());
+        builder.endElement();
+        builder.startElement("", "language", "language", null);
+        builder.characters(lemma.getLanguage());
+        builder.endElement();
+        builder.startElement("", "lemma-name", "lemma-name", null);
+        builder.characters(lemma.getLemmaName());
+        builder.endElement();
+        ArrayList<Form> forms = lemma.getFormsList();
+        builder.startElement("", "forms-size", "forms-size", null);
+        builder.characters(String.valueOf(forms.size()));
+        builder.endElement();
+        builder.startElement("", "forms", "forms", null);
+        for (int j=0; j<forms.size(); j++) {
+          Form form = forms.get(j);
+          builder.startElement("", "form", "form", null);
+          builder.startElement("", "provider", "provider", null);
+          builder.characters(form.getProvider());
+          builder.endElement();
+          builder.startElement("", "language", "language", null);
+          builder.characters(form.getLanguage());
+          builder.endElement();
+          builder.startElement("", "form-name", "form-name", null);
+          builder.characters(form.getFormName());
+          builder.endElement();
+          builder.startElement("", "lemma-name", "lemma-name", null);
+          builder.characters(lemma.getLemmaName());
+          builder.endElement();
+          builder.endElement();
+        }
+        builder.endElement();
+        builder.endElement();
+      }
+      builder.endElement();
+      doc = ((DocumentImpl)builder.getDocument());
+    } else {
+      return Sequence.EMPTY_SEQUENCE;
+    }
+		return doc;
+	}
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/software/eXist/mpdl-modules/src/org/exist/xquery/modules/mpdltext/GetLemmasByFormName.java	Wed Nov 24 17:24:23 2010 +0100
@@ -0,0 +1,131 @@
+/*
+ *  eXist Open Source Native XML Database: Extension module
+ *  Copyright (C) 2008 Josef Willenborg
+ *  jwillenborg@mpiwg-berlin.mpg.de
+ *  http://www.mpiwg-berlin.mpg.de
+ *  
+ *  This program is free software; you can redistribute it and/or
+ *  modify it under the terms of the GNU Lesser General Public License
+ *  as published by the Free Software Foundation; either version 2
+ *  of the License, or (at your option) any later version.
+ *  
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU Lesser General Public License for more details.
+ *  
+ *  You should have received a copy of the GNU Lesser General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *  
+ *  $Id: TextModule.java $
+ */
+package org.exist.xquery.modules.mpdltext;
+
+import java.util.ArrayList;
+
+import org.exist.dom.QName;
+import org.exist.memtree.DocumentImpl;
+import org.exist.memtree.MemTreeBuilder;
+import org.exist.xquery.BasicFunction;
+import org.exist.xquery.Cardinality;
+import org.exist.xquery.FunctionSignature;
+import org.exist.xquery.XPathException;
+import org.exist.xquery.XQueryContext;
+import org.exist.xquery.value.Sequence;
+import org.exist.xquery.value.SequenceType;
+import org.exist.xquery.value.Type;
+
+import de.mpg.mpiwg.berlin.mpdl.exception.ApplicationException;
+import de.mpg.mpiwg.berlin.mpdl.lt.morph.app.Form;
+import de.mpg.mpiwg.berlin.mpdl.lt.morph.app.Lemma;
+import de.mpg.mpiwg.berlin.mpdl.lt.morph.app.MorphologyCache;
+
+/**
+ * @author Josef Willenborg (jwillenborg@mpiwg-berlin.mpg.de)
+ */
+public class GetLemmasByFormName extends BasicFunction {
+
+	public final static FunctionSignature signature =
+		new FunctionSignature(
+			new QName("get-lemmas-by-form-name", MPDLTextModule.NAMESPACE_URI, MPDLTextModule.PREFIX),
+			"A function which delivers the lemma of a given form name and language " +
+			"by the MPDL language technology",
+			new SequenceType[] { new SequenceType(Type.STRING, Cardinality.ZERO_OR_MORE), 
+			                     new SequenceType(Type.STRING, Cardinality.ZERO_OR_MORE) },
+			new SequenceType(Type.NODE, Cardinality.EXACTLY_ONE));
+
+	public GetLemmasByFormName(XQueryContext context) {
+		super(context, signature);
+	}
+
+	public Sequence eval(Sequence[] args, Sequence contextSequence) throws XPathException {
+	  Sequence langSeq = args[0];
+    Sequence formNameSeq = args[1];
+    String language = "";
+    String formName = "";
+		if (langSeq.isEmpty() || formNameSeq.isEmpty())
+			return Sequence.EMPTY_SEQUENCE;
+		language = langSeq.getStringValue();
+		formName = formNameSeq.getStringValue();
+    ArrayList<Lemma> lemmas = null;
+		try { 
+	    MorphologyCache morphologyCache = MorphologyCache.getInstance();
+	    lemmas = morphologyCache.getLemmasByFormName(language, formName, true);
+	    if (lemmas == null || lemmas.isEmpty()) {
+	      Lemma lemma = new Lemma("standard analyzer", language, formName); // delivers at least one lemma with one form (with the formName)
+	      lemmas = new ArrayList<Lemma>();
+	      lemmas.add(lemma);
+	    }
+    } catch (ApplicationException e) {
+      throw new XPathException(e);
+    }
+    DocumentImpl doc = null;
+    if (lemmas != null) {
+      MemTreeBuilder builder = context.getDocumentBuilder();
+      builder.startElement("", "lemmas", "lemmas", null);
+      for (int i=0; i<lemmas.size(); i++) {
+        Lemma lemma = lemmas.get(i);
+        builder.startElement("", "lemma", "lemma", null);
+        builder.startElement("", "provider", "provider", null);
+        builder.characters(lemma.getProvider());
+        builder.endElement();
+        builder.startElement("", "language", "language", null);
+        builder.characters(lemma.getLanguage());
+        builder.endElement();
+        builder.startElement("", "lemma-name", "lemma-name", null);
+        builder.characters(lemma.getLemmaName());
+        builder.endElement();
+        ArrayList<Form> forms = lemma.getFormsList();
+        builder.startElement("", "forms-size", "forms-size", null);
+        builder.characters(String.valueOf(forms.size()));
+        builder.endElement();
+        builder.startElement("", "forms", "forms", null);
+        for (int j=0; j<forms.size(); j++) {
+          Form form = forms.get(j);
+          builder.startElement("", "form", "form", null);
+          builder.startElement("", "provider", "provider", null);
+          builder.characters(form.getProvider());
+          builder.endElement();
+          builder.startElement("", "language", "language", null);
+          builder.characters(form.getLanguage());
+          builder.endElement();
+          builder.startElement("", "form-name", "form-name", null);
+          builder.characters(form.getFormName());
+          builder.endElement();
+          builder.startElement("", "lemma-name", "lemma-name", null);
+          builder.characters(lemma.getLemmaName());
+          builder.endElement();
+          builder.endElement();
+        }
+        builder.endElement();
+        builder.endElement();
+      }
+      builder.endElement();
+      doc = ((DocumentImpl)builder.getDocument());
+    } else {
+      return Sequence.EMPTY_SEQUENCE;
+    }
+		return doc;
+	}
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/software/eXist/mpdl-modules/src/org/exist/xquery/modules/mpdltext/GetLemmasByLuceneQuery.java	Wed Nov 24 17:24:23 2010 +0100
@@ -0,0 +1,126 @@
+/*
+ *  eXist Open Source Native XML Database: Extension module
+ *  Copyright (C) 2008 Josef Willenborg
+ *  jwillenborg@mpiwg-berlin.mpg.de
+ *  http://www.mpiwg-berlin.mpg.de
+ *  
+ *  This program is free software; you can redistribute it and/or
+ *  modify it under the terms of the GNU Lesser General Public License
+ *  as published by the Free Software Foundation; either version 2
+ *  of the License, or (at your option) any later version.
+ *  
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU Lesser General Public License for more details.
+ *  
+ *  You should have received a copy of the GNU Lesser General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *  
+ *  $Id: TextModule.java $
+ */
+package org.exist.xquery.modules.mpdltext;
+
+import java.util.ArrayList;
+
+import org.exist.dom.QName;
+import org.exist.memtree.DocumentImpl;
+import org.exist.memtree.MemTreeBuilder;
+import org.exist.xquery.BasicFunction;
+import org.exist.xquery.Cardinality;
+import org.exist.xquery.FunctionSignature;
+import org.exist.xquery.XPathException;
+import org.exist.xquery.XQueryContext;
+import org.exist.xquery.value.Sequence;
+import org.exist.xquery.value.SequenceType;
+import org.exist.xquery.value.Type;
+
+import de.mpg.mpiwg.berlin.mpdl.exception.ApplicationException;
+import de.mpg.mpiwg.berlin.mpdl.lt.morph.app.Form;
+import de.mpg.mpiwg.berlin.mpdl.lt.morph.app.Lemma;
+import de.mpg.mpiwg.berlin.mpdl.lt.morph.app.MorphologyCache;
+
+/**
+ * @author Josef Willenborg (jwillenborg@mpiwg-berlin.mpg.de)
+ */
+public class GetLemmasByLuceneQuery extends BasicFunction {
+
+	public final static FunctionSignature signature =
+		new FunctionSignature(
+			new QName("get-lemmas-by-lucene-query", MPDLTextModule.NAMESPACE_URI, MPDLTextModule.PREFIX),
+			"A function which delivers the lemma of a given lucene query and language " +
+			"by the MPDL language technology",
+			new SequenceType[] { new SequenceType(Type.STRING, Cardinality.ZERO_OR_MORE), 
+			                     new SequenceType(Type.STRING, Cardinality.ZERO_OR_MORE) },
+			new SequenceType(Type.NODE, Cardinality.EXACTLY_ONE));
+
+	public GetLemmasByLuceneQuery(XQueryContext context) {
+		super(context, signature);
+	}
+
+	public Sequence eval(Sequence[] args, Sequence contextSequence) throws XPathException {
+	  Sequence langSeq = args[0];
+    Sequence luceneQuerySeq = args[1];
+    String language = "";
+    String luceneQueryStr = "";
+		if (langSeq.isEmpty() || luceneQuerySeq.isEmpty())
+			return Sequence.EMPTY_SEQUENCE;
+		language = langSeq.getStringValue();
+		luceneQueryStr = luceneQuerySeq.getStringValue();
+		ArrayList<Lemma> lemmas = null;
+		try { 
+	    MorphologyCache morphologyCache = MorphologyCache.getInstance();
+	    lemmas = morphologyCache.getLemmasByLuceneQuery(language, luceneQueryStr, true);
+    } catch (ApplicationException e) {
+      throw new XPathException(e);
+    }
+    DocumentImpl doc = null;
+    if (lemmas != null) {
+      MemTreeBuilder builder = context.getDocumentBuilder();
+      builder.startElement("", "lemmas", "lemmas", null);
+      for (int i=0; i<lemmas.size(); i++) {
+        Lemma lemma = lemmas.get(i);
+        builder.startElement("", "lemma", "lemma", null);
+        builder.startElement("", "provider", "provider", null);
+        builder.characters(lemma.getProvider());
+        builder.endElement();
+        builder.startElement("", "language", "language", null);
+        builder.characters(lemma.getLanguage());
+        builder.endElement();
+        builder.startElement("", "lemma-name", "lemma-name", null);
+        builder.characters(lemma.getLemmaName());
+        builder.endElement();
+        ArrayList<Form> forms = lemma.getFormsList();
+        builder.startElement("", "forms-size", "forms-size", null);
+        builder.characters(String.valueOf(forms.size()));
+        builder.endElement();
+        builder.startElement("", "forms", "forms", null);
+        for (int j=0; j<forms.size(); j++) {
+          Form form = forms.get(j);
+          builder.startElement("", "form", "form", null);
+          builder.startElement("", "provider", "provider", null);
+          builder.characters(form.getProvider());
+          builder.endElement();
+          builder.startElement("", "language", "language", null);
+          builder.characters(form.getLanguage());
+          builder.endElement();
+          builder.startElement("", "form-name", "form-name", null);
+          builder.characters(form.getFormName());
+          builder.endElement();
+          builder.startElement("", "lemma-name", "lemma-name", null);
+          builder.characters(lemma.getLemmaName());
+          builder.endElement();
+          builder.endElement();
+        }
+        builder.endElement();
+        builder.endElement();
+      }
+      builder.endElement();
+      doc = ((DocumentImpl)builder.getDocument());
+    } else {
+      return Sequence.EMPTY_SEQUENCE;
+    }
+		return doc;
+	}
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/software/eXist/mpdl-modules/src/org/exist/xquery/modules/mpdltext/GetLemmasStrByFormName.java	Wed Nov 24 17:24:23 2010 +0100
@@ -0,0 +1,94 @@
+/*
+ *  eXist Open Source Native XML Database: Extension module
+ *  Copyright (C) 2008 Josef Willenborg
+ *  jwillenborg@mpiwg-berlin.mpg.de
+ *  http://www.mpiwg-berlin.mpg.de
+ *  
+ *  This program is free software; you can redistribute it and/or
+ *  modify it under the terms of the GNU Lesser General Public License
+ *  as published by the Free Software Foundation; either version 2
+ *  of the License, or (at your option) any later version.
+ *  
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU Lesser General Public License for more details.
+ *  
+ *  You should have received a copy of the GNU Lesser General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *  
+ *  $Id: TextModule.java $
+ */
+package org.exist.xquery.modules.mpdltext;
+
+import java.util.ArrayList;
+
+import org.exist.dom.QName;
+import org.exist.xquery.BasicFunction;
+import org.exist.xquery.Cardinality;
+import org.exist.xquery.FunctionSignature;
+import org.exist.xquery.XPathException;
+import org.exist.xquery.XQueryContext;
+import org.exist.xquery.value.Sequence;
+import org.exist.xquery.value.SequenceType;
+import org.exist.xquery.value.StringValue;
+import org.exist.xquery.value.Type;
+import org.exist.xquery.value.ValueSequence;
+
+import de.mpg.mpiwg.berlin.mpdl.exception.ApplicationException;
+import de.mpg.mpiwg.berlin.mpdl.lt.morph.app.Lemma;
+import de.mpg.mpiwg.berlin.mpdl.lt.morph.app.MorphologyCache;
+
+/**
+ * @author Josef Willenborg (jwillenborg@mpiwg-berlin.mpg.de)
+ */
+public class GetLemmasStrByFormName extends BasicFunction {
+
+	public final static FunctionSignature signature =
+		new FunctionSignature(
+			new QName("get-lemmasstr-by-form-name", MPDLTextModule.NAMESPACE_URI, MPDLTextModule.PREFIX),
+			"A function which delivers the lemmas string of a given form name and language " +
+			"by the MPDL language technology",
+			new SequenceType[] { new SequenceType(Type.STRING, Cardinality.ZERO_OR_MORE), 
+			                     new SequenceType(Type.STRING, Cardinality.ZERO_OR_MORE) },
+			new SequenceType(Type.STRING, Cardinality.EXACTLY_ONE));
+
+	public GetLemmasStrByFormName(XQueryContext context) {
+		super(context, signature);
+	}
+
+	public Sequence eval(Sequence[] args, Sequence contextSequence) throws XPathException {
+	  Sequence langSeq = args[0];
+    Sequence formNameSeq = args[1];
+    String language = "";
+    String formName = "";
+		if (langSeq.isEmpty() || formNameSeq.isEmpty())
+			return Sequence.EMPTY_SEQUENCE;
+		language = langSeq.getStringValue();
+		formName = formNameSeq.getStringValue();
+    String lemmasStr = null;
+		try { 
+	    MorphologyCache morphologyCache = MorphologyCache.getInstance();
+	    ArrayList<Lemma> lemmas = morphologyCache.getLemmasByFormName(language, formName, true);
+      if (lemmas != null && ! lemmas.isEmpty()) {
+        lemmasStr = "";
+        for (int j=0; j<lemmas.size(); j++) {
+          Lemma lemma = lemmas.get(j);
+          String lemmaName = lemma.getLemmaName();
+          lemmasStr = lemmasStr + lemmaName + " OR ";
+        }
+        lemmasStr = lemmasStr.substring(0, lemmasStr.length() - 4);
+      }
+    } catch (ApplicationException e) {
+      throw new XPathException(e);
+    }
+    if (lemmasStr != null) {
+      ValueSequence result = new ValueSequence();
+      result.add(new StringValue(lemmasStr));
+      return result;
+    } else {
+      return Sequence.EMPTY_SEQUENCE;
+    }
+	}
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/software/eXist/mpdl-modules/src/org/exist/xquery/modules/mpdltext/GetLexEntriesByFormName.java	Wed Nov 24 17:24:23 2010 +0100
@@ -0,0 +1,140 @@
+/*
+ *  eXist Open Source Native XML Database: Extension module
+ *  Copyright (C) 2008 Josef Willenborg
+ *  jwillenborg@mpiwg-berlin.mpg.de
+ *  http://www.mpiwg-berlin.mpg.de
+ *  
+ *  This program is free software; you can redistribute it and/or
+ *  modify it under the terms of the GNU Lesser General Public License
+ *  as published by the Free Software Foundation; either version 2
+ *  of the License, or (at your option) any later version.
+ *  
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU Lesser General Public License for more details.
+ *  
+ *  You should have received a copy of the GNU Lesser General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *  
+ *  $Id: TextModule.java $
+ */
+package org.exist.xquery.modules.mpdltext;
+
+import java.util.ArrayList;
+
+import org.exist.dom.QName;
+import org.exist.memtree.DocumentImpl;
+import org.exist.memtree.MemTreeBuilder;
+import org.exist.xquery.BasicFunction;
+import org.exist.xquery.Cardinality;
+import org.exist.xquery.FunctionSignature;
+import org.exist.xquery.XPathException;
+import org.exist.xquery.XQueryContext;
+import org.exist.xquery.value.Sequence;
+import org.exist.xquery.value.SequenceType;
+import org.exist.xquery.value.Type;
+
+import de.mpg.mpiwg.berlin.mpdl.exception.ApplicationException;
+import de.mpg.mpiwg.berlin.mpdl.lt.lex.app.Lexica;
+import de.mpg.mpiwg.berlin.mpdl.lt.lex.app.Lexicon;
+import de.mpg.mpiwg.berlin.mpdl.lt.lex.app.LexiconEntry;
+import de.mpg.mpiwg.berlin.mpdl.lt.lex.db.LexHandler;
+
+/**
+ * @author Josef Willenborg (jwillenborg@mpiwg-berlin.mpg.de)
+ */
+public class GetLexEntriesByFormName extends BasicFunction {
+
+	public final static FunctionSignature signature =
+		new FunctionSignature(
+			new QName("get-lex-entries-by-form-name", MPDLTextModule.NAMESPACE_URI, MPDLTextModule.PREFIX),
+			"A function which delivers the lex entries of a given form name and language " +
+			"by the MPDL language technology",
+			new SequenceType[] { new SequenceType(Type.STRING, Cardinality.ZERO_OR_MORE), 
+			                     new SequenceType(Type.STRING, Cardinality.ZERO_OR_MORE) },
+			new SequenceType(Type.NODE, Cardinality.EXACTLY_ONE));
+
+	public GetLexEntriesByFormName(XQueryContext context) {
+		super(context, signature);
+	}
+
+	public Sequence eval(Sequence[] args, Sequence contextSequence) throws XPathException {
+	  Sequence langSeq = args[0];
+    Sequence formNameSeq = args[1];
+    String language = "";
+    String formName = "";
+		if (langSeq.isEmpty() || formNameSeq.isEmpty())
+			return Sequence.EMPTY_SEQUENCE;
+		language = langSeq.getStringValue();
+		formName = formNameSeq.getStringValue();
+    ArrayList<Lexicon> lexicons = null;
+		try { 
+		  ArrayList<Lexicon> statLexicons = Lexica.getInstance().getLexicons(language);
+		  if (statLexicons != null) {
+  	    LexHandler lexHandler = LexHandler.getInstance();
+        for (int i=0; i<statLexicons.size(); i++) {
+          Lexicon lexicon = statLexicons.get(i).clone(); // clone without lexicon entries
+  	      LexiconEntry lexEntry = lexHandler.readEntry(lexicon.getName(), formName);
+  	      if (lexEntry != null) {
+    	      lexicon.addEntry(lexEntry); // add entries to the cloned object
+            if (lexicons == null)
+              lexicons = new ArrayList<Lexicon>();
+            lexicons.add(lexicon);
+  	      }
+        }
+		  }
+    } catch (ApplicationException e) {
+      throw new XPathException(e);
+    }
+    DocumentImpl doc = null;
+    if (lexicons != null) {
+      MemTreeBuilder builder = context.getDocumentBuilder();
+      builder.startElement("", "lexica", "lexica", null);
+      for (int i=0; i<lexicons.size(); i++) {
+        Lexicon lexicon = lexicons.get(i);
+        builder.startElement("", "lexicon", "lexicon", null);
+        builder.startElement("", "name", "name", null);
+        builder.characters(lexicon.getName());
+        builder.endElement();
+        builder.startElement("", "description", "description", null);
+        builder.characters(lexicon.getDescription());
+        builder.endElement();
+        builder.startElement("", "entries", "entries", null);
+        ArrayList<LexiconEntry> entries = lexicon.getEntries();
+        for (int j=0; j<entries.size(); j++) {
+          builder.startElement("", "entry", "entry", null);
+          LexiconEntry entry = entries.get(j);
+          builder.startElement("", "form", "form", null);
+          builder.characters(entry.getFormName());
+          builder.endElement();
+          builder.startElement("", "content", "content", null);
+          builder.startElement("", "xml-valid", "xml-valid", null);
+          String xmlValid = "false";
+          if (entry.isXmlValid())
+            xmlValid = "true";
+          builder.characters(xmlValid);
+          builder.endElement();
+          builder.startElement("", "original-entry", "original-entry", null);
+          builder.characters(entry.getOriginalEntry()); 
+          builder.endElement();
+          builder.startElement("", "repaired-entry", "repaired-entry", null);
+          builder.characters(entry.getRepairedEntry()); 
+          builder.endElement();
+          builder.endElement();
+          builder.endElement();
+        }
+        builder.endElement();
+        builder.endElement();
+      }
+      builder.endElement();
+      doc = ((DocumentImpl)builder.getDocument());
+    } else {
+      return Sequence.EMPTY_SEQUENCE;
+    }
+		return doc;
+	}
+	
+	
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/software/eXist/mpdl-modules/src/org/exist/xquery/modules/mpdltext/GetLexEntriesByLuceneQuery.java	Wed Nov 24 17:24:23 2010 +0100
@@ -0,0 +1,147 @@
+/*
+ *  eXist Open Source Native XML Database: Extension module
+ *  Copyright (C) 2008 Josef Willenborg
+ *  jwillenborg@mpiwg-berlin.mpg.de
+ *  http://www.mpiwg-berlin.mpg.de
+ *  
+ *  This program is free software; you can redistribute it and/or
+ *  modify it under the terms of the GNU Lesser General Public License
+ *  as published by the Free Software Foundation; either version 2
+ *  of the License, or (at your option) any later version.
+ *  
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU Lesser General Public License for more details.
+ *  
+ *  You should have received a copy of the GNU Lesser General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *  
+ *  $Id: TextModule.java $
+ */
+package org.exist.xquery.modules.mpdltext;
+
+import java.util.ArrayList;
+
+import org.exist.dom.QName;
+import org.exist.memtree.DocumentImpl;
+import org.exist.memtree.MemTreeBuilder;
+import org.exist.xquery.BasicFunction;
+import org.exist.xquery.Cardinality;
+import org.exist.xquery.FunctionSignature;
+import org.exist.xquery.XPathException;
+import org.exist.xquery.XQueryContext;
+import org.exist.xquery.value.Sequence;
+import org.exist.xquery.value.SequenceType;
+import org.exist.xquery.value.Type;
+
+import de.mpg.mpiwg.berlin.mpdl.exception.ApplicationException;
+import de.mpg.mpiwg.berlin.mpdl.lt.lex.app.Lexica;
+import de.mpg.mpiwg.berlin.mpdl.lt.lex.app.Lexicon;
+import de.mpg.mpiwg.berlin.mpdl.lt.lex.app.LexiconEntry;
+import de.mpg.mpiwg.berlin.mpdl.lt.lex.db.LexHandler;
+import de.mpg.mpiwg.berlin.mpdl.lucene.LuceneUtil;
+
+/**
+ * @author Josef Willenborg (jwillenborg@mpiwg-berlin.mpg.de)
+ */
+public class GetLexEntriesByLuceneQuery extends BasicFunction {
+
+	public final static FunctionSignature signature =
+		new FunctionSignature(
+			new QName("get-lex-entries-by-lucene-query", MPDLTextModule.NAMESPACE_URI, MPDLTextModule.PREFIX),
+			"A function which delivers the lex entries of a given lucene query and language " +
+			"by the MPDL language technology",
+			new SequenceType[] { new SequenceType(Type.STRING, Cardinality.ZERO_OR_MORE), 
+			                     new SequenceType(Type.STRING, Cardinality.ZERO_OR_MORE) },
+			new SequenceType(Type.NODE, Cardinality.EXACTLY_ONE));
+
+	public GetLexEntriesByLuceneQuery(XQueryContext context) {
+		super(context, signature);
+	}
+
+	public Sequence eval(Sequence[] args, Sequence contextSequence) throws XPathException {
+    Sequence langSeq = args[0];
+    Sequence luceneQuerySeq = args[1];
+    String language = "";
+    String luceneQueryStr = "";
+    if (langSeq.isEmpty() || luceneQuerySeq.isEmpty())
+      return Sequence.EMPTY_SEQUENCE;
+    language = langSeq.getStringValue();
+    luceneQueryStr = luceneQuerySeq.getStringValue();
+    ArrayList<Lexicon> lexicons = null;
+		try { 
+		  ArrayList<Lexicon> statLexicons = Lexica.getInstance().getLexicons(language);
+		  if (statLexicons != null) {
+  	    LexHandler lexHandler = LexHandler.getInstance();
+        for (int i=0; i<statLexicons.size(); i++) {
+          Lexicon lexicon = statLexicons.get(i).clone(); // clone without lexicon entries
+          LuceneUtil luceneUtil = LuceneUtil.getInstance();
+          ArrayList<String> lexQueryVariants = luceneUtil.getVariantsFromLuceneQuery(luceneQueryStr);
+          for (int j=0; j<lexQueryVariants.size(); j++) {
+            String lexForm = lexQueryVariants.get(j);
+            LexiconEntry lexEntry = lexHandler.readEntry(lexicon.getName(), lexForm);
+            if (lexEntry != null) {
+              lexicon.addEntry(lexEntry); // add entries to the cloned object
+              if (lexicons == null)
+                lexicons = new ArrayList<Lexicon>();
+            }
+          }
+          if (! lexicon.isEmpty())
+          lexicons.add(lexicon);
+        }
+		  }
+    } catch (ApplicationException e) {
+      throw new XPathException(e);
+    }
+    DocumentImpl doc = null;
+    if (lexicons != null) {
+      MemTreeBuilder builder = context.getDocumentBuilder();
+      builder.startElement("", "lexica", "lexica", null);
+      for (int i=0; i<lexicons.size(); i++) {
+        Lexicon lexicon = lexicons.get(i);
+        builder.startElement("", "lexicon", "lexicon", null);
+        builder.startElement("", "name", "name", null);
+        builder.characters(lexicon.getName());
+        builder.endElement();
+        builder.startElement("", "description", "description", null);
+        builder.characters(lexicon.getDescription());
+        builder.endElement();
+        builder.startElement("", "entries", "entries", null);
+        ArrayList<LexiconEntry> entries = lexicon.getEntries();
+        for (int j=0; j<entries.size(); j++) {
+          builder.startElement("", "entry", "entry", null);
+          LexiconEntry entry = entries.get(j);
+          builder.startElement("", "form", "form", null);
+          builder.characters(entry.getFormName());
+          builder.endElement();
+          builder.startElement("", "content", "content", null);
+          builder.startElement("", "xml-valid", "xml-valid", null);
+          String xmlValid = "false";
+          if (entry.isXmlValid())
+            xmlValid = "true";
+          builder.characters(xmlValid);
+          builder.endElement();
+          builder.startElement("", "original-entry", "original-entry", null);
+          builder.characters(entry.getOriginalEntry()); 
+          builder.endElement();
+          builder.startElement("", "repaired-entry", "repaired-entry", null);
+          builder.characters(entry.getRepairedEntry()); 
+          builder.endElement();
+          builder.endElement();
+          builder.endElement();
+        }
+        builder.endElement();
+        builder.endElement();
+      }
+      builder.endElement();
+      doc = ((DocumentImpl)builder.getDocument());
+    } else {
+      return Sequence.EMPTY_SEQUENCE;
+    }
+		return doc;
+	}
+	
+	
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/software/eXist/mpdl-modules/src/org/exist/xquery/modules/mpdltext/GetLexEntryKeysByFormName.java	Wed Nov 24 17:24:23 2010 +0100
@@ -0,0 +1,94 @@
+/*
+ *  eXist Open Source Native XML Database: Extension module
+ *  Copyright (C) 2008 Josef Willenborg
+ *  jwillenborg@mpiwg-berlin.mpg.de
+ *  http://www.mpiwg-berlin.mpg.de
+ *  
+ *  This program is free software; you can redistribute it and/or
+ *  modify it under the terms of the GNU Lesser General Public License
+ *  as published by the Free Software Foundation; either version 2
+ *  of the License, or (at your option) any later version.
+ *  
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU Lesser General Public License for more details.
+ *  
+ *  You should have received a copy of the GNU Lesser General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *  
+ *  $Id: TextModule.java $
+ */
+package org.exist.xquery.modules.mpdltext;
+
+import java.util.ArrayList;
+
+import org.exist.dom.QName;
+import org.exist.xquery.BasicFunction;
+import org.exist.xquery.Cardinality;
+import org.exist.xquery.FunctionSignature;
+import org.exist.xquery.XPathException;
+import org.exist.xquery.XQueryContext;
+import org.exist.xquery.value.Sequence;
+import org.exist.xquery.value.SequenceType;
+import org.exist.xquery.value.StringValue;
+import org.exist.xquery.value.Type;
+import org.exist.xquery.value.ValueSequence;
+
+import de.mpg.mpiwg.berlin.mpdl.exception.ApplicationException;
+import de.mpg.mpiwg.berlin.mpdl.lt.lex.db.LexHandler;
+
+/**
+ * @author Josef Willenborg (jwillenborg@mpiwg-berlin.mpg.de)
+ */
+public class GetLexEntryKeysByFormName extends BasicFunction {
+
+	public final static FunctionSignature signature =
+		new FunctionSignature(
+			new QName("get-lex-entry-keys-by-form-name", MPDLTextModule.NAMESPACE_URI, MPDLTextModule.PREFIX),
+			"A function which delivers the lex entry keys of a given form name and language " +
+			"by the MPDL language technology",
+			new SequenceType[] { new SequenceType(Type.STRING, Cardinality.ZERO_OR_MORE), 
+			                     new SequenceType(Type.STRING, Cardinality.ZERO_OR_MORE) },
+			new SequenceType(Type.STRING, Cardinality.EXACTLY_ONE));
+
+	public GetLexEntryKeysByFormName(XQueryContext context) {
+		super(context, signature);
+	}
+
+	public Sequence eval(Sequence[] args, Sequence contextSequence) throws XPathException {
+	  Sequence langSeq = args[0];
+    Sequence formNameSeq = args[1];
+    String language = "";
+    String formName = "";
+		if (langSeq.isEmpty() || formNameSeq.isEmpty())
+			return Sequence.EMPTY_SEQUENCE;
+		language = langSeq.getStringValue();
+		formName = formNameSeq.getStringValue();
+    String lexEntryKeysStr = null;
+		try { 
+      LexHandler lexHandler = LexHandler.getInstance();
+      ArrayList<String> lexEntryKeys = lexHandler.getLexEntryKeys(formName, language, true);
+      if (lexEntryKeys != null) {
+        lexEntryKeysStr = "";
+        for (int j=0; j<lexEntryKeys.size(); j++) {
+          String lexEntryKey = lexEntryKeys.get(j);
+          lexEntryKeysStr = lexEntryKeysStr + lexEntryKey + " ";
+        }
+        lexEntryKeysStr = lexEntryKeysStr.substring(0, lexEntryKeysStr.length() - 1);
+      }
+    } catch (ApplicationException e) {
+      throw new XPathException(e);
+    }
+    if (lexEntryKeysStr != null) {
+      ValueSequence result = new ValueSequence();
+      result.add(new StringValue(lexEntryKeysStr));
+      return result;
+    } else {
+      return Sequence.EMPTY_SEQUENCE;
+    }
+	}
+	
+	
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/software/eXist/mpdl-modules/src/org/exist/xquery/modules/mpdltext/GetQueryMorphForms.java	Wed Nov 24 17:24:23 2010 +0100
@@ -0,0 +1,96 @@
+/*
+ *  eXist Open Source Native XML Database: Extension module
+ *  Copyright (C) 2008 Josef Willenborg
+ *  jwillenborg@mpiwg-berlin.mpg.de
+ *  http://www.mpiwg-berlin.mpg.de
+ *  
+ *  This program is free software; you can redistribute it and/or
+ *  modify it under the terms of the GNU Lesser General Public License
+ *  as published by the Free Software Foundation; either version 2
+ *  of the License, or (at your option) any later version.
+ *  
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU Lesser General Public License for more details.
+ *  
+ *  You should have received a copy of the GNU Lesser General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *  
+ *  $Id: TextModule.java $
+ */
+package org.exist.xquery.modules.mpdltext;
+
+import java.util.ArrayList;
+
+import org.exist.dom.QName;
+import org.exist.xquery.BasicFunction;
+import org.exist.xquery.Cardinality;
+import org.exist.xquery.FunctionSignature;
+import org.exist.xquery.XPathException;
+import org.exist.xquery.XQueryContext;
+import org.exist.xquery.value.Sequence;
+import org.exist.xquery.value.SequenceType;
+import org.exist.xquery.value.StringValue;
+import org.exist.xquery.value.Type;
+import org.exist.xquery.value.ValueSequence;
+
+import de.mpg.mpiwg.berlin.mpdl.exception.ApplicationException;
+import de.mpg.mpiwg.berlin.mpdl.lt.analyzer.MpdlNormalizer;
+import de.mpg.mpiwg.berlin.mpdl.lt.morph.app.Form;
+import de.mpg.mpiwg.berlin.mpdl.lt.morph.app.MorphologyCache;
+
+/**
+ * @author Josef Willenborg (jwillenborg@mpiwg-berlin.mpg.de)
+ */
+public class GetQueryMorphForms extends BasicFunction {
+
+	public final static FunctionSignature signature =
+		new FunctionSignature(
+			new QName("get-query-morph-forms", MPDLTextModule.NAMESPACE_URI, MPDLTextModule.PREFIX),
+			"A function which delivers morphological forms (seperated by |) of a given Lucene query string of a given " +
+			"language over the lemma of that form by the MPDL language technology",
+			new SequenceType[] { new SequenceType(Type.STRING, Cardinality.ZERO_OR_MORE), 
+			                     new SequenceType(Type.STRING, Cardinality.ZERO_OR_MORE) },
+			new SequenceType(Type.STRING, Cardinality.ZERO_OR_MORE));
+
+	public GetQueryMorphForms(XQueryContext context) {
+		super(context, signature);
+	}
+
+	public Sequence eval(Sequence[] args, Sequence contextSequence) throws XPathException {
+	  Sequence langSeq = args[0];
+    Sequence luceneQueryStringSeq = args[1];
+    String language = "";
+    String luceneQueryString = "";
+		if (langSeq.isEmpty() || luceneQueryStringSeq.isEmpty())
+			return Sequence.EMPTY_SEQUENCE;
+		language = langSeq.getStringValue();
+		luceneQueryString = luceneQueryStringSeq.getStringValue();
+		ArrayList<Form> resultVariants = null;
+		try { 
+	    MorphologyCache morphologyCache = MorphologyCache.getInstance();
+	    resultVariants = morphologyCache.getFormsByLuceneQuery(language, luceneQueryString, true);
+    } catch (ApplicationException e) {
+      throw new XPathException(e);
+    }
+    ValueSequence result = new ValueSequence();
+    String resultStr = "";
+    int size = resultVariants.size();
+    MpdlNormalizer normalizer = new MpdlNormalizer(language);
+    for (int i=0; i<size; i++) {
+      Form form = resultVariants.get(i);
+      String formName = form.getFormName();
+      String formNameNormalizedToRegExpr = normalizer.deNormalizeToRegExpr(formName);
+      resultStr = resultStr + formNameNormalizedToRegExpr + "|";
+    }
+    if (size == 0) {
+      result.add(new StringValue(""));
+    } else {
+      resultStr = resultStr.substring(0, resultStr.length() - 1); // without last | character
+      result.add(new StringValue(resultStr));
+    }
+		return result;
+	}
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/software/eXist/mpdl-modules/src/org/exist/xquery/modules/mpdltext/GetQueryRegularizations.java	Wed Nov 24 17:24:23 2010 +0100
@@ -0,0 +1,110 @@
+/*
+ *  eXist Open Source Native XML Database: Extension module
+ *  Copyright (C) 2008 Josef Willenborg
+ *  jwillenborg@mpiwg-berlin.mpg.de
+ *  http://www.mpiwg-berlin.mpg.de
+ *  
+ *  This program is free software; you can redistribute it and/or
+ *  modify it under the terms of the GNU Lesser General Public License
+ *  as published by the Free Software Foundation; either version 2
+ *  of the License, or (at your option) any later version.
+ *  
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU Lesser General Public License for more details.
+ *  
+ *  You should have received a copy of the GNU Lesser General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *  
+ *  $Id: TextModule.java $
+ */
+package org.exist.xquery.modules.mpdltext;
+
+import java.util.ArrayList;
+
+import org.exist.dom.QName;
+import org.exist.xquery.BasicFunction;
+import org.exist.xquery.Cardinality;
+import org.exist.xquery.FunctionSignature;
+import org.exist.xquery.XPathException;
+import org.exist.xquery.XQueryContext;
+import org.exist.xquery.value.Sequence;
+import org.exist.xquery.value.SequenceType;
+import org.exist.xquery.value.StringValue;
+import org.exist.xquery.value.Type;
+import org.exist.xquery.value.ValueSequence;
+
+import de.mpg.mpiwg.berlin.mpdl.exception.ApplicationException;
+import de.mpg.mpiwg.berlin.mpdl.lt.doc.regularization.RegularizationManager;
+import de.mpg.mpiwg.berlin.mpdl.lt.morph.app.Form;
+import de.mpg.mpiwg.berlin.mpdl.lt.morph.app.MorphologyCache;
+
+/**
+ * @author Josef Willenborg (jwillenborg@mpiwg-berlin.mpg.de)
+ */
+public class GetQueryRegularizations extends BasicFunction {
+
+	public final static FunctionSignature signature =
+		new FunctionSignature(
+			new QName("get-query-regularizations", MPDLTextModule.NAMESPACE_URI, MPDLTextModule.PREFIX),
+			"A function which delivers regularizations (seperated by |) of a given Lucene query string of a given " +
+			"language by the MPDL language technology",
+			new SequenceType[] { new SequenceType(Type.STRING, Cardinality.ZERO_OR_MORE), 
+			                     new SequenceType(Type.STRING, Cardinality.ZERO_OR_MORE) },
+			new SequenceType(Type.STRING, Cardinality.ZERO_OR_MORE));
+
+	public GetQueryRegularizations(XQueryContext context) {
+		super(context, signature);
+	}
+
+	public Sequence eval(Sequence[] args, Sequence contextSequence) throws XPathException {
+	  Sequence langSeq = args[0];
+    Sequence luceneQueryStringSeq = args[1];
+    String language = "";
+    String luceneQueryString = "";
+		if (langSeq.isEmpty() || luceneQueryStringSeq.isEmpty())
+			return Sequence.EMPTY_SEQUENCE;
+		language = langSeq.getStringValue();
+		luceneQueryString = luceneQueryStringSeq.getStringValue();
+    ValueSequence result = new ValueSequence();
+    String resultStr = "";
+    // add orig regularizations of all lucene query forms
+    boolean regOrigFormsFound = false;
+    try { 
+      RegularizationManager regManager = RegularizationManager.getInstance();
+      ArrayList<String> regOrigForms = regManager.getRegOrigsByNormLuceneQueryString(language, luceneQueryString);
+      if (regOrigForms != null && regOrigForms.size() > 0) {
+        regOrigFormsFound = true;
+        for (int i=0; i<regOrigForms.size(); i++) {
+          String regOrigForm = regOrigForms.get(i);
+          resultStr = resultStr + regOrigForm + "|";
+        }
+      }
+      MorphologyCache morphologyCache = MorphologyCache.getInstance();
+      ArrayList<Form> resultVariants = morphologyCache.getFormsByLuceneQuery(language, luceneQueryString, true);
+      for (int i=0; i<resultVariants.size(); i++) {
+        Form form = resultVariants.get(i);
+        String formName = form.getFormName();
+        regOrigForms = regManager.getRegOrigsByNormLuceneQueryString(language, formName);
+        if (regOrigForms != null && regOrigForms.size() > 0) {
+          regOrigFormsFound = true;
+          for (int j=0; j<regOrigForms.size(); j++) {
+            String regOrigForm = regOrigForms.get(j);
+            resultStr = resultStr + regOrigForm + "|";
+          }
+        }
+      }
+    } catch (ApplicationException e) {
+      throw new XPathException(e);
+    }
+    if (! regOrigFormsFound) {
+      result.add(new StringValue(""));
+    } else {
+      resultStr = resultStr.substring(0, resultStr.length() - 1); // without last | character
+      result.add(new StringValue(resultStr));
+    }
+		return result;
+	}
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/software/eXist/mpdl-modules/src/org/exist/xquery/modules/mpdltext/GetTranscodedText.java	Wed Nov 24 17:24:23 2010 +0100
@@ -0,0 +1,84 @@
+/*
+ *  eXist Open Source Native XML Database: Extension module
+ *  Copyright (C) 2008 Josef Willenborg
+ *  jwillenborg@mpiwg-berlin.mpg.de
+ *  http://www.mpiwg-berlin.mpg.de
+ *  
+ *  This program is free software; you can redistribute it and/or
+ *  modify it under the terms of the GNU Lesser General Public License
+ *  as published by the Free Software Foundation; either version 2
+ *  of the License, or (at your option) any later version.
+ *  
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU Lesser General Public License for more details.
+ *  
+ *  You should have received a copy of the GNU Lesser General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *  
+ *  $Id: TextModule.java $
+ */
+package org.exist.xquery.modules.mpdltext;
+
+import org.exist.dom.QName;
+import org.exist.xquery.BasicFunction;
+import org.exist.xquery.Cardinality;
+import org.exist.xquery.FunctionSignature;
+import org.exist.xquery.XPathException;
+import org.exist.xquery.XQueryContext;
+import org.exist.xquery.value.Sequence;
+import org.exist.xquery.value.SequenceType;
+import org.exist.xquery.value.StringValue;
+import org.exist.xquery.value.Type;
+import org.exist.xquery.value.ValueSequence;
+
+import de.mpg.mpiwg.berlin.mpdl.exception.ApplicationException;
+import de.mpg.mpiwg.berlin.mpdl.lt.lex.db.LexHandler;
+
+/**
+ * @author Josef Willenborg (jwillenborg@mpiwg-berlin.mpg.de)
+ */
+public class GetTranscodedText extends BasicFunction {
+
+	public final static FunctionSignature signature =
+		new FunctionSignature(
+			new QName("transcode", MPDLTextModule.NAMESPACE_URI, MPDLTextModule.PREFIX),
+			"A function which transcodes the given string to the given coding. First parameter " +
+			"is the from coding (e.g. buckwalter), second parameter is the to coding (e.g. unicode) " +
+			"and third parameter is the text which should be transcoded.",
+			new SequenceType[] { new SequenceType(Type.STRING, Cardinality.EXACTLY_ONE), 
+                           new SequenceType(Type.STRING, Cardinality.EXACTLY_ONE),
+			                     new SequenceType(Type.STRING, Cardinality.EXACTLY_ONE) },
+			new SequenceType(Type.STRING, Cardinality.EXACTLY_ONE));
+
+	public GetTranscodedText(XQueryContext context) {
+		super(context, signature);
+	}
+
+	public Sequence eval(Sequence[] args, Sequence contextSequence) throws XPathException {
+	  Sequence fromEncodingSeq = args[0];
+    Sequence toEncodingSeq = args[1];
+    Sequence inputStrSeq = args[2];
+    String fromEncoding = "";
+    String toEncoding = "";
+    String inputStr = "";
+		if (fromEncodingSeq.isEmpty() || toEncodingSeq.isEmpty())
+			return Sequence.EMPTY_SEQUENCE;
+		fromEncoding = fromEncodingSeq.getStringValue();
+		toEncoding = toEncodingSeq.getStringValue();
+		inputStr = inputStrSeq.getStringValue();
+		String transcodedStr = null;
+		try {
+	    LexHandler lexHandler = LexHandler.getInstance();
+	    transcodedStr = lexHandler.transcode(fromEncoding, toEncoding, inputStr);
+    } catch (ApplicationException e) {
+      throw new XPathException(e);
+    }
+    ValueSequence result = new ValueSequence();
+    result.add(new StringValue(transcodedStr));
+		return result;
+	}
+	
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/software/eXist/mpdl-modules/src/org/exist/xquery/modules/mpdltext/GetWordsFunction.java	Wed Nov 24 17:24:23 2010 +0100
@@ -0,0 +1,110 @@
+/*
+ *  eXist Open Source Native XML Database: Extension module
+ *  Copyright (C) 2008 Josef Willenborg
+ *  jwillenborg@mpiwg-berlin.mpg.de
+ *  http://www.mpiwg-berlin.mpg.de
+ *  
+ *  This program is free software; you can redistribute it and/or
+ *  modify it under the terms of the GNU Lesser General Public License
+ *  as published by the Free Software Foundation; either version 2
+ *  of the License, or (at your option) any later version.
+ *  
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU Lesser General Public License for more details.
+ *  
+ *  You should have received a copy of the GNU Lesser General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *  
+ *  $Id: TextModule.java $
+ */
+package org.exist.xquery.modules.mpdltext;
+
+import java.util.regex.Pattern;
+import java.util.regex.PatternSyntaxException;
+
+import org.exist.dom.QName;
+import org.exist.xquery.BasicFunction;
+import org.exist.xquery.Cardinality;
+import org.exist.xquery.FunctionSignature;
+import org.exist.xquery.XPathException;
+import org.exist.xquery.XQueryContext;
+import org.exist.xquery.value.Sequence;
+import org.exist.xquery.value.SequenceType;
+import org.exist.xquery.value.StringValue;
+import org.exist.xquery.value.Type;
+import org.exist.xquery.value.ValueSequence;
+
+/**
+ * @author Josef Willenborg (jwillenborg@mpiwg-berlin.mpg.de)
+ */
+public class GetWordsFunction extends BasicFunction {
+
+	public final static FunctionSignature signature =
+		new FunctionSignature(
+			new QName("getWords", MPDLTextModule.NAMESPACE_URI, MPDLTextModule.PREFIX),
+			"A function which delivers words of a given text by a split pattern and a flags string (see also fn:tokenize)",
+			new SequenceType[] { new SequenceType(Type.STRING, Cardinality.ZERO_OR_MORE), 
+			                     new SequenceType(Type.STRING, Cardinality.ZERO_OR_MORE), 
+			                     new SequenceType(Type.STRING, Cardinality.ZERO_OR_MORE) },
+			new SequenceType(Type.STRING, Cardinality.ZERO_OR_MORE));
+
+	public GetWordsFunction(XQueryContext context) {
+		super(context, signature);
+	}
+
+	public Sequence eval(Sequence[] args, Sequence contextSequence) throws XPathException {
+	  Sequence seqText = args[0];
+    Sequence seqPattern = args[1];
+    Sequence seqFlags = args[2];
+    String text = "";
+    String pattern = " ";  // default pattern
+    int flags = 0;
+		if (seqText.isEmpty())
+			return Sequence.EMPTY_SEQUENCE;
+		else
+		  text = seqText.getStringValue();
+		if (!seqPattern.isEmpty())
+		  pattern = seqPattern.getStringValue();
+		if (!seqFlags.isEmpty())
+		  flags = parseFlags(seqFlags.getStringValue());
+		ValueSequence result = null;
+    try {
+      Pattern pat = Pattern.compile(pattern, flags);
+      String[] tokens = pat.split(text, -1);
+      result = new ValueSequence();
+      for (int i = 0; i < tokens.length; i++)
+        result.add(new StringValue(tokens[i]));             
+    } catch (PatternSyntaxException e) {
+      throw new XPathException(e);
+    }
+		return result;
+	}
+
+  private int parseFlags(String s) throws XPathException {
+    int flags = 0;
+    for(int i = 0; i < s.length(); i++) {
+      char ch = s.charAt(i);
+      switch(ch) {
+        case 'm':
+          flags |= Pattern.MULTILINE;
+          break;
+        case 'i':
+          flags = flags | Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CASE;
+          break;
+        case 'x':
+          flags |= Pattern.COMMENTS;
+          break;
+        case 's':
+          flags |= Pattern.DOTALL;
+          break;
+        default:
+          throw new XPathException("Invalid regular expression flag: " + ch);
+      }
+    }
+    return flags;
+  }
+ 
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/software/eXist/mpdl-modules/src/org/exist/xquery/modules/mpdltext/LuceneQueryParser.java	Wed Nov 24 17:24:23 2010 +0100
@@ -0,0 +1,81 @@
+/*
+ *  eXist Open Source Native XML Database: Extension module
+ *  Copyright (C) 2008 Josef Willenborg
+ *  jwillenborg@mpiwg-berlin.mpg.de
+ *  http://www.mpiwg-berlin.mpg.de
+ *  
+ *  This program is free software; you can redistribute it and/or
+ *  modify it under the terms of the GNU Lesser General Public License
+ *  as published by the Free Software Foundation; either version 2
+ *  of the License, or (at your option) any later version.
+ *  
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU Lesser General Public License for more details.
+ *  
+ *  You should have received a copy of the GNU Lesser General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *  
+ *  $Id: TextModule.java $
+ */
+package org.exist.xquery.modules.mpdltext;
+
+import org.apache.lucene.queryParser.ParseException;
+import org.apache.lucene.queryParser.QueryParser;
+import org.exist.dom.QName;
+import org.exist.xquery.BasicFunction;
+import org.exist.xquery.Cardinality;
+import org.exist.xquery.FunctionSignature;
+import org.exist.xquery.XPathException;
+import org.exist.xquery.XQueryContext;
+import org.exist.xquery.value.Sequence;
+import org.exist.xquery.value.SequenceType;
+import org.exist.xquery.value.StringValue;
+import org.exist.xquery.value.Type;
+import org.exist.xquery.value.ValueSequence;
+
+import de.mpg.mpiwg.berlin.mpdl.analysis.MpdlStandardAnalyzer;
+
+/**
+ * @author Josef Willenborg (jwillenborg@mpiwg-berlin.mpg.de)
+ */
+public class LuceneQueryParser extends BasicFunction {
+
+	public final static FunctionSignature signature =
+		new FunctionSignature(
+			new QName("lucene-parse-query", MPDLTextModule.NAMESPACE_URI, MPDLTextModule.PREFIX),
+			"A function which parses the given lucene query and returns an error message",
+			new SequenceType[] { new SequenceType(Type.STRING, Cardinality.EXACTLY_ONE) },
+			new SequenceType(Type.STRING, Cardinality.EXACTLY_ONE));
+
+	public LuceneQueryParser(XQueryContext context) {
+		super(context, signature);
+	}
+
+	public Sequence eval(Sequence[] args, Sequence contextSequence) throws XPathException {
+    String retMessage = "";
+    ValueSequence result = new ValueSequence();
+    Sequence luceneQueryStringSeq = args[0];
+		if (luceneQueryStringSeq.isEmpty()) {
+	    result.add(new StringValue(""));
+			return result;
+		}
+		String luceneQueryString = luceneQueryStringSeq.getStringValue();
+    if (luceneQueryString == null || luceneQueryString.equals("")) {
+      result.add(new StringValue(""));
+      return result;
+    }
+		try { 
+      MpdlStandardAnalyzer analyzer = new MpdlStandardAnalyzer();
+      QueryParser parser = new QueryParser("", analyzer);
+      parser.parse(luceneQueryString);
+    } catch (ParseException e) {
+      retMessage = e.getMessage();
+    }
+    result.add(new StringValue(retMessage));
+		return result;
+	}
+	
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/software/eXist/mpdl-modules/src/org/exist/xquery/modules/mpdltext/MPDLTextModule.java	Wed Nov 24 17:24:23 2010 +0100
@@ -0,0 +1,75 @@
+/*
+ *  eXist Open Source Native XML Database: Extension module
+ *  Copyright (C) 2008 Josef Willenborg
+ *  jwillenborg@mpiwg-berlin.mpg.de
+ *  http://www.mpiwg-berlin.mpg.de
+ *  
+ *  This program is free software; you can redistribute it and/or
+ *  modify it under the terms of the GNU Lesser General Public License
+ *  as published by the Free Software Foundation; either version 2
+ *  of the License, or (at your option) any later version.
+ *  
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU Lesser General Public License for more details.
+ *  
+ *  You should have received a copy of the GNU Lesser General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *  
+ *  $Id: TextModule.java $
+ */
+package org.exist.xquery.modules.mpdltext;
+
+import org.exist.xquery.AbstractInternalModule;
+import org.exist.xquery.FunctionDef;
+
+/**
+ * @author Josef Willenborg (jwillenborg@mpiwg-berlin.mpg.de)
+ */
+public class MPDLTextModule extends AbstractInternalModule {
+	public final static String NAMESPACE_URI = "http://exist-db.org/xquery/mpdltext";
+	public final static String PREFIX = "mpdltext";
+	
+	private final static FunctionDef[] functions = {
+    new FunctionDef(Dictionarize.signature, Dictionarize.class),
+    new FunctionDef(NormalizeChars.signature, NormalizeChars.class),
+    new FunctionDef(GetDonatusQueryVariants.signature, GetDonatusQueryVariants.class),
+    new FunctionDef(GetQueryMorphForms.signature, GetQueryMorphForms.class),
+    new FunctionDef(GetQueryRegularizations.signature, GetQueryRegularizations.class),
+    new FunctionDef(GetLemma.signature, GetLemma.class),
+    new FunctionDef(GetLemmasByFormName.signature, GetLemmasByFormName.class),
+    new FunctionDef(GetLemmasByLuceneQuery.signature, GetLemmasByLuceneQuery.class),
+    new FunctionDef(GetLemmasStrByFormName.signature, GetLemmasStrByFormName.class),
+    new FunctionDef(GetLexEntriesByFormName.signature, GetLexEntriesByFormName.class),
+    new FunctionDef(GetLexEntriesByLuceneQuery.signature, GetLexEntriesByLuceneQuery.class),
+    new FunctionDef(GetLexEntryKeysByFormName.signature, GetLexEntryKeysByFormName.class),
+    new FunctionDef(GetTranscodedText.signature, GetTranscodedText.class),
+		new FunctionDef(GetWordsFunction.signature, GetWordsFunction.class),
+    new FunctionDef(GetBig5EncodedTerms.signature, GetBig5EncodedTerms.class),
+    new FunctionDef(EncodeBig5.signature, EncodeBig5.class),
+    new FunctionDef(LuceneQueryParser.signature, LuceneQueryParser.class),
+    new FunctionDef(ToCLevelGenerator.signature, ToCLevelGenerator.class)
+	};
+	
+	public MPDLTextModule() {
+		super(functions);
+	}
+
+	public String getNamespaceURI() {
+		return NAMESPACE_URI;
+	}
+
+	public String getDefaultPrefix() {
+		return PREFIX;
+	}
+
+	public String getDescription() {
+		return "A module for text functions";
+	}
+  
+  public String getReleaseVersion() {
+    return "1.0";
+  }
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/software/eXist/mpdl-modules/src/org/exist/xquery/modules/mpdltext/NormalizeChars.java	Wed Nov 24 17:24:23 2010 +0100
@@ -0,0 +1,101 @@
+/*
+ *  eXist Open Source Native XML Database: Extension module
+ *  Copyright (C) 2008 Josef Willenborg
+ *  jwillenborg@mpiwg-berlin.mpg.de
+ *  http://www.mpiwg-berlin.mpg.de
+ *  
+ *  This program is free software; you can redistribute it and/or
+ *  modify it under the terms of the GNU Lesser General Public License
+ *  as published by the Free Software Foundation; either version 2
+ *  of the License, or (at your option) any later version.
+ *  
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU Lesser General Public License for more details.
+ *  
+ *  You should have received a copy of the GNU Lesser General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *  
+ *  $Id: TextModule.java $
+ */
+package org.exist.xquery.modules.mpdltext;
+
+import java.io.IOException;
+import java.io.Reader;
+import java.io.StringReader;
+
+import org.exist.dom.QName;
+import org.exist.xquery.BasicFunction;
+import org.exist.xquery.Cardinality;
+import org.exist.xquery.FunctionSignature;
+import org.exist.xquery.XPathException;
+import org.exist.xquery.XQueryContext;
+import org.exist.xquery.value.Sequence;
+import org.exist.xquery.value.SequenceType;
+import org.exist.xquery.value.StringValue;
+import org.exist.xquery.value.Type;
+import org.exist.xquery.value.ValueSequence;
+import org.xml.sax.InputSource;
+import org.xml.sax.SAXException;
+import org.xml.sax.XMLReader;
+
+import com.sun.org.apache.xerces.internal.parsers.SAXParser;
+
+import de.mpg.mpiwg.berlin.mpdl.exception.ApplicationException;
+import de.mpg.mpiwg.berlin.mpdl.lt.doc.NormalizeCharsContentHandler;
+
+/**
+ * @author Josef Willenborg (jwillenborg@mpiwg-berlin.mpg.de)
+ */
+public class NormalizeChars extends BasicFunction {
+
+	public final static FunctionSignature signature =
+		new FunctionSignature(
+			new QName("normalizeChars", MPDLTextModule.NAMESPACE_URI, MPDLTextModule.PREFIX),
+			"A function which normalize characters of the given xml fragment string of the given language and normalize function." +
+			"Result is xml fragment where all content text is replaced by the normalized string. ",
+			new SequenceType[] { new SequenceType(Type.STRING, Cardinality.EXACTLY_ONE), 
+                           new SequenceType(Type.STRING, Cardinality.EXACTLY_ONE),
+                           new SequenceType(Type.STRING, Cardinality.EXACTLY_ONE)},
+			new SequenceType(Type.STRING, Cardinality.EXACTLY_ONE));
+
+	public NormalizeChars(XQueryContext context) {
+		super(context, signature);
+	}
+
+	public Sequence eval(Sequence[] args, Sequence contextSequence) throws XPathException {
+    Sequence normalizeFunctionSeq = args[0];
+    Sequence languageSeq = args[1];
+	  Sequence xmlFragmentSeq = args[2];
+    String normalizeFunctions = "";  // default
+    String language = "";
+    String xmlFragment = "";
+		if (xmlFragmentSeq.isEmpty() || languageSeq.isEmpty())
+			return Sequence.EMPTY_SEQUENCE;
+		normalizeFunctions = normalizeFunctionSeq.getStringValue();
+		String[] normalizeFunctionsArray = normalizeFunctions.split(","); 
+    language = languageSeq.getStringValue();
+		xmlFragment = xmlFragmentSeq.getStringValue();
+		String outputXmlFragment = null;
+		try { 
+		  NormalizeCharsContentHandler normCharsContentHandler = new NormalizeCharsContentHandler(normalizeFunctionsArray, language);
+      XMLReader xmlParser = new SAXParser();
+      xmlParser.setContentHandler(normCharsContentHandler);
+      Reader stringReaderXmlFragment = new StringReader(xmlFragment);
+      InputSource input = new InputSource(stringReaderXmlFragment);
+      xmlParser.parse(input);
+      outputXmlFragment = normCharsContentHandler.getXmlFragment();
+    } catch (ApplicationException e) {
+      throw new XPathException(e);
+    } catch (IOException e) {
+      throw new XPathException(e);
+    } catch (SAXException e) {
+      throw new XPathException(e);
+    }
+    ValueSequence result = new ValueSequence();
+    result.add(new StringValue(outputXmlFragment));
+		return result;
+	}
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/software/eXist/mpdl-modules/src/org/exist/xquery/modules/mpdltext/ToCLevelGenerator.java	Wed Nov 24 17:24:23 2010 +0100
@@ -0,0 +1,271 @@
+/*
+ *  eXist Open Source Native XML Database: Extension module
+ *  Copyright (C) 2008 Josef Willenborg
+ *  jwillenborg@mpiwg-berlin.mpg.de
+ *  http://www.mpiwg-berlin.mpg.de
+ *  
+ *  This program is free software; you can redistribute it and/or
+ *  modify it under the terms of the GNU Lesser General Public License
+ *  as published by the Free Software Foundation; either version 2
+ *  of the License, or (at your option) any later version.
+ *  
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU Lesser General Public License for more details.
+ *  
+ *  You should have received a copy of the GNU Lesser General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *  
+ *  $Id: TextModule.java $
+ */
+package org.exist.xquery.modules.mpdltext;
+
+import org.exist.memtree.NodeImpl;
+import org.exist.dom.NodeSet;
+import org.exist.dom.QName;
+import org.exist.memtree.DocumentImpl;
+import org.exist.memtree.MemTreeBuilder;
+import org.exist.xquery.BasicFunction;
+import org.exist.xquery.Cardinality;
+import org.exist.xquery.FunctionSignature;
+import org.exist.xquery.XPathException;
+import org.exist.xquery.XQueryContext;
+import org.exist.xquery.value.Item;
+import org.exist.xquery.value.MemoryNodeSet;
+import org.exist.xquery.value.NodeValue;
+import org.exist.xquery.value.Sequence;
+import org.exist.xquery.value.SequenceType;
+import org.exist.xquery.value.Type;
+import org.w3c.dom.Node;
+import org.w3c.dom.NodeList;
+
+/**
+ * @author Josef Willenborg (jwillenborg@mpiwg-berlin.mpg.de)
+ */
+public class ToCLevelGenerator extends BasicFunction {
+
+	public final static FunctionSignature signature =
+		new FunctionSignature(
+			new QName("generate-toc-levels", MPDLTextModule.NAMESPACE_URI, MPDLTextModule.PREFIX),
+			"A function which generates toc levels of the given toc entries " +
+			"and the starting levels",
+			new SequenceType[] { new SequenceType(Type.ITEM, Cardinality.ZERO_OR_MORE) },
+			new SequenceType(Type.NODE, Cardinality.ZERO_OR_MORE));
+
+	public ToCLevelGenerator(XQueryContext context) {
+		super(context, signature);
+	}
+
+  public Sequence eval(Sequence[] args, Sequence contextSequence) throws XPathException {
+    Sequence tocEntriesSeq = args[0];
+    if (tocEntriesSeq.isEmpty())
+      return Sequence.EMPTY_SEQUENCE;
+    boolean tocEntriesIsPersistent = tocEntriesSeq.isPersistentSet();  // Workaround: when entries are persistent (not in memory) 
+    if (tocEntriesIsPersistent)
+      return evalAsPersistentSeq(args, contextSequence);
+    int level = 0;
+    int level1 = 0;
+    int level2 = 0;
+    int level3 = 0;
+    int level4 = 0;
+    int level5 = 0;
+    int level6 = 0;
+    int level7 = 0;
+    DocumentImpl doc = null;
+    MemTreeBuilder builder = context.getDocumentBuilder();
+    builder.startElement("", "toc-entries", "toc-entries", null);
+    for (int i=0; i<tocEntriesSeq.getItemCount(); i++) {
+      MemoryNodeSet nodeToNodeSet = tocEntriesSeq.toMemNodeSet();
+      Node node = nodeToNodeSet.get(i);
+      NodeList childNodeList = node.getChildNodes();
+      int childsLength = childNodeList.getLength();
+      if (childsLength > 0) {
+        builder.startElement("", "toc-entry", "toc-entry", null);
+        for (int j=0; j<childsLength; j++) {
+          NodeImpl childNode = (NodeImpl)childNodeList.item(j);
+          String childNodeName = childNode.getLocalName();
+          String childNodeValue = childNode.getStringValue(); 
+          builder.startElement("", childNodeName, childNodeName, null);
+          if (childNodeValue != null)
+            builder.characters(childNodeValue);
+          builder.endElement();
+          if (childNodeName.equals("level")) {
+            int nodeLevel = 1;  // default is a level one node
+            try {
+              nodeLevel = new Integer(childNodeValue).intValue();
+            } catch (NumberFormatException e) {
+              // nothing 
+            }
+            if (level > nodeLevel) {
+              if (nodeLevel == 1) {
+                level2 = 0; level3 = 0; level4 = 0; level5 = 0; level6 = 0; level7 = 0;
+              } else if (nodeLevel == 2) {
+                level3 = 0; level4 = 0; level5 = 0; level6 = 0; level7 = 0;
+              } else if (nodeLevel == 3) {
+                level4 = 0; level5 = 0; level6 = 0; level7 = 0;
+              } else if (nodeLevel == 4) {
+                level5 = 0; level6 = 0; level7 = 0;
+              } else if (nodeLevel == 5) {
+                level6 = 0; level7 = 0;
+              } else if (nodeLevel == 6) {
+                level7 = 0;
+              }
+            }
+            level = nodeLevel; 
+            if (level == 1)
+              level1++;
+            else if (level == 2)
+              level2++;
+            else if (level == 3)
+              level3++;
+            else if (level == 4)
+              level4++;
+            else if (level == 5)
+              level5++;
+            else if (level == 6)
+              level6++;
+            else if (level == 7)
+              level7++;
+          }
+        }
+        String levelString =  "";
+        if (level1 != 0)
+          levelString += level1 + ".";
+        if (level2 != 0)
+          levelString += level2 + ".";
+        if (level3 != 0)
+          levelString += level3 + ".";
+        if (level4 != 0)
+          levelString += level4 + ".";
+        if (level5 != 0)
+          levelString += level5 + ".";
+        if (level6 != 0)
+          levelString += level6 + ".";
+        if (level7 != 0)
+          levelString += level7 + ".";
+        
+        builder.startElement("", "level-string", "level-string", null);
+        builder.characters(levelString);
+        builder.endElement();
+
+        String[] splitStr = levelString.split("\\.");
+        int realLevel = splitStr.length;
+        builder.startElement("", "real-level", "real-level", null);
+        builder.characters("" + realLevel);
+        builder.endElement();
+        builder.endElement();
+      }
+    }
+    builder.endElement();
+    doc = ((DocumentImpl)builder.getDocument());
+    return doc;
+  }
+
+  public Sequence evalAsPersistentSeq(Sequence[] args, Sequence contextSequence) throws XPathException {
+    Sequence tocEntriesSeq = args[0];
+    if (tocEntriesSeq.isEmpty())
+      return Sequence.EMPTY_SEQUENCE;
+    int level = 0;
+    int level1 = 0;
+    int level2 = 0;
+    int level3 = 0;
+    int level4 = 0;
+    int level5 = 0;
+    int level6 = 0;
+    int level7 = 0;
+    DocumentImpl doc = null;
+    MemTreeBuilder builder = context.getDocumentBuilder();
+    builder.startElement("", "toc-entries", "toc-entries", null);
+    for (int i=0; i<tocEntriesSeq.getItemCount(); i++) {
+      Item item = tocEntriesSeq.itemAt(i);
+      NodeValue nv = (NodeValue) item;
+      NodeImpl nodeTmp = (NodeImpl) nv;
+      NodeSet nodeToNodeSet = nodeTmp.toNodeSet();
+      Node node = nodeToNodeSet.item(0);
+      NodeList childNodeList = node.getChildNodes();
+      int childsLength = childNodeList.getLength();
+      if (childsLength > 0) {
+        builder.startElement("", "toc-entry", "toc-entry", null);
+        for (int j=0; j<childsLength; j++) {
+          Node childNode = childNodeList.item(j);
+          String childNodeName = childNode.getLocalName();
+          String childNodeValue = childNode.getNodeValue();
+          builder.startElement("", childNodeName, childNodeName, null);
+          if (childNodeValue != null)
+            builder.characters(childNodeValue);
+          builder.endElement();
+          if (childNodeName.equals("level")) {
+            int nodeLevel = 1;  // default is a level one node
+            try {
+              nodeLevel = new Integer(childNodeValue).intValue();
+            } catch (NumberFormatException e) {
+              // nothing 
+            }
+            if (level > nodeLevel) {
+              if (nodeLevel == 1) {
+                level2 = 0; level3 = 0; level4 = 0; level5 = 0; level6 = 0; level7 = 0;
+              } else if (nodeLevel == 2) {
+                level3 = 0; level4 = 0; level5 = 0; level6 = 0; level7 = 0;
+              } else if (nodeLevel == 3) {
+                level4 = 0; level5 = 0; level6 = 0; level7 = 0;
+              } else if (nodeLevel == 4) {
+                level5 = 0; level6 = 0; level7 = 0;
+              } else if (nodeLevel == 5) {
+                level6 = 0; level7 = 0;
+              } else if (nodeLevel == 6) {
+                level7 = 0;
+              }
+            }
+            level = nodeLevel; 
+            if (level == 1)
+              level1++;
+            else if (level == 2)
+              level2++;
+            else if (level == 3)
+              level3++;
+            else if (level == 4)
+              level4++;
+            else if (level == 5)
+              level5++;
+            else if (level == 6)
+              level6++;
+            else if (level == 7)
+              level7++;
+          }
+        }
+        String levelString =  "";
+        if (level1 != 0)
+          levelString += level1 + ".";
+        if (level2 != 0)
+          levelString += level2 + ".";
+        if (level3 != 0)
+          levelString += level3 + ".";
+        if (level4 != 0)
+          levelString += level4 + ".";
+        if (level5 != 0)
+          levelString += level5 + ".";
+        if (level6 != 0)
+          levelString += level6 + ".";
+        if (level7 != 0)
+          levelString += level7 + ".";
+        
+        builder.startElement("", "level-string", "level-string", null);
+        builder.characters(levelString);
+        builder.endElement();
+
+        String[] splitStr = levelString.split("\\.");
+        int realLevel = splitStr.length;
+        builder.startElement("", "real-level", "real-level", null);
+        builder.characters("" + realLevel);
+        builder.endElement();
+        builder.endElement();
+      }
+    }
+    builder.endElement();
+    doc = ((DocumentImpl)builder.getDocument());
+    return doc;
+  }
+
+}