diff software/eXist/mpdl-modules/src/org/exist/xquery/modules/mpdltext/Dictionarize.java @ 0:408254cf2f1d

Erstellung
author Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
date Wed, 24 Nov 2010 17:24:23 +0100
parents
children 5df60f24e997
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/software/eXist/mpdl-modules/src/org/exist/xquery/modules/mpdltext/Dictionarize.java	Wed Nov 24 17:24:23 2010 +0100
@@ -0,0 +1,97 @@
+/*
+ *  eXist Open Source Native XML Database: Extension module
+ *  Copyright (C) 2008 Josef Willenborg
+ *  jwillenborg@mpiwg-berlin.mpg.de
+ *  http://www.mpiwg-berlin.mpg.de
+ *  
+ *  This program is free software; you can redistribute it and/or
+ *  modify it under the terms of the GNU Lesser General Public License
+ *  as published by the Free Software Foundation; either version 2
+ *  of the License, or (at your option) any later version.
+ *  
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU Lesser General Public License for more details.
+ *  
+ *  You should have received a copy of the GNU Lesser General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *  
+ *  $Id: TextModule.java $
+ */
+package org.exist.xquery.modules.mpdltext;
+
+import java.io.IOException;
+import java.io.Reader;
+import java.io.StringReader;
+
+import org.exist.dom.QName;
+import org.exist.xquery.BasicFunction;
+import org.exist.xquery.Cardinality;
+import org.exist.xquery.FunctionSignature;
+import org.exist.xquery.XPathException;
+import org.exist.xquery.XQueryContext;
+import org.exist.xquery.value.Sequence;
+import org.exist.xquery.value.SequenceType;
+import org.exist.xquery.value.StringValue;
+import org.exist.xquery.value.Type;
+import org.exist.xquery.value.ValueSequence;
+import org.xml.sax.InputSource;
+import org.xml.sax.SAXException;
+import org.xml.sax.XMLReader;
+
+import com.sun.org.apache.xerces.internal.parsers.SAXParser;
+
+import de.mpg.mpiwg.berlin.mpdl.exception.ApplicationException;
+import de.mpg.mpiwg.berlin.mpdl.lt.doc.DictionarizerContentHandler;
+
+/**
+ * @author Josef Willenborg (jwillenborg@mpiwg-berlin.mpg.de)
+ */
+public class Dictionarize extends BasicFunction {
+
+	public final static FunctionSignature signature =
+		new FunctionSignature(
+			new QName("dictionarize", MPDLTextModule.NAMESPACE_URI, MPDLTextModule.PREFIX),
+			"A function which dictionarize the given xml fragment string of the given language." +
+			"Result is xml fragment which contains the original xml fragment enriched by " +
+			"a word tag for each word which contains attributes for the lemma and pollux dictionary.",
+			new SequenceType[] { new SequenceType(Type.STRING, Cardinality.EXACTLY_ONE), 
+                           new SequenceType(Type.STRING, Cardinality.EXACTLY_ONE)},
+			new SequenceType(Type.STRING, Cardinality.EXACTLY_ONE));
+
+	public Dictionarize(XQueryContext context) {
+		super(context, signature);
+	}
+
+	public Sequence eval(Sequence[] args, Sequence contextSequence) throws XPathException {
+	  Sequence xmlFragmentSeq = args[0];
+    Sequence languageSeq = args[1];
+    String xmlFragment = "";
+    String language = "";
+		if (xmlFragmentSeq.isEmpty() || languageSeq.isEmpty())
+			return Sequence.EMPTY_SEQUENCE;
+		xmlFragment = xmlFragmentSeq.getStringValue();
+		language = languageSeq.getStringValue();
+		String outputXmlFragment = null;
+		try { 
+		  DictionarizerContentHandler dictContentHandler = new DictionarizerContentHandler(language);
+      XMLReader xmlParser = new SAXParser();
+      xmlParser.setContentHandler(dictContentHandler);
+      Reader stringReaderXmlFragment = new StringReader(xmlFragment);
+      InputSource input = new InputSource(stringReaderXmlFragment);
+      xmlParser.parse(input);
+      outputXmlFragment = dictContentHandler.getXmlFragment();
+    } catch (ApplicationException e) {
+      throw new XPathException(e);
+    } catch (IOException e) {
+      throw new XPathException(e);
+    } catch (SAXException e) {
+      throw new XPathException(e);
+    }
+    ValueSequence result = new ValueSequence();
+    result.add(new StringValue(outputXmlFragment));
+		return result;
+	}
+}