annotate software/eXist/mpdl-modules/src/org/exist/xquery/modules/mpdltext/Dictionarize.java @ 0:408254cf2f1d

Erstellung
author Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
date Wed, 24 Nov 2010 17:24:23 +0100
parents
children 5df60f24e997
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
408254cf2f1d Erstellung
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
1 /*
408254cf2f1d Erstellung
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
2 * eXist Open Source Native XML Database: Extension module
408254cf2f1d Erstellung
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
3 * Copyright (C) 2008 Josef Willenborg
408254cf2f1d Erstellung
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
4 * jwillenborg@mpiwg-berlin.mpg.de
408254cf2f1d Erstellung
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
5 * http://www.mpiwg-berlin.mpg.de
408254cf2f1d Erstellung
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
6 *
408254cf2f1d Erstellung
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
7 * This program is free software; you can redistribute it and/or
408254cf2f1d Erstellung
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
8 * modify it under the terms of the GNU Lesser General Public License
408254cf2f1d Erstellung
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
9 * as published by the Free Software Foundation; either version 2
408254cf2f1d Erstellung
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
10 * of the License, or (at your option) any later version.
408254cf2f1d Erstellung
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
11 *
408254cf2f1d Erstellung
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
12 * This program is distributed in the hope that it will be useful,
408254cf2f1d Erstellung
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
408254cf2f1d Erstellung
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
408254cf2f1d Erstellung
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
15 * GNU Lesser General Public License for more details.
408254cf2f1d Erstellung
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
16 *
408254cf2f1d Erstellung
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
17 * You should have received a copy of the GNU Lesser General Public License
408254cf2f1d Erstellung
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
18 * along with this program; if not, write to the Free Software
408254cf2f1d Erstellung
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
19 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
408254cf2f1d Erstellung
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
20 *
408254cf2f1d Erstellung
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
21 * $Id: TextModule.java $
408254cf2f1d Erstellung
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
22 */
408254cf2f1d Erstellung
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
23 package org.exist.xquery.modules.mpdltext;
408254cf2f1d Erstellung
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
24
408254cf2f1d Erstellung
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
25 import java.io.IOException;
408254cf2f1d Erstellung
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
26 import java.io.Reader;
408254cf2f1d Erstellung
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
27 import java.io.StringReader;
408254cf2f1d Erstellung
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
28
408254cf2f1d Erstellung
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
29 import org.exist.dom.QName;
408254cf2f1d Erstellung
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
30 import org.exist.xquery.BasicFunction;
408254cf2f1d Erstellung
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
31 import org.exist.xquery.Cardinality;
408254cf2f1d Erstellung
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
32 import org.exist.xquery.FunctionSignature;
408254cf2f1d Erstellung
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
33 import org.exist.xquery.XPathException;
408254cf2f1d Erstellung
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
34 import org.exist.xquery.XQueryContext;
408254cf2f1d Erstellung
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
35 import org.exist.xquery.value.Sequence;
408254cf2f1d Erstellung
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
36 import org.exist.xquery.value.SequenceType;
408254cf2f1d Erstellung
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
37 import org.exist.xquery.value.StringValue;
408254cf2f1d Erstellung
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
38 import org.exist.xquery.value.Type;
408254cf2f1d Erstellung
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
39 import org.exist.xquery.value.ValueSequence;
408254cf2f1d Erstellung
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
40 import org.xml.sax.InputSource;
408254cf2f1d Erstellung
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
41 import org.xml.sax.SAXException;
408254cf2f1d Erstellung
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
42 import org.xml.sax.XMLReader;
408254cf2f1d Erstellung
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
43
408254cf2f1d Erstellung
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
44 import com.sun.org.apache.xerces.internal.parsers.SAXParser;
408254cf2f1d Erstellung
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
45
408254cf2f1d Erstellung
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
46 import de.mpg.mpiwg.berlin.mpdl.exception.ApplicationException;
408254cf2f1d Erstellung
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
47 import de.mpg.mpiwg.berlin.mpdl.lt.doc.DictionarizerContentHandler;
408254cf2f1d Erstellung
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
48
408254cf2f1d Erstellung
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
49 /**
408254cf2f1d Erstellung
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
50 * @author Josef Willenborg (jwillenborg@mpiwg-berlin.mpg.de)
408254cf2f1d Erstellung
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
51 */
408254cf2f1d Erstellung
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
52 public class Dictionarize extends BasicFunction {
408254cf2f1d Erstellung
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
53
408254cf2f1d Erstellung
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
54 public final static FunctionSignature signature =
408254cf2f1d Erstellung
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
55 new FunctionSignature(
408254cf2f1d Erstellung
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
56 new QName("dictionarize", MPDLTextModule.NAMESPACE_URI, MPDLTextModule.PREFIX),
408254cf2f1d Erstellung
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
57 "A function which dictionarize the given xml fragment string of the given language." +
408254cf2f1d Erstellung
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
58 "Result is xml fragment which contains the original xml fragment enriched by " +
408254cf2f1d Erstellung
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
59 "a word tag for each word which contains attributes for the lemma and pollux dictionary.",
408254cf2f1d Erstellung
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
60 new SequenceType[] { new SequenceType(Type.STRING, Cardinality.EXACTLY_ONE),
408254cf2f1d Erstellung
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
61 new SequenceType(Type.STRING, Cardinality.EXACTLY_ONE)},
408254cf2f1d Erstellung
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
62 new SequenceType(Type.STRING, Cardinality.EXACTLY_ONE));
408254cf2f1d Erstellung
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
63
408254cf2f1d Erstellung
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
64 public Dictionarize(XQueryContext context) {
408254cf2f1d Erstellung
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
65 super(context, signature);
408254cf2f1d Erstellung
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
66 }
408254cf2f1d Erstellung
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
67
408254cf2f1d Erstellung
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
68 public Sequence eval(Sequence[] args, Sequence contextSequence) throws XPathException {
408254cf2f1d Erstellung
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
69 Sequence xmlFragmentSeq = args[0];
408254cf2f1d Erstellung
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
70 Sequence languageSeq = args[1];
408254cf2f1d Erstellung
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
71 String xmlFragment = "";
408254cf2f1d Erstellung
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
72 String language = "";
408254cf2f1d Erstellung
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
73 if (xmlFragmentSeq.isEmpty() || languageSeq.isEmpty())
408254cf2f1d Erstellung
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
74 return Sequence.EMPTY_SEQUENCE;
408254cf2f1d Erstellung
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
75 xmlFragment = xmlFragmentSeq.getStringValue();
408254cf2f1d Erstellung
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
76 language = languageSeq.getStringValue();
408254cf2f1d Erstellung
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
77 String outputXmlFragment = null;
408254cf2f1d Erstellung
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
78 try {
408254cf2f1d Erstellung
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
79 DictionarizerContentHandler dictContentHandler = new DictionarizerContentHandler(language);
408254cf2f1d Erstellung
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
80 XMLReader xmlParser = new SAXParser();
408254cf2f1d Erstellung
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
81 xmlParser.setContentHandler(dictContentHandler);
408254cf2f1d Erstellung
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
82 Reader stringReaderXmlFragment = new StringReader(xmlFragment);
408254cf2f1d Erstellung
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
83 InputSource input = new InputSource(stringReaderXmlFragment);
408254cf2f1d Erstellung
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
84 xmlParser.parse(input);
408254cf2f1d Erstellung
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
85 outputXmlFragment = dictContentHandler.getXmlFragment();
408254cf2f1d Erstellung
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
86 } catch (ApplicationException e) {
408254cf2f1d Erstellung
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
87 throw new XPathException(e);
408254cf2f1d Erstellung
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
88 } catch (IOException e) {
408254cf2f1d Erstellung
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
89 throw new XPathException(e);
408254cf2f1d Erstellung
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
90 } catch (SAXException e) {
408254cf2f1d Erstellung
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
91 throw new XPathException(e);
408254cf2f1d Erstellung
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
92 }
408254cf2f1d Erstellung
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
93 ValueSequence result = new ValueSequence();
408254cf2f1d Erstellung
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
94 result.add(new StringValue(outputXmlFragment));
408254cf2f1d Erstellung
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
95 return result;
408254cf2f1d Erstellung
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
96 }
408254cf2f1d Erstellung
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff changeset
97 }