Mercurial > hg > mpdl-group
comparison software/eXist/mpdl-modules/src/org/exist/xquery/modules/mpdltext/GetLemma.java @ 0:408254cf2f1d
Erstellung
author | Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de> |
---|---|
date | Wed, 24 Nov 2010 17:24:23 +0100 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:408254cf2f1d |
---|---|
1 /* | |
2 * eXist Open Source Native XML Database: Extension module | |
3 * Copyright (C) 2008 Josef Willenborg | |
4 * jwillenborg@mpiwg-berlin.mpg.de | |
5 * http://www.mpiwg-berlin.mpg.de | |
6 * | |
7 * This program is free software; you can redistribute it and/or | |
8 * modify it under the terms of the GNU Lesser General Public License | |
9 * as published by the Free Software Foundation; either version 2 | |
10 * of the License, or (at your option) any later version. | |
11 * | |
12 * This program is distributed in the hope that it will be useful, | |
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
15 * GNU Lesser General Public License for more details. | |
16 * | |
17 * You should have received a copy of the GNU Lesser General Public License | |
18 * along with this program; if not, write to the Free Software | |
19 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. | |
20 * | |
21 * $Id: TextModule.java $ | |
22 */ | |
23 package org.exist.xquery.modules.mpdltext; | |
24 | |
25 import java.util.ArrayList; | |
26 | |
27 import org.exist.dom.QName; | |
28 import org.exist.memtree.DocumentImpl; | |
29 import org.exist.memtree.MemTreeBuilder; | |
30 import org.exist.xquery.BasicFunction; | |
31 import org.exist.xquery.Cardinality; | |
32 import org.exist.xquery.FunctionSignature; | |
33 import org.exist.xquery.XPathException; | |
34 import org.exist.xquery.XQueryContext; | |
35 import org.exist.xquery.value.Sequence; | |
36 import org.exist.xquery.value.SequenceType; | |
37 import org.exist.xquery.value.Type; | |
38 | |
39 import de.mpg.mpiwg.berlin.mpdl.exception.ApplicationException; | |
40 import de.mpg.mpiwg.berlin.mpdl.lt.morph.app.Form; | |
41 import de.mpg.mpiwg.berlin.mpdl.lt.morph.app.Lemma; | |
42 import de.mpg.mpiwg.berlin.mpdl.lt.morph.app.MorphologyCache; | |
43 | |
44 /** | |
45 * @author Josef Willenborg (jwillenborg@mpiwg-berlin.mpg.de) | |
46 */ | |
47 public class GetLemma extends BasicFunction { | |
48 | |
49 public final static FunctionSignature signature = | |
50 new FunctionSignature( | |
51 new QName("get-lemma", MPDLTextModule.NAMESPACE_URI, MPDLTextModule.PREFIX), | |
52 "A function which delivers the lemma of a given lemma name and language " + | |
53 "by the MPDL language technology", | |
54 new SequenceType[] { new SequenceType(Type.STRING, Cardinality.ZERO_OR_MORE), | |
55 new SequenceType(Type.STRING, Cardinality.ZERO_OR_MORE) }, | |
56 new SequenceType(Type.NODE, Cardinality.EXACTLY_ONE)); | |
57 | |
58 public GetLemma(XQueryContext context) { | |
59 super(context, signature); | |
60 } | |
61 | |
62 public Sequence eval(Sequence[] args, Sequence contextSequence) throws XPathException { | |
63 Sequence langSeq = args[0]; | |
64 Sequence formNameSeq = args[1]; | |
65 String language = ""; | |
66 String formName = ""; | |
67 if (langSeq.isEmpty() || formNameSeq.isEmpty()) | |
68 return Sequence.EMPTY_SEQUENCE; | |
69 language = langSeq.getStringValue(); | |
70 formName = formNameSeq.getStringValue(); | |
71 ArrayList<Lemma> lemmas = null; | |
72 try { | |
73 MorphologyCache morphologyCache = MorphologyCache.getInstance(); | |
74 Lemma lemma = morphologyCache.getLemma(language, formName, true); | |
75 if (lemma == null) { | |
76 lemma = new Lemma("standard analyzer", language, formName); // delivers at least one lemma with one form (with the formName) | |
77 } | |
78 lemmas = new ArrayList<Lemma>(); | |
79 lemmas.add(lemma); | |
80 } catch (ApplicationException e) { | |
81 throw new XPathException(e); | |
82 } | |
83 DocumentImpl doc = null; | |
84 if (lemmas != null) { | |
85 MemTreeBuilder builder = context.getDocumentBuilder(); | |
86 builder.startElement("", "lemmas", "lemmas", null); | |
87 for (int i=0; i<lemmas.size(); i++) { | |
88 Lemma lemma = lemmas.get(i); | |
89 builder.startElement("", "lemma", "lemma", null); | |
90 builder.startElement("", "provider", "provider", null); | |
91 builder.characters(lemma.getProvider()); | |
92 builder.endElement(); | |
93 builder.startElement("", "language", "language", null); | |
94 builder.characters(lemma.getLanguage()); | |
95 builder.endElement(); | |
96 builder.startElement("", "lemma-name", "lemma-name", null); | |
97 builder.characters(lemma.getLemmaName()); | |
98 builder.endElement(); | |
99 ArrayList<Form> forms = lemma.getFormsList(); | |
100 builder.startElement("", "forms-size", "forms-size", null); | |
101 builder.characters(String.valueOf(forms.size())); | |
102 builder.endElement(); | |
103 builder.startElement("", "forms", "forms", null); | |
104 for (int j=0; j<forms.size(); j++) { | |
105 Form form = forms.get(j); | |
106 builder.startElement("", "form", "form", null); | |
107 builder.startElement("", "provider", "provider", null); | |
108 builder.characters(form.getProvider()); | |
109 builder.endElement(); | |
110 builder.startElement("", "language", "language", null); | |
111 builder.characters(form.getLanguage()); | |
112 builder.endElement(); | |
113 builder.startElement("", "form-name", "form-name", null); | |
114 builder.characters(form.getFormName()); | |
115 builder.endElement(); | |
116 builder.startElement("", "lemma-name", "lemma-name", null); | |
117 builder.characters(lemma.getLemmaName()); | |
118 builder.endElement(); | |
119 builder.endElement(); | |
120 } | |
121 builder.endElement(); | |
122 builder.endElement(); | |
123 } | |
124 builder.endElement(); | |
125 doc = ((DocumentImpl)builder.getDocument()); | |
126 } else { | |
127 return Sequence.EMPTY_SEQUENCE; | |
128 } | |
129 return doc; | |
130 } | |
131 } |