0
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
1 /*
|
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
2 * eXist Open Source Native XML Database: Extension module
|
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
3 * Copyright (C) 2008 Josef Willenborg
|
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
4 * jwillenborg@mpiwg-berlin.mpg.de
|
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
5 * http://www.mpiwg-berlin.mpg.de
|
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
6 *
|
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
7 * This program is free software; you can redistribute it and/or
|
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
8 * modify it under the terms of the GNU Lesser General Public License
|
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
9 * as published by the Free Software Foundation; either version 2
|
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
10 * of the License, or (at your option) any later version.
|
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
11 *
|
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
12 * This program is distributed in the hope that it will be useful,
|
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
|
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
15 * GNU Lesser General Public License for more details.
|
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
16 *
|
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
17 * You should have received a copy of the GNU Lesser General Public License
|
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
18 * along with this program; if not, write to the Free Software
|
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
19 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
|
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
20 *
|
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
21 * $Id: TextModule.java $
|
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
22 */
|
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
23 package org.exist.xquery.modules.mpdltext;
|
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
24
|
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
25 import java.util.ArrayList;
|
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
26
|
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
27 import org.exist.dom.QName;
|
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
28 import org.exist.xquery.BasicFunction;
|
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
29 import org.exist.xquery.Cardinality;
|
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
30 import org.exist.xquery.FunctionSignature;
|
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
31 import org.exist.xquery.XPathException;
|
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
32 import org.exist.xquery.XQueryContext;
|
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
33 import org.exist.xquery.value.Sequence;
|
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
34 import org.exist.xquery.value.SequenceType;
|
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
35 import org.exist.xquery.value.StringValue;
|
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
36 import org.exist.xquery.value.Type;
|
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
37 import org.exist.xquery.value.ValueSequence;
|
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
38
|
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
39 import de.mpg.mpiwg.berlin.mpdl.exception.ApplicationException;
|
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
40 import de.mpg.mpiwg.berlin.mpdl.lt.doc.regularization.RegularizationManager;
|
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
41 import de.mpg.mpiwg.berlin.mpdl.lt.morph.app.Form;
|
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
42 import de.mpg.mpiwg.berlin.mpdl.lt.morph.app.MorphologyCache;
|
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
43
|
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
44 /**
|
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
45 * @author Josef Willenborg (jwillenborg@mpiwg-berlin.mpg.de)
|
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
46 */
|
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
47 public class GetQueryRegularizations extends BasicFunction {
|
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
48
|
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
49 public final static FunctionSignature signature =
|
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
50 new FunctionSignature(
|
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
51 new QName("get-query-regularizations", MPDLTextModule.NAMESPACE_URI, MPDLTextModule.PREFIX),
|
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
52 "A function which delivers regularizations (seperated by |) of a given Lucene query string of a given " +
|
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
53 "language by the MPDL language technology",
|
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
54 new SequenceType[] { new SequenceType(Type.STRING, Cardinality.ZERO_OR_MORE),
|
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
55 new SequenceType(Type.STRING, Cardinality.ZERO_OR_MORE) },
|
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
56 new SequenceType(Type.STRING, Cardinality.ZERO_OR_MORE));
|
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
57
|
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
58 public GetQueryRegularizations(XQueryContext context) {
|
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
59 super(context, signature);
|
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
60 }
|
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
61
|
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
62 public Sequence eval(Sequence[] args, Sequence contextSequence) throws XPathException {
|
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
63 Sequence langSeq = args[0];
|
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
64 Sequence luceneQueryStringSeq = args[1];
|
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
65 String language = "";
|
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
66 String luceneQueryString = "";
|
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
67 if (langSeq.isEmpty() || luceneQueryStringSeq.isEmpty())
|
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
68 return Sequence.EMPTY_SEQUENCE;
|
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
69 language = langSeq.getStringValue();
|
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
70 luceneQueryString = luceneQueryStringSeq.getStringValue();
|
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
71 ValueSequence result = new ValueSequence();
|
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
72 String resultStr = "";
|
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
73 // add orig regularizations of all lucene query forms
|
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
74 boolean regOrigFormsFound = false;
|
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
75 try {
|
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
76 RegularizationManager regManager = RegularizationManager.getInstance();
|
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
77 ArrayList<String> regOrigForms = regManager.getRegOrigsByNormLuceneQueryString(language, luceneQueryString);
|
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
78 if (regOrigForms != null && regOrigForms.size() > 0) {
|
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
79 regOrigFormsFound = true;
|
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
80 for (int i=0; i<regOrigForms.size(); i++) {
|
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
81 String regOrigForm = regOrigForms.get(i);
|
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
82 resultStr = resultStr + regOrigForm + "|";
|
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
83 }
|
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
84 }
|
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
85 MorphologyCache morphologyCache = MorphologyCache.getInstance();
|
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
86 ArrayList<Form> resultVariants = morphologyCache.getFormsByLuceneQuery(language, luceneQueryString, true);
|
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
87 for (int i=0; i<resultVariants.size(); i++) {
|
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
88 Form form = resultVariants.get(i);
|
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
89 String formName = form.getFormName();
|
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
90 regOrigForms = regManager.getRegOrigsByNormLuceneQueryString(language, formName);
|
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
91 if (regOrigForms != null && regOrigForms.size() > 0) {
|
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
92 regOrigFormsFound = true;
|
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
93 for (int j=0; j<regOrigForms.size(); j++) {
|
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
94 String regOrigForm = regOrigForms.get(j);
|
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
95 resultStr = resultStr + regOrigForm + "|";
|
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
96 }
|
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
97 }
|
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
98 }
|
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
99 } catch (ApplicationException e) {
|
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
100 throw new XPathException(e);
|
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
101 }
|
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
102 if (! regOrigFormsFound) {
|
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
103 result.add(new StringValue(""));
|
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
104 } else {
|
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
105 resultStr = resultStr.substring(0, resultStr.length() - 1); // without last | character
|
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
106 result.add(new StringValue(resultStr));
|
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
107 }
|
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
108 return result;
|
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
109 }
|
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
110 }
|