Mercurial > hg > mpdl-group
diff software/eXist/mpdl-modules/src/org/exist/xquery/modules/mpdltext/GetQueryMorphForms.java @ 0:408254cf2f1d
Erstellung
author | Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de> |
---|---|
date | Wed, 24 Nov 2010 17:24:23 +0100 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/software/eXist/mpdl-modules/src/org/exist/xquery/modules/mpdltext/GetQueryMorphForms.java Wed Nov 24 17:24:23 2010 +0100 @@ -0,0 +1,96 @@ +/* + * eXist Open Source Native XML Database: Extension module + * Copyright (C) 2008 Josef Willenborg + * jwillenborg@mpiwg-berlin.mpg.de + * http://www.mpiwg-berlin.mpg.de + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + * + * $Id: TextModule.java $ + */ +package org.exist.xquery.modules.mpdltext; + +import java.util.ArrayList; + +import org.exist.dom.QName; +import org.exist.xquery.BasicFunction; +import org.exist.xquery.Cardinality; +import org.exist.xquery.FunctionSignature; +import org.exist.xquery.XPathException; +import org.exist.xquery.XQueryContext; +import org.exist.xquery.value.Sequence; +import org.exist.xquery.value.SequenceType; +import org.exist.xquery.value.StringValue; +import org.exist.xquery.value.Type; +import org.exist.xquery.value.ValueSequence; + +import de.mpg.mpiwg.berlin.mpdl.exception.ApplicationException; +import de.mpg.mpiwg.berlin.mpdl.lt.analyzer.MpdlNormalizer; +import de.mpg.mpiwg.berlin.mpdl.lt.morph.app.Form; +import de.mpg.mpiwg.berlin.mpdl.lt.morph.app.MorphologyCache; + +/** + * @author Josef Willenborg (jwillenborg@mpiwg-berlin.mpg.de) + */ +public class GetQueryMorphForms extends BasicFunction { + + public final static FunctionSignature signature = + new FunctionSignature( + new QName("get-query-morph-forms", MPDLTextModule.NAMESPACE_URI, MPDLTextModule.PREFIX), + "A function which delivers morphological forms (seperated by |) of a given Lucene query string of a given " + + "language over the lemma of that form by the MPDL language technology", + new SequenceType[] { new SequenceType(Type.STRING, Cardinality.ZERO_OR_MORE), + new SequenceType(Type.STRING, Cardinality.ZERO_OR_MORE) }, + new SequenceType(Type.STRING, Cardinality.ZERO_OR_MORE)); + + public GetQueryMorphForms(XQueryContext context) { + super(context, signature); + } + + public Sequence eval(Sequence[] args, Sequence contextSequence) throws XPathException { + Sequence langSeq = args[0]; + Sequence luceneQueryStringSeq = args[1]; + String language = ""; + String luceneQueryString = ""; + if (langSeq.isEmpty() || luceneQueryStringSeq.isEmpty()) + return Sequence.EMPTY_SEQUENCE; + language = langSeq.getStringValue(); + luceneQueryString = luceneQueryStringSeq.getStringValue(); + ArrayList<Form> resultVariants = null; + try { + MorphologyCache morphologyCache = MorphologyCache.getInstance(); + resultVariants = morphologyCache.getFormsByLuceneQuery(language, luceneQueryString, true); + } catch (ApplicationException e) { + throw new XPathException(e); + } + ValueSequence result = new ValueSequence(); + String resultStr = ""; + int size = resultVariants.size(); + MpdlNormalizer normalizer = new MpdlNormalizer(language); + for (int i=0; i<size; i++) { + Form form = resultVariants.get(i); + String formName = form.getFormName(); + String formNameNormalizedToRegExpr = normalizer.deNormalizeToRegExpr(formName); + resultStr = resultStr + formNameNormalizedToRegExpr + "|"; + } + if (size == 0) { + result.add(new StringValue("")); + } else { + resultStr = resultStr.substring(0, resultStr.length() - 1); // without last | character + result.add(new StringValue(resultStr)); + } + return result; + } +}