Mercurial > hg > mpdl-group
view software/eXist/mpdl-modules/src/org/exist/xquery/modules/mpdltext/GetLemmasByLuceneQuery.java @ 0:408254cf2f1d
Erstellung
author | Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de> |
---|---|
date | Wed, 24 Nov 2010 17:24:23 +0100 |
parents | |
children |
line wrap: on
line source
/* * eXist Open Source Native XML Database: Extension module * Copyright (C) 2008 Josef Willenborg * jwillenborg@mpiwg-berlin.mpg.de * http://www.mpiwg-berlin.mpg.de * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public License * as published by the Free Software Foundation; either version 2 * of the License, or (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. * * $Id: TextModule.java $ */ package org.exist.xquery.modules.mpdltext; import java.util.ArrayList; import org.exist.dom.QName; import org.exist.memtree.DocumentImpl; import org.exist.memtree.MemTreeBuilder; import org.exist.xquery.BasicFunction; import org.exist.xquery.Cardinality; import org.exist.xquery.FunctionSignature; import org.exist.xquery.XPathException; import org.exist.xquery.XQueryContext; import org.exist.xquery.value.Sequence; import org.exist.xquery.value.SequenceType; import org.exist.xquery.value.Type; import de.mpg.mpiwg.berlin.mpdl.exception.ApplicationException; import de.mpg.mpiwg.berlin.mpdl.lt.morph.app.Form; import de.mpg.mpiwg.berlin.mpdl.lt.morph.app.Lemma; import de.mpg.mpiwg.berlin.mpdl.lt.morph.app.MorphologyCache; /** * @author Josef Willenborg (jwillenborg@mpiwg-berlin.mpg.de) */ public class GetLemmasByLuceneQuery extends BasicFunction { public final static FunctionSignature signature = new FunctionSignature( new QName("get-lemmas-by-lucene-query", MPDLTextModule.NAMESPACE_URI, MPDLTextModule.PREFIX), "A function which delivers the lemma of a given lucene query and language " + "by the MPDL language technology", new SequenceType[] { new SequenceType(Type.STRING, Cardinality.ZERO_OR_MORE), new SequenceType(Type.STRING, Cardinality.ZERO_OR_MORE) }, new SequenceType(Type.NODE, Cardinality.EXACTLY_ONE)); public GetLemmasByLuceneQuery(XQueryContext context) { super(context, signature); } public Sequence eval(Sequence[] args, Sequence contextSequence) throws XPathException { Sequence langSeq = args[0]; Sequence luceneQuerySeq = args[1]; String language = ""; String luceneQueryStr = ""; if (langSeq.isEmpty() || luceneQuerySeq.isEmpty()) return Sequence.EMPTY_SEQUENCE; language = langSeq.getStringValue(); luceneQueryStr = luceneQuerySeq.getStringValue(); ArrayList<Lemma> lemmas = null; try { MorphologyCache morphologyCache = MorphologyCache.getInstance(); lemmas = morphologyCache.getLemmasByLuceneQuery(language, luceneQueryStr, true); } catch (ApplicationException e) { throw new XPathException(e); } DocumentImpl doc = null; if (lemmas != null) { MemTreeBuilder builder = context.getDocumentBuilder(); builder.startElement("", "lemmas", "lemmas", null); for (int i=0; i<lemmas.size(); i++) { Lemma lemma = lemmas.get(i); builder.startElement("", "lemma", "lemma", null); builder.startElement("", "provider", "provider", null); builder.characters(lemma.getProvider()); builder.endElement(); builder.startElement("", "language", "language", null); builder.characters(lemma.getLanguage()); builder.endElement(); builder.startElement("", "lemma-name", "lemma-name", null); builder.characters(lemma.getLemmaName()); builder.endElement(); ArrayList<Form> forms = lemma.getFormsList(); builder.startElement("", "forms-size", "forms-size", null); builder.characters(String.valueOf(forms.size())); builder.endElement(); builder.startElement("", "forms", "forms", null); for (int j=0; j<forms.size(); j++) { Form form = forms.get(j); builder.startElement("", "form", "form", null); builder.startElement("", "provider", "provider", null); builder.characters(form.getProvider()); builder.endElement(); builder.startElement("", "language", "language", null); builder.characters(form.getLanguage()); builder.endElement(); builder.startElement("", "form-name", "form-name", null); builder.characters(form.getFormName()); builder.endElement(); builder.startElement("", "lemma-name", "lemma-name", null); builder.characters(lemma.getLemmaName()); builder.endElement(); builder.endElement(); } builder.endElement(); builder.endElement(); } builder.endElement(); doc = ((DocumentImpl)builder.getDocument()); } else { return Sequence.EMPTY_SEQUENCE; } return doc; } }