Mercurial > hg > mpdl-group
view software/eXist/mpdl-modules/src/org/exist/xquery/modules/mpdltext/GetLemmasByFormName.java @ 0:408254cf2f1d
Erstellung
author | Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de> |
---|---|
date | Wed, 24 Nov 2010 17:24:23 +0100 |
parents | |
children |
line wrap: on
line source
/* * eXist Open Source Native XML Database: Extension module * Copyright (C) 2008 Josef Willenborg * jwillenborg@mpiwg-berlin.mpg.de * http://www.mpiwg-berlin.mpg.de * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public License * as published by the Free Software Foundation; either version 2 * of the License, or (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. * * $Id: TextModule.java $ */ package org.exist.xquery.modules.mpdltext; import java.util.ArrayList; import org.exist.dom.QName; import org.exist.memtree.DocumentImpl; import org.exist.memtree.MemTreeBuilder; import org.exist.xquery.BasicFunction; import org.exist.xquery.Cardinality; import org.exist.xquery.FunctionSignature; import org.exist.xquery.XPathException; import org.exist.xquery.XQueryContext; import org.exist.xquery.value.Sequence; import org.exist.xquery.value.SequenceType; import org.exist.xquery.value.Type; import de.mpg.mpiwg.berlin.mpdl.exception.ApplicationException; import de.mpg.mpiwg.berlin.mpdl.lt.morph.app.Form; import de.mpg.mpiwg.berlin.mpdl.lt.morph.app.Lemma; import de.mpg.mpiwg.berlin.mpdl.lt.morph.app.MorphologyCache; /** * @author Josef Willenborg (jwillenborg@mpiwg-berlin.mpg.de) */ public class GetLemmasByFormName extends BasicFunction { public final static FunctionSignature signature = new FunctionSignature( new QName("get-lemmas-by-form-name", MPDLTextModule.NAMESPACE_URI, MPDLTextModule.PREFIX), "A function which delivers the lemma of a given form name and language " + "by the MPDL language technology", new SequenceType[] { new SequenceType(Type.STRING, Cardinality.ZERO_OR_MORE), new SequenceType(Type.STRING, Cardinality.ZERO_OR_MORE) }, new SequenceType(Type.NODE, Cardinality.EXACTLY_ONE)); public GetLemmasByFormName(XQueryContext context) { super(context, signature); } public Sequence eval(Sequence[] args, Sequence contextSequence) throws XPathException { Sequence langSeq = args[0]; Sequence formNameSeq = args[1]; String language = ""; String formName = ""; if (langSeq.isEmpty() || formNameSeq.isEmpty()) return Sequence.EMPTY_SEQUENCE; language = langSeq.getStringValue(); formName = formNameSeq.getStringValue(); ArrayList<Lemma> lemmas = null; try { MorphologyCache morphologyCache = MorphologyCache.getInstance(); lemmas = morphologyCache.getLemmasByFormName(language, formName, true); if (lemmas == null || lemmas.isEmpty()) { Lemma lemma = new Lemma("standard analyzer", language, formName); // delivers at least one lemma with one form (with the formName) lemmas = new ArrayList<Lemma>(); lemmas.add(lemma); } } catch (ApplicationException e) { throw new XPathException(e); } DocumentImpl doc = null; if (lemmas != null) { MemTreeBuilder builder = context.getDocumentBuilder(); builder.startElement("", "lemmas", "lemmas", null); for (int i=0; i<lemmas.size(); i++) { Lemma lemma = lemmas.get(i); builder.startElement("", "lemma", "lemma", null); builder.startElement("", "provider", "provider", null); builder.characters(lemma.getProvider()); builder.endElement(); builder.startElement("", "language", "language", null); builder.characters(lemma.getLanguage()); builder.endElement(); builder.startElement("", "lemma-name", "lemma-name", null); builder.characters(lemma.getLemmaName()); builder.endElement(); ArrayList<Form> forms = lemma.getFormsList(); builder.startElement("", "forms-size", "forms-size", null); builder.characters(String.valueOf(forms.size())); builder.endElement(); builder.startElement("", "forms", "forms", null); for (int j=0; j<forms.size(); j++) { Form form = forms.get(j); builder.startElement("", "form", "form", null); builder.startElement("", "provider", "provider", null); builder.characters(form.getProvider()); builder.endElement(); builder.startElement("", "language", "language", null); builder.characters(form.getLanguage()); builder.endElement(); builder.startElement("", "form-name", "form-name", null); builder.characters(form.getFormName()); builder.endElement(); builder.startElement("", "lemma-name", "lemma-name", null); builder.characters(lemma.getLemmaName()); builder.endElement(); builder.endElement(); } builder.endElement(); builder.endElement(); } builder.endElement(); doc = ((DocumentImpl)builder.getDocument()); } else { return Sequence.EMPTY_SEQUENCE; } return doc; } }