view software/eXist/mpdl-modules/src/org/exist/xquery/modules/mpdltext/GetLexEntriesByLuceneQuery.java @ 0:408254cf2f1d

Erstellung
author Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
date Wed, 24 Nov 2010 17:24:23 +0100
parents
children
line wrap: on
line source

/*
 *  eXist Open Source Native XML Database: Extension module
 *  Copyright (C) 2008 Josef Willenborg
 *  jwillenborg@mpiwg-berlin.mpg.de
 *  http://www.mpiwg-berlin.mpg.de
 *  
 *  This program is free software; you can redistribute it and/or
 *  modify it under the terms of the GNU Lesser General Public License
 *  as published by the Free Software Foundation; either version 2
 *  of the License, or (at your option) any later version.
 *  
 *  This program is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *  GNU Lesser General Public License for more details.
 *  
 *  You should have received a copy of the GNU Lesser General Public License
 *  along with this program; if not, write to the Free Software
 *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
 *  
 *  $Id: TextModule.java $
 */
package org.exist.xquery.modules.mpdltext;

import java.util.ArrayList;

import org.exist.dom.QName;
import org.exist.memtree.DocumentImpl;
import org.exist.memtree.MemTreeBuilder;
import org.exist.xquery.BasicFunction;
import org.exist.xquery.Cardinality;
import org.exist.xquery.FunctionSignature;
import org.exist.xquery.XPathException;
import org.exist.xquery.XQueryContext;
import org.exist.xquery.value.Sequence;
import org.exist.xquery.value.SequenceType;
import org.exist.xquery.value.Type;

import de.mpg.mpiwg.berlin.mpdl.exception.ApplicationException;
import de.mpg.mpiwg.berlin.mpdl.lt.lex.app.Lexica;
import de.mpg.mpiwg.berlin.mpdl.lt.lex.app.Lexicon;
import de.mpg.mpiwg.berlin.mpdl.lt.lex.app.LexiconEntry;
import de.mpg.mpiwg.berlin.mpdl.lt.lex.db.LexHandler;
import de.mpg.mpiwg.berlin.mpdl.lucene.LuceneUtil;

/**
 * @author Josef Willenborg (jwillenborg@mpiwg-berlin.mpg.de)
 */
public class GetLexEntriesByLuceneQuery extends BasicFunction {

	public final static FunctionSignature signature =
		new FunctionSignature(
			new QName("get-lex-entries-by-lucene-query", MPDLTextModule.NAMESPACE_URI, MPDLTextModule.PREFIX),
			"A function which delivers the lex entries of a given lucene query and language " +
			"by the MPDL language technology",
			new SequenceType[] { new SequenceType(Type.STRING, Cardinality.ZERO_OR_MORE), 
			                     new SequenceType(Type.STRING, Cardinality.ZERO_OR_MORE) },
			new SequenceType(Type.NODE, Cardinality.EXACTLY_ONE));

	public GetLexEntriesByLuceneQuery(XQueryContext context) {
		super(context, signature);
	}

	public Sequence eval(Sequence[] args, Sequence contextSequence) throws XPathException {
    Sequence langSeq = args[0];
    Sequence luceneQuerySeq = args[1];
    String language = "";
    String luceneQueryStr = "";
    if (langSeq.isEmpty() || luceneQuerySeq.isEmpty())
      return Sequence.EMPTY_SEQUENCE;
    language = langSeq.getStringValue();
    luceneQueryStr = luceneQuerySeq.getStringValue();
    ArrayList<Lexicon> lexicons = null;
		try { 
		  ArrayList<Lexicon> statLexicons = Lexica.getInstance().getLexicons(language);
		  if (statLexicons != null) {
  	    LexHandler lexHandler = LexHandler.getInstance();
        for (int i=0; i<statLexicons.size(); i++) {
          Lexicon lexicon = statLexicons.get(i).clone(); // clone without lexicon entries
          LuceneUtil luceneUtil = LuceneUtil.getInstance();
          ArrayList<String> lexQueryVariants = luceneUtil.getVariantsFromLuceneQuery(luceneQueryStr);
          for (int j=0; j<lexQueryVariants.size(); j++) {
            String lexForm = lexQueryVariants.get(j);
            LexiconEntry lexEntry = lexHandler.readEntry(lexicon.getName(), lexForm);
            if (lexEntry != null) {
              lexicon.addEntry(lexEntry); // add entries to the cloned object
              if (lexicons == null)
                lexicons = new ArrayList<Lexicon>();
            }
          }
          if (! lexicon.isEmpty())
          lexicons.add(lexicon);
        }
		  }
    } catch (ApplicationException e) {
      throw new XPathException(e);
    }
    DocumentImpl doc = null;
    if (lexicons != null) {
      MemTreeBuilder builder = context.getDocumentBuilder();
      builder.startElement("", "lexica", "lexica", null);
      for (int i=0; i<lexicons.size(); i++) {
        Lexicon lexicon = lexicons.get(i);
        builder.startElement("", "lexicon", "lexicon", null);
        builder.startElement("", "name", "name", null);
        builder.characters(lexicon.getName());
        builder.endElement();
        builder.startElement("", "description", "description", null);
        builder.characters(lexicon.getDescription());
        builder.endElement();
        builder.startElement("", "entries", "entries", null);
        ArrayList<LexiconEntry> entries = lexicon.getEntries();
        for (int j=0; j<entries.size(); j++) {
          builder.startElement("", "entry", "entry", null);
          LexiconEntry entry = entries.get(j);
          builder.startElement("", "form", "form", null);
          builder.characters(entry.getFormName());
          builder.endElement();
          builder.startElement("", "content", "content", null);
          builder.startElement("", "xml-valid", "xml-valid", null);
          String xmlValid = "false";
          if (entry.isXmlValid())
            xmlValid = "true";
          builder.characters(xmlValid);
          builder.endElement();
          builder.startElement("", "original-entry", "original-entry", null);
          builder.characters(entry.getOriginalEntry()); 
          builder.endElement();
          builder.startElement("", "repaired-entry", "repaired-entry", null);
          builder.characters(entry.getRepairedEntry()); 
          builder.endElement();
          builder.endElement();
          builder.endElement();
        }
        builder.endElement();
        builder.endElement();
      }
      builder.endElement();
      doc = ((DocumentImpl)builder.getDocument());
    } else {
      return Sequence.EMPTY_SEQUENCE;
    }
		return doc;
	}
	
	
}