view software/eXist/mpdl-modules/src/org/exist/xquery/modules/mpdltext/GetWordsFunction.java @ 0:408254cf2f1d

Erstellung
author Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
date Wed, 24 Nov 2010 17:24:23 +0100
parents
children
line wrap: on
line source

/*
 *  eXist Open Source Native XML Database: Extension module
 *  Copyright (C) 2008 Josef Willenborg
 *  jwillenborg@mpiwg-berlin.mpg.de
 *  http://www.mpiwg-berlin.mpg.de
 *  
 *  This program is free software; you can redistribute it and/or
 *  modify it under the terms of the GNU Lesser General Public License
 *  as published by the Free Software Foundation; either version 2
 *  of the License, or (at your option) any later version.
 *  
 *  This program is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *  GNU Lesser General Public License for more details.
 *  
 *  You should have received a copy of the GNU Lesser General Public License
 *  along with this program; if not, write to the Free Software
 *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
 *  
 *  $Id: TextModule.java $
 */
package org.exist.xquery.modules.mpdltext;

import java.util.regex.Pattern;
import java.util.regex.PatternSyntaxException;

import org.exist.dom.QName;
import org.exist.xquery.BasicFunction;
import org.exist.xquery.Cardinality;
import org.exist.xquery.FunctionSignature;
import org.exist.xquery.XPathException;
import org.exist.xquery.XQueryContext;
import org.exist.xquery.value.Sequence;
import org.exist.xquery.value.SequenceType;
import org.exist.xquery.value.StringValue;
import org.exist.xquery.value.Type;
import org.exist.xquery.value.ValueSequence;

/**
 * @author Josef Willenborg (jwillenborg@mpiwg-berlin.mpg.de)
 */
public class GetWordsFunction extends BasicFunction {

	public final static FunctionSignature signature =
		new FunctionSignature(
			new QName("getWords", MPDLTextModule.NAMESPACE_URI, MPDLTextModule.PREFIX),
			"A function which delivers words of a given text by a split pattern and a flags string (see also fn:tokenize)",
			new SequenceType[] { new SequenceType(Type.STRING, Cardinality.ZERO_OR_MORE), 
			                     new SequenceType(Type.STRING, Cardinality.ZERO_OR_MORE), 
			                     new SequenceType(Type.STRING, Cardinality.ZERO_OR_MORE) },
			new SequenceType(Type.STRING, Cardinality.ZERO_OR_MORE));

	public GetWordsFunction(XQueryContext context) {
		super(context, signature);
	}

	public Sequence eval(Sequence[] args, Sequence contextSequence) throws XPathException {
	  Sequence seqText = args[0];
    Sequence seqPattern = args[1];
    Sequence seqFlags = args[2];
    String text = "";
    String pattern = " ";  // default pattern
    int flags = 0;
		if (seqText.isEmpty())
			return Sequence.EMPTY_SEQUENCE;
		else
		  text = seqText.getStringValue();
		if (!seqPattern.isEmpty())
		  pattern = seqPattern.getStringValue();
		if (!seqFlags.isEmpty())
		  flags = parseFlags(seqFlags.getStringValue());
		ValueSequence result = null;
    try {
      Pattern pat = Pattern.compile(pattern, flags);
      String[] tokens = pat.split(text, -1);
      result = new ValueSequence();
      for (int i = 0; i < tokens.length; i++)
        result.add(new StringValue(tokens[i]));             
    } catch (PatternSyntaxException e) {
      throw new XPathException(e);
    }
		return result;
	}

  private int parseFlags(String s) throws XPathException {
    int flags = 0;
    for(int i = 0; i < s.length(); i++) {
      char ch = s.charAt(i);
      switch(ch) {
        case 'm':
          flags |= Pattern.MULTILINE;
          break;
        case 'i':
          flags = flags | Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CASE;
          break;
        case 'x':
          flags |= Pattern.COMMENTS;
          break;
        case 's':
          flags |= Pattern.DOTALL;
          break;
        default:
          throw new XPathException("Invalid regular expression flag: " + ch);
      }
    }
    return flags;
  }
 
}