diff software/eXist/mpdl-modules/src/org/exist/xquery/modules/mpdltext/GetWordsFunction.java @ 0:408254cf2f1d

Erstellung
author Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
date Wed, 24 Nov 2010 17:24:23 +0100
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/software/eXist/mpdl-modules/src/org/exist/xquery/modules/mpdltext/GetWordsFunction.java	Wed Nov 24 17:24:23 2010 +0100
@@ -0,0 +1,110 @@
+/*
+ *  eXist Open Source Native XML Database: Extension module
+ *  Copyright (C) 2008 Josef Willenborg
+ *  jwillenborg@mpiwg-berlin.mpg.de
+ *  http://www.mpiwg-berlin.mpg.de
+ *  
+ *  This program is free software; you can redistribute it and/or
+ *  modify it under the terms of the GNU Lesser General Public License
+ *  as published by the Free Software Foundation; either version 2
+ *  of the License, or (at your option) any later version.
+ *  
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU Lesser General Public License for more details.
+ *  
+ *  You should have received a copy of the GNU Lesser General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *  
+ *  $Id: TextModule.java $
+ */
+package org.exist.xquery.modules.mpdltext;
+
+import java.util.regex.Pattern;
+import java.util.regex.PatternSyntaxException;
+
+import org.exist.dom.QName;
+import org.exist.xquery.BasicFunction;
+import org.exist.xquery.Cardinality;
+import org.exist.xquery.FunctionSignature;
+import org.exist.xquery.XPathException;
+import org.exist.xquery.XQueryContext;
+import org.exist.xquery.value.Sequence;
+import org.exist.xquery.value.SequenceType;
+import org.exist.xquery.value.StringValue;
+import org.exist.xquery.value.Type;
+import org.exist.xquery.value.ValueSequence;
+
+/**
+ * @author Josef Willenborg (jwillenborg@mpiwg-berlin.mpg.de)
+ */
+public class GetWordsFunction extends BasicFunction {
+
+	public final static FunctionSignature signature =
+		new FunctionSignature(
+			new QName("getWords", MPDLTextModule.NAMESPACE_URI, MPDLTextModule.PREFIX),
+			"A function which delivers words of a given text by a split pattern and a flags string (see also fn:tokenize)",
+			new SequenceType[] { new SequenceType(Type.STRING, Cardinality.ZERO_OR_MORE), 
+			                     new SequenceType(Type.STRING, Cardinality.ZERO_OR_MORE), 
+			                     new SequenceType(Type.STRING, Cardinality.ZERO_OR_MORE) },
+			new SequenceType(Type.STRING, Cardinality.ZERO_OR_MORE));
+
+	public GetWordsFunction(XQueryContext context) {
+		super(context, signature);
+	}
+
+	public Sequence eval(Sequence[] args, Sequence contextSequence) throws XPathException {
+	  Sequence seqText = args[0];
+    Sequence seqPattern = args[1];
+    Sequence seqFlags = args[2];
+    String text = "";
+    String pattern = " ";  // default pattern
+    int flags = 0;
+		if (seqText.isEmpty())
+			return Sequence.EMPTY_SEQUENCE;
+		else
+		  text = seqText.getStringValue();
+		if (!seqPattern.isEmpty())
+		  pattern = seqPattern.getStringValue();
+		if (!seqFlags.isEmpty())
+		  flags = parseFlags(seqFlags.getStringValue());
+		ValueSequence result = null;
+    try {
+      Pattern pat = Pattern.compile(pattern, flags);
+      String[] tokens = pat.split(text, -1);
+      result = new ValueSequence();
+      for (int i = 0; i < tokens.length; i++)
+        result.add(new StringValue(tokens[i]));             
+    } catch (PatternSyntaxException e) {
+      throw new XPathException(e);
+    }
+		return result;
+	}
+
+  private int parseFlags(String s) throws XPathException {
+    int flags = 0;
+    for(int i = 0; i < s.length(); i++) {
+      char ch = s.charAt(i);
+      switch(ch) {
+        case 'm':
+          flags |= Pattern.MULTILINE;
+          break;
+        case 'i':
+          flags = flags | Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CASE;
+          break;
+        case 'x':
+          flags |= Pattern.COMMENTS;
+          break;
+        case 's':
+          flags |= Pattern.DOTALL;
+          break;
+        default:
+          throw new XPathException("Invalid regular expression flag: " + ch);
+      }
+    }
+    return flags;
+  }
+ 
+}