comparison software/eXist/mpdl-modules/src/org/exist/xquery/modules/mpdltext/GetWordsFunction.java @ 0:408254cf2f1d

Erstellung
author Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
date Wed, 24 Nov 2010 17:24:23 +0100
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:408254cf2f1d
1 /*
2 * eXist Open Source Native XML Database: Extension module
3 * Copyright (C) 2008 Josef Willenborg
4 * jwillenborg@mpiwg-berlin.mpg.de
5 * http://www.mpiwg-berlin.mpg.de
6 *
7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public License
9 * as published by the Free Software Foundation; either version 2
10 * of the License, or (at your option) any later version.
11 *
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU Lesser General Public License for more details.
16 *
17 * You should have received a copy of the GNU Lesser General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
20 *
21 * $Id: TextModule.java $
22 */
23 package org.exist.xquery.modules.mpdltext;
24
25 import java.util.regex.Pattern;
26 import java.util.regex.PatternSyntaxException;
27
28 import org.exist.dom.QName;
29 import org.exist.xquery.BasicFunction;
30 import org.exist.xquery.Cardinality;
31 import org.exist.xquery.FunctionSignature;
32 import org.exist.xquery.XPathException;
33 import org.exist.xquery.XQueryContext;
34 import org.exist.xquery.value.Sequence;
35 import org.exist.xquery.value.SequenceType;
36 import org.exist.xquery.value.StringValue;
37 import org.exist.xquery.value.Type;
38 import org.exist.xquery.value.ValueSequence;
39
40 /**
41 * @author Josef Willenborg (jwillenborg@mpiwg-berlin.mpg.de)
42 */
43 public class GetWordsFunction extends BasicFunction {
44
45 public final static FunctionSignature signature =
46 new FunctionSignature(
47 new QName("getWords", MPDLTextModule.NAMESPACE_URI, MPDLTextModule.PREFIX),
48 "A function which delivers words of a given text by a split pattern and a flags string (see also fn:tokenize)",
49 new SequenceType[] { new SequenceType(Type.STRING, Cardinality.ZERO_OR_MORE),
50 new SequenceType(Type.STRING, Cardinality.ZERO_OR_MORE),
51 new SequenceType(Type.STRING, Cardinality.ZERO_OR_MORE) },
52 new SequenceType(Type.STRING, Cardinality.ZERO_OR_MORE));
53
54 public GetWordsFunction(XQueryContext context) {
55 super(context, signature);
56 }
57
58 public Sequence eval(Sequence[] args, Sequence contextSequence) throws XPathException {
59 Sequence seqText = args[0];
60 Sequence seqPattern = args[1];
61 Sequence seqFlags = args[2];
62 String text = "";
63 String pattern = " "; // default pattern
64 int flags = 0;
65 if (seqText.isEmpty())
66 return Sequence.EMPTY_SEQUENCE;
67 else
68 text = seqText.getStringValue();
69 if (!seqPattern.isEmpty())
70 pattern = seqPattern.getStringValue();
71 if (!seqFlags.isEmpty())
72 flags = parseFlags(seqFlags.getStringValue());
73 ValueSequence result = null;
74 try {
75 Pattern pat = Pattern.compile(pattern, flags);
76 String[] tokens = pat.split(text, -1);
77 result = new ValueSequence();
78 for (int i = 0; i < tokens.length; i++)
79 result.add(new StringValue(tokens[i]));
80 } catch (PatternSyntaxException e) {
81 throw new XPathException(e);
82 }
83 return result;
84 }
85
86 private int parseFlags(String s) throws XPathException {
87 int flags = 0;
88 for(int i = 0; i < s.length(); i++) {
89 char ch = s.charAt(i);
90 switch(ch) {
91 case 'm':
92 flags |= Pattern.MULTILINE;
93 break;
94 case 'i':
95 flags = flags | Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CASE;
96 break;
97 case 'x':
98 flags |= Pattern.COMMENTS;
99 break;
100 case 's':
101 flags |= Pattern.DOTALL;
102 break;
103 default:
104 throw new XPathException("Invalid regular expression flag: " + ch);
105 }
106 }
107 return flags;
108 }
109
110 }