Mercurial > hg > mpdl-group
comparison software/eXist/mpdl-modules/src/org/exist/xquery/modules/mpdltext/GetWordsFunction.java @ 0:408254cf2f1d
Erstellung
author | Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de> |
---|---|
date | Wed, 24 Nov 2010 17:24:23 +0100 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:408254cf2f1d |
---|---|
1 /* | |
2 * eXist Open Source Native XML Database: Extension module | |
3 * Copyright (C) 2008 Josef Willenborg | |
4 * jwillenborg@mpiwg-berlin.mpg.de | |
5 * http://www.mpiwg-berlin.mpg.de | |
6 * | |
7 * This program is free software; you can redistribute it and/or | |
8 * modify it under the terms of the GNU Lesser General Public License | |
9 * as published by the Free Software Foundation; either version 2 | |
10 * of the License, or (at your option) any later version. | |
11 * | |
12 * This program is distributed in the hope that it will be useful, | |
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
15 * GNU Lesser General Public License for more details. | |
16 * | |
17 * You should have received a copy of the GNU Lesser General Public License | |
18 * along with this program; if not, write to the Free Software | |
19 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. | |
20 * | |
21 * $Id: TextModule.java $ | |
22 */ | |
23 package org.exist.xquery.modules.mpdltext; | |
24 | |
25 import java.util.regex.Pattern; | |
26 import java.util.regex.PatternSyntaxException; | |
27 | |
28 import org.exist.dom.QName; | |
29 import org.exist.xquery.BasicFunction; | |
30 import org.exist.xquery.Cardinality; | |
31 import org.exist.xquery.FunctionSignature; | |
32 import org.exist.xquery.XPathException; | |
33 import org.exist.xquery.XQueryContext; | |
34 import org.exist.xquery.value.Sequence; | |
35 import org.exist.xquery.value.SequenceType; | |
36 import org.exist.xquery.value.StringValue; | |
37 import org.exist.xquery.value.Type; | |
38 import org.exist.xquery.value.ValueSequence; | |
39 | |
40 /** | |
41 * @author Josef Willenborg (jwillenborg@mpiwg-berlin.mpg.de) | |
42 */ | |
43 public class GetWordsFunction extends BasicFunction { | |
44 | |
45 public final static FunctionSignature signature = | |
46 new FunctionSignature( | |
47 new QName("getWords", MPDLTextModule.NAMESPACE_URI, MPDLTextModule.PREFIX), | |
48 "A function which delivers words of a given text by a split pattern and a flags string (see also fn:tokenize)", | |
49 new SequenceType[] { new SequenceType(Type.STRING, Cardinality.ZERO_OR_MORE), | |
50 new SequenceType(Type.STRING, Cardinality.ZERO_OR_MORE), | |
51 new SequenceType(Type.STRING, Cardinality.ZERO_OR_MORE) }, | |
52 new SequenceType(Type.STRING, Cardinality.ZERO_OR_MORE)); | |
53 | |
54 public GetWordsFunction(XQueryContext context) { | |
55 super(context, signature); | |
56 } | |
57 | |
58 public Sequence eval(Sequence[] args, Sequence contextSequence) throws XPathException { | |
59 Sequence seqText = args[0]; | |
60 Sequence seqPattern = args[1]; | |
61 Sequence seqFlags = args[2]; | |
62 String text = ""; | |
63 String pattern = " "; // default pattern | |
64 int flags = 0; | |
65 if (seqText.isEmpty()) | |
66 return Sequence.EMPTY_SEQUENCE; | |
67 else | |
68 text = seqText.getStringValue(); | |
69 if (!seqPattern.isEmpty()) | |
70 pattern = seqPattern.getStringValue(); | |
71 if (!seqFlags.isEmpty()) | |
72 flags = parseFlags(seqFlags.getStringValue()); | |
73 ValueSequence result = null; | |
74 try { | |
75 Pattern pat = Pattern.compile(pattern, flags); | |
76 String[] tokens = pat.split(text, -1); | |
77 result = new ValueSequence(); | |
78 for (int i = 0; i < tokens.length; i++) | |
79 result.add(new StringValue(tokens[i])); | |
80 } catch (PatternSyntaxException e) { | |
81 throw new XPathException(e); | |
82 } | |
83 return result; | |
84 } | |
85 | |
86 private int parseFlags(String s) throws XPathException { | |
87 int flags = 0; | |
88 for(int i = 0; i < s.length(); i++) { | |
89 char ch = s.charAt(i); | |
90 switch(ch) { | |
91 case 'm': | |
92 flags |= Pattern.MULTILINE; | |
93 break; | |
94 case 'i': | |
95 flags = flags | Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CASE; | |
96 break; | |
97 case 'x': | |
98 flags |= Pattern.COMMENTS; | |
99 break; | |
100 case 's': | |
101 flags |= Pattern.DOTALL; | |
102 break; | |
103 default: | |
104 throw new XPathException("Invalid regular expression flag: " + ch); | |
105 } | |
106 } | |
107 return flags; | |
108 } | |
109 | |
110 } |