Mercurial > hg > mpdl-group
annotate software/mpdl-services/mpiwg-mpdl-xml/src/de/mpg/mpiwg/berlin/mpdl/xml/xquery/XQueryEvaluator.java @ 23:e845310098ba
diverse Korrekturen
author | Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de> |
---|---|
date | Tue, 27 Nov 2012 12:35:19 +0100 |
parents | dc5e9fcb3fdc |
children |
rev | line source |
---|---|
18 | 1 package de.mpg.mpiwg.berlin.mpdl.xml.xquery; |
2 | |
3 import java.io.StringReader; | |
4 import java.io.StringWriter; | |
5 import java.net.URL; | |
23
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
18
diff
changeset
|
6 import java.util.ArrayList; |
18 | 7 |
8 import javax.xml.transform.stream.StreamSource; | |
9 | |
23
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
18
diff
changeset
|
10 import org.xml.sax.SAXParseException; |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
18
diff
changeset
|
11 |
18 | 12 import de.mpg.mpiwg.berlin.mpdl.exception.ApplicationException; |
13 | |
23
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
18
diff
changeset
|
14 import net.sf.saxon.s9api.Axis; |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
18
diff
changeset
|
15 import net.sf.saxon.s9api.DocumentBuilder; |
18 | 16 import net.sf.saxon.s9api.Processor; |
23
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
18
diff
changeset
|
17 import net.sf.saxon.s9api.QName; |
18 | 18 import net.sf.saxon.s9api.SaxonApiException; |
19 import net.sf.saxon.s9api.Serializer; | |
20 import net.sf.saxon.s9api.XQueryCompiler; | |
21 import net.sf.saxon.s9api.XQueryExecutable; | |
23
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
18
diff
changeset
|
22 import net.sf.saxon.s9api.XdmAtomicValue; |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
18
diff
changeset
|
23 import net.sf.saxon.s9api.XdmEmptySequence; |
18 | 24 import net.sf.saxon.s9api.XdmItem; |
23
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
18
diff
changeset
|
25 import net.sf.saxon.s9api.XdmNode; |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
18
diff
changeset
|
26 import net.sf.saxon.s9api.XdmNodeKind; |
18 | 27 import net.sf.saxon.s9api.XdmSequenceIterator; |
28 import net.sf.saxon.s9api.XdmValue; | |
23
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
18
diff
changeset
|
29 import net.sf.saxon.trans.XPathException; |
18 | 30 |
31 public class XQueryEvaluator { | |
32 private Processor processor; | |
33 private XQueryCompiler xQueryCompiler; | |
34 | |
35 public XQueryEvaluator() { | |
36 processor = new Processor(false); | |
37 xQueryCompiler = processor.newXQueryCompiler(); | |
38 } | |
39 | |
23
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
18
diff
changeset
|
40 public XdmNode parse(URL srcUrl) throws ApplicationException { |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
18
diff
changeset
|
41 try { |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
18
diff
changeset
|
42 DocumentBuilder docBuilder = processor.newDocumentBuilder(); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
18
diff
changeset
|
43 StreamSource xmlDoc = new StreamSource(srcUrl.toString()); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
18
diff
changeset
|
44 XdmNode docNode = docBuilder.build(xmlDoc); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
18
diff
changeset
|
45 return docNode; |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
18
diff
changeset
|
46 } catch (SaxonApiException e) { |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
18
diff
changeset
|
47 String message = e.getMessage(); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
18
diff
changeset
|
48 Throwable t = e.getCause(); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
18
diff
changeset
|
49 if (t instanceof XPathException) { |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
18
diff
changeset
|
50 XPathException xpathException = (XPathException) t; |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
18
diff
changeset
|
51 Throwable t2 = xpathException.getException(); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
18
diff
changeset
|
52 if (t2 instanceof SAXParseException) { |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
18
diff
changeset
|
53 SAXParseException saxParseException = (SAXParseException) t2; |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
18
diff
changeset
|
54 int lineNumber = saxParseException.getLineNumber(); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
18
diff
changeset
|
55 int columnNumber = saxParseException.getColumnNumber(); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
18
diff
changeset
|
56 message = "Line: " + lineNumber + ", Column: " + columnNumber + ": " + e.getMessage(); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
18
diff
changeset
|
57 } |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
18
diff
changeset
|
58 } |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
18
diff
changeset
|
59 throw new ApplicationException("Your source file (" + srcUrl.toString() + ") is not valid: " + message); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
18
diff
changeset
|
60 } |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
18
diff
changeset
|
61 } |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
18
diff
changeset
|
62 |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
18
diff
changeset
|
63 public Hits evaluate(URL srcUrl, String xqueryStr, int from, int to) throws ApplicationException { |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
18
diff
changeset
|
64 Hits result = (Hits) evaluate(srcUrl, xqueryStr, from, to, "hits"); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
18
diff
changeset
|
65 return result; |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
18
diff
changeset
|
66 } |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
18
diff
changeset
|
67 |
18 | 68 public XdmValue evaluate(String srcXmlStr, String xqueryStr) throws ApplicationException { |
23
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
18
diff
changeset
|
69 XdmValue result = (XdmValue) evaluate(srcXmlStr, xqueryStr, 0, 9, "XdmValue"); |
18 | 70 return result; |
71 } | |
72 | |
73 public XdmValue evaluate(URL srcUrl, String xqueryStr) throws ApplicationException { | |
23
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
18
diff
changeset
|
74 XdmValue result = (XdmValue) evaluate(srcUrl, xqueryStr, 0, 9, "XdmValue"); |
18 | 75 return result; |
76 } | |
77 | |
78 public String evaluateAsString(String srcXmlStr, String xqueryStr) throws ApplicationException { | |
23
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
18
diff
changeset
|
79 Object result = evaluate(srcXmlStr, xqueryStr, 0, 9, "string"); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
18
diff
changeset
|
80 if (result == null) |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
18
diff
changeset
|
81 return null; |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
18
diff
changeset
|
82 else |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
18
diff
changeset
|
83 return (String) result; |
18 | 84 } |
85 | |
86 public String evaluateAsString(URL srcUrl, String xqueryStr) throws ApplicationException { | |
23
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
18
diff
changeset
|
87 Object result = evaluate(srcUrl, xqueryStr, 0, 9, "string"); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
18
diff
changeset
|
88 if (result == null) |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
18
diff
changeset
|
89 return null; |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
18
diff
changeset
|
90 else |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
18
diff
changeset
|
91 return (String) result; |
18 | 92 } |
93 | |
23
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
18
diff
changeset
|
94 public String evaluateAsStringValueJoined(String srcXmlStr, String xqueryStr) throws ApplicationException { |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
18
diff
changeset
|
95 return evaluateAsStringValueJoined(srcXmlStr, xqueryStr, " "); |
18 | 96 } |
97 | |
23
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
18
diff
changeset
|
98 public String evaluateAsStringValueJoined(URL srcUrl, String xqueryStr) throws ApplicationException { |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
18
diff
changeset
|
99 return evaluateAsStringValueJoined(srcUrl, xqueryStr, " "); |
18 | 100 } |
101 | |
23
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
18
diff
changeset
|
102 public String evaluateAsStringValueJoined(String srcXmlStr, String xqueryStr, String separator) throws ApplicationException { |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
18
diff
changeset
|
103 XdmValue val = (XdmValue) evaluate(srcXmlStr, xqueryStr, 0, 9, "XdmValue"); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
18
diff
changeset
|
104 return stringJoin(val, separator); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
18
diff
changeset
|
105 } |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
18
diff
changeset
|
106 |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
18
diff
changeset
|
107 public String evaluateAsStringValueJoined(URL srcUrl, String xqueryStr, String separator) throws ApplicationException { |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
18
diff
changeset
|
108 XdmValue val = (XdmValue) evaluate(srcUrl, xqueryStr, 0, 9, "XdmValue"); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
18
diff
changeset
|
109 return stringJoin(val, separator); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
18
diff
changeset
|
110 } |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
18
diff
changeset
|
111 |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
18
diff
changeset
|
112 public Object evaluate(String srcXmlStr, String xqueryStr, int from, int to, String resultType) throws ApplicationException { |
18 | 113 try { |
114 StringReader srcXmlStrReader = new StringReader(srcXmlStr); | |
115 StreamSource xmlDoc = new StreamSource(srcXmlStrReader); | |
116 XQueryExecutable xQueryExecutable = xQueryCompiler.compile(xqueryStr); | |
117 Serializer serializer = new Serializer(); | |
118 serializer.setOutputWriter(new StringWriter()); | |
119 net.sf.saxon.s9api.XQueryEvaluator xQueryEvaluator = xQueryExecutable.load(); | |
120 xQueryEvaluator.setSource(xmlDoc); | |
121 XdmValue val = xQueryEvaluator.evaluate(); | |
122 Object result = val; | |
23
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
18
diff
changeset
|
123 if (resultType != null && resultType.equals("string")) { |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
18
diff
changeset
|
124 result = toString(val); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
18
diff
changeset
|
125 } else if (resultType != null && resultType.equals("XdmValue")) { |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
18
diff
changeset
|
126 result = val; |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
18
diff
changeset
|
127 } else if (resultType != null && resultType.equals("hits")) { |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
18
diff
changeset
|
128 result = toHits(val, from, to); |
18 | 129 } |
130 return result; | |
131 } catch (SaxonApiException e) { | |
132 throw new ApplicationException(e); | |
133 } | |
134 } | |
135 | |
23
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
18
diff
changeset
|
136 public Object evaluate(URL srcUrl, String xqueryStr, int from, int to, String resultType) throws ApplicationException { |
18 | 137 try { |
138 StreamSource xmlDoc = new StreamSource(srcUrl.toString()); | |
139 XQueryExecutable xQueryExecutable = xQueryCompiler.compile(xqueryStr); | |
140 Serializer serializer = new Serializer(); | |
141 serializer.setOutputWriter(new StringWriter()); | |
142 net.sf.saxon.s9api.XQueryEvaluator xQueryEvaluator = xQueryExecutable.load(); | |
143 xQueryEvaluator.setSource(xmlDoc); | |
144 XdmValue val = xQueryEvaluator.evaluate(); | |
23
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
18
diff
changeset
|
145 Object result = null; |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
18
diff
changeset
|
146 if (resultType != null && resultType.equals("string")) { |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
18
diff
changeset
|
147 result = toString(val); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
18
diff
changeset
|
148 } else if (resultType != null && resultType.equals("XdmValue")) { |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
18
diff
changeset
|
149 result = val; |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
18
diff
changeset
|
150 } else if (resultType != null && resultType.equals("hits")) { |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
18
diff
changeset
|
151 result = toHits(val, from, to); |
18 | 152 } |
153 return result; | |
154 } catch (SaxonApiException e) { | |
155 throw new ApplicationException(e); | |
156 } | |
157 } | |
158 | |
23
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
18
diff
changeset
|
159 private Hits toHits(XdmValue xdmValue, int from, int to) throws ApplicationException { |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
18
diff
changeset
|
160 Hits result = null; |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
18
diff
changeset
|
161 if (xdmValue instanceof XdmAtomicValue) { |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
18
diff
changeset
|
162 XdmAtomicValue av = (XdmAtomicValue) xdmValue; |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
18
diff
changeset
|
163 String avStr = av.getStringValue(); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
18
diff
changeset
|
164 ArrayList<Hit> hits = new ArrayList<Hit>(); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
18
diff
changeset
|
165 Hit hit = new Hit(avStr); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
18
diff
changeset
|
166 hit.setType(Hit.TYPE_ATOMIC_VALUE); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
18
diff
changeset
|
167 hits.add(hit); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
18
diff
changeset
|
168 result = new Hits(hits, from, to); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
18
diff
changeset
|
169 result.setSize(1); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
18
diff
changeset
|
170 return result; |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
18
diff
changeset
|
171 } |
18 | 172 XdmSequenceIterator iter = xdmValue.iterator(); |
23
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
18
diff
changeset
|
173 int size = xdmValue.size(); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
18
diff
changeset
|
174 if (size > 0) { |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
18
diff
changeset
|
175 int counter = 0; |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
18
diff
changeset
|
176 ArrayList<Hit> hits = new ArrayList<Hit>(); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
18
diff
changeset
|
177 while (iter.hasNext() && counter <= to) { |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
18
diff
changeset
|
178 XdmItem item = iter.next(); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
18
diff
changeset
|
179 if (counter >= from) { |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
18
diff
changeset
|
180 XdmNode n = (XdmNode) item; |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
18
diff
changeset
|
181 ArrayList<XdmNode> precNodes = getPreceding(n, "pb"); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
18
diff
changeset
|
182 int page = precNodes.size(); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
18
diff
changeset
|
183 String nodeName = n.getNodeName().getLocalName(); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
18
diff
changeset
|
184 String nodeKindName = n.getNodeKind().name(); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
18
diff
changeset
|
185 ArrayList<XdmNode> precNodesUntilPB = getPrecedingUntil(n, nodeName, "pb"); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
18
diff
changeset
|
186 int hitPagePosition = precNodesUntilPB.size() + 1; |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
18
diff
changeset
|
187 String itemStr = item.toString(); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
18
diff
changeset
|
188 // itemStr = itemStr.replaceAll("[ \n\t]+xmlns.*?\".*?\"", ""); // remove the namespace |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
18
diff
changeset
|
189 itemStr = itemStr.replaceAll("[ \n\t]+xmlns", " xmlns"); // remove the blanks before the namespace |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
18
diff
changeset
|
190 Hit hit = new Hit(itemStr); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
18
diff
changeset
|
191 hit.setPage(page); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
18
diff
changeset
|
192 hit.setName(nodeName); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
18
diff
changeset
|
193 if (nodeKindName != null && nodeKindName.equals("ELEMENT")) |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
18
diff
changeset
|
194 hit.setType(Hit.TYPE_ELEMENT); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
18
diff
changeset
|
195 else if (nodeKindName != null && nodeKindName.equals("ATTRIBUTE")) |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
18
diff
changeset
|
196 hit.setType(Hit.TYPE_ATTRIBUTE); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
18
diff
changeset
|
197 if (hit.getType() == Hit.TYPE_ELEMENT) |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
18
diff
changeset
|
198 hit.setHitPagePosition(hitPagePosition); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
18
diff
changeset
|
199 hits.add(hit); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
18
diff
changeset
|
200 } |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
18
diff
changeset
|
201 counter++; |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
18
diff
changeset
|
202 } |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
18
diff
changeset
|
203 result = new Hits(hits, from, to); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
18
diff
changeset
|
204 result.setSize(size); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
18
diff
changeset
|
205 } |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
18
diff
changeset
|
206 return result; |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
18
diff
changeset
|
207 } |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
18
diff
changeset
|
208 |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
18
diff
changeset
|
209 private String toString(XdmValue xdmValue) { |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
18
diff
changeset
|
210 if (xdmValue instanceof XdmAtomicValue) { |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
18
diff
changeset
|
211 XdmAtomicValue av = (XdmAtomicValue) xdmValue; |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
18
diff
changeset
|
212 String avStr = av.getStringValue(); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
18
diff
changeset
|
213 return avStr; |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
18
diff
changeset
|
214 } |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
18
diff
changeset
|
215 String result = null; |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
18
diff
changeset
|
216 XdmSequenceIterator iter = xdmValue.iterator(); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
18
diff
changeset
|
217 int size = xdmValue.size(); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
18
diff
changeset
|
218 if (size > 0) { |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
18
diff
changeset
|
219 result = ""; |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
18
diff
changeset
|
220 while (iter.hasNext()) { |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
18
diff
changeset
|
221 XdmItem item = iter.next(); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
18
diff
changeset
|
222 String itemStr = item.toString(); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
18
diff
changeset
|
223 result += itemStr; |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
18
diff
changeset
|
224 } |
18 | 225 } |
226 return result; | |
227 } | |
228 | |
23
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
18
diff
changeset
|
229 private String stringJoin(XdmValue xdmValue, String separator) { |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
18
diff
changeset
|
230 if (xdmValue == null || xdmValue instanceof XdmEmptySequence) |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
18
diff
changeset
|
231 return null; |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
18
diff
changeset
|
232 if (xdmValue instanceof XdmAtomicValue) { |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
18
diff
changeset
|
233 XdmAtomicValue av = (XdmAtomicValue) xdmValue; |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
18
diff
changeset
|
234 String avStr = av.getStringValue(); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
18
diff
changeset
|
235 return avStr; |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
18
diff
changeset
|
236 } |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
18
diff
changeset
|
237 String result = ""; |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
18
diff
changeset
|
238 XdmSequenceIterator iter = null; |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
18
diff
changeset
|
239 if (xdmValue instanceof XdmNode) { |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
18
diff
changeset
|
240 XdmNode xdmNode = (XdmNode) xdmValue; |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
18
diff
changeset
|
241 iter = xdmNode.axisIterator(Axis.CHILD); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
18
diff
changeset
|
242 } else if (xdmValue instanceof XdmValue) { |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
18
diff
changeset
|
243 iter = xdmValue.iterator(); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
18
diff
changeset
|
244 } |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
18
diff
changeset
|
245 while (iter.hasNext()) { |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
18
diff
changeset
|
246 XdmNode node = (XdmNode) iter.next(); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
18
diff
changeset
|
247 XdmNodeKind nodeKind = node.getNodeKind(); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
18
diff
changeset
|
248 if (nodeKind == XdmNodeKind.TEXT) { |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
18
diff
changeset
|
249 String nodeStr = node.getStringValue(); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
18
diff
changeset
|
250 String trimmedNodeStr = nodeStr.trim(); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
18
diff
changeset
|
251 result = result + trimmedNodeStr; |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
18
diff
changeset
|
252 } else if (nodeKind == XdmNodeKind.ELEMENT) { |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
18
diff
changeset
|
253 result = result + stringJoin(node, separator) + separator; // put a separator between child element nodes |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
18
diff
changeset
|
254 } |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
18
diff
changeset
|
255 } |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
18
diff
changeset
|
256 return result; |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
18
diff
changeset
|
257 } |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
18
diff
changeset
|
258 |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
18
diff
changeset
|
259 private ArrayList<XdmNode> getPreceding(XdmNode startNode, String ofNodeName) { |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
18
diff
changeset
|
260 ArrayList<XdmNode> retNodes = null; |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
18
diff
changeset
|
261 XdmSequenceIterator iter = startNode.axisIterator(Axis.PRECEDING); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
18
diff
changeset
|
262 if (iter != null) { |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
18
diff
changeset
|
263 retNodes = new ArrayList<XdmNode>(); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
18
diff
changeset
|
264 while (iter.hasNext()) { |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
18
diff
changeset
|
265 XdmNode n = (XdmNode) iter.next(); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
18
diff
changeset
|
266 QName qName = n.getNodeName(); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
18
diff
changeset
|
267 if (qName != null) { |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
18
diff
changeset
|
268 String name = qName.getLocalName(); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
18
diff
changeset
|
269 if (name != null && name.equals(ofNodeName)) |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
18
diff
changeset
|
270 retNodes.add(n); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
18
diff
changeset
|
271 } |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
18
diff
changeset
|
272 } |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
18
diff
changeset
|
273 } |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
18
diff
changeset
|
274 return retNodes; |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
18
diff
changeset
|
275 } |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
18
diff
changeset
|
276 |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
18
diff
changeset
|
277 private ArrayList<XdmNode> getPrecedingUntil(XdmNode startNode, String ofNodeName, String untilNodeName) { |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
18
diff
changeset
|
278 ArrayList<XdmNode> retNodes = null; |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
18
diff
changeset
|
279 XdmSequenceIterator iter = startNode.axisIterator(Axis.PRECEDING); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
18
diff
changeset
|
280 if (iter != null) { |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
18
diff
changeset
|
281 retNodes = new ArrayList<XdmNode>(); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
18
diff
changeset
|
282 while (iter.hasNext()) { |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
18
diff
changeset
|
283 XdmNode n = (XdmNode) iter.next(); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
18
diff
changeset
|
284 QName qName = n.getNodeName(); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
18
diff
changeset
|
285 if (qName != null) { |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
18
diff
changeset
|
286 String name = qName.getLocalName(); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
18
diff
changeset
|
287 if (name != null && name.equals(ofNodeName)) |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
18
diff
changeset
|
288 retNodes.add(n); |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
18
diff
changeset
|
289 if (name != null && name.equals(untilNodeName)) |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
18
diff
changeset
|
290 return retNodes; |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
18
diff
changeset
|
291 } |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
18
diff
changeset
|
292 } |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
18
diff
changeset
|
293 } |
e845310098ba
diverse Korrekturen
Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
parents:
18
diff
changeset
|
294 return retNodes; |
18 | 295 } |
296 } |