Mercurial > hg > mpdl-group
diff software/mpdl-services/mpiwg-mpdl-xml/src/de/mpg/mpiwg/berlin/mpdl/xml/xquery/XQueryEvaluator.java @ 23:e845310098ba
diverse Korrekturen
author | Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de> |
---|---|
date | Tue, 27 Nov 2012 12:35:19 +0100 |
parents | dc5e9fcb3fdc |
children |
line wrap: on
line diff
--- a/software/mpdl-services/mpiwg-mpdl-xml/src/de/mpg/mpiwg/berlin/mpdl/xml/xquery/XQueryEvaluator.java Wed Dec 14 13:57:09 2011 +0100 +++ b/software/mpdl-services/mpiwg-mpdl-xml/src/de/mpg/mpiwg/berlin/mpdl/xml/xquery/XQueryEvaluator.java Tue Nov 27 12:35:19 2012 +0100 @@ -3,19 +3,30 @@ import java.io.StringReader; import java.io.StringWriter; import java.net.URL; +import java.util.ArrayList; import javax.xml.transform.stream.StreamSource; +import org.xml.sax.SAXParseException; + import de.mpg.mpiwg.berlin.mpdl.exception.ApplicationException; +import net.sf.saxon.s9api.Axis; +import net.sf.saxon.s9api.DocumentBuilder; import net.sf.saxon.s9api.Processor; +import net.sf.saxon.s9api.QName; import net.sf.saxon.s9api.SaxonApiException; import net.sf.saxon.s9api.Serializer; import net.sf.saxon.s9api.XQueryCompiler; import net.sf.saxon.s9api.XQueryExecutable; +import net.sf.saxon.s9api.XdmAtomicValue; +import net.sf.saxon.s9api.XdmEmptySequence; import net.sf.saxon.s9api.XdmItem; +import net.sf.saxon.s9api.XdmNode; +import net.sf.saxon.s9api.XdmNodeKind; import net.sf.saxon.s9api.XdmSequenceIterator; import net.sf.saxon.s9api.XdmValue; +import net.sf.saxon.trans.XPathException; public class XQueryEvaluator { private Processor processor; @@ -26,37 +37,79 @@ xQueryCompiler = processor.newXQueryCompiler(); } + public XdmNode parse(URL srcUrl) throws ApplicationException { + try { + DocumentBuilder docBuilder = processor.newDocumentBuilder(); + StreamSource xmlDoc = new StreamSource(srcUrl.toString()); + XdmNode docNode = docBuilder.build(xmlDoc); + return docNode; + } catch (SaxonApiException e) { + String message = e.getMessage(); + Throwable t = e.getCause(); + if (t instanceof XPathException) { + XPathException xpathException = (XPathException) t; + Throwable t2 = xpathException.getException(); + if (t2 instanceof SAXParseException) { + SAXParseException saxParseException = (SAXParseException) t2; + int lineNumber = saxParseException.getLineNumber(); + int columnNumber = saxParseException.getColumnNumber(); + message = "Line: " + lineNumber + ", Column: " + columnNumber + ": " + e.getMessage(); + } + } + throw new ApplicationException("Your source file (" + srcUrl.toString() + ") is not valid: " + message); + } + } + + public Hits evaluate(URL srcUrl, String xqueryStr, int from, int to) throws ApplicationException { + Hits result = (Hits) evaluate(srcUrl, xqueryStr, from, to, "hits"); + return result; + } + public XdmValue evaluate(String srcXmlStr, String xqueryStr) throws ApplicationException { - XdmValue result = (XdmValue) evaluate(srcXmlStr, xqueryStr, null); + XdmValue result = (XdmValue) evaluate(srcXmlStr, xqueryStr, 0, 9, "XdmValue"); return result; } public XdmValue evaluate(URL srcUrl, String xqueryStr) throws ApplicationException { - XdmValue result = (XdmValue) evaluate(srcUrl, xqueryStr, null); + XdmValue result = (XdmValue) evaluate(srcUrl, xqueryStr, 0, 9, "XdmValue"); return result; } public String evaluateAsString(String srcXmlStr, String xqueryStr) throws ApplicationException { - Object result = evaluate(srcXmlStr, xqueryStr, "asString"); - return (String) result; + Object result = evaluate(srcXmlStr, xqueryStr, 0, 9, "string"); + if (result == null) + return null; + else + return (String) result; } public String evaluateAsString(URL srcUrl, String xqueryStr) throws ApplicationException { - Object result = evaluate(srcUrl, xqueryStr, "asString"); - return (String) result; + Object result = evaluate(srcUrl, xqueryStr, 0, 9, "string"); + if (result == null) + return null; + else + return (String) result; } - public String evaluateAsStringValue(String srcXmlStr, String xqueryStr) throws ApplicationException { - XdmValue val = (XdmValue) evaluate(srcXmlStr, xqueryStr, null); - return toStringValue(val); + public String evaluateAsStringValueJoined(String srcXmlStr, String xqueryStr) throws ApplicationException { + return evaluateAsStringValueJoined(srcXmlStr, xqueryStr, " "); } - public String evaluateAsStringValue(URL srcUrl, String xqueryStr) throws ApplicationException { - XdmValue val = (XdmValue) evaluate(srcUrl, xqueryStr, null); - return toStringValue(val); + public String evaluateAsStringValueJoined(URL srcUrl, String xqueryStr) throws ApplicationException { + return evaluateAsStringValueJoined(srcUrl, xqueryStr, " "); } - public Object evaluate(String srcXmlStr, String xqueryStr, String optionsStr) throws ApplicationException { + public String evaluateAsStringValueJoined(String srcXmlStr, String xqueryStr, String separator) throws ApplicationException { + XdmValue val = (XdmValue) evaluate(srcXmlStr, xqueryStr, 0, 9, "XdmValue"); + return stringJoin(val, separator); + } + + public String evaluateAsStringValueJoined(URL srcUrl, String xqueryStr, String separator) throws ApplicationException { + XdmValue val = (XdmValue) evaluate(srcUrl, xqueryStr, 0, 9, "XdmValue"); + return stringJoin(val, separator); + } + + public Object evaluate(String srcXmlStr, String xqueryStr, int from, int to, String resultType) throws ApplicationException { try { StringReader srcXmlStrReader = new StringReader(srcXmlStr); StreamSource xmlDoc = new StreamSource(srcXmlStrReader); @@ -67,12 +120,12 @@ xQueryEvaluator.setSource(xmlDoc); XdmValue val = xQueryEvaluator.evaluate(); Object result = val; - if (optionsStr != null && optionsStr.contains("asString")) { - int size = val.size(); - if (size <= 0) - result = ""; - else - result = toString(val); + if (resultType != null && resultType.equals("string")) { + result = toString(val); + } else if (resultType != null && resultType.equals("XdmValue")) { + result = val; + } else if (resultType != null && resultType.equals("hits")) { + result = toHits(val, from, to); } return result; } catch (SaxonApiException e) { @@ -80,7 +133,7 @@ } } - public Object evaluate(URL srcUrl, String xqueryStr, String optionsStr) throws ApplicationException { + public Object evaluate(URL srcUrl, String xqueryStr, int from, int to, String resultType) throws ApplicationException { try { StreamSource xmlDoc = new StreamSource(srcUrl.toString()); XQueryExecutable xQueryExecutable = xQueryCompiler.compile(xqueryStr); @@ -89,13 +142,13 @@ net.sf.saxon.s9api.XQueryEvaluator xQueryEvaluator = xQueryExecutable.load(); xQueryEvaluator.setSource(xmlDoc); XdmValue val = xQueryEvaluator.evaluate(); - Object result = val; - if (optionsStr != null && optionsStr.contains("asString")) { - int size = val.size(); - if (size <= 0) - result = ""; - else - result = toString(val); + Object result = null; + if (resultType != null && resultType.equals("string")) { + result = toString(val); + } else if (resultType != null && resultType.equals("XdmValue")) { + result = val; + } else if (resultType != null && resultType.equals("hits")) { + result = toHits(val, from, to); } return result; } catch (SaxonApiException e) { @@ -103,18 +156,141 @@ } } - private String toString(XdmValue xdmValue) { - String result = ""; + private Hits toHits(XdmValue xdmValue, int from, int to) throws ApplicationException { + Hits result = null; + if (xdmValue instanceof XdmAtomicValue) { + XdmAtomicValue av = (XdmAtomicValue) xdmValue; + String avStr = av.getStringValue(); + ArrayList<Hit> hits = new ArrayList<Hit>(); + Hit hit = new Hit(avStr); + hit.setType(Hit.TYPE_ATOMIC_VALUE); + hits.add(hit); + result = new Hits(hits, from, to); + result.setSize(1); + return result; + } XdmSequenceIterator iter = xdmValue.iterator(); - while (iter.hasNext()) { - XdmItem item = iter.next(); - String itemStr = item.toString(); - result += itemStr; + int size = xdmValue.size(); + if (size > 0) { + int counter = 0; + ArrayList<Hit> hits = new ArrayList<Hit>(); + while (iter.hasNext() && counter <= to) { + XdmItem item = iter.next(); + if (counter >= from) { + XdmNode n = (XdmNode) item; + ArrayList<XdmNode> precNodes = getPreceding(n, "pb"); + int page = precNodes.size(); + String nodeName = n.getNodeName().getLocalName(); + String nodeKindName = n.getNodeKind().name(); + ArrayList<XdmNode> precNodesUntilPB = getPrecedingUntil(n, nodeName, "pb"); + int hitPagePosition = precNodesUntilPB.size() + 1; + String itemStr = item.toString(); + // itemStr = itemStr.replaceAll("[ \n\t]+xmlns.*?\".*?\"", ""); // remove the namespace + itemStr = itemStr.replaceAll("[ \n\t]+xmlns", " xmlns"); // remove the blanks before the namespace + Hit hit = new Hit(itemStr); + hit.setPage(page); + hit.setName(nodeName); + if (nodeKindName != null && nodeKindName.equals("ELEMENT")) + hit.setType(Hit.TYPE_ELEMENT); + else if (nodeKindName != null && nodeKindName.equals("ATTRIBUTE")) + hit.setType(Hit.TYPE_ATTRIBUTE); + if (hit.getType() == Hit.TYPE_ELEMENT) + hit.setHitPagePosition(hitPagePosition); + hits.add(hit); + } + counter++; + } + result = new Hits(hits, from, to); + result.setSize(size); + } + return result; + } + + private String toString(XdmValue xdmValue) { + if (xdmValue instanceof XdmAtomicValue) { + XdmAtomicValue av = (XdmAtomicValue) xdmValue; + String avStr = av.getStringValue(); + return avStr; + } + String result = null; + XdmSequenceIterator iter = xdmValue.iterator(); + int size = xdmValue.size(); + if (size > 0) { + result = ""; + while (iter.hasNext()) { + XdmItem item = iter.next(); + String itemStr = item.toString(); + result += itemStr; + } } return result; } - private String toStringValue(XdmValue xdmValue) { - return xdmValue.itemAt(0).getStringValue(); + private String stringJoin(XdmValue xdmValue, String separator) { + if (xdmValue == null || xdmValue instanceof XdmEmptySequence) + return null; + if (xdmValue instanceof XdmAtomicValue) { + XdmAtomicValue av = (XdmAtomicValue) xdmValue; + String avStr = av.getStringValue(); + return avStr; + } + String result = ""; + XdmSequenceIterator iter = null; + if (xdmValue instanceof XdmNode) { + XdmNode xdmNode = (XdmNode) xdmValue; + iter = xdmNode.axisIterator(Axis.CHILD); + } else if (xdmValue instanceof XdmValue) { + iter = xdmValue.iterator(); + } + while (iter.hasNext()) { + XdmNode node = (XdmNode) iter.next(); + XdmNodeKind nodeKind = node.getNodeKind(); + if (nodeKind == XdmNodeKind.TEXT) { + String nodeStr = node.getStringValue(); + String trimmedNodeStr = nodeStr.trim(); + result = result + trimmedNodeStr; + } else if (nodeKind == XdmNodeKind.ELEMENT) { + result = result + stringJoin(node, separator) + separator; // put a separator between child element nodes + } + } + return result; + } + + private ArrayList<XdmNode> getPreceding(XdmNode startNode, String ofNodeName) { + ArrayList<XdmNode> retNodes = null; + XdmSequenceIterator iter = startNode.axisIterator(Axis.PRECEDING); + if (iter != null) { + retNodes = new ArrayList<XdmNode>(); + while (iter.hasNext()) { + XdmNode n = (XdmNode) iter.next(); + QName qName = n.getNodeName(); + if (qName != null) { + String name = qName.getLocalName(); + if (name != null && name.equals(ofNodeName)) + retNodes.add(n); + } + } + } + return retNodes; + } + + private ArrayList<XdmNode> getPrecedingUntil(XdmNode startNode, String ofNodeName, String untilNodeName) { + ArrayList<XdmNode> retNodes = null; + XdmSequenceIterator iter = startNode.axisIterator(Axis.PRECEDING); + if (iter != null) { + retNodes = new ArrayList<XdmNode>(); + while (iter.hasNext()) { + XdmNode n = (XdmNode) iter.next(); + QName qName = n.getNodeName(); + if (qName != null) { + String name = qName.getLocalName(); + if (name != null && name.equals(ofNodeName)) + retNodes.add(n); + if (name != null && name.equals(untilNodeName)) + return retNodes; + } + } + } + return retNodes; } }