Mercurial > hg > mpdl-group
view software/mpdl-services/mpiwg-mpdl-xml/src/de/mpg/mpiwg/berlin/mpdl/xml/xquery/XQueryEvaluator.java @ 23:e845310098ba
diverse Korrekturen
author | Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de> |
---|---|
date | Tue, 27 Nov 2012 12:35:19 +0100 |
parents | dc5e9fcb3fdc |
children |
line wrap: on
line source
package de.mpg.mpiwg.berlin.mpdl.xml.xquery; import java.io.StringReader; import java.io.StringWriter; import java.net.URL; import java.util.ArrayList; import javax.xml.transform.stream.StreamSource; import org.xml.sax.SAXParseException; import de.mpg.mpiwg.berlin.mpdl.exception.ApplicationException; import net.sf.saxon.s9api.Axis; import net.sf.saxon.s9api.DocumentBuilder; import net.sf.saxon.s9api.Processor; import net.sf.saxon.s9api.QName; import net.sf.saxon.s9api.SaxonApiException; import net.sf.saxon.s9api.Serializer; import net.sf.saxon.s9api.XQueryCompiler; import net.sf.saxon.s9api.XQueryExecutable; import net.sf.saxon.s9api.XdmAtomicValue; import net.sf.saxon.s9api.XdmEmptySequence; import net.sf.saxon.s9api.XdmItem; import net.sf.saxon.s9api.XdmNode; import net.sf.saxon.s9api.XdmNodeKind; import net.sf.saxon.s9api.XdmSequenceIterator; import net.sf.saxon.s9api.XdmValue; import net.sf.saxon.trans.XPathException; public class XQueryEvaluator { private Processor processor; private XQueryCompiler xQueryCompiler; public XQueryEvaluator() { processor = new Processor(false); xQueryCompiler = processor.newXQueryCompiler(); } public XdmNode parse(URL srcUrl) throws ApplicationException { try { DocumentBuilder docBuilder = processor.newDocumentBuilder(); StreamSource xmlDoc = new StreamSource(srcUrl.toString()); XdmNode docNode = docBuilder.build(xmlDoc); return docNode; } catch (SaxonApiException e) { String message = e.getMessage(); Throwable t = e.getCause(); if (t instanceof XPathException) { XPathException xpathException = (XPathException) t; Throwable t2 = xpathException.getException(); if (t2 instanceof SAXParseException) { SAXParseException saxParseException = (SAXParseException) t2; int lineNumber = saxParseException.getLineNumber(); int columnNumber = saxParseException.getColumnNumber(); message = "Line: " + lineNumber + ", Column: " + columnNumber + ": " + e.getMessage(); } } throw new ApplicationException("Your source file (" + srcUrl.toString() + ") is not valid: " + message); } } public Hits evaluate(URL srcUrl, String xqueryStr, int from, int to) throws ApplicationException { Hits result = (Hits) evaluate(srcUrl, xqueryStr, from, to, "hits"); return result; } public XdmValue evaluate(String srcXmlStr, String xqueryStr) throws ApplicationException { XdmValue result = (XdmValue) evaluate(srcXmlStr, xqueryStr, 0, 9, "XdmValue"); return result; } public XdmValue evaluate(URL srcUrl, String xqueryStr) throws ApplicationException { XdmValue result = (XdmValue) evaluate(srcUrl, xqueryStr, 0, 9, "XdmValue"); return result; } public String evaluateAsString(String srcXmlStr, String xqueryStr) throws ApplicationException { Object result = evaluate(srcXmlStr, xqueryStr, 0, 9, "string"); if (result == null) return null; else return (String) result; } public String evaluateAsString(URL srcUrl, String xqueryStr) throws ApplicationException { Object result = evaluate(srcUrl, xqueryStr, 0, 9, "string"); if (result == null) return null; else return (String) result; } public String evaluateAsStringValueJoined(String srcXmlStr, String xqueryStr) throws ApplicationException { return evaluateAsStringValueJoined(srcXmlStr, xqueryStr, " "); } public String evaluateAsStringValueJoined(URL srcUrl, String xqueryStr) throws ApplicationException { return evaluateAsStringValueJoined(srcUrl, xqueryStr, " "); } public String evaluateAsStringValueJoined(String srcXmlStr, String xqueryStr, String separator) throws ApplicationException { XdmValue val = (XdmValue) evaluate(srcXmlStr, xqueryStr, 0, 9, "XdmValue"); return stringJoin(val, separator); } public String evaluateAsStringValueJoined(URL srcUrl, String xqueryStr, String separator) throws ApplicationException { XdmValue val = (XdmValue) evaluate(srcUrl, xqueryStr, 0, 9, "XdmValue"); return stringJoin(val, separator); } public Object evaluate(String srcXmlStr, String xqueryStr, int from, int to, String resultType) throws ApplicationException { try { StringReader srcXmlStrReader = new StringReader(srcXmlStr); StreamSource xmlDoc = new StreamSource(srcXmlStrReader); XQueryExecutable xQueryExecutable = xQueryCompiler.compile(xqueryStr); Serializer serializer = new Serializer(); serializer.setOutputWriter(new StringWriter()); net.sf.saxon.s9api.XQueryEvaluator xQueryEvaluator = xQueryExecutable.load(); xQueryEvaluator.setSource(xmlDoc); XdmValue val = xQueryEvaluator.evaluate(); Object result = val; if (resultType != null && resultType.equals("string")) { result = toString(val); } else if (resultType != null && resultType.equals("XdmValue")) { result = val; } else if (resultType != null && resultType.equals("hits")) { result = toHits(val, from, to); } return result; } catch (SaxonApiException e) { throw new ApplicationException(e); } } public Object evaluate(URL srcUrl, String xqueryStr, int from, int to, String resultType) throws ApplicationException { try { StreamSource xmlDoc = new StreamSource(srcUrl.toString()); XQueryExecutable xQueryExecutable = xQueryCompiler.compile(xqueryStr); Serializer serializer = new Serializer(); serializer.setOutputWriter(new StringWriter()); net.sf.saxon.s9api.XQueryEvaluator xQueryEvaluator = xQueryExecutable.load(); xQueryEvaluator.setSource(xmlDoc); XdmValue val = xQueryEvaluator.evaluate(); Object result = null; if (resultType != null && resultType.equals("string")) { result = toString(val); } else if (resultType != null && resultType.equals("XdmValue")) { result = val; } else if (resultType != null && resultType.equals("hits")) { result = toHits(val, from, to); } return result; } catch (SaxonApiException e) { throw new ApplicationException(e); } } private Hits toHits(XdmValue xdmValue, int from, int to) throws ApplicationException { Hits result = null; if (xdmValue instanceof XdmAtomicValue) { XdmAtomicValue av = (XdmAtomicValue) xdmValue; String avStr = av.getStringValue(); ArrayList<Hit> hits = new ArrayList<Hit>(); Hit hit = new Hit(avStr); hit.setType(Hit.TYPE_ATOMIC_VALUE); hits.add(hit); result = new Hits(hits, from, to); result.setSize(1); return result; } XdmSequenceIterator iter = xdmValue.iterator(); int size = xdmValue.size(); if (size > 0) { int counter = 0; ArrayList<Hit> hits = new ArrayList<Hit>(); while (iter.hasNext() && counter <= to) { XdmItem item = iter.next(); if (counter >= from) { XdmNode n = (XdmNode) item; ArrayList<XdmNode> precNodes = getPreceding(n, "pb"); int page = precNodes.size(); String nodeName = n.getNodeName().getLocalName(); String nodeKindName = n.getNodeKind().name(); ArrayList<XdmNode> precNodesUntilPB = getPrecedingUntil(n, nodeName, "pb"); int hitPagePosition = precNodesUntilPB.size() + 1; String itemStr = item.toString(); // itemStr = itemStr.replaceAll("[ \n\t]+xmlns.*?\".*?\"", ""); // remove the namespace itemStr = itemStr.replaceAll("[ \n\t]+xmlns", " xmlns"); // remove the blanks before the namespace Hit hit = new Hit(itemStr); hit.setPage(page); hit.setName(nodeName); if (nodeKindName != null && nodeKindName.equals("ELEMENT")) hit.setType(Hit.TYPE_ELEMENT); else if (nodeKindName != null && nodeKindName.equals("ATTRIBUTE")) hit.setType(Hit.TYPE_ATTRIBUTE); if (hit.getType() == Hit.TYPE_ELEMENT) hit.setHitPagePosition(hitPagePosition); hits.add(hit); } counter++; } result = new Hits(hits, from, to); result.setSize(size); } return result; } private String toString(XdmValue xdmValue) { if (xdmValue instanceof XdmAtomicValue) { XdmAtomicValue av = (XdmAtomicValue) xdmValue; String avStr = av.getStringValue(); return avStr; } String result = null; XdmSequenceIterator iter = xdmValue.iterator(); int size = xdmValue.size(); if (size > 0) { result = ""; while (iter.hasNext()) { XdmItem item = iter.next(); String itemStr = item.toString(); result += itemStr; } } return result; } private String stringJoin(XdmValue xdmValue, String separator) { if (xdmValue == null || xdmValue instanceof XdmEmptySequence) return null; if (xdmValue instanceof XdmAtomicValue) { XdmAtomicValue av = (XdmAtomicValue) xdmValue; String avStr = av.getStringValue(); return avStr; } String result = ""; XdmSequenceIterator iter = null; if (xdmValue instanceof XdmNode) { XdmNode xdmNode = (XdmNode) xdmValue; iter = xdmNode.axisIterator(Axis.CHILD); } else if (xdmValue instanceof XdmValue) { iter = xdmValue.iterator(); } while (iter.hasNext()) { XdmNode node = (XdmNode) iter.next(); XdmNodeKind nodeKind = node.getNodeKind(); if (nodeKind == XdmNodeKind.TEXT) { String nodeStr = node.getStringValue(); String trimmedNodeStr = nodeStr.trim(); result = result + trimmedNodeStr; } else if (nodeKind == XdmNodeKind.ELEMENT) { result = result + stringJoin(node, separator) + separator; // put a separator between child element nodes } } return result; } private ArrayList<XdmNode> getPreceding(XdmNode startNode, String ofNodeName) { ArrayList<XdmNode> retNodes = null; XdmSequenceIterator iter = startNode.axisIterator(Axis.PRECEDING); if (iter != null) { retNodes = new ArrayList<XdmNode>(); while (iter.hasNext()) { XdmNode n = (XdmNode) iter.next(); QName qName = n.getNodeName(); if (qName != null) { String name = qName.getLocalName(); if (name != null && name.equals(ofNodeName)) retNodes.add(n); } } } return retNodes; } private ArrayList<XdmNode> getPrecedingUntil(XdmNode startNode, String ofNodeName, String untilNodeName) { ArrayList<XdmNode> retNodes = null; XdmSequenceIterator iter = startNode.axisIterator(Axis.PRECEDING); if (iter != null) { retNodes = new ArrayList<XdmNode>(); while (iter.hasNext()) { XdmNode n = (XdmNode) iter.next(); QName qName = n.getNodeName(); if (qName != null) { String name = qName.getLocalName(); if (name != null && name.equals(ofNodeName)) retNodes.add(n); if (name != null && name.equals(untilNodeName)) return retNodes; } } } return retNodes; } }