diff software/mpdl-services/mpiwg-mpdl-xml/src/de/mpg/mpiwg/berlin/mpdl/xml/xquery/XQueryEvaluator.java @ 23:e845310098ba

diverse Korrekturen
author Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
date Tue, 27 Nov 2012 12:35:19 +0100
parents dc5e9fcb3fdc
children
line wrap: on
line diff
--- a/software/mpdl-services/mpiwg-mpdl-xml/src/de/mpg/mpiwg/berlin/mpdl/xml/xquery/XQueryEvaluator.java	Wed Dec 14 13:57:09 2011 +0100
+++ b/software/mpdl-services/mpiwg-mpdl-xml/src/de/mpg/mpiwg/berlin/mpdl/xml/xquery/XQueryEvaluator.java	Tue Nov 27 12:35:19 2012 +0100
@@ -3,19 +3,30 @@
 import java.io.StringReader;
 import java.io.StringWriter;
 import java.net.URL;
+import java.util.ArrayList;
 
 import javax.xml.transform.stream.StreamSource;
 
+import org.xml.sax.SAXParseException;
+
 import de.mpg.mpiwg.berlin.mpdl.exception.ApplicationException;
 
+import net.sf.saxon.s9api.Axis;
+import net.sf.saxon.s9api.DocumentBuilder;
 import net.sf.saxon.s9api.Processor;
+import net.sf.saxon.s9api.QName;
 import net.sf.saxon.s9api.SaxonApiException;
 import net.sf.saxon.s9api.Serializer;
 import net.sf.saxon.s9api.XQueryCompiler;
 import net.sf.saxon.s9api.XQueryExecutable;
+import net.sf.saxon.s9api.XdmAtomicValue;
+import net.sf.saxon.s9api.XdmEmptySequence;
 import net.sf.saxon.s9api.XdmItem;
+import net.sf.saxon.s9api.XdmNode;
+import net.sf.saxon.s9api.XdmNodeKind;
 import net.sf.saxon.s9api.XdmSequenceIterator;
 import net.sf.saxon.s9api.XdmValue;
+import net.sf.saxon.trans.XPathException;
 
 public class XQueryEvaluator {
   private Processor processor;
@@ -26,37 +37,79 @@
     xQueryCompiler = processor.newXQueryCompiler();
   }
   
+  public XdmNode parse(URL srcUrl) throws ApplicationException {
+    try {
+      DocumentBuilder docBuilder = processor.newDocumentBuilder();
+      StreamSource xmlDoc = new StreamSource(srcUrl.toString());
+      XdmNode docNode = docBuilder.build(xmlDoc);
+      return docNode;
+    } catch (SaxonApiException e) {
+      String message = e.getMessage();
+      Throwable t = e.getCause();
+      if (t instanceof XPathException) {
+        XPathException xpathException = (XPathException) t;
+        Throwable t2 = xpathException.getException();
+        if (t2 instanceof SAXParseException) {
+          SAXParseException saxParseException = (SAXParseException) t2;
+          int lineNumber = saxParseException.getLineNumber();
+          int columnNumber = saxParseException.getColumnNumber();
+          message = "Line: " + lineNumber + ", Column: " + columnNumber + ": " + e.getMessage();
+        }
+      }
+      throw new ApplicationException("Your source file (" + srcUrl.toString() + ") is not valid: " + message);
+    }
+  }
+  
+  public Hits evaluate(URL srcUrl, String xqueryStr, int from, int to) throws ApplicationException {
+    Hits result = (Hits) evaluate(srcUrl, xqueryStr, from, to, "hits");
+    return result;
+  }
+
   public XdmValue evaluate(String srcXmlStr, String xqueryStr) throws ApplicationException {
-    XdmValue result = (XdmValue) evaluate(srcXmlStr, xqueryStr, null);
+    XdmValue result = (XdmValue) evaluate(srcXmlStr, xqueryStr, 0, 9, "XdmValue");
     return result;
   }
 
   public XdmValue evaluate(URL srcUrl, String xqueryStr) throws ApplicationException {
-    XdmValue result = (XdmValue) evaluate(srcUrl, xqueryStr, null);
+    XdmValue result = (XdmValue) evaluate(srcUrl, xqueryStr, 0, 9, "XdmValue");
     return result;
   }
 
   public String evaluateAsString(String srcXmlStr, String xqueryStr) throws ApplicationException {
-    Object result = evaluate(srcXmlStr, xqueryStr, "asString");
-    return (String) result;
+    Object result = evaluate(srcXmlStr, xqueryStr, 0, 9, "string");
+    if (result == null)
+      return null;
+    else 
+      return (String) result;
   }
   
   public String evaluateAsString(URL srcUrl, String xqueryStr) throws ApplicationException {
-    Object result = evaluate(srcUrl, xqueryStr, "asString");
-    return (String) result;
+    Object result = evaluate(srcUrl, xqueryStr, 0, 9, "string");
+    if (result == null)
+      return null;
+    else 
+      return (String) result;
   }
 
-  public String evaluateAsStringValue(String srcXmlStr, String xqueryStr) throws ApplicationException {
-    XdmValue val = (XdmValue) evaluate(srcXmlStr, xqueryStr, null);
-    return toStringValue(val);
+  public String evaluateAsStringValueJoined(String srcXmlStr, String xqueryStr) throws ApplicationException {
+    return evaluateAsStringValueJoined(srcXmlStr, xqueryStr, " ");
   }
   
-  public String evaluateAsStringValue(URL srcUrl, String xqueryStr) throws ApplicationException {
-    XdmValue val = (XdmValue) evaluate(srcUrl, xqueryStr, null);
-    return toStringValue(val);
+  public String evaluateAsStringValueJoined(URL srcUrl, String xqueryStr) throws ApplicationException {
+    return evaluateAsStringValueJoined(srcUrl, xqueryStr, " ");
   }
 
-  public Object evaluate(String srcXmlStr, String xqueryStr, String optionsStr) throws ApplicationException {
+  public String evaluateAsStringValueJoined(String srcXmlStr, String xqueryStr, String separator) throws ApplicationException {
+    XdmValue val = (XdmValue) evaluate(srcXmlStr, xqueryStr, 0, 9, "XdmValue");
+    return stringJoin(val, separator);
+  }
+  
+  public String evaluateAsStringValueJoined(URL srcUrl, String xqueryStr, String separator) throws ApplicationException {
+    XdmValue val = (XdmValue) evaluate(srcUrl, xqueryStr, 0, 9, "XdmValue");
+    return stringJoin(val, separator);
+  }
+
+  public Object evaluate(String srcXmlStr, String xqueryStr, int from, int to, String resultType) throws ApplicationException {
     try {
       StringReader srcXmlStrReader = new StringReader(srcXmlStr);
       StreamSource xmlDoc = new StreamSource(srcXmlStrReader);
@@ -67,12 +120,12 @@
       xQueryEvaluator.setSource(xmlDoc);
       XdmValue val = xQueryEvaluator.evaluate();
       Object result = val;
-      if (optionsStr != null && optionsStr.contains("asString")) {
-        int size = val.size();
-        if (size <= 0)
-          result = "";
-        else 
-          result = toString(val);
+      if (resultType != null && resultType.equals("string")) {
+        result = toString(val);
+      } else if (resultType != null && resultType.equals("XdmValue")) {
+        result = val;
+      } else if (resultType != null && resultType.equals("hits")) {
+        result = toHits(val, from, to);
       }
       return result;
     } catch (SaxonApiException e) {
@@ -80,7 +133,7 @@
     }
   }
 
-  public Object evaluate(URL srcUrl, String xqueryStr, String optionsStr) throws ApplicationException {
+  public Object evaluate(URL srcUrl, String xqueryStr, int from, int to, String resultType) throws ApplicationException {
     try {
       StreamSource xmlDoc = new StreamSource(srcUrl.toString());
       XQueryExecutable xQueryExecutable = xQueryCompiler.compile(xqueryStr);
@@ -89,13 +142,13 @@
       net.sf.saxon.s9api.XQueryEvaluator xQueryEvaluator = xQueryExecutable.load();
       xQueryEvaluator.setSource(xmlDoc);
       XdmValue val = xQueryEvaluator.evaluate();
-      Object result = val;
-      if (optionsStr != null && optionsStr.contains("asString")) {
-        int size = val.size();
-        if (size <= 0)
-          result = "";
-        else 
-          result = toString(val);
+      Object result = null;
+      if (resultType != null && resultType.equals("string")) {
+        result = toString(val);
+      } else if (resultType != null && resultType.equals("XdmValue")) {
+        result = val;
+      } else if (resultType != null && resultType.equals("hits")) {
+        result = toHits(val, from, to);
       }
       return result;
     } catch (SaxonApiException e) {
@@ -103,18 +156,141 @@
     }
   }
   
-  private String toString(XdmValue xdmValue) {
-    String result = "";
+  private Hits toHits(XdmValue xdmValue, int from, int to) throws ApplicationException {
+    Hits result = null;
+    if (xdmValue instanceof XdmAtomicValue) {
+      XdmAtomicValue av = (XdmAtomicValue) xdmValue;
+      String avStr = av.getStringValue();
+      ArrayList<Hit> hits = new ArrayList<Hit>();
+      Hit hit = new Hit(avStr);
+      hit.setType(Hit.TYPE_ATOMIC_VALUE);
+      hits.add(hit);
+      result = new Hits(hits, from, to);
+      result.setSize(1);
+      return result;
+    }
     XdmSequenceIterator iter = xdmValue.iterator();
-    while (iter.hasNext()) {
-      XdmItem item = iter.next();
-      String itemStr = item.toString();
-      result += itemStr;
+    int size = xdmValue.size();
+    if (size > 0) {
+      int counter = 0;
+      ArrayList<Hit> hits = new ArrayList<Hit>();
+      while (iter.hasNext() && counter <= to) {
+        XdmItem item = iter.next();
+        if (counter >= from) {
+          XdmNode n = (XdmNode) item;
+          ArrayList<XdmNode> precNodes = getPreceding(n, "pb");
+          int page = precNodes.size();
+          String nodeName = n.getNodeName().getLocalName();
+          String nodeKindName = n.getNodeKind().name();
+          ArrayList<XdmNode> precNodesUntilPB = getPrecedingUntil(n, nodeName, "pb");
+          int hitPagePosition = precNodesUntilPB.size() + 1;
+          String itemStr = item.toString();
+          // itemStr = itemStr.replaceAll("[ \n\t]+xmlns.*?\".*?\"", "");  // remove the namespace
+          itemStr = itemStr.replaceAll("[ \n\t]+xmlns", " xmlns");  // remove the blanks before the namespace
+          Hit hit = new Hit(itemStr);
+          hit.setPage(page);
+          hit.setName(nodeName);
+          if (nodeKindName != null && nodeKindName.equals("ELEMENT"))
+            hit.setType(Hit.TYPE_ELEMENT);
+          else if (nodeKindName != null && nodeKindName.equals("ATTRIBUTE"))
+            hit.setType(Hit.TYPE_ATTRIBUTE);
+          if (hit.getType() == Hit.TYPE_ELEMENT)
+            hit.setHitPagePosition(hitPagePosition);
+          hits.add(hit);
+        }
+        counter++;
+      }
+      result = new Hits(hits, from, to);
+      result.setSize(size);
+    }
+    return result;
+  }
+
+  private String toString(XdmValue xdmValue) {
+    if (xdmValue instanceof XdmAtomicValue) {
+      XdmAtomicValue av = (XdmAtomicValue) xdmValue;
+      String avStr = av.getStringValue();
+      return avStr;
+    }
+    String result = null;
+    XdmSequenceIterator iter = xdmValue.iterator();
+    int size = xdmValue.size();
+    if (size > 0) {
+      result = "";
+      while (iter.hasNext()) {
+        XdmItem item = iter.next();
+        String itemStr = item.toString();
+        result += itemStr;
+      }
     }
     return result;
   }
   
-  private String toStringValue(XdmValue xdmValue) {
-    return xdmValue.itemAt(0).getStringValue();
+  private String stringJoin(XdmValue xdmValue, String separator) {
+    if (xdmValue == null || xdmValue instanceof XdmEmptySequence)
+      return null;
+    if (xdmValue instanceof XdmAtomicValue) {
+      XdmAtomicValue av = (XdmAtomicValue) xdmValue;
+      String avStr = av.getStringValue();
+      return avStr;
+    }
+    String result = "";
+    XdmSequenceIterator iter = null;
+    if (xdmValue instanceof XdmNode) {
+      XdmNode xdmNode = (XdmNode) xdmValue;
+      iter = xdmNode.axisIterator(Axis.CHILD);
+    } else if (xdmValue instanceof XdmValue) {
+      iter = xdmValue.iterator();
+    }
+    while (iter.hasNext()) {
+      XdmNode node = (XdmNode) iter.next();
+      XdmNodeKind nodeKind = node.getNodeKind();
+      if (nodeKind == XdmNodeKind.TEXT) {
+        String nodeStr = node.getStringValue();
+        String trimmedNodeStr = nodeStr.trim();
+        result = result + trimmedNodeStr;
+      } else if (nodeKind == XdmNodeKind.ELEMENT) {
+        result = result + stringJoin(node, separator) + separator; // put a separator between child element nodes
+      }
+    }
+    return result;
+  }
+
+  private ArrayList<XdmNode> getPreceding(XdmNode startNode, String ofNodeName) {
+    ArrayList<XdmNode> retNodes = null;
+    XdmSequenceIterator iter = startNode.axisIterator(Axis.PRECEDING);
+    if (iter != null) {
+      retNodes = new ArrayList<XdmNode>();
+      while (iter.hasNext()) {
+        XdmNode n = (XdmNode) iter.next();
+        QName qName = n.getNodeName();
+        if (qName != null) {
+          String name = qName.getLocalName();
+          if (name != null && name.equals(ofNodeName))
+            retNodes.add(n);
+        }
+      }
+    }
+    return retNodes;
+  }
+
+  private ArrayList<XdmNode> getPrecedingUntil(XdmNode startNode, String ofNodeName, String untilNodeName) {
+    ArrayList<XdmNode> retNodes = null;
+    XdmSequenceIterator iter = startNode.axisIterator(Axis.PRECEDING);
+    if (iter != null) {
+      retNodes = new ArrayList<XdmNode>();
+      while (iter.hasNext()) {
+        XdmNode n = (XdmNode) iter.next();
+        QName qName = n.getNodeName();
+        if (qName != null) {
+          String name = qName.getLocalName();
+          if (name != null && name.equals(ofNodeName))
+            retNodes.add(n);
+          if (name != null && name.equals(untilNodeName))
+            return retNodes;
+        }
+      }
+    }
+    return retNodes;
   }
 }