view software/mpdl-services/mpiwg-mpdl-xml/src/de/mpg/mpiwg/berlin/mpdl/xml/xquery/XQueryEvaluator.java @ 23:e845310098ba

diverse Korrekturen
author Josef Willenborg <jwillenborg@mpiwg-berlin.mpg.de>
date Tue, 27 Nov 2012 12:35:19 +0100
parents dc5e9fcb3fdc
children
line wrap: on
line source

package de.mpg.mpiwg.berlin.mpdl.xml.xquery;

import java.io.StringReader;
import java.io.StringWriter;
import java.net.URL;
import java.util.ArrayList;

import javax.xml.transform.stream.StreamSource;

import org.xml.sax.SAXParseException;

import de.mpg.mpiwg.berlin.mpdl.exception.ApplicationException;

import net.sf.saxon.s9api.Axis;
import net.sf.saxon.s9api.DocumentBuilder;
import net.sf.saxon.s9api.Processor;
import net.sf.saxon.s9api.QName;
import net.sf.saxon.s9api.SaxonApiException;
import net.sf.saxon.s9api.Serializer;
import net.sf.saxon.s9api.XQueryCompiler;
import net.sf.saxon.s9api.XQueryExecutable;
import net.sf.saxon.s9api.XdmAtomicValue;
import net.sf.saxon.s9api.XdmEmptySequence;
import net.sf.saxon.s9api.XdmItem;
import net.sf.saxon.s9api.XdmNode;
import net.sf.saxon.s9api.XdmNodeKind;
import net.sf.saxon.s9api.XdmSequenceIterator;
import net.sf.saxon.s9api.XdmValue;
import net.sf.saxon.trans.XPathException;

public class XQueryEvaluator {
  private Processor processor;
  private XQueryCompiler xQueryCompiler;
  
  public XQueryEvaluator() {
    processor = new Processor(false); 
    xQueryCompiler = processor.newXQueryCompiler();
  }
  
  public XdmNode parse(URL srcUrl) throws ApplicationException {
    try {
      DocumentBuilder docBuilder = processor.newDocumentBuilder();
      StreamSource xmlDoc = new StreamSource(srcUrl.toString());
      XdmNode docNode = docBuilder.build(xmlDoc);
      return docNode;
    } catch (SaxonApiException e) {
      String message = e.getMessage();
      Throwable t = e.getCause();
      if (t instanceof XPathException) {
        XPathException xpathException = (XPathException) t;
        Throwable t2 = xpathException.getException();
        if (t2 instanceof SAXParseException) {
          SAXParseException saxParseException = (SAXParseException) t2;
          int lineNumber = saxParseException.getLineNumber();
          int columnNumber = saxParseException.getColumnNumber();
          message = "Line: " + lineNumber + ", Column: " + columnNumber + ": " + e.getMessage();
        }
      }
      throw new ApplicationException("Your source file (" + srcUrl.toString() + ") is not valid: " + message);
    }
  }
  
  public Hits evaluate(URL srcUrl, String xqueryStr, int from, int to) throws ApplicationException {
    Hits result = (Hits) evaluate(srcUrl, xqueryStr, from, to, "hits");
    return result;
  }

  public XdmValue evaluate(String srcXmlStr, String xqueryStr) throws ApplicationException {
    XdmValue result = (XdmValue) evaluate(srcXmlStr, xqueryStr, 0, 9, "XdmValue");
    return result;
  }

  public XdmValue evaluate(URL srcUrl, String xqueryStr) throws ApplicationException {
    XdmValue result = (XdmValue) evaluate(srcUrl, xqueryStr, 0, 9, "XdmValue");
    return result;
  }

  public String evaluateAsString(String srcXmlStr, String xqueryStr) throws ApplicationException {
    Object result = evaluate(srcXmlStr, xqueryStr, 0, 9, "string");
    if (result == null)
      return null;
    else 
      return (String) result;
  }
  
  public String evaluateAsString(URL srcUrl, String xqueryStr) throws ApplicationException {
    Object result = evaluate(srcUrl, xqueryStr, 0, 9, "string");
    if (result == null)
      return null;
    else 
      return (String) result;
  }

  public String evaluateAsStringValueJoined(String srcXmlStr, String xqueryStr) throws ApplicationException {
    return evaluateAsStringValueJoined(srcXmlStr, xqueryStr, " ");
  }
  
  public String evaluateAsStringValueJoined(URL srcUrl, String xqueryStr) throws ApplicationException {
    return evaluateAsStringValueJoined(srcUrl, xqueryStr, " ");
  }

  public String evaluateAsStringValueJoined(String srcXmlStr, String xqueryStr, String separator) throws ApplicationException {
    XdmValue val = (XdmValue) evaluate(srcXmlStr, xqueryStr, 0, 9, "XdmValue");
    return stringJoin(val, separator);
  }
  
  public String evaluateAsStringValueJoined(URL srcUrl, String xqueryStr, String separator) throws ApplicationException {
    XdmValue val = (XdmValue) evaluate(srcUrl, xqueryStr, 0, 9, "XdmValue");
    return stringJoin(val, separator);
  }

  public Object evaluate(String srcXmlStr, String xqueryStr, int from, int to, String resultType) throws ApplicationException {
    try {
      StringReader srcXmlStrReader = new StringReader(srcXmlStr);
      StreamSource xmlDoc = new StreamSource(srcXmlStrReader);
      XQueryExecutable xQueryExecutable = xQueryCompiler.compile(xqueryStr);
      Serializer serializer = new Serializer();
      serializer.setOutputWriter(new StringWriter());
      net.sf.saxon.s9api.XQueryEvaluator xQueryEvaluator = xQueryExecutable.load();
      xQueryEvaluator.setSource(xmlDoc);
      XdmValue val = xQueryEvaluator.evaluate();
      Object result = val;
      if (resultType != null && resultType.equals("string")) {
        result = toString(val);
      } else if (resultType != null && resultType.equals("XdmValue")) {
        result = val;
      } else if (resultType != null && resultType.equals("hits")) {
        result = toHits(val, from, to);
      }
      return result;
    } catch (SaxonApiException e) {
      throw new ApplicationException(e);
    }
  }

  public Object evaluate(URL srcUrl, String xqueryStr, int from, int to, String resultType) throws ApplicationException {
    try {
      StreamSource xmlDoc = new StreamSource(srcUrl.toString());
      XQueryExecutable xQueryExecutable = xQueryCompiler.compile(xqueryStr);
      Serializer serializer = new Serializer();
      serializer.setOutputWriter(new StringWriter());
      net.sf.saxon.s9api.XQueryEvaluator xQueryEvaluator = xQueryExecutable.load();
      xQueryEvaluator.setSource(xmlDoc);
      XdmValue val = xQueryEvaluator.evaluate();
      Object result = null;
      if (resultType != null && resultType.equals("string")) {
        result = toString(val);
      } else if (resultType != null && resultType.equals("XdmValue")) {
        result = val;
      } else if (resultType != null && resultType.equals("hits")) {
        result = toHits(val, from, to);
      }
      return result;
    } catch (SaxonApiException e) {
      throw new ApplicationException(e);
    }
  }
  
  private Hits toHits(XdmValue xdmValue, int from, int to) throws ApplicationException {
    Hits result = null;
    if (xdmValue instanceof XdmAtomicValue) {
      XdmAtomicValue av = (XdmAtomicValue) xdmValue;
      String avStr = av.getStringValue();
      ArrayList<Hit> hits = new ArrayList<Hit>();
      Hit hit = new Hit(avStr);
      hit.setType(Hit.TYPE_ATOMIC_VALUE);
      hits.add(hit);
      result = new Hits(hits, from, to);
      result.setSize(1);
      return result;
    }
    XdmSequenceIterator iter = xdmValue.iterator();
    int size = xdmValue.size();
    if (size > 0) {
      int counter = 0;
      ArrayList<Hit> hits = new ArrayList<Hit>();
      while (iter.hasNext() && counter <= to) {
        XdmItem item = iter.next();
        if (counter >= from) {
          XdmNode n = (XdmNode) item;
          ArrayList<XdmNode> precNodes = getPreceding(n, "pb");
          int page = precNodes.size();
          String nodeName = n.getNodeName().getLocalName();
          String nodeKindName = n.getNodeKind().name();
          ArrayList<XdmNode> precNodesUntilPB = getPrecedingUntil(n, nodeName, "pb");
          int hitPagePosition = precNodesUntilPB.size() + 1;
          String itemStr = item.toString();
          // itemStr = itemStr.replaceAll("[ \n\t]+xmlns.*?\".*?\"", "");  // remove the namespace
          itemStr = itemStr.replaceAll("[ \n\t]+xmlns", " xmlns");  // remove the blanks before the namespace
          Hit hit = new Hit(itemStr);
          hit.setPage(page);
          hit.setName(nodeName);
          if (nodeKindName != null && nodeKindName.equals("ELEMENT"))
            hit.setType(Hit.TYPE_ELEMENT);
          else if (nodeKindName != null && nodeKindName.equals("ATTRIBUTE"))
            hit.setType(Hit.TYPE_ATTRIBUTE);
          if (hit.getType() == Hit.TYPE_ELEMENT)
            hit.setHitPagePosition(hitPagePosition);
          hits.add(hit);
        }
        counter++;
      }
      result = new Hits(hits, from, to);
      result.setSize(size);
    }
    return result;
  }

  private String toString(XdmValue xdmValue) {
    if (xdmValue instanceof XdmAtomicValue) {
      XdmAtomicValue av = (XdmAtomicValue) xdmValue;
      String avStr = av.getStringValue();
      return avStr;
    }
    String result = null;
    XdmSequenceIterator iter = xdmValue.iterator();
    int size = xdmValue.size();
    if (size > 0) {
      result = "";
      while (iter.hasNext()) {
        XdmItem item = iter.next();
        String itemStr = item.toString();
        result += itemStr;
      }
    }
    return result;
  }
  
  private String stringJoin(XdmValue xdmValue, String separator) {
    if (xdmValue == null || xdmValue instanceof XdmEmptySequence)
      return null;
    if (xdmValue instanceof XdmAtomicValue) {
      XdmAtomicValue av = (XdmAtomicValue) xdmValue;
      String avStr = av.getStringValue();
      return avStr;
    }
    String result = "";
    XdmSequenceIterator iter = null;
    if (xdmValue instanceof XdmNode) {
      XdmNode xdmNode = (XdmNode) xdmValue;
      iter = xdmNode.axisIterator(Axis.CHILD);
    } else if (xdmValue instanceof XdmValue) {
      iter = xdmValue.iterator();
    }
    while (iter.hasNext()) {
      XdmNode node = (XdmNode) iter.next();
      XdmNodeKind nodeKind = node.getNodeKind();
      if (nodeKind == XdmNodeKind.TEXT) {
        String nodeStr = node.getStringValue();
        String trimmedNodeStr = nodeStr.trim();
        result = result + trimmedNodeStr;
      } else if (nodeKind == XdmNodeKind.ELEMENT) {
        result = result + stringJoin(node, separator) + separator; // put a separator between child element nodes
      }
    }
    return result;
  }

  private ArrayList<XdmNode> getPreceding(XdmNode startNode, String ofNodeName) {
    ArrayList<XdmNode> retNodes = null;
    XdmSequenceIterator iter = startNode.axisIterator(Axis.PRECEDING);
    if (iter != null) {
      retNodes = new ArrayList<XdmNode>();
      while (iter.hasNext()) {
        XdmNode n = (XdmNode) iter.next();
        QName qName = n.getNodeName();
        if (qName != null) {
          String name = qName.getLocalName();
          if (name != null && name.equals(ofNodeName))
            retNodes.add(n);
        }
      }
    }
    return retNodes;
  }

  private ArrayList<XdmNode> getPrecedingUntil(XdmNode startNode, String ofNodeName, String untilNodeName) {
    ArrayList<XdmNode> retNodes = null;
    XdmSequenceIterator iter = startNode.axisIterator(Axis.PRECEDING);
    if (iter != null) {
      retNodes = new ArrayList<XdmNode>();
      while (iter.hasNext()) {
        XdmNode n = (XdmNode) iter.next();
        QName qName = n.getNodeName();
        if (qName != null) {
          String name = qName.getLocalName();
          if (name != null && name.equals(ofNodeName))
            retNodes.add(n);
          if (name != null && name.equals(untilNodeName))
            return retNodes;
        }
      }
    }
    return retNodes;
  }
}