diff Solr_Base_Query.php @ 0:a2b4f67e73dc default tip

initial
author Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
date Mon, 08 Jun 2015 10:21:54 +0200
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/Solr_Base_Query.php	Mon Jun 08 10:21:54 2015 +0200
@@ -0,0 +1,657 @@
+<?php
+/**
+ * This class allows you to make operations on a query that will be sent to
+ * Apache Solr. methods such as adding and removing sorts, remove and replace
+ * parameters, adding and removing filters, getters and setters for various
+ * parameters and more
+ * @file
+ *   Class that defines the base query for the Apache Solr Drupal module.
+ */
+
+class SolrFilterSubQuery {
+
+  /**
+   * Static shared by all instances, used to increment ID numbers.
+   */
+  protected static $idCount = 0;
+
+  /**
+   * Each query/subquery will have a unique ID.
+   */
+  public $id;
+  public $operator;
+
+  /**
+   * A keyed array where the key is a position integer and the value
+   * is an array with #name and #value properties.  Each value is a
+   * used for filter queries, e.g. array('#name' => 'is_uid', '#value' => 0)
+   * for anonymous content.
+   */
+  protected $fields = array();
+
+  /**
+   * An array of subqueries.
+   */
+  protected $subqueries = array();
+
+  function __construct($operator = 'OR') {
+    $this->operator = $operator;
+    $this->id = ++SolrFilterSubQuery::$idCount;
+  }
+
+  function __clone() {
+    $this->id = ++SolrFilterSubQuery::$idCount;
+  }
+
+  public function getFilters($name = NULL) {
+    if (empty($name)) {
+      return $this->fields;
+    }
+    reset($this->fields);
+    $matches = array();
+    foreach ($this->fields as $filter) {
+      if ($filter['#name'] == $name) {
+        $matches[] = $filter;
+      }
+    }
+    return $matches;
+  }
+
+  public function hasFilter($name, $value, $exclude = FALSE) {
+    foreach ($this->fields as $pos => $values) {
+      if ($values['#name'] == $name && $values['#value'] == $value && $values['#exclude'] == $exclude) {
+        return TRUE;
+      }
+    }
+    return FALSE;
+  }
+
+  public function addFilter($name, $value, $exclude = FALSE, $local = '') {
+    // @todo - escape the value if it has spaces in it and is not a range query or parenthesized.
+    $filter = array(
+      '#exclude' => (bool) $exclude,
+      '#name' => trim($name),
+      '#value' => trim($value),
+      '#local' => trim($local),
+    );
+    $this->fields[] = $filter;
+    return $this;
+  }
+
+  public function removeFilter($name, $value = NULL, $exclude = FALSE) {
+    // Remove from the public list of filters.
+    $this->unsetFilter($this->fields, $name, $value, $exclude);
+    return $this;
+  }
+
+  protected function unsetFilter(&$fields, $name, $value, $exclude) {
+    if (!isset($value)) {
+      foreach ($fields as $pos => $values) {
+        if ($values['#name'] == $name) {
+          unset($fields[$pos]);
+        }
+      }
+    }
+    else {
+      foreach ($fields as $pos => $values) {
+        if ($values['#name'] == $name && $values['#value'] == $value && $values['#exclude'] == $exclude) {
+          unset($fields[$pos]);
+        }
+      }
+    }
+  }
+
+  public function getFilterSubQueries() {
+    return $this->subqueries;
+  }
+
+  public function addFilterSubQuery(SolrFilterSubQuery $query) {
+    $this->subqueries[$query->id] = $query;
+    return $this;
+  }
+
+  public function removeFilterSubQuery(SolrFilterSubQuery $query) {
+    unset($this->subqueries[$query->id]);
+    return $this;
+  }
+
+  public function removeFilterSubQueries() {
+    $this->subqueries = array();
+    return $this;
+  }
+
+  public function makeFilterQuery(array $filter) {
+    $prefix = empty($filter['#exclude']) ? '' : '-';
+    if ($filter['#local']) {
+      $prefix = '{!' . $filter['#local'] . '}' . $prefix;
+    }
+    // If the field value contains a colon or a space, wrap it in double quotes,
+    // unless it is a range query or is already wrapped in double quotes or
+    // parentheses.
+    if (preg_match('/[ :]/', $filter['#value']) && !preg_match('/^[\[\{]\S+ TO \S+[\]\}]$/', $filter['#value']) && !preg_match('/^["\(].*["\)]$/', $filter['#value'])) {
+      $filter['#value'] = '"' . $filter['#value'] . '"';
+    }
+    return $prefix . $filter['#name'] . ':' . $filter['#value'];
+  }
+
+  /**
+   * Make sure our query matches the pattern name:value or name:"value"
+   * Make sure that if we are ranges we use name:[ AND ]
+   * allowed inputs :
+   * a. bundle:article
+   * b. date:[1970-12-31T23:59:59Z TO NOW]
+   * Split the text in 4 different parts
+   * 1. name, eg.: bundle or date
+   * 2. The first opening bracket (or nothing), eg.: [
+   * 3. The value of the field, eg. article or 1970-12-31T23:59:59Z TO NOW
+   * 4. The last closing bracket, eg.: ]
+   * @param string $filter
+   *   The filter to validate
+   * @return boolean
+   */
+  public static function validFilterValue($filter) {
+    $opening = 0;
+    $closing = 0;
+    $name = NULL;
+    $value = NULL;
+
+    if (preg_match('/(?P<name>[^:]+):(?P<value>.+)?$/', $filter, $matches)) {
+      foreach ($matches as $match_id => $match) {
+        switch($match_id) {
+          case 'name' :
+            $name = $match;
+            break;
+          case 'value' :
+            $value = $match;
+            break;
+        }
+      }
+
+      // For the name we allow any character that fits between the A-Z0-9 range and
+      // any alternative for this in other languages. No special characters allowed
+      if (!preg_match('/^[a-zA-Z0-9_\x7f-\xff]+$/', $name)) {
+        return FALSE;
+      }
+
+      // For the value we allow anything that is UTF8
+      if (!drupal_validate_utf8($value)) {
+        return FALSE;
+      }
+
+      // Check our bracket count. If it does not match it is also not valid
+      $valid_brackets = TRUE;
+      $brackets['opening']['{'] = substr_count($value, '{');
+      $brackets['closing']['}'] = substr_count($value, '}');
+      $valid_brackets = ($brackets['opening']['{'] != $brackets['closing']['}']) ? FALSE : TRUE;
+      $brackets['opening']['['] = substr_count($value, '[');
+      $brackets['closing'][']'] = substr_count($value, ']');
+      $valid_brackets = ($brackets['opening']['['] != $brackets['closing'][']']) ? FALSE : TRUE;
+      $brackets['opening']['('] = substr_count($value, '(');
+      $brackets['closing'][')'] = substr_count($value, ')');
+      $valid_brackets = ($brackets['opening']['('] != $brackets['closing'][')']) ? FALSE : TRUE;
+      if (!$valid_brackets) {
+        return FALSE;
+      }
+
+      // Check the date field inputs
+      if (preg_match('/\[(.+) TO (.+)\]$/', $value, $datefields)) {
+        // Only Allow a value in the form of
+        // http://lucene.apache.org/solr/api/org/apache/solr/schema/DateField.html
+        // http://lucene.apache.org/solr/api/org/apache/solr/util/DateMathParser.html
+        // http://wiki.apache.org/solr/SolrQuerySyntax
+        // 1976-03-06T23:59:59.999Z (valid)
+        // * (valid)
+        // 1995-12-31T23:59:59.999Z (valid)
+        // 2007-03-06T00:00:00Z (valid)
+        // NOW-1YEAR/DAY (valid)
+        // NOW/DAY+1DAY (valid)
+        // 1976-03-06T23:59:59.999Z (valid)
+        // 1976-03-06T23:59:59.999Z+1YEAR (valid)
+        // 1976-03-06T23:59:59.999Z/YEAR (valid)
+        // 1976-03-06T23:59:59.999Z (valid)
+        // 1976-03-06T23::59::59.999Z (invalid)
+        if (!empty($datefields[1]) && !empty($datefields[2])) {
+          // Do not check to full value, only the splitted ones
+          unset($datefields[0]);
+          // Check if both matches are valid datefields
+          foreach ($datefields as $datefield) {
+            if (!preg_match('/(\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:[\d\.]{2,6}Z(\S)*)|(^([A-Z\*]+)(\A-Z0-9\+\-\/)*)/', $datefield, $datefield_match)) {
+              return FALSE;
+            }
+          }
+        }
+      }
+    }
+    return TRUE;
+  }
+
+  /**
+   * Builds a set of filter queries from $this->fields and all subqueries.
+   *
+   * Returns an array of strings that can be combined into
+   * a URL query parameter or passed to Solr as fq paramters.
+   */
+  protected function rebuildFq() {
+    $fq = array();
+    foreach ($this->fields as $pos => $field) {
+      $fq[] = $this->makeFilterQuery($field);
+    }
+    foreach ($this->subqueries as $subquery) {
+      $subfq = $subquery->rebuildFq();
+      if ($subfq) {
+        $operator = $subquery->operator;
+        $fq[] = "(" . implode(" $operator ", $subfq) . ")";
+      }
+    }
+    return $fq;
+  }
+
+}
+
+class SolrBaseQuery extends SolrFilterSubQuery implements DrupalSolrQueryInterface {
+
+  /**
+   * The parameters that get sent to Solr.
+   */
+  protected $params = array('start' => 0, 'rows' => 10, 'fq' => array());
+
+  /**
+   * The search base path.
+   */
+  protected $base_path;
+  protected $field_map = array();
+
+  /**
+   * DrupalApacheSolrService object
+   */
+  protected $solr;
+  // The array keys must always be real Solr index fields.
+  protected $available_sorts;
+
+  /**
+   * The query name is used to construct a searcher string. Mostly the
+   * environment id
+   */
+  protected $name;
+  protected $context = array();
+  // Makes sure we always have a valid sort.
+  protected $solrsort = array('#name' => 'score', '#direction' => 'desc');
+  // A flag to allow the search to be aborted.
+  public $abort_search = FALSE;
+
+  // A flag to check if need to retrieve another page of the result set
+  public $page = 0;
+
+  /**
+   * @param $name
+   *   The search name, used for finding the correct blocks and other config.
+   *   Typically "apachesolr".
+   *
+   * @param $solr
+   *   An instantiated DrupalApacheSolrService Object.
+   *   Can be instantiated from apachesolr_get_solr().
+   *
+   * @param $params
+   *   Array of params to initialize the object (typically 'q' and 'fq').
+   *
+   * @param $sortstring
+   *   Visible string telling solr how to sort - added to GET query params.
+   *
+   * @param $base_path
+   *   The search base path (without the keywords) for this query, without trailing slash.
+   */
+  function __construct($name, $solr, array $params = array(), $sortstring = '', $base_path = '', $context = array()) {
+    parent::__construct();
+    $this->name = $name;
+    $this->solr = $solr;
+    $this->addContext((array) $context);
+    $this->addParams((array) $params);
+    $this->available_sorts = $this->defaultSorts();
+    $this->sortstring = trim($sortstring);
+    $this->parseSortString();
+    $this->base_path = $base_path;
+  }
+
+  protected function defaultSorts() {
+    return array(
+      'score' => array('title' => t('Relevancy'), 'default' => 'desc'),
+      'title' => array('title' => t('Title'), 'default' => 'asc'),
+      'author_s' => array('title' => t('Author'), 'default' => 'asc'),
+      'date' => array('title' => t('Date'), 'default' => 'desc'),
+    );
+  }
+
+  /**
+   * Get query name.
+   */
+  public function getName() {
+    return $this->name;
+  }
+
+  /**
+   * Get query searcher name (for facetapi, views, pages, etc).
+   */
+  public function getSearcher() {
+    return $this->name . '@' . $this->solr->getId();
+  }
+
+  /**
+   * Get context values.
+   */
+  public function getContext() {
+    return $this->context;
+  }
+
+  /**
+   * Set context value.
+   */
+  public function addContext(array $context) {
+    foreach ($context as $k => $v) {
+      $this->context[$k] = $v;
+    }
+    // The env_id must match that of the actual $solr object
+    $this->context['env_id'] = $this->solr->getId();
+    return $this->context;
+  }
+
+  protected $single_value_params = array(
+    'q' => TRUE, // http://wiki.apache.org/solr/SearchHandler#q
+    'q.op' => TRUE, // http://wiki.apache.org/solr/SearchHandler#q.op
+    'q.alt' => TRUE, // http://wiki.apache.org/solr/SearchHandler#q
+    'df' => TRUE,
+    'qt' => TRUE,
+    'defType' => TRUE,
+    'timeAllowed' => TRUE,
+    'omitHeader' => TRUE,
+    'debugQuery' => TRUE,
+    'start' => TRUE,
+    'rows' => TRUE,
+    'stats' => TRUE,
+    'facet' => TRUE,
+    'facet.prefix' => TRUE,
+    'facet.limit' => TRUE,
+    'facet.offset' => TRUE,
+    'facet.mincount' => TRUE,
+    'facet.missing' => TRUE,
+    'facet.method' => TRUE,
+    'facet.enum.cache.minDf' => TRUE,
+    'facet.date.start' => TRUE,
+    'facet.date.end' => TRUE,
+    'facet.date.gap' => TRUE,
+    'facet.date.hardend' => TRUE,
+    'facet.date.other' => TRUE,
+    'facet.date.include' => TRUE,
+    'hl' => TRUE,
+    'hl.snippets' => TRUE,
+    'hl.fragsize' => TRUE,
+    'hl.mergeContiguous' => TRUE,
+    'hl.requireFieldMatch' => TRUE,
+    'hl.maxAnalyzedChars' => TRUE,
+    'hl.alternateField' => TRUE,
+    'hl.maxAlternateFieldLength' => TRUE,
+    'hl.formatter' => TRUE,
+    'hl.simple.pre/hl.simple.post' => TRUE,
+    'hl.fragmenter' => TRUE,
+    'hl.fragListBuilder' => TRUE,
+    'hl.fragmentsBuilder' => TRUE,
+    'hl.useFastVectorHighlighter' => TRUE,
+    'hl.usePhraseHighlighter' => TRUE,
+    'hl.highlightMultiTerm' => TRUE,
+    'hl.regex.slop' => TRUE,
+    'hl.regex.pattern' => TRUE,
+    'hl.regex.maxAnalyzedChars' => TRUE,
+    'spellcheck' => TRUE,
+  );
+
+  public function getParam($name) {
+    if ($name == 'fq') {
+      return $this->rebuildFq();
+    }
+    $empty = isset($this->single_value_params[$name]) ? NULL : array();
+    return isset($this->params[$name]) ? $this->params[$name] : $empty;
+  }
+
+  public function getParams() {
+    $params = $this->params;
+    $params['fq'] = $this->rebuildFq();
+    return $params;
+  }
+
+  public function getSolrParams() {
+    $params = $this->getParams();
+    // For certain fields Solr prefers a comma separated list.
+    foreach (array('fl', 'hl.fl', 'sort', 'mlt.fl') as $name) {
+      if (isset($params[$name])) {
+        $params[$name] = implode(',', $params[$name]);
+      }
+    }
+    return $params;
+  }
+
+  protected function addFq($string, $index = NULL) {
+    $string = trim($string);
+    $local = '';
+    $exclude = FALSE;
+    $name = NULL;
+    $value = NULL;
+
+    // Check if we are dealing with an exclude
+    if (preg_match('/^-(.*)/', $string, $matches)) {
+      $exclude = TRUE;
+      $string = $matches[1];
+    }
+
+    // If {!something} is found as first character then this is a local value
+    if (preg_match('/\{!([^}]+)\}(.*)/', $string, $matches)) {
+      $local = $matches[1];
+      $string = $matches[2];
+    }
+
+    // Anything that has a name and value
+    // check if we have a : in the string
+    if (strstr($string, ':')) {
+      list($name, $value) = explode(":", $string, 2);
+    }
+    else {
+      $value = $string;
+    }
+    $this->addFilter($name, $value, $exclude, $local);
+    return $this;
+  }
+
+  public function addParam($name, $value) {
+    if (isset($this->single_value_params[$name])) {
+      if (is_array($value)) {
+        $value = end($value);
+      }
+      $this->params[$name] = $this->normalizeParamValue($value);
+      return $this;
+    }
+    // We never actually populate $this->params['fq'].  Instead
+    // we manage everything via the filter methods.
+    if ($name == 'fq') {
+      if (is_array($value)) {
+        array_walk_recursive($value, array($this, 'addFq'));
+        return $this;
+      }
+      else {
+        return $this->addFq($value);
+      }
+    }
+
+    if (!isset($this->params[$name])) {
+      $this->params[$name] = array();
+    }
+
+    if (!is_array($value)) {
+      // Convert to array for array_map.
+      $param_values = array($value);
+    }
+    else {
+      // Convert to a numerically keyed array.
+      $param_values = array_values($value);
+    }
+    $this->params[$name] = array_merge($this->params[$name], array_map(array($this, 'normalizeParamValue'), $param_values));
+
+    return $this;
+  }
+
+  protected function normalizeParamValue($value) {
+    // Convert boolean to string.
+    if (is_bool($value)) {
+      return $value ? 'true' : 'false';
+    }
+    // Convert to trimmed string.
+    return trim($value);
+  }
+
+  public function addParams(Array $params) {
+    foreach ($params as $name => $value) {
+      $this->addParam($name, $value);
+    }
+    return $this;
+  }
+
+  public function removeParam($name) {
+    unset($this->params[$name]);
+    if ($name == 'fq') {
+      $this->fields = array();
+      $this->subqueries = array();
+    }
+    return $this;
+  }
+
+  public function replaceParam($name, $value) {
+    $this->removeParam($name);
+    return $this->addParam($name, $value);
+  }
+
+  /**
+   * Handles aliases for field to make nicer URLs.
+   *
+   * @param $field_map
+   *   An array keyed with real Solr index field names with the alias as value.
+   *
+   * @return DrupalSolrQueryInterface
+   *   The called object.
+   */
+  public function addFieldAliases($field_map) {
+    $this->field_map = array_merge($this->field_map, $field_map);
+    // We have to re-parse the filters.
+    $this->parseSortString();
+    return $this;
+  }
+
+  public function getFieldAliases() {
+    return $this->field_map;
+  }
+
+  public function clearFieldAliases() {
+    $this->field_map = array();
+    // We have to re-parse the filters.
+    $this->parseSortString();
+    return $this;
+  }
+
+  protected function parseSortString() {
+    // Substitute any field aliases with real field names.
+    $sortstring = strtr($this->sortstring, $this->field_map);
+    // Score is a special case - it's the default sort for Solr.
+    if ('' == $sortstring || 'score desc' == $sortstring) {
+      $this->solrsort['#name'] = 'score';
+      $this->solrsort['#direction'] = 'desc';
+      unset($this->params['sort']);
+    }
+    else {
+      // Validate and set sort parameter
+      $fields = implode('|', array_keys($this->available_sorts));
+      if (preg_match('/^(?:(' . $fields . ') (asc|desc),?)+$/', $sortstring, $matches)) {
+        // We only use the last match.
+        $this->solrsort['#name'] = $matches[1];
+        $this->solrsort['#direction'] = $matches[2];
+        $this->params['sort'] = array($sortstring);
+      }
+    }
+  }
+
+  public function getAvailableSorts() {
+    return $this->available_sorts;
+  }
+
+  public function setAvailableSort($name, $sort) {
+    // We expect non-aliased sorts to be added.
+    $this->available_sorts[$name] = $sort;
+    // Re-parse the sortstring.
+    $this->parseSortString();
+    return $this;
+  }
+
+  public function setAvailableSorts($sorts) {
+    // We expect a complete array of valid sorts.
+    $this->available_sorts = $sorts;
+    $this->parseSortString();
+    return $this;
+  }
+
+  public function removeAvailableSort($name) {
+    unset($this->available_sorts[$name]);
+    // Re-parse the sortstring.
+    $this->parseSortString();
+    return $this;
+  }
+
+  public function getSolrsort() {
+    return $this->solrsort;
+  }
+
+  public function setSolrsort($name, $direction) {
+    $this->sortstring = trim($name) . ' ' . trim($direction);
+    $this->parseSortString();
+    return $this;
+  }
+
+  public function getPath($new_keywords = NULL) {
+    if (isset($new_keywords)) {
+      return $this->base_path . '/' . $new_keywords;
+    }
+    elseif ($this->getParam('q')) {
+      return $this->base_path . '/' . $this->getParam('q');
+    }
+    else {
+      // Return with empty query (the slash). The path for a facet
+      // becomes $this->base_path . '//facetinfo';
+      // We do this so we can have a consistent way of retrieving the query +
+      // additional parameters
+      return $this->base_path . '/';
+    }
+  }
+
+  public function getSolrsortUrlQuery() {
+    $queryvalues = array();
+    $solrsort = $this->solrsort;
+    if ($solrsort && ($solrsort['#name'] != 'score')) {
+      if (isset($this->field_map[$solrsort['#name']])) {
+        $solrsort['#name'] = $this->field_map[$solrsort['#name']];
+      }
+      $queryvalues['solrsort'] = $solrsort['#name'] . ' ' . $solrsort['#direction'];
+    }
+    else {
+      // Return to default relevancy sort.
+      unset($queryvalues['solrsort']);
+    }
+    return $queryvalues;
+  }
+
+  public function search($keys = NULL) {
+    if ($this->abort_search) {
+      return NULL;
+    }
+
+    return $this->solr->search($keys, $this->getSolrParams());
+  }
+
+  public function solr($method) {
+    return $this->solr->$method();
+  }
+
+}