view Solr_Base_Query.php @ 0:a2b4f67e73dc default tip

initial
author Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
date Mon, 08 Jun 2015 10:21:54 +0200
parents
children
line wrap: on
line source

<?php
/**
 * This class allows you to make operations on a query that will be sent to
 * Apache Solr. methods such as adding and removing sorts, remove and replace
 * parameters, adding and removing filters, getters and setters for various
 * parameters and more
 * @file
 *   Class that defines the base query for the Apache Solr Drupal module.
 */

class SolrFilterSubQuery {

  /**
   * Static shared by all instances, used to increment ID numbers.
   */
  protected static $idCount = 0;

  /**
   * Each query/subquery will have a unique ID.
   */
  public $id;
  public $operator;

  /**
   * A keyed array where the key is a position integer and the value
   * is an array with #name and #value properties.  Each value is a
   * used for filter queries, e.g. array('#name' => 'is_uid', '#value' => 0)
   * for anonymous content.
   */
  protected $fields = array();

  /**
   * An array of subqueries.
   */
  protected $subqueries = array();

  function __construct($operator = 'OR') {
    $this->operator = $operator;
    $this->id = ++SolrFilterSubQuery::$idCount;
  }

  function __clone() {
    $this->id = ++SolrFilterSubQuery::$idCount;
  }

  public function getFilters($name = NULL) {
    if (empty($name)) {
      return $this->fields;
    }
    reset($this->fields);
    $matches = array();
    foreach ($this->fields as $filter) {
      if ($filter['#name'] == $name) {
        $matches[] = $filter;
      }
    }
    return $matches;
  }

  public function hasFilter($name, $value, $exclude = FALSE) {
    foreach ($this->fields as $pos => $values) {
      if ($values['#name'] == $name && $values['#value'] == $value && $values['#exclude'] == $exclude) {
        return TRUE;
      }
    }
    return FALSE;
  }

  public function addFilter($name, $value, $exclude = FALSE, $local = '') {
    // @todo - escape the value if it has spaces in it and is not a range query or parenthesized.
    $filter = array(
      '#exclude' => (bool) $exclude,
      '#name' => trim($name),
      '#value' => trim($value),
      '#local' => trim($local),
    );
    $this->fields[] = $filter;
    return $this;
  }

  public function removeFilter($name, $value = NULL, $exclude = FALSE) {
    // Remove from the public list of filters.
    $this->unsetFilter($this->fields, $name, $value, $exclude);
    return $this;
  }

  protected function unsetFilter(&$fields, $name, $value, $exclude) {
    if (!isset($value)) {
      foreach ($fields as $pos => $values) {
        if ($values['#name'] == $name) {
          unset($fields[$pos]);
        }
      }
    }
    else {
      foreach ($fields as $pos => $values) {
        if ($values['#name'] == $name && $values['#value'] == $value && $values['#exclude'] == $exclude) {
          unset($fields[$pos]);
        }
      }
    }
  }

  public function getFilterSubQueries() {
    return $this->subqueries;
  }

  public function addFilterSubQuery(SolrFilterSubQuery $query) {
    $this->subqueries[$query->id] = $query;
    return $this;
  }

  public function removeFilterSubQuery(SolrFilterSubQuery $query) {
    unset($this->subqueries[$query->id]);
    return $this;
  }

  public function removeFilterSubQueries() {
    $this->subqueries = array();
    return $this;
  }

  public function makeFilterQuery(array $filter) {
    $prefix = empty($filter['#exclude']) ? '' : '-';
    if ($filter['#local']) {
      $prefix = '{!' . $filter['#local'] . '}' . $prefix;
    }
    // If the field value contains a colon or a space, wrap it in double quotes,
    // unless it is a range query or is already wrapped in double quotes or
    // parentheses.
    if (preg_match('/[ :]/', $filter['#value']) && !preg_match('/^[\[\{]\S+ TO \S+[\]\}]$/', $filter['#value']) && !preg_match('/^["\(].*["\)]$/', $filter['#value'])) {
      $filter['#value'] = '"' . $filter['#value'] . '"';
    }
    return $prefix . $filter['#name'] . ':' . $filter['#value'];
  }

  /**
   * Make sure our query matches the pattern name:value or name:"value"
   * Make sure that if we are ranges we use name:[ AND ]
   * allowed inputs :
   * a. bundle:article
   * b. date:[1970-12-31T23:59:59Z TO NOW]
   * Split the text in 4 different parts
   * 1. name, eg.: bundle or date
   * 2. The first opening bracket (or nothing), eg.: [
   * 3. The value of the field, eg. article or 1970-12-31T23:59:59Z TO NOW
   * 4. The last closing bracket, eg.: ]
   * @param string $filter
   *   The filter to validate
   * @return boolean
   */
  public static function validFilterValue($filter) {
    $opening = 0;
    $closing = 0;
    $name = NULL;
    $value = NULL;

    if (preg_match('/(?P<name>[^:]+):(?P<value>.+)?$/', $filter, $matches)) {
      foreach ($matches as $match_id => $match) {
        switch($match_id) {
          case 'name' :
            $name = $match;
            break;
          case 'value' :
            $value = $match;
            break;
        }
      }

      // For the name we allow any character that fits between the A-Z0-9 range and
      // any alternative for this in other languages. No special characters allowed
      if (!preg_match('/^[a-zA-Z0-9_\x7f-\xff]+$/', $name)) {
        return FALSE;
      }

      // For the value we allow anything that is UTF8
      if (!drupal_validate_utf8($value)) {
        return FALSE;
      }

      // Check our bracket count. If it does not match it is also not valid
      $valid_brackets = TRUE;
      $brackets['opening']['{'] = substr_count($value, '{');
      $brackets['closing']['}'] = substr_count($value, '}');
      $valid_brackets = ($brackets['opening']['{'] != $brackets['closing']['}']) ? FALSE : TRUE;
      $brackets['opening']['['] = substr_count($value, '[');
      $brackets['closing'][']'] = substr_count($value, ']');
      $valid_brackets = ($brackets['opening']['['] != $brackets['closing'][']']) ? FALSE : TRUE;
      $brackets['opening']['('] = substr_count($value, '(');
      $brackets['closing'][')'] = substr_count($value, ')');
      $valid_brackets = ($brackets['opening']['('] != $brackets['closing'][')']) ? FALSE : TRUE;
      if (!$valid_brackets) {
        return FALSE;
      }

      // Check the date field inputs
      if (preg_match('/\[(.+) TO (.+)\]$/', $value, $datefields)) {
        // Only Allow a value in the form of
        // http://lucene.apache.org/solr/api/org/apache/solr/schema/DateField.html
        // http://lucene.apache.org/solr/api/org/apache/solr/util/DateMathParser.html
        // http://wiki.apache.org/solr/SolrQuerySyntax
        // 1976-03-06T23:59:59.999Z (valid)
        // * (valid)
        // 1995-12-31T23:59:59.999Z (valid)
        // 2007-03-06T00:00:00Z (valid)
        // NOW-1YEAR/DAY (valid)
        // NOW/DAY+1DAY (valid)
        // 1976-03-06T23:59:59.999Z (valid)
        // 1976-03-06T23:59:59.999Z+1YEAR (valid)
        // 1976-03-06T23:59:59.999Z/YEAR (valid)
        // 1976-03-06T23:59:59.999Z (valid)
        // 1976-03-06T23::59::59.999Z (invalid)
        if (!empty($datefields[1]) && !empty($datefields[2])) {
          // Do not check to full value, only the splitted ones
          unset($datefields[0]);
          // Check if both matches are valid datefields
          foreach ($datefields as $datefield) {
            if (!preg_match('/(\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:[\d\.]{2,6}Z(\S)*)|(^([A-Z\*]+)(\A-Z0-9\+\-\/)*)/', $datefield, $datefield_match)) {
              return FALSE;
            }
          }
        }
      }
    }
    return TRUE;
  }

  /**
   * Builds a set of filter queries from $this->fields and all subqueries.
   *
   * Returns an array of strings that can be combined into
   * a URL query parameter or passed to Solr as fq paramters.
   */
  protected function rebuildFq() {
    $fq = array();
    foreach ($this->fields as $pos => $field) {
      $fq[] = $this->makeFilterQuery($field);
    }
    foreach ($this->subqueries as $subquery) {
      $subfq = $subquery->rebuildFq();
      if ($subfq) {
        $operator = $subquery->operator;
        $fq[] = "(" . implode(" $operator ", $subfq) . ")";
      }
    }
    return $fq;
  }

}

class SolrBaseQuery extends SolrFilterSubQuery implements DrupalSolrQueryInterface {

  /**
   * The parameters that get sent to Solr.
   */
  protected $params = array('start' => 0, 'rows' => 10, 'fq' => array());

  /**
   * The search base path.
   */
  protected $base_path;
  protected $field_map = array();

  /**
   * DrupalApacheSolrService object
   */
  protected $solr;
  // The array keys must always be real Solr index fields.
  protected $available_sorts;

  /**
   * The query name is used to construct a searcher string. Mostly the
   * environment id
   */
  protected $name;
  protected $context = array();
  // Makes sure we always have a valid sort.
  protected $solrsort = array('#name' => 'score', '#direction' => 'desc');
  // A flag to allow the search to be aborted.
  public $abort_search = FALSE;

  // A flag to check if need to retrieve another page of the result set
  public $page = 0;

  /**
   * @param $name
   *   The search name, used for finding the correct blocks and other config.
   *   Typically "apachesolr".
   *
   * @param $solr
   *   An instantiated DrupalApacheSolrService Object.
   *   Can be instantiated from apachesolr_get_solr().
   *
   * @param $params
   *   Array of params to initialize the object (typically 'q' and 'fq').
   *
   * @param $sortstring
   *   Visible string telling solr how to sort - added to GET query params.
   *
   * @param $base_path
   *   The search base path (without the keywords) for this query, without trailing slash.
   */
  function __construct($name, $solr, array $params = array(), $sortstring = '', $base_path = '', $context = array()) {
    parent::__construct();
    $this->name = $name;
    $this->solr = $solr;
    $this->addContext((array) $context);
    $this->addParams((array) $params);
    $this->available_sorts = $this->defaultSorts();
    $this->sortstring = trim($sortstring);
    $this->parseSortString();
    $this->base_path = $base_path;
  }

  protected function defaultSorts() {
    return array(
      'score' => array('title' => t('Relevancy'), 'default' => 'desc'),
      'title' => array('title' => t('Title'), 'default' => 'asc'),
      'author_s' => array('title' => t('Author'), 'default' => 'asc'),
      'date' => array('title' => t('Date'), 'default' => 'desc'),
    );
  }

  /**
   * Get query name.
   */
  public function getName() {
    return $this->name;
  }

  /**
   * Get query searcher name (for facetapi, views, pages, etc).
   */
  public function getSearcher() {
    return $this->name . '@' . $this->solr->getId();
  }

  /**
   * Get context values.
   */
  public function getContext() {
    return $this->context;
  }

  /**
   * Set context value.
   */
  public function addContext(array $context) {
    foreach ($context as $k => $v) {
      $this->context[$k] = $v;
    }
    // The env_id must match that of the actual $solr object
    $this->context['env_id'] = $this->solr->getId();
    return $this->context;
  }

  protected $single_value_params = array(
    'q' => TRUE, // http://wiki.apache.org/solr/SearchHandler#q
    'q.op' => TRUE, // http://wiki.apache.org/solr/SearchHandler#q.op
    'q.alt' => TRUE, // http://wiki.apache.org/solr/SearchHandler#q
    'df' => TRUE,
    'qt' => TRUE,
    'defType' => TRUE,
    'timeAllowed' => TRUE,
    'omitHeader' => TRUE,
    'debugQuery' => TRUE,
    'start' => TRUE,
    'rows' => TRUE,
    'stats' => TRUE,
    'facet' => TRUE,
    'facet.prefix' => TRUE,
    'facet.limit' => TRUE,
    'facet.offset' => TRUE,
    'facet.mincount' => TRUE,
    'facet.missing' => TRUE,
    'facet.method' => TRUE,
    'facet.enum.cache.minDf' => TRUE,
    'facet.date.start' => TRUE,
    'facet.date.end' => TRUE,
    'facet.date.gap' => TRUE,
    'facet.date.hardend' => TRUE,
    'facet.date.other' => TRUE,
    'facet.date.include' => TRUE,
    'hl' => TRUE,
    'hl.snippets' => TRUE,
    'hl.fragsize' => TRUE,
    'hl.mergeContiguous' => TRUE,
    'hl.requireFieldMatch' => TRUE,
    'hl.maxAnalyzedChars' => TRUE,
    'hl.alternateField' => TRUE,
    'hl.maxAlternateFieldLength' => TRUE,
    'hl.formatter' => TRUE,
    'hl.simple.pre/hl.simple.post' => TRUE,
    'hl.fragmenter' => TRUE,
    'hl.fragListBuilder' => TRUE,
    'hl.fragmentsBuilder' => TRUE,
    'hl.useFastVectorHighlighter' => TRUE,
    'hl.usePhraseHighlighter' => TRUE,
    'hl.highlightMultiTerm' => TRUE,
    'hl.regex.slop' => TRUE,
    'hl.regex.pattern' => TRUE,
    'hl.regex.maxAnalyzedChars' => TRUE,
    'spellcheck' => TRUE,
  );

  public function getParam($name) {
    if ($name == 'fq') {
      return $this->rebuildFq();
    }
    $empty = isset($this->single_value_params[$name]) ? NULL : array();
    return isset($this->params[$name]) ? $this->params[$name] : $empty;
  }

  public function getParams() {
    $params = $this->params;
    $params['fq'] = $this->rebuildFq();
    return $params;
  }

  public function getSolrParams() {
    $params = $this->getParams();
    // For certain fields Solr prefers a comma separated list.
    foreach (array('fl', 'hl.fl', 'sort', 'mlt.fl') as $name) {
      if (isset($params[$name])) {
        $params[$name] = implode(',', $params[$name]);
      }
    }
    return $params;
  }

  protected function addFq($string, $index = NULL) {
    $string = trim($string);
    $local = '';
    $exclude = FALSE;
    $name = NULL;
    $value = NULL;

    // Check if we are dealing with an exclude
    if (preg_match('/^-(.*)/', $string, $matches)) {
      $exclude = TRUE;
      $string = $matches[1];
    }

    // If {!something} is found as first character then this is a local value
    if (preg_match('/\{!([^}]+)\}(.*)/', $string, $matches)) {
      $local = $matches[1];
      $string = $matches[2];
    }

    // Anything that has a name and value
    // check if we have a : in the string
    if (strstr($string, ':')) {
      list($name, $value) = explode(":", $string, 2);
    }
    else {
      $value = $string;
    }
    $this->addFilter($name, $value, $exclude, $local);
    return $this;
  }

  public function addParam($name, $value) {
    if (isset($this->single_value_params[$name])) {
      if (is_array($value)) {
        $value = end($value);
      }
      $this->params[$name] = $this->normalizeParamValue($value);
      return $this;
    }
    // We never actually populate $this->params['fq'].  Instead
    // we manage everything via the filter methods.
    if ($name == 'fq') {
      if (is_array($value)) {
        array_walk_recursive($value, array($this, 'addFq'));
        return $this;
      }
      else {
        return $this->addFq($value);
      }
    }

    if (!isset($this->params[$name])) {
      $this->params[$name] = array();
    }

    if (!is_array($value)) {
      // Convert to array for array_map.
      $param_values = array($value);
    }
    else {
      // Convert to a numerically keyed array.
      $param_values = array_values($value);
    }
    $this->params[$name] = array_merge($this->params[$name], array_map(array($this, 'normalizeParamValue'), $param_values));

    return $this;
  }

  protected function normalizeParamValue($value) {
    // Convert boolean to string.
    if (is_bool($value)) {
      return $value ? 'true' : 'false';
    }
    // Convert to trimmed string.
    return trim($value);
  }

  public function addParams(Array $params) {
    foreach ($params as $name => $value) {
      $this->addParam($name, $value);
    }
    return $this;
  }

  public function removeParam($name) {
    unset($this->params[$name]);
    if ($name == 'fq') {
      $this->fields = array();
      $this->subqueries = array();
    }
    return $this;
  }

  public function replaceParam($name, $value) {
    $this->removeParam($name);
    return $this->addParam($name, $value);
  }

  /**
   * Handles aliases for field to make nicer URLs.
   *
   * @param $field_map
   *   An array keyed with real Solr index field names with the alias as value.
   *
   * @return DrupalSolrQueryInterface
   *   The called object.
   */
  public function addFieldAliases($field_map) {
    $this->field_map = array_merge($this->field_map, $field_map);
    // We have to re-parse the filters.
    $this->parseSortString();
    return $this;
  }

  public function getFieldAliases() {
    return $this->field_map;
  }

  public function clearFieldAliases() {
    $this->field_map = array();
    // We have to re-parse the filters.
    $this->parseSortString();
    return $this;
  }

  protected function parseSortString() {
    // Substitute any field aliases with real field names.
    $sortstring = strtr($this->sortstring, $this->field_map);
    // Score is a special case - it's the default sort for Solr.
    if ('' == $sortstring || 'score desc' == $sortstring) {
      $this->solrsort['#name'] = 'score';
      $this->solrsort['#direction'] = 'desc';
      unset($this->params['sort']);
    }
    else {
      // Validate and set sort parameter
      $fields = implode('|', array_keys($this->available_sorts));
      if (preg_match('/^(?:(' . $fields . ') (asc|desc),?)+$/', $sortstring, $matches)) {
        // We only use the last match.
        $this->solrsort['#name'] = $matches[1];
        $this->solrsort['#direction'] = $matches[2];
        $this->params['sort'] = array($sortstring);
      }
    }
  }

  public function getAvailableSorts() {
    return $this->available_sorts;
  }

  public function setAvailableSort($name, $sort) {
    // We expect non-aliased sorts to be added.
    $this->available_sorts[$name] = $sort;
    // Re-parse the sortstring.
    $this->parseSortString();
    return $this;
  }

  public function setAvailableSorts($sorts) {
    // We expect a complete array of valid sorts.
    $this->available_sorts = $sorts;
    $this->parseSortString();
    return $this;
  }

  public function removeAvailableSort($name) {
    unset($this->available_sorts[$name]);
    // Re-parse the sortstring.
    $this->parseSortString();
    return $this;
  }

  public function getSolrsort() {
    return $this->solrsort;
  }

  public function setSolrsort($name, $direction) {
    $this->sortstring = trim($name) . ' ' . trim($direction);
    $this->parseSortString();
    return $this;
  }

  public function getPath($new_keywords = NULL) {
    if (isset($new_keywords)) {
      return $this->base_path . '/' . $new_keywords;
    }
    elseif ($this->getParam('q')) {
      return $this->base_path . '/' . $this->getParam('q');
    }
    else {
      // Return with empty query (the slash). The path for a facet
      // becomes $this->base_path . '//facetinfo';
      // We do this so we can have a consistent way of retrieving the query +
      // additional parameters
      return $this->base_path . '/';
    }
  }

  public function getSolrsortUrlQuery() {
    $queryvalues = array();
    $solrsort = $this->solrsort;
    if ($solrsort && ($solrsort['#name'] != 'score')) {
      if (isset($this->field_map[$solrsort['#name']])) {
        $solrsort['#name'] = $this->field_map[$solrsort['#name']];
      }
      $queryvalues['solrsort'] = $solrsort['#name'] . ' ' . $solrsort['#direction'];
    }
    else {
      // Return to default relevancy sort.
      unset($queryvalues['solrsort']);
    }
    return $queryvalues;
  }

  public function search($keys = NULL) {
    if ($this->abort_search) {
      return NULL;
    }

    return $this->solr->search($keys, $this->getSolrParams());
  }

  public function solr($method) {
    return $this->solr->$method();
  }

}