Mercurial > hg > solrsearch
view Solr_Base_Query.php @ 0:a2b4f67e73dc default tip
initial
author | Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de> |
---|---|
date | Mon, 08 Jun 2015 10:21:54 +0200 |
parents | |
children |
line wrap: on
line source
<?php /** * This class allows you to make operations on a query that will be sent to * Apache Solr. methods such as adding and removing sorts, remove and replace * parameters, adding and removing filters, getters and setters for various * parameters and more * @file * Class that defines the base query for the Apache Solr Drupal module. */ class SolrFilterSubQuery { /** * Static shared by all instances, used to increment ID numbers. */ protected static $idCount = 0; /** * Each query/subquery will have a unique ID. */ public $id; public $operator; /** * A keyed array where the key is a position integer and the value * is an array with #name and #value properties. Each value is a * used for filter queries, e.g. array('#name' => 'is_uid', '#value' => 0) * for anonymous content. */ protected $fields = array(); /** * An array of subqueries. */ protected $subqueries = array(); function __construct($operator = 'OR') { $this->operator = $operator; $this->id = ++SolrFilterSubQuery::$idCount; } function __clone() { $this->id = ++SolrFilterSubQuery::$idCount; } public function getFilters($name = NULL) { if (empty($name)) { return $this->fields; } reset($this->fields); $matches = array(); foreach ($this->fields as $filter) { if ($filter['#name'] == $name) { $matches[] = $filter; } } return $matches; } public function hasFilter($name, $value, $exclude = FALSE) { foreach ($this->fields as $pos => $values) { if ($values['#name'] == $name && $values['#value'] == $value && $values['#exclude'] == $exclude) { return TRUE; } } return FALSE; } public function addFilter($name, $value, $exclude = FALSE, $local = '') { // @todo - escape the value if it has spaces in it and is not a range query or parenthesized. $filter = array( '#exclude' => (bool) $exclude, '#name' => trim($name), '#value' => trim($value), '#local' => trim($local), ); $this->fields[] = $filter; return $this; } public function removeFilter($name, $value = NULL, $exclude = FALSE) { // Remove from the public list of filters. $this->unsetFilter($this->fields, $name, $value, $exclude); return $this; } protected function unsetFilter(&$fields, $name, $value, $exclude) { if (!isset($value)) { foreach ($fields as $pos => $values) { if ($values['#name'] == $name) { unset($fields[$pos]); } } } else { foreach ($fields as $pos => $values) { if ($values['#name'] == $name && $values['#value'] == $value && $values['#exclude'] == $exclude) { unset($fields[$pos]); } } } } public function getFilterSubQueries() { return $this->subqueries; } public function addFilterSubQuery(SolrFilterSubQuery $query) { $this->subqueries[$query->id] = $query; return $this; } public function removeFilterSubQuery(SolrFilterSubQuery $query) { unset($this->subqueries[$query->id]); return $this; } public function removeFilterSubQueries() { $this->subqueries = array(); return $this; } public function makeFilterQuery(array $filter) { $prefix = empty($filter['#exclude']) ? '' : '-'; if ($filter['#local']) { $prefix = '{!' . $filter['#local'] . '}' . $prefix; } // If the field value contains a colon or a space, wrap it in double quotes, // unless it is a range query or is already wrapped in double quotes or // parentheses. if (preg_match('/[ :]/', $filter['#value']) && !preg_match('/^[\[\{]\S+ TO \S+[\]\}]$/', $filter['#value']) && !preg_match('/^["\(].*["\)]$/', $filter['#value'])) { $filter['#value'] = '"' . $filter['#value'] . '"'; } return $prefix . $filter['#name'] . ':' . $filter['#value']; } /** * Make sure our query matches the pattern name:value or name:"value" * Make sure that if we are ranges we use name:[ AND ] * allowed inputs : * a. bundle:article * b. date:[1970-12-31T23:59:59Z TO NOW] * Split the text in 4 different parts * 1. name, eg.: bundle or date * 2. The first opening bracket (or nothing), eg.: [ * 3. The value of the field, eg. article or 1970-12-31T23:59:59Z TO NOW * 4. The last closing bracket, eg.: ] * @param string $filter * The filter to validate * @return boolean */ public static function validFilterValue($filter) { $opening = 0; $closing = 0; $name = NULL; $value = NULL; if (preg_match('/(?P<name>[^:]+):(?P<value>.+)?$/', $filter, $matches)) { foreach ($matches as $match_id => $match) { switch($match_id) { case 'name' : $name = $match; break; case 'value' : $value = $match; break; } } // For the name we allow any character that fits between the A-Z0-9 range and // any alternative for this in other languages. No special characters allowed if (!preg_match('/^[a-zA-Z0-9_\x7f-\xff]+$/', $name)) { return FALSE; } // For the value we allow anything that is UTF8 if (!drupal_validate_utf8($value)) { return FALSE; } // Check our bracket count. If it does not match it is also not valid $valid_brackets = TRUE; $brackets['opening']['{'] = substr_count($value, '{'); $brackets['closing']['}'] = substr_count($value, '}'); $valid_brackets = ($brackets['opening']['{'] != $brackets['closing']['}']) ? FALSE : TRUE; $brackets['opening']['['] = substr_count($value, '['); $brackets['closing'][']'] = substr_count($value, ']'); $valid_brackets = ($brackets['opening']['['] != $brackets['closing'][']']) ? FALSE : TRUE; $brackets['opening']['('] = substr_count($value, '('); $brackets['closing'][')'] = substr_count($value, ')'); $valid_brackets = ($brackets['opening']['('] != $brackets['closing'][')']) ? FALSE : TRUE; if (!$valid_brackets) { return FALSE; } // Check the date field inputs if (preg_match('/\[(.+) TO (.+)\]$/', $value, $datefields)) { // Only Allow a value in the form of // http://lucene.apache.org/solr/api/org/apache/solr/schema/DateField.html // http://lucene.apache.org/solr/api/org/apache/solr/util/DateMathParser.html // http://wiki.apache.org/solr/SolrQuerySyntax // 1976-03-06T23:59:59.999Z (valid) // * (valid) // 1995-12-31T23:59:59.999Z (valid) // 2007-03-06T00:00:00Z (valid) // NOW-1YEAR/DAY (valid) // NOW/DAY+1DAY (valid) // 1976-03-06T23:59:59.999Z (valid) // 1976-03-06T23:59:59.999Z+1YEAR (valid) // 1976-03-06T23:59:59.999Z/YEAR (valid) // 1976-03-06T23:59:59.999Z (valid) // 1976-03-06T23::59::59.999Z (invalid) if (!empty($datefields[1]) && !empty($datefields[2])) { // Do not check to full value, only the splitted ones unset($datefields[0]); // Check if both matches are valid datefields foreach ($datefields as $datefield) { if (!preg_match('/(\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:[\d\.]{2,6}Z(\S)*)|(^([A-Z\*]+)(\A-Z0-9\+\-\/)*)/', $datefield, $datefield_match)) { return FALSE; } } } } } return TRUE; } /** * Builds a set of filter queries from $this->fields and all subqueries. * * Returns an array of strings that can be combined into * a URL query parameter or passed to Solr as fq paramters. */ protected function rebuildFq() { $fq = array(); foreach ($this->fields as $pos => $field) { $fq[] = $this->makeFilterQuery($field); } foreach ($this->subqueries as $subquery) { $subfq = $subquery->rebuildFq(); if ($subfq) { $operator = $subquery->operator; $fq[] = "(" . implode(" $operator ", $subfq) . ")"; } } return $fq; } } class SolrBaseQuery extends SolrFilterSubQuery implements DrupalSolrQueryInterface { /** * The parameters that get sent to Solr. */ protected $params = array('start' => 0, 'rows' => 10, 'fq' => array()); /** * The search base path. */ protected $base_path; protected $field_map = array(); /** * DrupalApacheSolrService object */ protected $solr; // The array keys must always be real Solr index fields. protected $available_sorts; /** * The query name is used to construct a searcher string. Mostly the * environment id */ protected $name; protected $context = array(); // Makes sure we always have a valid sort. protected $solrsort = array('#name' => 'score', '#direction' => 'desc'); // A flag to allow the search to be aborted. public $abort_search = FALSE; // A flag to check if need to retrieve another page of the result set public $page = 0; /** * @param $name * The search name, used for finding the correct blocks and other config. * Typically "apachesolr". * * @param $solr * An instantiated DrupalApacheSolrService Object. * Can be instantiated from apachesolr_get_solr(). * * @param $params * Array of params to initialize the object (typically 'q' and 'fq'). * * @param $sortstring * Visible string telling solr how to sort - added to GET query params. * * @param $base_path * The search base path (without the keywords) for this query, without trailing slash. */ function __construct($name, $solr, array $params = array(), $sortstring = '', $base_path = '', $context = array()) { parent::__construct(); $this->name = $name; $this->solr = $solr; $this->addContext((array) $context); $this->addParams((array) $params); $this->available_sorts = $this->defaultSorts(); $this->sortstring = trim($sortstring); $this->parseSortString(); $this->base_path = $base_path; } protected function defaultSorts() { return array( 'score' => array('title' => t('Relevancy'), 'default' => 'desc'), 'title' => array('title' => t('Title'), 'default' => 'asc'), 'author_s' => array('title' => t('Author'), 'default' => 'asc'), 'date' => array('title' => t('Date'), 'default' => 'desc'), ); } /** * Get query name. */ public function getName() { return $this->name; } /** * Get query searcher name (for facetapi, views, pages, etc). */ public function getSearcher() { return $this->name . '@' . $this->solr->getId(); } /** * Get context values. */ public function getContext() { return $this->context; } /** * Set context value. */ public function addContext(array $context) { foreach ($context as $k => $v) { $this->context[$k] = $v; } // The env_id must match that of the actual $solr object $this->context['env_id'] = $this->solr->getId(); return $this->context; } protected $single_value_params = array( 'q' => TRUE, // http://wiki.apache.org/solr/SearchHandler#q 'q.op' => TRUE, // http://wiki.apache.org/solr/SearchHandler#q.op 'q.alt' => TRUE, // http://wiki.apache.org/solr/SearchHandler#q 'df' => TRUE, 'qt' => TRUE, 'defType' => TRUE, 'timeAllowed' => TRUE, 'omitHeader' => TRUE, 'debugQuery' => TRUE, 'start' => TRUE, 'rows' => TRUE, 'stats' => TRUE, 'facet' => TRUE, 'facet.prefix' => TRUE, 'facet.limit' => TRUE, 'facet.offset' => TRUE, 'facet.mincount' => TRUE, 'facet.missing' => TRUE, 'facet.method' => TRUE, 'facet.enum.cache.minDf' => TRUE, 'facet.date.start' => TRUE, 'facet.date.end' => TRUE, 'facet.date.gap' => TRUE, 'facet.date.hardend' => TRUE, 'facet.date.other' => TRUE, 'facet.date.include' => TRUE, 'hl' => TRUE, 'hl.snippets' => TRUE, 'hl.fragsize' => TRUE, 'hl.mergeContiguous' => TRUE, 'hl.requireFieldMatch' => TRUE, 'hl.maxAnalyzedChars' => TRUE, 'hl.alternateField' => TRUE, 'hl.maxAlternateFieldLength' => TRUE, 'hl.formatter' => TRUE, 'hl.simple.pre/hl.simple.post' => TRUE, 'hl.fragmenter' => TRUE, 'hl.fragListBuilder' => TRUE, 'hl.fragmentsBuilder' => TRUE, 'hl.useFastVectorHighlighter' => TRUE, 'hl.usePhraseHighlighter' => TRUE, 'hl.highlightMultiTerm' => TRUE, 'hl.regex.slop' => TRUE, 'hl.regex.pattern' => TRUE, 'hl.regex.maxAnalyzedChars' => TRUE, 'spellcheck' => TRUE, ); public function getParam($name) { if ($name == 'fq') { return $this->rebuildFq(); } $empty = isset($this->single_value_params[$name]) ? NULL : array(); return isset($this->params[$name]) ? $this->params[$name] : $empty; } public function getParams() { $params = $this->params; $params['fq'] = $this->rebuildFq(); return $params; } public function getSolrParams() { $params = $this->getParams(); // For certain fields Solr prefers a comma separated list. foreach (array('fl', 'hl.fl', 'sort', 'mlt.fl') as $name) { if (isset($params[$name])) { $params[$name] = implode(',', $params[$name]); } } return $params; } protected function addFq($string, $index = NULL) { $string = trim($string); $local = ''; $exclude = FALSE; $name = NULL; $value = NULL; // Check if we are dealing with an exclude if (preg_match('/^-(.*)/', $string, $matches)) { $exclude = TRUE; $string = $matches[1]; } // If {!something} is found as first character then this is a local value if (preg_match('/\{!([^}]+)\}(.*)/', $string, $matches)) { $local = $matches[1]; $string = $matches[2]; } // Anything that has a name and value // check if we have a : in the string if (strstr($string, ':')) { list($name, $value) = explode(":", $string, 2); } else { $value = $string; } $this->addFilter($name, $value, $exclude, $local); return $this; } public function addParam($name, $value) { if (isset($this->single_value_params[$name])) { if (is_array($value)) { $value = end($value); } $this->params[$name] = $this->normalizeParamValue($value); return $this; } // We never actually populate $this->params['fq']. Instead // we manage everything via the filter methods. if ($name == 'fq') { if (is_array($value)) { array_walk_recursive($value, array($this, 'addFq')); return $this; } else { return $this->addFq($value); } } if (!isset($this->params[$name])) { $this->params[$name] = array(); } if (!is_array($value)) { // Convert to array for array_map. $param_values = array($value); } else { // Convert to a numerically keyed array. $param_values = array_values($value); } $this->params[$name] = array_merge($this->params[$name], array_map(array($this, 'normalizeParamValue'), $param_values)); return $this; } protected function normalizeParamValue($value) { // Convert boolean to string. if (is_bool($value)) { return $value ? 'true' : 'false'; } // Convert to trimmed string. return trim($value); } public function addParams(Array $params) { foreach ($params as $name => $value) { $this->addParam($name, $value); } return $this; } public function removeParam($name) { unset($this->params[$name]); if ($name == 'fq') { $this->fields = array(); $this->subqueries = array(); } return $this; } public function replaceParam($name, $value) { $this->removeParam($name); return $this->addParam($name, $value); } /** * Handles aliases for field to make nicer URLs. * * @param $field_map * An array keyed with real Solr index field names with the alias as value. * * @return DrupalSolrQueryInterface * The called object. */ public function addFieldAliases($field_map) { $this->field_map = array_merge($this->field_map, $field_map); // We have to re-parse the filters. $this->parseSortString(); return $this; } public function getFieldAliases() { return $this->field_map; } public function clearFieldAliases() { $this->field_map = array(); // We have to re-parse the filters. $this->parseSortString(); return $this; } protected function parseSortString() { // Substitute any field aliases with real field names. $sortstring = strtr($this->sortstring, $this->field_map); // Score is a special case - it's the default sort for Solr. if ('' == $sortstring || 'score desc' == $sortstring) { $this->solrsort['#name'] = 'score'; $this->solrsort['#direction'] = 'desc'; unset($this->params['sort']); } else { // Validate and set sort parameter $fields = implode('|', array_keys($this->available_sorts)); if (preg_match('/^(?:(' . $fields . ') (asc|desc),?)+$/', $sortstring, $matches)) { // We only use the last match. $this->solrsort['#name'] = $matches[1]; $this->solrsort['#direction'] = $matches[2]; $this->params['sort'] = array($sortstring); } } } public function getAvailableSorts() { return $this->available_sorts; } public function setAvailableSort($name, $sort) { // We expect non-aliased sorts to be added. $this->available_sorts[$name] = $sort; // Re-parse the sortstring. $this->parseSortString(); return $this; } public function setAvailableSorts($sorts) { // We expect a complete array of valid sorts. $this->available_sorts = $sorts; $this->parseSortString(); return $this; } public function removeAvailableSort($name) { unset($this->available_sorts[$name]); // Re-parse the sortstring. $this->parseSortString(); return $this; } public function getSolrsort() { return $this->solrsort; } public function setSolrsort($name, $direction) { $this->sortstring = trim($name) . ' ' . trim($direction); $this->parseSortString(); return $this; } public function getPath($new_keywords = NULL) { if (isset($new_keywords)) { return $this->base_path . '/' . $new_keywords; } elseif ($this->getParam('q')) { return $this->base_path . '/' . $this->getParam('q'); } else { // Return with empty query (the slash). The path for a facet // becomes $this->base_path . '//facetinfo'; // We do this so we can have a consistent way of retrieving the query + // additional parameters return $this->base_path . '/'; } } public function getSolrsortUrlQuery() { $queryvalues = array(); $solrsort = $this->solrsort; if ($solrsort && ($solrsort['#name'] != 'score')) { if (isset($this->field_map[$solrsort['#name']])) { $solrsort['#name'] = $this->field_map[$solrsort['#name']]; } $queryvalues['solrsort'] = $solrsort['#name'] . ' ' . $solrsort['#direction']; } else { // Return to default relevancy sort. unset($queryvalues['solrsort']); } return $queryvalues; } public function search($keys = NULL) { if ($this->abort_search) { return NULL; } return $this->solr->search($keys, $this->getSolrParams()); } public function solr($method) { return $this->solr->$method(); } }