Mercurial > hg > MPIWG-drupal-modules
diff sites/all/modules/custom/solrsearch/Solr_Base_Query.php @ 0:015d06b10d37 default tip
initial
author | dwinter |
---|---|
date | Wed, 31 Jul 2013 13:49:13 +0200 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/sites/all/modules/custom/solrsearch/Solr_Base_Query.php Wed Jul 31 13:49:13 2013 +0200 @@ -0,0 +1,657 @@ +<?php +/** + * This class allows you to make operations on a query that will be sent to + * Apache Solr. methods such as adding and removing sorts, remove and replace + * parameters, adding and removing filters, getters and setters for various + * parameters and more + * @file + * Class that defines the base query for the Apache Solr Drupal module. + */ + +class SolrFilterSubQuery { + + /** + * Static shared by all instances, used to increment ID numbers. + */ + protected static $idCount = 0; + + /** + * Each query/subquery will have a unique ID. + */ + public $id; + public $operator; + + /** + * A keyed array where the key is a position integer and the value + * is an array with #name and #value properties. Each value is a + * used for filter queries, e.g. array('#name' => 'is_uid', '#value' => 0) + * for anonymous content. + */ + protected $fields = array(); + + /** + * An array of subqueries. + */ + protected $subqueries = array(); + + function __construct($operator = 'OR') { + $this->operator = $operator; + $this->id = ++SolrFilterSubQuery::$idCount; + } + + function __clone() { + $this->id = ++SolrFilterSubQuery::$idCount; + } + + public function getFilters($name = NULL) { + if (empty($name)) { + return $this->fields; + } + reset($this->fields); + $matches = array(); + foreach ($this->fields as $filter) { + if ($filter['#name'] == $name) { + $matches[] = $filter; + } + } + return $matches; + } + + public function hasFilter($name, $value, $exclude = FALSE) { + foreach ($this->fields as $pos => $values) { + if ($values['#name'] == $name && $values['#value'] == $value && $values['#exclude'] == $exclude) { + return TRUE; + } + } + return FALSE; + } + + public function addFilter($name, $value, $exclude = FALSE, $local = '') { + // @todo - escape the value if it has spaces in it and is not a range query or parenthesized. + $filter = array( + '#exclude' => (bool) $exclude, + '#name' => trim($name), + '#value' => trim($value), + '#local' => trim($local), + ); + $this->fields[] = $filter; + return $this; + } + + public function removeFilter($name, $value = NULL, $exclude = FALSE) { + // Remove from the public list of filters. + $this->unsetFilter($this->fields, $name, $value, $exclude); + return $this; + } + + protected function unsetFilter(&$fields, $name, $value, $exclude) { + if (!isset($value)) { + foreach ($fields as $pos => $values) { + if ($values['#name'] == $name) { + unset($fields[$pos]); + } + } + } + else { + foreach ($fields as $pos => $values) { + if ($values['#name'] == $name && $values['#value'] == $value && $values['#exclude'] == $exclude) { + unset($fields[$pos]); + } + } + } + } + + public function getFilterSubQueries() { + return $this->subqueries; + } + + public function addFilterSubQuery(SolrFilterSubQuery $query) { + $this->subqueries[$query->id] = $query; + return $this; + } + + public function removeFilterSubQuery(SolrFilterSubQuery $query) { + unset($this->subqueries[$query->id]); + return $this; + } + + public function removeFilterSubQueries() { + $this->subqueries = array(); + return $this; + } + + public function makeFilterQuery(array $filter) { + $prefix = empty($filter['#exclude']) ? '' : '-'; + if ($filter['#local']) { + $prefix = '{!' . $filter['#local'] . '}' . $prefix; + } + // If the field value contains a colon or a space, wrap it in double quotes, + // unless it is a range query or is already wrapped in double quotes or + // parentheses. + if (preg_match('/[ :]/', $filter['#value']) && !preg_match('/^[\[\{]\S+ TO \S+[\]\}]$/', $filter['#value']) && !preg_match('/^["\(].*["\)]$/', $filter['#value'])) { + $filter['#value'] = '"' . $filter['#value'] . '"'; + } + return $prefix . $filter['#name'] . ':' . $filter['#value']; + } + + /** + * Make sure our query matches the pattern name:value or name:"value" + * Make sure that if we are ranges we use name:[ AND ] + * allowed inputs : + * a. bundle:article + * b. date:[1970-12-31T23:59:59Z TO NOW] + * Split the text in 4 different parts + * 1. name, eg.: bundle or date + * 2. The first opening bracket (or nothing), eg.: [ + * 3. The value of the field, eg. article or 1970-12-31T23:59:59Z TO NOW + * 4. The last closing bracket, eg.: ] + * @param string $filter + * The filter to validate + * @return boolean + */ + public static function validFilterValue($filter) { + $opening = 0; + $closing = 0; + $name = NULL; + $value = NULL; + + if (preg_match('/(?P<name>[^:]+):(?P<value>.+)?$/', $filter, $matches)) { + foreach ($matches as $match_id => $match) { + switch($match_id) { + case 'name' : + $name = $match; + break; + case 'value' : + $value = $match; + break; + } + } + + // For the name we allow any character that fits between the A-Z0-9 range and + // any alternative for this in other languages. No special characters allowed + if (!preg_match('/^[a-zA-Z0-9_\x7f-\xff]+$/', $name)) { + return FALSE; + } + + // For the value we allow anything that is UTF8 + if (!drupal_validate_utf8($value)) { + return FALSE; + } + + // Check our bracket count. If it does not match it is also not valid + $valid_brackets = TRUE; + $brackets['opening']['{'] = substr_count($value, '{'); + $brackets['closing']['}'] = substr_count($value, '}'); + $valid_brackets = ($brackets['opening']['{'] != $brackets['closing']['}']) ? FALSE : TRUE; + $brackets['opening']['['] = substr_count($value, '['); + $brackets['closing'][']'] = substr_count($value, ']'); + $valid_brackets = ($brackets['opening']['['] != $brackets['closing'][']']) ? FALSE : TRUE; + $brackets['opening']['('] = substr_count($value, '('); + $brackets['closing'][')'] = substr_count($value, ')'); + $valid_brackets = ($brackets['opening']['('] != $brackets['closing'][')']) ? FALSE : TRUE; + if (!$valid_brackets) { + return FALSE; + } + + // Check the date field inputs + if (preg_match('/\[(.+) TO (.+)\]$/', $value, $datefields)) { + // Only Allow a value in the form of + // http://lucene.apache.org/solr/api/org/apache/solr/schema/DateField.html + // http://lucene.apache.org/solr/api/org/apache/solr/util/DateMathParser.html + // http://wiki.apache.org/solr/SolrQuerySyntax + // 1976-03-06T23:59:59.999Z (valid) + // * (valid) + // 1995-12-31T23:59:59.999Z (valid) + // 2007-03-06T00:00:00Z (valid) + // NOW-1YEAR/DAY (valid) + // NOW/DAY+1DAY (valid) + // 1976-03-06T23:59:59.999Z (valid) + // 1976-03-06T23:59:59.999Z+1YEAR (valid) + // 1976-03-06T23:59:59.999Z/YEAR (valid) + // 1976-03-06T23:59:59.999Z (valid) + // 1976-03-06T23::59::59.999Z (invalid) + if (!empty($datefields[1]) && !empty($datefields[2])) { + // Do not check to full value, only the splitted ones + unset($datefields[0]); + // Check if both matches are valid datefields + foreach ($datefields as $datefield) { + if (!preg_match('/(\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:[\d\.]{2,6}Z(\S)*)|(^([A-Z\*]+)(\A-Z0-9\+\-\/)*)/', $datefield, $datefield_match)) { + return FALSE; + } + } + } + } + } + return TRUE; + } + + /** + * Builds a set of filter queries from $this->fields and all subqueries. + * + * Returns an array of strings that can be combined into + * a URL query parameter or passed to Solr as fq paramters. + */ + protected function rebuildFq() { + $fq = array(); + foreach ($this->fields as $pos => $field) { + $fq[] = $this->makeFilterQuery($field); + } + foreach ($this->subqueries as $subquery) { + $subfq = $subquery->rebuildFq(); + if ($subfq) { + $operator = $subquery->operator; + $fq[] = "(" . implode(" $operator ", $subfq) . ")"; + } + } + return $fq; + } + +} + +class SolrBaseQuery extends SolrFilterSubQuery implements DrupalSolrQueryInterface { + + /** + * The parameters that get sent to Solr. + */ + protected $params = array('start' => 0, 'rows' => 10, 'fq' => array()); + + /** + * The search base path. + */ + protected $base_path; + protected $field_map = array(); + + /** + * DrupalApacheSolrService object + */ + protected $solr; + // The array keys must always be real Solr index fields. + protected $available_sorts; + + /** + * The query name is used to construct a searcher string. Mostly the + * environment id + */ + protected $name; + protected $context = array(); + // Makes sure we always have a valid sort. + protected $solrsort = array('#name' => 'score', '#direction' => 'desc'); + // A flag to allow the search to be aborted. + public $abort_search = FALSE; + + // A flag to check if need to retrieve another page of the result set + public $page = 0; + + /** + * @param $name + * The search name, used for finding the correct blocks and other config. + * Typically "apachesolr". + * + * @param $solr + * An instantiated DrupalApacheSolrService Object. + * Can be instantiated from apachesolr_get_solr(). + * + * @param $params + * Array of params to initialize the object (typically 'q' and 'fq'). + * + * @param $sortstring + * Visible string telling solr how to sort - added to GET query params. + * + * @param $base_path + * The search base path (without the keywords) for this query, without trailing slash. + */ + function __construct($name, $solr, array $params = array(), $sortstring = '', $base_path = '', $context = array()) { + parent::__construct(); + $this->name = $name; + $this->solr = $solr; + $this->addContext((array) $context); + $this->addParams((array) $params); + $this->available_sorts = $this->defaultSorts(); + $this->sortstring = trim($sortstring); + $this->parseSortString(); + $this->base_path = $base_path; + } + + protected function defaultSorts() { + return array( + 'score' => array('title' => t('Relevancy'), 'default' => 'desc'), + 'title' => array('title' => t('Title'), 'default' => 'asc'), + 'author_s' => array('title' => t('Author'), 'default' => 'asc'), + 'date' => array('title' => t('Date'), 'default' => 'desc'), + ); + } + + /** + * Get query name. + */ + public function getName() { + return $this->name; + } + + /** + * Get query searcher name (for facetapi, views, pages, etc). + */ + public function getSearcher() { + return $this->name . '@' . $this->solr->getId(); + } + + /** + * Get context values. + */ + public function getContext() { + return $this->context; + } + + /** + * Set context value. + */ + public function addContext(array $context) { + foreach ($context as $k => $v) { + $this->context[$k] = $v; + } + // The env_id must match that of the actual $solr object + $this->context['env_id'] = $this->solr->getId(); + return $this->context; + } + + protected $single_value_params = array( + 'q' => TRUE, // http://wiki.apache.org/solr/SearchHandler#q + 'q.op' => TRUE, // http://wiki.apache.org/solr/SearchHandler#q.op + 'q.alt' => TRUE, // http://wiki.apache.org/solr/SearchHandler#q + 'df' => TRUE, + 'qt' => TRUE, + 'defType' => TRUE, + 'timeAllowed' => TRUE, + 'omitHeader' => TRUE, + 'debugQuery' => TRUE, + 'start' => TRUE, + 'rows' => TRUE, + 'stats' => TRUE, + 'facet' => TRUE, + 'facet.prefix' => TRUE, + 'facet.limit' => TRUE, + 'facet.offset' => TRUE, + 'facet.mincount' => TRUE, + 'facet.missing' => TRUE, + 'facet.method' => TRUE, + 'facet.enum.cache.minDf' => TRUE, + 'facet.date.start' => TRUE, + 'facet.date.end' => TRUE, + 'facet.date.gap' => TRUE, + 'facet.date.hardend' => TRUE, + 'facet.date.other' => TRUE, + 'facet.date.include' => TRUE, + 'hl' => TRUE, + 'hl.snippets' => TRUE, + 'hl.fragsize' => TRUE, + 'hl.mergeContiguous' => TRUE, + 'hl.requireFieldMatch' => TRUE, + 'hl.maxAnalyzedChars' => TRUE, + 'hl.alternateField' => TRUE, + 'hl.maxAlternateFieldLength' => TRUE, + 'hl.formatter' => TRUE, + 'hl.simple.pre/hl.simple.post' => TRUE, + 'hl.fragmenter' => TRUE, + 'hl.fragListBuilder' => TRUE, + 'hl.fragmentsBuilder' => TRUE, + 'hl.useFastVectorHighlighter' => TRUE, + 'hl.usePhraseHighlighter' => TRUE, + 'hl.highlightMultiTerm' => TRUE, + 'hl.regex.slop' => TRUE, + 'hl.regex.pattern' => TRUE, + 'hl.regex.maxAnalyzedChars' => TRUE, + 'spellcheck' => TRUE, + ); + + public function getParam($name) { + if ($name == 'fq') { + return $this->rebuildFq(); + } + $empty = isset($this->single_value_params[$name]) ? NULL : array(); + return isset($this->params[$name]) ? $this->params[$name] : $empty; + } + + public function getParams() { + $params = $this->params; + $params['fq'] = $this->rebuildFq(); + return $params; + } + + public function getSolrParams() { + $params = $this->getParams(); + // For certain fields Solr prefers a comma separated list. + foreach (array('fl', 'hl.fl', 'sort', 'mlt.fl') as $name) { + if (isset($params[$name])) { + $params[$name] = implode(',', $params[$name]); + } + } + return $params; + } + + protected function addFq($string, $index = NULL) { + $string = trim($string); + $local = ''; + $exclude = FALSE; + $name = NULL; + $value = NULL; + + // Check if we are dealing with an exclude + if (preg_match('/^-(.*)/', $string, $matches)) { + $exclude = TRUE; + $string = $matches[1]; + } + + // If {!something} is found as first character then this is a local value + if (preg_match('/\{!([^}]+)\}(.*)/', $string, $matches)) { + $local = $matches[1]; + $string = $matches[2]; + } + + // Anything that has a name and value + // check if we have a : in the string + if (strstr($string, ':')) { + list($name, $value) = explode(":", $string, 2); + } + else { + $value = $string; + } + $this->addFilter($name, $value, $exclude, $local); + return $this; + } + + public function addParam($name, $value) { + if (isset($this->single_value_params[$name])) { + if (is_array($value)) { + $value = end($value); + } + $this->params[$name] = $this->normalizeParamValue($value); + return $this; + } + // We never actually populate $this->params['fq']. Instead + // we manage everything via the filter methods. + if ($name == 'fq') { + if (is_array($value)) { + array_walk_recursive($value, array($this, 'addFq')); + return $this; + } + else { + return $this->addFq($value); + } + } + + if (!isset($this->params[$name])) { + $this->params[$name] = array(); + } + + if (!is_array($value)) { + // Convert to array for array_map. + $param_values = array($value); + } + else { + // Convert to a numerically keyed array. + $param_values = array_values($value); + } + $this->params[$name] = array_merge($this->params[$name], array_map(array($this, 'normalizeParamValue'), $param_values)); + + return $this; + } + + protected function normalizeParamValue($value) { + // Convert boolean to string. + if (is_bool($value)) { + return $value ? 'true' : 'false'; + } + // Convert to trimmed string. + return trim($value); + } + + public function addParams(Array $params) { + foreach ($params as $name => $value) { + $this->addParam($name, $value); + } + return $this; + } + + public function removeParam($name) { + unset($this->params[$name]); + if ($name == 'fq') { + $this->fields = array(); + $this->subqueries = array(); + } + return $this; + } + + public function replaceParam($name, $value) { + $this->removeParam($name); + return $this->addParam($name, $value); + } + + /** + * Handles aliases for field to make nicer URLs. + * + * @param $field_map + * An array keyed with real Solr index field names with the alias as value. + * + * @return DrupalSolrQueryInterface + * The called object. + */ + public function addFieldAliases($field_map) { + $this->field_map = array_merge($this->field_map, $field_map); + // We have to re-parse the filters. + $this->parseSortString(); + return $this; + } + + public function getFieldAliases() { + return $this->field_map; + } + + public function clearFieldAliases() { + $this->field_map = array(); + // We have to re-parse the filters. + $this->parseSortString(); + return $this; + } + + protected function parseSortString() { + // Substitute any field aliases with real field names. + $sortstring = strtr($this->sortstring, $this->field_map); + // Score is a special case - it's the default sort for Solr. + if ('' == $sortstring || 'score desc' == $sortstring) { + $this->solrsort['#name'] = 'score'; + $this->solrsort['#direction'] = 'desc'; + unset($this->params['sort']); + } + else { + // Validate and set sort parameter + $fields = implode('|', array_keys($this->available_sorts)); + if (preg_match('/^(?:(' . $fields . ') (asc|desc),?)+$/', $sortstring, $matches)) { + // We only use the last match. + $this->solrsort['#name'] = $matches[1]; + $this->solrsort['#direction'] = $matches[2]; + $this->params['sort'] = array($sortstring); + } + } + } + + public function getAvailableSorts() { + return $this->available_sorts; + } + + public function setAvailableSort($name, $sort) { + // We expect non-aliased sorts to be added. + $this->available_sorts[$name] = $sort; + // Re-parse the sortstring. + $this->parseSortString(); + return $this; + } + + public function setAvailableSorts($sorts) { + // We expect a complete array of valid sorts. + $this->available_sorts = $sorts; + $this->parseSortString(); + return $this; + } + + public function removeAvailableSort($name) { + unset($this->available_sorts[$name]); + // Re-parse the sortstring. + $this->parseSortString(); + return $this; + } + + public function getSolrsort() { + return $this->solrsort; + } + + public function setSolrsort($name, $direction) { + $this->sortstring = trim($name) . ' ' . trim($direction); + $this->parseSortString(); + return $this; + } + + public function getPath($new_keywords = NULL) { + if (isset($new_keywords)) { + return $this->base_path . '/' . $new_keywords; + } + elseif ($this->getParam('q')) { + return $this->base_path . '/' . $this->getParam('q'); + } + else { + // Return with empty query (the slash). The path for a facet + // becomes $this->base_path . '//facetinfo'; + // We do this so we can have a consistent way of retrieving the query + + // additional parameters + return $this->base_path . '/'; + } + } + + public function getSolrsortUrlQuery() { + $queryvalues = array(); + $solrsort = $this->solrsort; + if ($solrsort && ($solrsort['#name'] != 'score')) { + if (isset($this->field_map[$solrsort['#name']])) { + $solrsort['#name'] = $this->field_map[$solrsort['#name']]; + } + $queryvalues['solrsort'] = $solrsort['#name'] . ' ' . $solrsort['#direction']; + } + else { + // Return to default relevancy sort. + unset($queryvalues['solrsort']); + } + return $queryvalues; + } + + public function search($keys = NULL) { + if ($this->abort_search) { + return NULL; + } + + return $this->solr->search($keys, $this->getSolrParams()); + } + + public function solr($method) { + return $this->solr->$method(); + } + +}