comparison Solr_Base_Query.php @ 0:a2b4f67e73dc default tip

initial
author Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
date Mon, 08 Jun 2015 10:21:54 +0200
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:a2b4f67e73dc
1 <?php
2 /**
3 * This class allows you to make operations on a query that will be sent to
4 * Apache Solr. methods such as adding and removing sorts, remove and replace
5 * parameters, adding and removing filters, getters and setters for various
6 * parameters and more
7 * @file
8 * Class that defines the base query for the Apache Solr Drupal module.
9 */
10
11 class SolrFilterSubQuery {
12
13 /**
14 * Static shared by all instances, used to increment ID numbers.
15 */
16 protected static $idCount = 0;
17
18 /**
19 * Each query/subquery will have a unique ID.
20 */
21 public $id;
22 public $operator;
23
24 /**
25 * A keyed array where the key is a position integer and the value
26 * is an array with #name and #value properties. Each value is a
27 * used for filter queries, e.g. array('#name' => 'is_uid', '#value' => 0)
28 * for anonymous content.
29 */
30 protected $fields = array();
31
32 /**
33 * An array of subqueries.
34 */
35 protected $subqueries = array();
36
37 function __construct($operator = 'OR') {
38 $this->operator = $operator;
39 $this->id = ++SolrFilterSubQuery::$idCount;
40 }
41
42 function __clone() {
43 $this->id = ++SolrFilterSubQuery::$idCount;
44 }
45
46 public function getFilters($name = NULL) {
47 if (empty($name)) {
48 return $this->fields;
49 }
50 reset($this->fields);
51 $matches = array();
52 foreach ($this->fields as $filter) {
53 if ($filter['#name'] == $name) {
54 $matches[] = $filter;
55 }
56 }
57 return $matches;
58 }
59
60 public function hasFilter($name, $value, $exclude = FALSE) {
61 foreach ($this->fields as $pos => $values) {
62 if ($values['#name'] == $name && $values['#value'] == $value && $values['#exclude'] == $exclude) {
63 return TRUE;
64 }
65 }
66 return FALSE;
67 }
68
69 public function addFilter($name, $value, $exclude = FALSE, $local = '') {
70 // @todo - escape the value if it has spaces in it and is not a range query or parenthesized.
71 $filter = array(
72 '#exclude' => (bool) $exclude,
73 '#name' => trim($name),
74 '#value' => trim($value),
75 '#local' => trim($local),
76 );
77 $this->fields[] = $filter;
78 return $this;
79 }
80
81 public function removeFilter($name, $value = NULL, $exclude = FALSE) {
82 // Remove from the public list of filters.
83 $this->unsetFilter($this->fields, $name, $value, $exclude);
84 return $this;
85 }
86
87 protected function unsetFilter(&$fields, $name, $value, $exclude) {
88 if (!isset($value)) {
89 foreach ($fields as $pos => $values) {
90 if ($values['#name'] == $name) {
91 unset($fields[$pos]);
92 }
93 }
94 }
95 else {
96 foreach ($fields as $pos => $values) {
97 if ($values['#name'] == $name && $values['#value'] == $value && $values['#exclude'] == $exclude) {
98 unset($fields[$pos]);
99 }
100 }
101 }
102 }
103
104 public function getFilterSubQueries() {
105 return $this->subqueries;
106 }
107
108 public function addFilterSubQuery(SolrFilterSubQuery $query) {
109 $this->subqueries[$query->id] = $query;
110 return $this;
111 }
112
113 public function removeFilterSubQuery(SolrFilterSubQuery $query) {
114 unset($this->subqueries[$query->id]);
115 return $this;
116 }
117
118 public function removeFilterSubQueries() {
119 $this->subqueries = array();
120 return $this;
121 }
122
123 public function makeFilterQuery(array $filter) {
124 $prefix = empty($filter['#exclude']) ? '' : '-';
125 if ($filter['#local']) {
126 $prefix = '{!' . $filter['#local'] . '}' . $prefix;
127 }
128 // If the field value contains a colon or a space, wrap it in double quotes,
129 // unless it is a range query or is already wrapped in double quotes or
130 // parentheses.
131 if (preg_match('/[ :]/', $filter['#value']) && !preg_match('/^[\[\{]\S+ TO \S+[\]\}]$/', $filter['#value']) && !preg_match('/^["\(].*["\)]$/', $filter['#value'])) {
132 $filter['#value'] = '"' . $filter['#value'] . '"';
133 }
134 return $prefix . $filter['#name'] . ':' . $filter['#value'];
135 }
136
137 /**
138 * Make sure our query matches the pattern name:value or name:"value"
139 * Make sure that if we are ranges we use name:[ AND ]
140 * allowed inputs :
141 * a. bundle:article
142 * b. date:[1970-12-31T23:59:59Z TO NOW]
143 * Split the text in 4 different parts
144 * 1. name, eg.: bundle or date
145 * 2. The first opening bracket (or nothing), eg.: [
146 * 3. The value of the field, eg. article or 1970-12-31T23:59:59Z TO NOW
147 * 4. The last closing bracket, eg.: ]
148 * @param string $filter
149 * The filter to validate
150 * @return boolean
151 */
152 public static function validFilterValue($filter) {
153 $opening = 0;
154 $closing = 0;
155 $name = NULL;
156 $value = NULL;
157
158 if (preg_match('/(?P<name>[^:]+):(?P<value>.+)?$/', $filter, $matches)) {
159 foreach ($matches as $match_id => $match) {
160 switch($match_id) {
161 case 'name' :
162 $name = $match;
163 break;
164 case 'value' :
165 $value = $match;
166 break;
167 }
168 }
169
170 // For the name we allow any character that fits between the A-Z0-9 range and
171 // any alternative for this in other languages. No special characters allowed
172 if (!preg_match('/^[a-zA-Z0-9_\x7f-\xff]+$/', $name)) {
173 return FALSE;
174 }
175
176 // For the value we allow anything that is UTF8
177 if (!drupal_validate_utf8($value)) {
178 return FALSE;
179 }
180
181 // Check our bracket count. If it does not match it is also not valid
182 $valid_brackets = TRUE;
183 $brackets['opening']['{'] = substr_count($value, '{');
184 $brackets['closing']['}'] = substr_count($value, '}');
185 $valid_brackets = ($brackets['opening']['{'] != $brackets['closing']['}']) ? FALSE : TRUE;
186 $brackets['opening']['['] = substr_count($value, '[');
187 $brackets['closing'][']'] = substr_count($value, ']');
188 $valid_brackets = ($brackets['opening']['['] != $brackets['closing'][']']) ? FALSE : TRUE;
189 $brackets['opening']['('] = substr_count($value, '(');
190 $brackets['closing'][')'] = substr_count($value, ')');
191 $valid_brackets = ($brackets['opening']['('] != $brackets['closing'][')']) ? FALSE : TRUE;
192 if (!$valid_brackets) {
193 return FALSE;
194 }
195
196 // Check the date field inputs
197 if (preg_match('/\[(.+) TO (.+)\]$/', $value, $datefields)) {
198 // Only Allow a value in the form of
199 // http://lucene.apache.org/solr/api/org/apache/solr/schema/DateField.html
200 // http://lucene.apache.org/solr/api/org/apache/solr/util/DateMathParser.html
201 // http://wiki.apache.org/solr/SolrQuerySyntax
202 // 1976-03-06T23:59:59.999Z (valid)
203 // * (valid)
204 // 1995-12-31T23:59:59.999Z (valid)
205 // 2007-03-06T00:00:00Z (valid)
206 // NOW-1YEAR/DAY (valid)
207 // NOW/DAY+1DAY (valid)
208 // 1976-03-06T23:59:59.999Z (valid)
209 // 1976-03-06T23:59:59.999Z+1YEAR (valid)
210 // 1976-03-06T23:59:59.999Z/YEAR (valid)
211 // 1976-03-06T23:59:59.999Z (valid)
212 // 1976-03-06T23::59::59.999Z (invalid)
213 if (!empty($datefields[1]) && !empty($datefields[2])) {
214 // Do not check to full value, only the splitted ones
215 unset($datefields[0]);
216 // Check if both matches are valid datefields
217 foreach ($datefields as $datefield) {
218 if (!preg_match('/(\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:[\d\.]{2,6}Z(\S)*)|(^([A-Z\*]+)(\A-Z0-9\+\-\/)*)/', $datefield, $datefield_match)) {
219 return FALSE;
220 }
221 }
222 }
223 }
224 }
225 return TRUE;
226 }
227
228 /**
229 * Builds a set of filter queries from $this->fields and all subqueries.
230 *
231 * Returns an array of strings that can be combined into
232 * a URL query parameter or passed to Solr as fq paramters.
233 */
234 protected function rebuildFq() {
235 $fq = array();
236 foreach ($this->fields as $pos => $field) {
237 $fq[] = $this->makeFilterQuery($field);
238 }
239 foreach ($this->subqueries as $subquery) {
240 $subfq = $subquery->rebuildFq();
241 if ($subfq) {
242 $operator = $subquery->operator;
243 $fq[] = "(" . implode(" $operator ", $subfq) . ")";
244 }
245 }
246 return $fq;
247 }
248
249 }
250
251 class SolrBaseQuery extends SolrFilterSubQuery implements DrupalSolrQueryInterface {
252
253 /**
254 * The parameters that get sent to Solr.
255 */
256 protected $params = array('start' => 0, 'rows' => 10, 'fq' => array());
257
258 /**
259 * The search base path.
260 */
261 protected $base_path;
262 protected $field_map = array();
263
264 /**
265 * DrupalApacheSolrService object
266 */
267 protected $solr;
268 // The array keys must always be real Solr index fields.
269 protected $available_sorts;
270
271 /**
272 * The query name is used to construct a searcher string. Mostly the
273 * environment id
274 */
275 protected $name;
276 protected $context = array();
277 // Makes sure we always have a valid sort.
278 protected $solrsort = array('#name' => 'score', '#direction' => 'desc');
279 // A flag to allow the search to be aborted.
280 public $abort_search = FALSE;
281
282 // A flag to check if need to retrieve another page of the result set
283 public $page = 0;
284
285 /**
286 * @param $name
287 * The search name, used for finding the correct blocks and other config.
288 * Typically "apachesolr".
289 *
290 * @param $solr
291 * An instantiated DrupalApacheSolrService Object.
292 * Can be instantiated from apachesolr_get_solr().
293 *
294 * @param $params
295 * Array of params to initialize the object (typically 'q' and 'fq').
296 *
297 * @param $sortstring
298 * Visible string telling solr how to sort - added to GET query params.
299 *
300 * @param $base_path
301 * The search base path (without the keywords) for this query, without trailing slash.
302 */
303 function __construct($name, $solr, array $params = array(), $sortstring = '', $base_path = '', $context = array()) {
304 parent::__construct();
305 $this->name = $name;
306 $this->solr = $solr;
307 $this->addContext((array) $context);
308 $this->addParams((array) $params);
309 $this->available_sorts = $this->defaultSorts();
310 $this->sortstring = trim($sortstring);
311 $this->parseSortString();
312 $this->base_path = $base_path;
313 }
314
315 protected function defaultSorts() {
316 return array(
317 'score' => array('title' => t('Relevancy'), 'default' => 'desc'),
318 'title' => array('title' => t('Title'), 'default' => 'asc'),
319 'author_s' => array('title' => t('Author'), 'default' => 'asc'),
320 'date' => array('title' => t('Date'), 'default' => 'desc'),
321 );
322 }
323
324 /**
325 * Get query name.
326 */
327 public function getName() {
328 return $this->name;
329 }
330
331 /**
332 * Get query searcher name (for facetapi, views, pages, etc).
333 */
334 public function getSearcher() {
335 return $this->name . '@' . $this->solr->getId();
336 }
337
338 /**
339 * Get context values.
340 */
341 public function getContext() {
342 return $this->context;
343 }
344
345 /**
346 * Set context value.
347 */
348 public function addContext(array $context) {
349 foreach ($context as $k => $v) {
350 $this->context[$k] = $v;
351 }
352 // The env_id must match that of the actual $solr object
353 $this->context['env_id'] = $this->solr->getId();
354 return $this->context;
355 }
356
357 protected $single_value_params = array(
358 'q' => TRUE, // http://wiki.apache.org/solr/SearchHandler#q
359 'q.op' => TRUE, // http://wiki.apache.org/solr/SearchHandler#q.op
360 'q.alt' => TRUE, // http://wiki.apache.org/solr/SearchHandler#q
361 'df' => TRUE,
362 'qt' => TRUE,
363 'defType' => TRUE,
364 'timeAllowed' => TRUE,
365 'omitHeader' => TRUE,
366 'debugQuery' => TRUE,
367 'start' => TRUE,
368 'rows' => TRUE,
369 'stats' => TRUE,
370 'facet' => TRUE,
371 'facet.prefix' => TRUE,
372 'facet.limit' => TRUE,
373 'facet.offset' => TRUE,
374 'facet.mincount' => TRUE,
375 'facet.missing' => TRUE,
376 'facet.method' => TRUE,
377 'facet.enum.cache.minDf' => TRUE,
378 'facet.date.start' => TRUE,
379 'facet.date.end' => TRUE,
380 'facet.date.gap' => TRUE,
381 'facet.date.hardend' => TRUE,
382 'facet.date.other' => TRUE,
383 'facet.date.include' => TRUE,
384 'hl' => TRUE,
385 'hl.snippets' => TRUE,
386 'hl.fragsize' => TRUE,
387 'hl.mergeContiguous' => TRUE,
388 'hl.requireFieldMatch' => TRUE,
389 'hl.maxAnalyzedChars' => TRUE,
390 'hl.alternateField' => TRUE,
391 'hl.maxAlternateFieldLength' => TRUE,
392 'hl.formatter' => TRUE,
393 'hl.simple.pre/hl.simple.post' => TRUE,
394 'hl.fragmenter' => TRUE,
395 'hl.fragListBuilder' => TRUE,
396 'hl.fragmentsBuilder' => TRUE,
397 'hl.useFastVectorHighlighter' => TRUE,
398 'hl.usePhraseHighlighter' => TRUE,
399 'hl.highlightMultiTerm' => TRUE,
400 'hl.regex.slop' => TRUE,
401 'hl.regex.pattern' => TRUE,
402 'hl.regex.maxAnalyzedChars' => TRUE,
403 'spellcheck' => TRUE,
404 );
405
406 public function getParam($name) {
407 if ($name == 'fq') {
408 return $this->rebuildFq();
409 }
410 $empty = isset($this->single_value_params[$name]) ? NULL : array();
411 return isset($this->params[$name]) ? $this->params[$name] : $empty;
412 }
413
414 public function getParams() {
415 $params = $this->params;
416 $params['fq'] = $this->rebuildFq();
417 return $params;
418 }
419
420 public function getSolrParams() {
421 $params = $this->getParams();
422 // For certain fields Solr prefers a comma separated list.
423 foreach (array('fl', 'hl.fl', 'sort', 'mlt.fl') as $name) {
424 if (isset($params[$name])) {
425 $params[$name] = implode(',', $params[$name]);
426 }
427 }
428 return $params;
429 }
430
431 protected function addFq($string, $index = NULL) {
432 $string = trim($string);
433 $local = '';
434 $exclude = FALSE;
435 $name = NULL;
436 $value = NULL;
437
438 // Check if we are dealing with an exclude
439 if (preg_match('/^-(.*)/', $string, $matches)) {
440 $exclude = TRUE;
441 $string = $matches[1];
442 }
443
444 // If {!something} is found as first character then this is a local value
445 if (preg_match('/\{!([^}]+)\}(.*)/', $string, $matches)) {
446 $local = $matches[1];
447 $string = $matches[2];
448 }
449
450 // Anything that has a name and value
451 // check if we have a : in the string
452 if (strstr($string, ':')) {
453 list($name, $value) = explode(":", $string, 2);
454 }
455 else {
456 $value = $string;
457 }
458 $this->addFilter($name, $value, $exclude, $local);
459 return $this;
460 }
461
462 public function addParam($name, $value) {
463 if (isset($this->single_value_params[$name])) {
464 if (is_array($value)) {
465 $value = end($value);
466 }
467 $this->params[$name] = $this->normalizeParamValue($value);
468 return $this;
469 }
470 // We never actually populate $this->params['fq']. Instead
471 // we manage everything via the filter methods.
472 if ($name == 'fq') {
473 if (is_array($value)) {
474 array_walk_recursive($value, array($this, 'addFq'));
475 return $this;
476 }
477 else {
478 return $this->addFq($value);
479 }
480 }
481
482 if (!isset($this->params[$name])) {
483 $this->params[$name] = array();
484 }
485
486 if (!is_array($value)) {
487 // Convert to array for array_map.
488 $param_values = array($value);
489 }
490 else {
491 // Convert to a numerically keyed array.
492 $param_values = array_values($value);
493 }
494 $this->params[$name] = array_merge($this->params[$name], array_map(array($this, 'normalizeParamValue'), $param_values));
495
496 return $this;
497 }
498
499 protected function normalizeParamValue($value) {
500 // Convert boolean to string.
501 if (is_bool($value)) {
502 return $value ? 'true' : 'false';
503 }
504 // Convert to trimmed string.
505 return trim($value);
506 }
507
508 public function addParams(Array $params) {
509 foreach ($params as $name => $value) {
510 $this->addParam($name, $value);
511 }
512 return $this;
513 }
514
515 public function removeParam($name) {
516 unset($this->params[$name]);
517 if ($name == 'fq') {
518 $this->fields = array();
519 $this->subqueries = array();
520 }
521 return $this;
522 }
523
524 public function replaceParam($name, $value) {
525 $this->removeParam($name);
526 return $this->addParam($name, $value);
527 }
528
529 /**
530 * Handles aliases for field to make nicer URLs.
531 *
532 * @param $field_map
533 * An array keyed with real Solr index field names with the alias as value.
534 *
535 * @return DrupalSolrQueryInterface
536 * The called object.
537 */
538 public function addFieldAliases($field_map) {
539 $this->field_map = array_merge($this->field_map, $field_map);
540 // We have to re-parse the filters.
541 $this->parseSortString();
542 return $this;
543 }
544
545 public function getFieldAliases() {
546 return $this->field_map;
547 }
548
549 public function clearFieldAliases() {
550 $this->field_map = array();
551 // We have to re-parse the filters.
552 $this->parseSortString();
553 return $this;
554 }
555
556 protected function parseSortString() {
557 // Substitute any field aliases with real field names.
558 $sortstring = strtr($this->sortstring, $this->field_map);
559 // Score is a special case - it's the default sort for Solr.
560 if ('' == $sortstring || 'score desc' == $sortstring) {
561 $this->solrsort['#name'] = 'score';
562 $this->solrsort['#direction'] = 'desc';
563 unset($this->params['sort']);
564 }
565 else {
566 // Validate and set sort parameter
567 $fields = implode('|', array_keys($this->available_sorts));
568 if (preg_match('/^(?:(' . $fields . ') (asc|desc),?)+$/', $sortstring, $matches)) {
569 // We only use the last match.
570 $this->solrsort['#name'] = $matches[1];
571 $this->solrsort['#direction'] = $matches[2];
572 $this->params['sort'] = array($sortstring);
573 }
574 }
575 }
576
577 public function getAvailableSorts() {
578 return $this->available_sorts;
579 }
580
581 public function setAvailableSort($name, $sort) {
582 // We expect non-aliased sorts to be added.
583 $this->available_sorts[$name] = $sort;
584 // Re-parse the sortstring.
585 $this->parseSortString();
586 return $this;
587 }
588
589 public function setAvailableSorts($sorts) {
590 // We expect a complete array of valid sorts.
591 $this->available_sorts = $sorts;
592 $this->parseSortString();
593 return $this;
594 }
595
596 public function removeAvailableSort($name) {
597 unset($this->available_sorts[$name]);
598 // Re-parse the sortstring.
599 $this->parseSortString();
600 return $this;
601 }
602
603 public function getSolrsort() {
604 return $this->solrsort;
605 }
606
607 public function setSolrsort($name, $direction) {
608 $this->sortstring = trim($name) . ' ' . trim($direction);
609 $this->parseSortString();
610 return $this;
611 }
612
613 public function getPath($new_keywords = NULL) {
614 if (isset($new_keywords)) {
615 return $this->base_path . '/' . $new_keywords;
616 }
617 elseif ($this->getParam('q')) {
618 return $this->base_path . '/' . $this->getParam('q');
619 }
620 else {
621 // Return with empty query (the slash). The path for a facet
622 // becomes $this->base_path . '//facetinfo';
623 // We do this so we can have a consistent way of retrieving the query +
624 // additional parameters
625 return $this->base_path . '/';
626 }
627 }
628
629 public function getSolrsortUrlQuery() {
630 $queryvalues = array();
631 $solrsort = $this->solrsort;
632 if ($solrsort && ($solrsort['#name'] != 'score')) {
633 if (isset($this->field_map[$solrsort['#name']])) {
634 $solrsort['#name'] = $this->field_map[$solrsort['#name']];
635 }
636 $queryvalues['solrsort'] = $solrsort['#name'] . ' ' . $solrsort['#direction'];
637 }
638 else {
639 // Return to default relevancy sort.
640 unset($queryvalues['solrsort']);
641 }
642 return $queryvalues;
643 }
644
645 public function search($keys = NULL) {
646 if ($this->abort_search) {
647 return NULL;
648 }
649
650 return $this->solr->search($keys, $this->getSolrParams());
651 }
652
653 public function solr($method) {
654 return $this->solr->$method();
655 }
656
657 }