Mercurial > hg > solrsearch
comparison Drupal_Apache_Solr_Service.php @ 0:a2b4f67e73dc default tip
initial
| author | Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de> |
|---|---|
| date | Mon, 08 Jun 2015 10:21:54 +0200 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:a2b4f67e73dc |
|---|---|
| 1 <?php | |
| 2 | |
| 3 /** | |
| 4 * Copyright (c) 2007-2009, Conduit Internet Technologies, Inc. | |
| 5 * All rights reserved. | |
| 6 * | |
| 7 * Redistribution and use in source and binary forms, with or without | |
| 8 * modification, are permitted provided that the following conditions are met: | |
| 9 * | |
| 10 * - Redistributions of source code must retain the above copyright notice, | |
| 11 * this list of conditions and the following disclaimer. | |
| 12 * - Redistributions in binary form must reproduce the above copyright | |
| 13 * notice, this list of conditions and the following disclaimer in the | |
| 14 * documentation and/or other materials provided with the distribution. | |
| 15 * - Neither the name of Conduit Internet Technologies, Inc. nor the names of | |
| 16 * its contributors may be used to endorse or promote products derived from | |
| 17 * this software without specific prior written permission. | |
| 18 * | |
| 19 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | |
| 20 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |
| 21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |
| 22 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE | |
| 23 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR | |
| 24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF | |
| 25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS | |
| 26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN | |
| 27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) | |
| 28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE | |
| 29 * POSSIBILITY OF SUCH DAMAGE. | |
| 30 * | |
| 31 * @copyright Copyright 2007-2009 Conduit Internet Technologies, Inc. (http://conduit-it.com) | |
| 32 * @license New BSD (http://solr-php-client.googlecode.com/svn/trunk/COPYING) | |
| 33 * @version $Id: Service.php 22 2009-11-09 22:46:54Z donovan.jimenez $ | |
| 34 * | |
| 35 * @package Apache | |
| 36 * @subpackage Solr | |
| 37 * @author Donovan Jimenez <djimenez@conduit-it.com> | |
| 38 */ | |
| 39 | |
| 40 /** | |
| 41 * Additional code Copyright (c) 2008-2011 by Robert Douglass, James McKinney, | |
| 42 * Jacob Singh, Alejandro Garza, Peter Wolanin, and additional contributors. | |
| 43 * | |
| 44 * This program is free software; you can redistribute it and/or modify | |
| 45 * it under the terms of the GNU General Public License as published by | |
| 46 * the Free Software Foundation; either version 2 of the License, or (at | |
| 47 * your option) any later version. | |
| 48 | |
| 49 * | |
| 50 * This program is distributed in the hope that it will be useful, but | |
| 51 * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY | |
| 52 * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License | |
| 53 * for more details. | |
| 54 * | |
| 55 * You should have received a copy of the GNU General Public License | |
| 56 * along with this program as the file LICENSE.txt; if not, please see | |
| 57 * http://www.gnu.org/licenses/old-licenses/gpl-2.0.txt. | |
| 58 */ | |
| 59 | |
| 60 /** | |
| 61 * Starting point for the Solr API. Represents a Solr server resource and has | |
| 62 * methods for pinging, adding, deleting, committing, optimizing and searching. | |
| 63 */ | |
| 64 | |
| 65 class DrupalsolrsearchService implements DrupalApacheSolrServiceInterface { | |
| 66 /** | |
| 67 * How NamedLists should be formatted in the output. This specifically effects facet counts. Valid values | |
| 68 * are 'map' (default) or 'flat'. | |
| 69 * | |
| 70 */ | |
| 71 const NAMED_LIST_FORMAT = 'map'; | |
| 72 | |
| 73 /** | |
| 74 * Servlet mappings | |
| 75 */ | |
| 76 const PING_SERVLET = 'admin/ping'; | |
| 77 const UPDATE_SERVLET = 'update'; | |
| 78 const SEARCH_SERVLET = 'select'; | |
| 79 const LUKE_SERVLET = 'admin/luke'; | |
| 80 const SYSTEM_SERVLET = 'admin/system'; | |
| 81 const STATS_SERVLET = 'admin/stats.jsp'; | |
| 82 const STATS_SERVLET_4 = 'admin/mbeans?wt=xml&stats=true'; | |
| 83 | |
| 84 /** | |
| 85 * Server url | |
| 86 * | |
| 87 * @var array | |
| 88 */ | |
| 89 protected $parsed_url; | |
| 90 | |
| 91 /** | |
| 92 * Constructed servlet full path URLs | |
| 93 * | |
| 94 * @var string | |
| 95 */ | |
| 96 protected $update_url; | |
| 97 | |
| 98 /** | |
| 99 * Default HTTP timeout when one is not specified (initialized to default_socket_timeout ini setting) | |
| 100 * | |
| 101 * var float | |
| 102 */ | |
| 103 protected $_defaultTimeout; | |
| 104 protected $env_id; | |
| 105 protected $luke; | |
| 106 protected $stats; | |
| 107 protected $system_info; | |
| 108 | |
| 109 /** | |
| 110 * Flag that denotes whether to use soft commits for Solr 4.x, defaults to FALSE. | |
| 111 * | |
| 112 * @var bool | |
| 113 */ | |
| 114 protected $soft_commit = FALSE; | |
| 115 | |
| 116 /** | |
| 117 * Call the /admin/ping servlet, to test the connection to the server. | |
| 118 * | |
| 119 * @param $timeout | |
| 120 * maximum time to wait for ping in seconds, -1 for unlimited (default 2). | |
| 121 * @return | |
| 122 * (float) seconds taken to ping the server, FALSE if timeout occurs. | |
| 123 */ | |
| 124 public function ping($timeout = 2) { | |
| 125 $start = microtime(TRUE); | |
| 126 | |
| 127 if ($timeout <= 0.0) { | |
| 128 $timeout = -1; | |
| 129 } | |
| 130 $pingUrl = $this->_constructUrl(self::PING_SERVLET); | |
| 131 // Attempt a HEAD request to the solr ping url. | |
| 132 $options = array( | |
| 133 'method' => 'HEAD', | |
| 134 'timeout' => $timeout, | |
| 135 ); | |
| 136 $response = $this->_makeHttpRequest($pingUrl, $options); | |
| 137 | |
| 138 if ($response->code == 200) { | |
| 139 // Add 0.1 ms to the ping time so we never return 0.0. | |
| 140 return microtime(TRUE) - $start + 0.0001; | |
| 141 } | |
| 142 else { | |
| 143 return FALSE; | |
| 144 } | |
| 145 } | |
| 146 | |
| 147 /** | |
| 148 * Flags whether to use soft commits for Solr 4.x. | |
| 149 * | |
| 150 * @param bool $soft_commit | |
| 151 * Whether or not to use soft commits for Solr 4.x. | |
| 152 */ | |
| 153 public function setSoftCommit($soft_commit) { | |
| 154 $this->soft_commit = (bool) $soft_commit; | |
| 155 } | |
| 156 | |
| 157 /** | |
| 158 * Returns the flag that denotes whether to use soft commits for Solr 4.x. | |
| 159 * | |
| 160 * @return bool | |
| 161 * Whether to use soft commits for Solr 4.x. | |
| 162 */ | |
| 163 public function getSoftCommit() { | |
| 164 return $this->soft_commit; | |
| 165 } | |
| 166 | |
| 167 /** | |
| 168 * Call the /admin/system servlet | |
| 169 * | |
| 170 * @return | |
| 171 * (array) With all the system info | |
| 172 */ | |
| 173 protected function setSystemInfo() { | |
| 174 $url = $this->_constructUrl(self::SYSTEM_SERVLET, array('wt' => 'json')); | |
| 175 if ($this->env_id) { | |
| 176 $this->system_info_cid = $this->env_id . ":system:" . drupal_hash_base64($url); | |
| 177 $cache = cache_get($this->system_info_cid, 'cache_solrsearch'); | |
| 178 if (isset($cache->data)) { | |
| 179 $this->system_info = json_decode($cache->data); | |
| 180 } | |
| 181 } | |
| 182 // Second pass to populate the cache if necessary. | |
| 183 if (empty($this->system_info)) { | |
| 184 $response = $this->_sendRawGet($url); | |
| 185 $this->system_info = json_decode($response->data); | |
| 186 if ($this->env_id) { | |
| 187 cache_set($this->system_info_cid, $response->data, 'cache_solrsearch'); | |
| 188 } | |
| 189 } | |
| 190 } | |
| 191 | |
| 192 /** | |
| 193 * Get information about the Solr Core. | |
| 194 * | |
| 195 * @return | |
| 196 * (string) system info encoded in json | |
| 197 */ | |
| 198 public function getSystemInfo() { | |
| 199 if (!isset($this->system_info)) { | |
| 200 $this->setSystemInfo(); | |
| 201 } | |
| 202 return $this->system_info; | |
| 203 } | |
| 204 | |
| 205 /** | |
| 206 * Sets $this->luke with the meta-data about the index from admin/luke. | |
| 207 */ | |
| 208 protected function setLuke($num_terms = 0) { | |
| 209 if (empty($this->luke[$num_terms])) { | |
| 210 $params = array( | |
| 211 'numTerms' => "$num_terms", | |
| 212 'wt' => 'json', | |
| 213 'json.nl' => self::NAMED_LIST_FORMAT, | |
| 214 ); | |
| 215 $url = $this->_constructUrl(self::LUKE_SERVLET, $params); | |
| 216 if ($this->env_id) { | |
| 217 $cid = $this->env_id . ":luke:" . drupal_hash_base64($url); | |
| 218 $cache = cache_get($cid, 'cache_solrsearch'); | |
| 219 if (isset($cache->data)) { | |
| 220 $this->luke = $cache->data; | |
| 221 } | |
| 222 } | |
| 223 } | |
| 224 // Second pass to populate the cache if necessary. | |
| 225 if (empty($this->luke[$num_terms])) { | |
| 226 $this->luke[$num_terms] = $this->_sendRawGet($url); | |
| 227 if ($this->env_id) { | |
| 228 cache_set($cid, $this->luke, 'cache_solrsearch'); | |
| 229 } | |
| 230 } | |
| 231 } | |
| 232 | |
| 233 /** | |
| 234 * Get just the field meta-data about the index. | |
| 235 */ | |
| 236 public function getFields($num_terms = 0) { | |
| 237 return $this->getLuke($num_terms)->fields; | |
| 238 } | |
| 239 | |
| 240 /** | |
| 241 * Get meta-data about the index. | |
| 242 */ | |
| 243 public function getLuke($num_terms = 0) { | |
| 244 if (!isset($this->luke[$num_terms])) { | |
| 245 $this->setLuke($num_terms); | |
| 246 } | |
| 247 return $this->luke[$num_terms]; | |
| 248 } | |
| 249 | |
| 250 /** | |
| 251 * Get the current solr version. This could be 1, 3 or 4 | |
| 252 * | |
| 253 * @return int | |
| 254 * 1, 3 or 4. Does not give a more details version, for that you need | |
| 255 * to get the system info. | |
| 256 */ | |
| 257 public function getSolrVersion() { | |
| 258 $system_info = $this->getSystemInfo(); | |
| 259 // Get our solr version number | |
| 260 if (isset($system_info->lucene->{'solr-spec-version'})) { | |
| 261 return $system_info->lucene->{'solr-spec-version'}[0]; | |
| 262 } | |
| 263 return 0; | |
| 264 } | |
| 265 | |
| 266 /** | |
| 267 * Sets $this->stats with the information about the Solr Core form | |
| 268 */ | |
| 269 protected function setStats() { | |
| 270 $data = $this->getLuke(); | |
| 271 $solr_version = $this->getSolrVersion(); | |
| 272 // Only try to get stats if we have connected to the index. | |
| 273 if (empty($this->stats) && isset($data->index->numDocs)) { | |
| 274 if ($solr_version >= 4) { | |
| 275 $url = $this->_constructUrl(self::STATS_SERVLET_4); | |
| 276 } | |
| 277 else { | |
| 278 $url = $this->_constructUrl(self::STATS_SERVLET); | |
| 279 } | |
| 280 if ($this->env_id) { | |
| 281 $this->stats_cid = $this->env_id . ":stats:" . drupal_hash_base64($url); | |
| 282 $cache = cache_get($this->stats_cid, 'cache_solrsearch'); | |
| 283 if (isset($cache->data)) { | |
| 284 $this->stats = simplexml_load_string($cache->data); | |
| 285 } | |
| 286 } | |
| 287 // Second pass to populate the cache if necessary. | |
| 288 if (empty($this->stats)) { | |
| 289 $response = $this->_sendRawGet($url); | |
| 290 $this->stats = simplexml_load_string($response->data); | |
| 291 if ($this->env_id) { | |
| 292 cache_set($this->stats_cid, $response->data, 'cache_solrsearch'); | |
| 293 } | |
| 294 } | |
| 295 } | |
| 296 } | |
| 297 | |
| 298 /** | |
| 299 * Get information about the Solr Core. | |
| 300 * | |
| 301 * Returns a Simple XMl document | |
| 302 */ | |
| 303 public function getStats() { | |
| 304 if (!isset($this->stats)) { | |
| 305 $this->setStats(); | |
| 306 } | |
| 307 return $this->stats; | |
| 308 } | |
| 309 | |
| 310 /** | |
| 311 * Get summary information about the Solr Core. | |
| 312 */ | |
| 313 public function getStatsSummary() { | |
| 314 $stats = $this->getStats(); | |
| 315 $solr_version = $this->getSolrVersion(); | |
| 316 | |
| 317 $summary = array( | |
| 318 '@pending_docs' => '', | |
| 319 '@autocommit_time_seconds' => '', | |
| 320 '@autocommit_time' => '', | |
| 321 '@deletes_by_id' => '', | |
| 322 '@deletes_by_query' => '', | |
| 323 '@deletes_total' => '', | |
| 324 '@schema_version' => '', | |
| 325 '@core_name' => '', | |
| 326 '@index_size' => '', | |
| 327 ); | |
| 328 | |
| 329 if (!empty($stats)) { | |
| 330 if ($solr_version <= 3) { | |
| 331 $docs_pending_xpath = $stats->xpath('//stat[@name="docsPending"]'); | |
| 332 $summary['@pending_docs'] = (int) trim(current($docs_pending_xpath)); | |
| 333 $max_time_xpath = $stats->xpath('//stat[@name="autocommit maxTime"]'); | |
| 334 $max_time = (int) trim(current($max_time_xpath)); | |
| 335 // Convert to seconds. | |
| 336 $summary['@autocommit_time_seconds'] = $max_time / 1000; | |
| 337 $summary['@autocommit_time'] = format_interval($max_time / 1000); | |
| 338 $deletes_id_xpath = $stats->xpath('//stat[@name="deletesById"]'); | |
| 339 $summary['@deletes_by_id'] = (int) trim(current($deletes_id_xpath)); | |
| 340 $deletes_query_xpath = $stats->xpath('//stat[@name="deletesByQuery"]'); | |
| 341 $summary['@deletes_by_query'] = (int) trim(current($deletes_query_xpath)); | |
| 342 $summary['@deletes_total'] = $summary['@deletes_by_id'] + $summary['@deletes_by_query']; | |
| 343 $schema = $stats->xpath('/solr/schema[1]'); | |
| 344 $summary['@schema_version'] = trim($schema[0]); | |
| 345 $core = $stats->xpath('/solr/core[1]'); | |
| 346 $summary['@core_name'] = trim($core[0]); | |
| 347 $size_xpath = $stats->xpath('//stat[@name="indexSize"]'); | |
| 348 $summary['@index_size'] = trim(current($size_xpath)); | |
| 349 } | |
| 350 else { | |
| 351 $system_info = $this->getSystemInfo(); | |
| 352 $docs_pending_xpath = $stats->xpath('//lst["stats"]/long[@name="docsPending"]'); | |
| 353 $summary['@pending_docs'] = (int) trim(current($docs_pending_xpath)); | |
| 354 $max_time_xpath = $stats->xpath('//lst["stats"]/str[@name="autocommit maxTime"]'); | |
| 355 $max_time = (int) trim(current($max_time_xpath)); | |
| 356 // Convert to seconds. | |
| 357 $summary['@autocommit_time_seconds'] = $max_time / 1000; | |
| 358 $summary['@autocommit_time'] = format_interval($max_time / 1000); | |
| 359 $deletes_id_xpath = $stats->xpath('//lst["stats"]/long[@name="deletesById"]'); | |
| 360 $summary['@deletes_by_id'] = (int) trim(current($deletes_id_xpath)); | |
| 361 $deletes_query_xpath = $stats->xpath('//lst["stats"]/long[@name="deletesByQuery"]'); | |
| 362 $summary['@deletes_by_query'] = (int) trim(current($deletes_query_xpath)); | |
| 363 $summary['@deletes_total'] = $summary['@deletes_by_id'] + $summary['@deletes_by_query']; | |
| 364 $schema = $system_info->core->schema; | |
| 365 $summary['@schema_version'] = $schema; | |
| 366 $core = $stats->xpath('//lst["core"]/str[@name="coreName"]'); | |
| 367 $summary['@core_name'] = trim(current($core)); | |
| 368 $size_xpath = $stats->xpath('//lst["core"]/str[@name="indexSize"]'); | |
| 369 $summary['@index_size'] = trim(current($size_xpath)); | |
| 370 } | |
| 371 } | |
| 372 | |
| 373 return $summary; | |
| 374 } | |
| 375 | |
| 376 /** | |
| 377 * Clear cached Solr data. | |
| 378 */ | |
| 379 public function clearCache() { | |
| 380 // Don't clear cached data if the server is unavailable. | |
| 381 if (@$this->ping()) { | |
| 382 $this->_clearCache(); | |
| 383 } | |
| 384 else { | |
| 385 throw new Exception('No Solr instance available when trying to clear the cache.'); | |
| 386 } | |
| 387 } | |
| 388 | |
| 389 protected function _clearCache() { | |
| 390 if ($this->env_id) { | |
| 391 cache_clear_all($this->env_id . ":stats:", 'cache_solrsearch', TRUE); | |
| 392 cache_clear_all($this->env_id . ":luke:", 'cache_solrsearch', TRUE); | |
| 393 } | |
| 394 $this->luke = array(); | |
| 395 $this->stats = NULL; | |
| 396 } | |
| 397 | |
| 398 /** | |
| 399 * Constructor | |
| 400 * | |
| 401 * @param $url | |
| 402 * The URL to the Solr server, possibly including a core name. E.g. http://localhost:8983/solr/ | |
| 403 * or https://search.example.com/solr/core99/ | |
| 404 * @param $env_id | |
| 405 * The machine name of a corresponding saved configuration used for loading | |
| 406 * data like which facets are enabled. | |
| 407 */ | |
| 408 public function __construct($url, $env_id = NULL) { | |
| 409 $this->env_id = $env_id; | |
| 410 $this->setUrl($url); | |
| 411 | |
| 412 // determine our default http timeout from ini settings | |
| 413 $this->_defaultTimeout = (int) ini_get('default_socket_timeout'); | |
| 414 | |
| 415 // double check we didn't get 0 for a timeout | |
| 416 if ($this->_defaultTimeout <= 0) { | |
| 417 $this->_defaultTimeout = 60; | |
| 418 } | |
| 419 } | |
| 420 | |
| 421 function getId() { | |
| 422 return $this->env_id; | |
| 423 } | |
| 424 | |
| 425 /** | |
| 426 * Check the reponse code and thow an exception if it's not 200. | |
| 427 * | |
| 428 * @param stdClass $response | |
| 429 * response object. | |
| 430 * | |
| 431 * @return | |
| 432 * response object | |
| 433 * @thows Exception | |
| 434 */ | |
| 435 protected function checkResponse($response) { | |
| 436 $code = (int) $response->code; | |
| 437 if ($code != 200) { | |
| 438 if ($code >= 400 && $code != 403 && $code != 404) { | |
| 439 // Add details, like Solr's exception message. | |
| 440 $response->status_message .= $response->data; | |
| 441 } | |
| 442 throw new Exception('"' . $code . '" Status: ' . $response->status_message); | |
| 443 } | |
| 444 return $response; | |
| 445 } | |
| 446 | |
| 447 /** | |
| 448 * Make a request to a servlet (a path) that's not a standard path. | |
| 449 * | |
| 450 * @param string $servlet | |
| 451 * A path to be added to the base Solr path. e.g. 'extract/tika' | |
| 452 * | |
| 453 * @param array $params | |
| 454 * Any request parameters when constructing the URL. | |
| 455 * | |
| 456 * @param array $options | |
| 457 * @see drupal_http_request() $options. | |
| 458 * | |
| 459 * @return | |
| 460 * response object | |
| 461 * | |
| 462 * @thows Exception | |
| 463 */ | |
| 464 public function makeServletRequest($servlet, $params = array(), $options = array()) { | |
| 465 // Add default params. | |
| 466 $params += array( | |
| 467 'wt' => 'json', | |
| 468 'json.nl' => self::NAMED_LIST_FORMAT, | |
| 469 ); | |
| 470 | |
| 471 $url = $this->_constructUrl($servlet, $params); | |
| 472 $response = $this->_makeHttpRequest($url, $options); | |
| 473 return $this->checkResponse($response); | |
| 474 } | |
| 475 | |
| 476 /** | |
| 477 * Central method for making a GET operation against this Solr Server | |
| 478 */ | |
| 479 protected function _sendRawGet($url, $options = array()) { | |
| 480 $response = $this->_makeHttpRequest($url, $options); | |
| 481 return $this->checkResponse($response); | |
| 482 } | |
| 483 | |
| 484 /** | |
| 485 * Central method for making a POST operation against this Solr Server | |
| 486 */ | |
| 487 protected function _sendRawPost($url, $options = array()) { | |
| 488 $options['method'] = 'POST'; | |
| 489 // Normally we use POST to send XML documents. | |
| 490 if (!isset($options['headers']['Content-Type'])) { | |
| 491 $options['headers']['Content-Type'] = 'text/xml; charset=UTF-8'; | |
| 492 } | |
| 493 $response = $this->_makeHttpRequest($url, $options); | |
| 494 return $this->checkResponse($response); | |
| 495 } | |
| 496 | |
| 497 /** | |
| 498 * Central method for making the actual http request to the Solr Server | |
| 499 * | |
| 500 * This is just a wrapper around drupal_http_request(). | |
| 501 */ | |
| 502 protected function _makeHttpRequest($url, array $options = array()) { | |
| 503 if (!isset($options['method']) || $options['method'] == 'GET' || $options['method'] == 'HEAD') { | |
| 504 // Make sure we are not sending a request body. | |
| 505 $options['data'] = NULL; | |
| 506 } | |
| 507 | |
| 508 $result = drupal_http_request($url, $options); | |
| 509 | |
| 510 if (!isset($result->code) || $result->code < 0) { | |
| 511 $result->code = 0; | |
| 512 $result->status_message = 'Request failed'; | |
| 513 $result->protocol = 'HTTP/1.0'; | |
| 514 } | |
| 515 // Additional information may be in the error property. | |
| 516 if (isset($result->error)) { | |
| 517 $result->status_message .= ': ' . check_plain($result->error); | |
| 518 } | |
| 519 | |
| 520 if (!isset($result->data)) { | |
| 521 $result->data = ''; | |
| 522 $result->response = NULL; | |
| 523 } | |
| 524 else { | |
| 525 $response = json_decode($result->data); | |
| 526 if (is_object($response)) { | |
| 527 foreach ($response as $key => $value) { | |
| 528 $result->$key = $value; | |
| 529 } | |
| 530 } | |
| 531 } | |
| 532 return $result; | |
| 533 } | |
| 534 | |
| 535 | |
| 536 /** | |
| 537 * Escape a value for special query characters such as ':', '(', ')', '*', '?', etc. | |
| 538 * | |
| 539 * NOTE: inside a phrase fewer characters need escaped, use {@link DrupalsolrsearchService::escapePhrase()} instead | |
| 540 * | |
| 541 * @param string $value | |
| 542 * @return string | |
| 543 */ | |
| 544 static public function escape($value) | |
| 545 { | |
| 546 //list taken from http://lucene.apache.org/java/docs/queryparsersyntax.html#Escaping%20Special%20Characters | |
| 547 $pattern = '/(\+|-|&&|\|\||!|\(|\)|\{|}|\[|]|\^|"|~|\*|\?|:|\\\)/'; | |
| 548 $replace = '\\\$1'; | |
| 549 | |
| 550 return preg_replace($pattern, $replace, $value); | |
| 551 } | |
| 552 | |
| 553 /** | |
| 554 * Escape a value meant to be contained in a phrase for special query characters | |
| 555 * | |
| 556 * @param string $value | |
| 557 * @return string | |
| 558 */ | |
| 559 static public function escapePhrase($value) | |
| 560 { | |
| 561 $pattern = '/("|\\\)/'; | |
| 562 $replace = '\\\$1'; | |
| 563 | |
| 564 return preg_replace($pattern, $replace, $value); | |
| 565 } | |
| 566 | |
| 567 /** | |
| 568 * Convenience function for creating phrase syntax from a value | |
| 569 * | |
| 570 * @param string $value | |
| 571 * @return string | |
| 572 */ | |
| 573 static public function phrase($value) | |
| 574 { | |
| 575 return '"' . self::escapePhrase($value) . '"'; | |
| 576 } | |
| 577 | |
| 578 /** | |
| 579 * Return a valid http URL given this server's host, port and path and a provided servlet name | |
| 580 * | |
| 581 * @param $servlet | |
| 582 * A string path to a Solr request handler. | |
| 583 * @param $params | |
| 584 * @param $parsed_url | |
| 585 * A url to use instead of the stored one. | |
| 586 * | |
| 587 * @return string | |
| 588 */ | |
| 589 protected function _constructUrl($servlet, $params = array(), $added_query_string = NULL) { | |
| 590 // PHP's built in http_build_query() doesn't give us the format Solr wants. | |
| 591 $query_string = $this->httpBuildQuery($params); | |
| 592 | |
| 593 if ($query_string) { | |
| 594 $query_string = '?' . $query_string; | |
| 595 if ($added_query_string) { | |
| 596 $query_string = $query_string . '&' . $added_query_string; | |
| 597 } | |
| 598 } | |
| 599 elseif ($added_query_string) { | |
| 600 $query_string = '?' . $added_query_string; | |
| 601 } | |
| 602 | |
| 603 $url = $this->parsed_url; | |
| 604 return $url['scheme'] . $url['user'] . $url['pass'] . $url['host'] . $url['port'] . $url['path'] . $servlet . $query_string; | |
| 605 } | |
| 606 | |
| 607 /** | |
| 608 * Get the Solr url | |
| 609 * | |
| 610 * @return string | |
| 611 */ | |
| 612 public function getUrl() { | |
| 613 return $this->_constructUrl(''); | |
| 614 } | |
| 615 | |
| 616 /** | |
| 617 * Set the Solr url. | |
| 618 * | |
| 619 * @param $url | |
| 620 * | |
| 621 * @return $this | |
| 622 */ | |
| 623 public function setUrl($url) { | |
| 624 $parsed_url = parse_url($url); | |
| 625 | |
| 626 if (!isset($parsed_url['scheme'])) { | |
| 627 $parsed_url['scheme'] = 'http'; | |
| 628 } | |
| 629 $parsed_url['scheme'] .= '://'; | |
| 630 | |
| 631 if (!isset($parsed_url['user'])) { | |
| 632 $parsed_url['user'] = ''; | |
| 633 } | |
| 634 else { | |
| 635 $parsed_url['host'] = '@' . $parsed_url['host']; | |
| 636 } | |
| 637 $parsed_url['pass'] = isset($parsed_url['pass']) ? ':' . $parsed_url['pass'] : ''; | |
| 638 $parsed_url['port'] = isset($parsed_url['port']) ? ':' . $parsed_url['port'] : ''; | |
| 639 | |
| 640 if (isset($parsed_url['path'])) { | |
| 641 // Make sure the path has a single leading/trailing slash. | |
| 642 $parsed_url['path'] = '/' . ltrim($parsed_url['path'], '/'); | |
| 643 $parsed_url['path'] = rtrim($parsed_url['path'], '/') . '/'; | |
| 644 } | |
| 645 else { | |
| 646 $parsed_url['path'] = '/'; | |
| 647 } | |
| 648 // For now we ignore query and fragment. | |
| 649 $this->parsed_url = $parsed_url; | |
| 650 // Force the update url to be rebuilt. | |
| 651 unset($this->update_url); | |
| 652 return $this; | |
| 653 } | |
| 654 | |
| 655 /** | |
| 656 * Raw update Method. Takes a raw post body and sends it to the update service. Post body | |
| 657 * should be a complete and well formed xml document. | |
| 658 * | |
| 659 * @param string $rawPost | |
| 660 * @param float $timeout Maximum expected duration (in seconds) | |
| 661 * | |
| 662 * @return response object | |
| 663 * | |
| 664 * @throws Exception If an error occurs during the service call | |
| 665 */ | |
| 666 public function update($rawPost, $timeout = FALSE) { | |
| 667 // @todo: throw exception if updates are disabled. | |
| 668 if (empty($this->update_url)) { | |
| 669 // Store the URL in an instance variable since many updates may be sent | |
| 670 // via a single instance of this class. | |
| 671 $this->update_url = $this->_constructUrl(self::UPDATE_SERVLET, array('wt' => 'json')); | |
| 672 } | |
| 673 $options['data'] = $rawPost; | |
| 674 if ($timeout) { | |
| 675 $options['timeout'] = $timeout; | |
| 676 } | |
| 677 return $this->_sendRawPost($this->update_url, $options); | |
| 678 } | |
| 679 | |
| 680 /** | |
| 681 * Add an array of Solr Documents to the index all at once | |
| 682 * | |
| 683 * @param array $documents Should be an array of solrsearchDocument instances | |
| 684 * @param boolean $allowDups | |
| 685 * @param boolean $overwritePending | |
| 686 * @param boolean $overwriteCommitted | |
| 687 * | |
| 688 * @return response objecte | |
| 689 * | |
| 690 * @throws Exception If an error occurs during the service call | |
| 691 */ | |
| 692 public function addDocuments($documents, $overwrite = NULL, $commitWithin = NULL) { | |
| 693 $attr = ''; | |
| 694 | |
| 695 if (isset($overwrite)) { | |
| 696 $attr .= ' overwrite="' . empty($overwrite) ? 'false"' : 'true"'; | |
| 697 } | |
| 698 if (isset($commitWithin)) { | |
| 699 $attr .= ' commitWithin="' . intval($commitWithin) . '"'; | |
| 700 } | |
| 701 | |
| 702 $rawPost = "<add{$attr}>"; | |
| 703 foreach ($documents as $document) { | |
| 704 if (is_object($document) && ($document instanceof solrsearchDocument)) { | |
| 705 $rawPost .= solrsearchDocument::documentToXml($document); | |
| 706 } | |
| 707 } | |
| 708 $rawPost .= '</add>'; | |
| 709 | |
| 710 return $this->update($rawPost); | |
| 711 } | |
| 712 | |
| 713 /** | |
| 714 * Send a commit command. Will be synchronous unless both wait parameters are set to false. | |
| 715 * | |
| 716 * @param boolean $optimize Defaults to true | |
| 717 * optimizes the index files. Only valid for solr versions <= 3 | |
| 718 * @param boolean $waitFlush | |
| 719 * block until index changes are flushed to disk. Only valid for solr versions <= 3 | |
| 720 * @param boolean $waitSearcher | |
| 721 * block until a new searcher is opened and registered as the main query searcher, making the changes visible. | |
| 722 * @param float $timeout | |
| 723 * Maximum expected duration of the commit operation on the server (otherwise, will throw a communication exception) | |
| 724 * | |
| 725 * @return response object | |
| 726 * | |
| 727 * @throws Exception If an error occurs during the service call | |
| 728 */ | |
| 729 public function commit($optimize = TRUE, $waitFlush = TRUE, $waitSearcher = TRUE, $timeout = 3600) { | |
| 730 $optimizeValue = $optimize ? 'true' : 'false'; | |
| 731 $flushValue = $waitFlush ? 'true' : 'false'; | |
| 732 $searcherValue = $waitSearcher ? 'true' : 'false'; | |
| 733 $softCommit = $this->soft_commit ? 'true' : 'false'; | |
| 734 | |
| 735 $solr_version = $this->getSolrVersion(); | |
| 736 if ($solr_version <= 3) { | |
| 737 $rawPost = '<commit waitSearcher="' . $searcherValue . '" waitFlush="' . $flushValue . '" optimize="' . $optimizeValue . '" />'; | |
| 738 } | |
| 739 else { | |
| 740 $rawPost = '<commit waitSearcher="' . $searcherValue . '" softCommit="' . $softCommit . '" />'; | |
| 741 } | |
| 742 | |
| 743 $response = $this->update($rawPost, $timeout); | |
| 744 $this->_clearCache(); | |
| 745 return $response; | |
| 746 } | |
| 747 | |
| 748 /** | |
| 749 * Create a delete document based on document ID | |
| 750 * | |
| 751 * @param string $id Expected to be utf-8 encoded | |
| 752 * @param float $timeout Maximum expected duration of the delete operation on the server (otherwise, will throw a communication exception) | |
| 753 * | |
| 754 * @return response object | |
| 755 * | |
| 756 * @throws Exception If an error occurs during the service call | |
| 757 */ | |
| 758 public function deleteById($id, $timeout = 3600) { | |
| 759 return $this->deleteByMultipleIds(array($id), $timeout); | |
| 760 } | |
| 761 | |
| 762 /** | |
| 763 * Create and post a delete document based on multiple document IDs. | |
| 764 * | |
| 765 * @param array $ids Expected to be utf-8 encoded strings | |
| 766 * @param float $timeout Maximum expected duration of the delete operation on the server (otherwise, will throw a communication exception) | |
| 767 * | |
| 768 * @return response object | |
| 769 * | |
| 770 * @throws Exception If an error occurs during the service call | |
| 771 */ | |
| 772 public function deleteByMultipleIds($ids, $timeout = 3600) { | |
| 773 $rawPost = '<delete>'; | |
| 774 | |
| 775 foreach ($ids as $id) { | |
| 776 $rawPost .= '<id>' . htmlspecialchars($id, ENT_NOQUOTES, 'UTF-8') . '</id>'; | |
| 777 } | |
| 778 $rawPost .= '</delete>'; | |
| 779 | |
| 780 return $this->update($rawPost, $timeout); | |
| 781 } | |
| 782 | |
| 783 /** | |
| 784 * Create a delete document based on a query and submit it | |
| 785 * | |
| 786 * @param string $rawQuery Expected to be utf-8 encoded | |
| 787 * @param float $timeout Maximum expected duration of the delete operation on the server (otherwise, will throw a communication exception) | |
| 788 * @return stdClass response object | |
| 789 * | |
| 790 * @throws Exception If an error occurs during the service call | |
| 791 */ | |
| 792 public function deleteByQuery($rawQuery, $timeout = 3600) { | |
| 793 $rawPost = '<delete><query>' . htmlspecialchars($rawQuery, ENT_NOQUOTES, 'UTF-8') . '</query></delete>'; | |
| 794 | |
| 795 return $this->update($rawPost, $timeout); | |
| 796 } | |
| 797 | |
| 798 /** | |
| 799 * Send an optimize command. Will be synchronous unless both wait parameters are set | |
| 800 * to false. | |
| 801 * | |
| 802 * @param boolean $waitFlush | |
| 803 * block until index changes are flushed to disk Removed in Solr 4.0 | |
| 804 * @param boolean $waitSearcher | |
| 805 * block until a new searcher is opened and registered as the main query searcher, making the changes visible. | |
| 806 * @param float $timeout | |
| 807 * Maximum expected duration of the commit operation on the server (otherwise, will throw a communication exception) | |
| 808 * | |
| 809 * @return response object | |
| 810 * | |
| 811 * @throws Exception If an error occurs during the service call | |
| 812 */ | |
| 813 public function optimize($waitFlush = TRUE, $waitSearcher = TRUE, $timeout = 3600) { | |
| 814 $flushValue = $waitFlush ? 'true' : 'false'; | |
| 815 $searcherValue = $waitSearcher ? 'true' : 'false'; | |
| 816 $softCommit = $this->soft_commit ? 'true' : 'false'; | |
| 817 | |
| 818 $solr_version = $this->getSolrVersion(); | |
| 819 if ($solr_version <= 3) { | |
| 820 $rawPost = '<optimize waitSearcher="' . $searcherValue . '" waitFlush="' . $flushValue . '" />'; | |
| 821 } | |
| 822 else { | |
| 823 $rawPost = '<optimize waitSearcher="' . $searcherValue . '" softCommit="' . $softCommit . '" />'; | |
| 824 } | |
| 825 | |
| 826 return $this->update($rawPost, $timeout); | |
| 827 } | |
| 828 | |
| 829 /** | |
| 830 * Like PHP's built in http_build_query(), but uses rawurlencode() and no [] for repeated params. | |
| 831 */ | |
| 832 protected function httpBuildQuery(array $query, $parent = '') { | |
| 833 $params = array(); | |
| 834 | |
| 835 foreach ($query as $key => $value) { | |
| 836 $key = ($parent ? $parent : rawurlencode($key)); | |
| 837 | |
| 838 // Recurse into children. | |
| 839 if (is_array($value)) { | |
| 840 $params[] = $this->httpBuildQuery($value, $key); | |
| 841 } | |
| 842 // If a query parameter value is NULL, only append its key. | |
| 843 elseif (!isset($value)) { | |
| 844 $params[] = $key; | |
| 845 } | |
| 846 else { | |
| 847 $params[] = $key . '=' . rawurlencode($value); | |
| 848 } | |
| 849 } | |
| 850 | |
| 851 return implode('&', $params); | |
| 852 } | |
| 853 | |
| 854 /** | |
| 855 * Simple Search interface | |
| 856 * | |
| 857 * @param string $query The raw query string | |
| 858 * @param array $params key / value pairs for other query parameters (see Solr documentation), use arrays for parameter keys used more than once (e.g. facet.field) | |
| 859 * | |
| 860 * @return response object | |
| 861 * | |
| 862 * @throws Exception If an error occurs during the service call | |
| 863 */ | |
| 864 public function search($query = '', array $params = array(), $method = 'GET') { | |
| 865 // Always use JSON. See http://code.google.com/p/solr-php-client/issues/detail?id=6#c1 for reasoning | |
| 866 | |
| 867 | |
| 868 | |
| 869 if (!user_access("view restricted content")){ | |
| 870 $params['fq'][]='access-type:free'; | |
| 871 | |
| 872 | |
| 873 } | |
| 874 | |
| 875 $params['wt'] = 'json'; | |
| 876 // Additional default params. | |
| 877 $params += array( | |
| 878 'json.nl' => self::NAMED_LIST_FORMAT, | |
| 879 ); | |
| 880 if ($query) { | |
| 881 $params['q'] = $query; | |
| 882 } | |
| 883 // PHP's built in http_build_query() doesn't give us the format Solr wants. | |
| 884 $queryString = $this->httpBuildQuery($params); | |
| 885 // Check string length of the query string, change method to POST | |
| 886 $len = strlen($queryString); | |
| 887 // Fetch our threshold to find out when to flip to POST | |
| 888 $max_len = solrsearch_environment_variable_get($this->env_id, 'solrsearch_search_post_threshold', 3600); | |
| 889 | |
| 890 // if longer than $max_len (default 3600) characters | |
| 891 // we should switch to POST (a typical server handles 4096 max). | |
| 892 // If this class is used independently (without environments), we switch automatically to POST at an | |
| 893 // limit of 1800 chars. | |
| 894 if (($len > 1800) && (empty($this->env_id) || ($len > $max_len))) { | |
| 895 $method = 'POST'; | |
| 896 } | |
| 897 | |
| 898 if ($method == 'GET') { | |
| 899 $searchUrl = $this->_constructUrl(self::SEARCH_SERVLET, array(), $queryString); | |
| 900 | |
| 901 return $this->_sendRawGet($searchUrl); | |
| 902 } | |
| 903 else if ($method == 'POST') { | |
| 904 $searchUrl = $this->_constructUrl(self::SEARCH_SERVLET); | |
| 905 $options['data'] = $queryString; | |
| 906 $options['headers']['Content-Type'] = 'application/x-www-form-urlencoded; charset=UTF-8'; | |
| 907 return $this->_sendRawPost($searchUrl, $options); | |
| 908 } | |
| 909 else { | |
| 910 throw new Exception("Unsupported method '$method' for search(), use GET or POST"); | |
| 911 } | |
| 912 } | |
| 913 } |
