0
|
1 <?php
|
|
2
|
|
3 /**
|
|
4 * Copyright (c) 2007-2009, Conduit Internet Technologies, Inc.
|
|
5 * All rights reserved.
|
|
6 *
|
|
7 * Redistribution and use in source and binary forms, with or without
|
|
8 * modification, are permitted provided that the following conditions are met:
|
|
9 *
|
|
10 * - Redistributions of source code must retain the above copyright notice,
|
|
11 * this list of conditions and the following disclaimer.
|
|
12 * - Redistributions in binary form must reproduce the above copyright
|
|
13 * notice, this list of conditions and the following disclaimer in the
|
|
14 * documentation and/or other materials provided with the distribution.
|
|
15 * - Neither the name of Conduit Internet Technologies, Inc. nor the names of
|
|
16 * its contributors may be used to endorse or promote products derived from
|
|
17 * this software without specific prior written permission.
|
|
18 *
|
|
19 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
|
20 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
|
22 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
|
23 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
|
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
|
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
|
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
|
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
|
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
|
29 * POSSIBILITY OF SUCH DAMAGE.
|
|
30 *
|
|
31 * @copyright Copyright 2007-2009 Conduit Internet Technologies, Inc. (http://conduit-it.com)
|
|
32 * @license New BSD (http://solr-php-client.googlecode.com/svn/trunk/COPYING)
|
|
33 * @version $Id: Service.php 22 2009-11-09 22:46:54Z donovan.jimenez $
|
|
34 *
|
|
35 * @package Apache
|
|
36 * @subpackage Solr
|
|
37 * @author Donovan Jimenez <djimenez@conduit-it.com>
|
|
38 */
|
|
39
|
|
40 /**
|
|
41 * Additional code Copyright (c) 2008-2011 by Robert Douglass, James McKinney,
|
|
42 * Jacob Singh, Alejandro Garza, Peter Wolanin, and additional contributors.
|
|
43 *
|
|
44 * This program is free software; you can redistribute it and/or modify
|
|
45 * it under the terms of the GNU General Public License as published by
|
|
46 * the Free Software Foundation; either version 2 of the License, or (at
|
|
47 * your option) any later version.
|
|
48
|
|
49 *
|
|
50 * This program is distributed in the hope that it will be useful, but
|
|
51 * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
|
|
52 * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
|
53 * for more details.
|
|
54 *
|
|
55 * You should have received a copy of the GNU General Public License
|
|
56 * along with this program as the file LICENSE.txt; if not, please see
|
|
57 * http://www.gnu.org/licenses/old-licenses/gpl-2.0.txt.
|
|
58 */
|
|
59
|
|
60 /**
|
|
61 * Starting point for the Solr API. Represents a Solr server resource and has
|
|
62 * methods for pinging, adding, deleting, committing, optimizing and searching.
|
|
63 */
|
|
64
|
|
65 class DrupalApacheSolrService implements DrupalApacheSolrServiceInterface {
|
|
66 /**
|
|
67 * How NamedLists should be formatted in the output. This specifically effects facet counts. Valid values
|
|
68 * are 'map' (default) or 'flat'.
|
|
69 *
|
|
70 */
|
|
71 const NAMED_LIST_FORMAT = 'map';
|
|
72
|
|
73 /**
|
|
74 * Servlet mappings
|
|
75 */
|
|
76 const PING_SERVLET = 'admin/ping';
|
|
77 const UPDATE_SERVLET = 'update';
|
|
78 const SEARCH_SERVLET = 'select';
|
|
79 const LUKE_SERVLET = 'admin/luke';
|
|
80 const SYSTEM_SERVLET = 'admin/system';
|
|
81 const STATS_SERVLET = 'admin/stats.jsp';
|
|
82 const STATS_SERVLET_4 = 'admin/mbeans?wt=xml&stats=true';
|
|
83
|
|
84 /**
|
|
85 * Server url
|
|
86 *
|
|
87 * @var array
|
|
88 */
|
|
89 protected $parsed_url;
|
|
90
|
|
91 /**
|
|
92 * Constructed servlet full path URLs
|
|
93 *
|
|
94 * @var string
|
|
95 */
|
|
96 protected $update_url;
|
|
97
|
|
98 /**
|
|
99 * Default HTTP timeout when one is not specified (initialized to default_socket_timeout ini setting)
|
|
100 *
|
|
101 * var float
|
|
102 */
|
|
103 protected $_defaultTimeout;
|
|
104 protected $env_id;
|
|
105 protected $luke;
|
|
106 protected $stats;
|
|
107 protected $system_info;
|
|
108
|
|
109 /**
|
|
110 * Flag that denotes whether to use soft commits for Solr 4.x, defaults to FALSE.
|
|
111 *
|
|
112 * @var bool
|
|
113 */
|
|
114 protected $soft_commit = FALSE;
|
|
115
|
|
116 /**
|
|
117 * Call the /admin/ping servlet, to test the connection to the server.
|
|
118 *
|
|
119 * @param $timeout
|
|
120 * maximum time to wait for ping in seconds, -1 for unlimited (default 2).
|
|
121 * @return
|
|
122 * (float) seconds taken to ping the server, FALSE if timeout occurs.
|
|
123 */
|
|
124 public function ping($timeout = 2) {
|
|
125 $start = microtime(TRUE);
|
|
126
|
|
127 if ($timeout <= 0.0) {
|
|
128 $timeout = -1;
|
|
129 }
|
|
130 $pingUrl = $this->_constructUrl(self::PING_SERVLET);
|
|
131 // Attempt a HEAD request to the solr ping url.
|
|
132 $options = array(
|
|
133 'method' => 'HEAD',
|
|
134 'timeout' => $timeout,
|
|
135 );
|
|
136 $response = $this->_makeHttpRequest($pingUrl, $options);
|
|
137
|
|
138 if ($response->code == 200) {
|
|
139 // Add 0.1 ms to the ping time so we never return 0.0.
|
|
140 return microtime(TRUE) - $start + 0.0001;
|
|
141 }
|
|
142 else {
|
|
143 return FALSE;
|
|
144 }
|
|
145 }
|
|
146
|
|
147 /**
|
|
148 * Flags whether to use soft commits for Solr 4.x.
|
|
149 *
|
|
150 * @param bool $soft_commit
|
|
151 * Whether or not to use soft commits for Solr 4.x.
|
|
152 */
|
|
153 public function setSoftCommit($soft_commit) {
|
|
154 $this->soft_commit = (bool) $soft_commit;
|
|
155 }
|
|
156
|
|
157 /**
|
|
158 * Returns the flag that denotes whether to use soft commits for Solr 4.x.
|
|
159 *
|
|
160 * @return bool
|
|
161 * Whether to use soft commits for Solr 4.x.
|
|
162 */
|
|
163 public function getSoftCommit() {
|
|
164 return $this->soft_commit;
|
|
165 }
|
|
166
|
|
167 /**
|
|
168 * Call the /admin/system servlet
|
|
169 *
|
|
170 * @return
|
|
171 * (array) With all the system info
|
|
172 */
|
|
173 protected function setSystemInfo() {
|
|
174 $url = $this->_constructUrl(self::SYSTEM_SERVLET, array('wt' => 'json'));
|
|
175 if ($this->env_id) {
|
|
176 $this->system_info_cid = $this->env_id . ":system:" . drupal_hash_base64($url);
|
|
177 $cache = cache_get($this->system_info_cid, 'cache_apachesolr');
|
|
178 if (isset($cache->data)) {
|
|
179 $this->system_info = json_decode($cache->data);
|
|
180 }
|
|
181 }
|
|
182 // Second pass to populate the cache if necessary.
|
|
183 if (empty($this->system_info)) {
|
|
184 $response = $this->_sendRawGet($url);
|
|
185 $this->system_info = json_decode($response->data);
|
|
186 if ($this->env_id) {
|
|
187 cache_set($this->system_info_cid, $response->data, 'cache_apachesolr');
|
|
188 }
|
|
189 }
|
|
190 }
|
|
191
|
|
192 /**
|
|
193 * Get information about the Solr Core.
|
|
194 *
|
|
195 * @return
|
|
196 * (string) system info encoded in json
|
|
197 */
|
|
198 public function getSystemInfo() {
|
|
199 if (!isset($this->system_info)) {
|
|
200 $this->setSystemInfo();
|
|
201 }
|
|
202 return $this->system_info;
|
|
203 }
|
|
204
|
|
205 /**
|
|
206 * Sets $this->luke with the meta-data about the index from admin/luke.
|
|
207 */
|
|
208 protected function setLuke($num_terms = 0) {
|
|
209 if (empty($this->luke[$num_terms])) {
|
|
210 $params = array(
|
|
211 'numTerms' => "$num_terms",
|
|
212 'wt' => 'json',
|
|
213 'json.nl' => self::NAMED_LIST_FORMAT,
|
|
214 );
|
|
215 $url = $this->_constructUrl(self::LUKE_SERVLET, $params);
|
|
216 if ($this->env_id) {
|
|
217 $cid = $this->env_id . ":luke:" . drupal_hash_base64($url);
|
|
218 $cache = cache_get($cid, 'cache_apachesolr');
|
|
219 if (isset($cache->data)) {
|
|
220 $this->luke = $cache->data;
|
|
221 }
|
|
222 }
|
|
223 }
|
|
224 // Second pass to populate the cache if necessary.
|
|
225 if (empty($this->luke[$num_terms])) {
|
|
226 $this->luke[$num_terms] = $this->_sendRawGet($url);
|
|
227 if ($this->env_id) {
|
|
228 cache_set($cid, $this->luke, 'cache_apachesolr');
|
|
229 }
|
|
230 }
|
|
231 }
|
|
232
|
|
233 /**
|
|
234 * Get just the field meta-data about the index.
|
|
235 */
|
|
236 public function getFields($num_terms = 0) {
|
|
237 return $this->getLuke($num_terms)->fields;
|
|
238 }
|
|
239
|
|
240 /**
|
|
241 * Get meta-data about the index.
|
|
242 */
|
|
243 public function getLuke($num_terms = 0) {
|
|
244 if (!isset($this->luke[$num_terms])) {
|
|
245 $this->setLuke($num_terms);
|
|
246 }
|
|
247 return $this->luke[$num_terms];
|
|
248 }
|
|
249
|
|
250 /**
|
|
251 * Get the current solr version. This could be 1, 3 or 4
|
|
252 *
|
|
253 * @return int
|
|
254 * 1, 3 or 4. Does not give a more details version, for that you need
|
|
255 * to get the system info.
|
|
256 */
|
|
257 public function getSolrVersion() {
|
|
258 $system_info = $this->getSystemInfo();
|
|
259 // Get our solr version number
|
|
260 if (isset($system_info->lucene->{'solr-spec-version'})) {
|
|
261 return $system_info->lucene->{'solr-spec-version'}[0];
|
|
262 }
|
|
263 return 0;
|
|
264 }
|
|
265
|
|
266 /**
|
|
267 * Sets $this->stats with the information about the Solr Core form
|
|
268 */
|
|
269 protected function setStats() {
|
|
270 $data = $this->getLuke();
|
|
271 $solr_version = $this->getSolrVersion();
|
|
272 // Only try to get stats if we have connected to the index.
|
|
273 if (empty($this->stats) && isset($data->index->numDocs)) {
|
|
274 if ($solr_version >= 4) {
|
|
275 $url = $this->_constructUrl(self::STATS_SERVLET_4);
|
|
276 }
|
|
277 else {
|
|
278 $url = $this->_constructUrl(self::STATS_SERVLET);
|
|
279 }
|
|
280 if ($this->env_id) {
|
|
281 $this->stats_cid = $this->env_id . ":stats:" . drupal_hash_base64($url);
|
|
282 $cache = cache_get($this->stats_cid, 'cache_apachesolr');
|
|
283 if (isset($cache->data)) {
|
|
284 $this->stats = simplexml_load_string($cache->data);
|
|
285 }
|
|
286 }
|
|
287 // Second pass to populate the cache if necessary.
|
|
288 if (empty($this->stats)) {
|
|
289 $response = $this->_sendRawGet($url);
|
|
290 $this->stats = simplexml_load_string($response->data);
|
|
291 if ($this->env_id) {
|
|
292 cache_set($this->stats_cid, $response->data, 'cache_apachesolr');
|
|
293 }
|
|
294 }
|
|
295 }
|
|
296 }
|
|
297
|
|
298 /**
|
|
299 * Get information about the Solr Core.
|
|
300 *
|
|
301 * Returns a Simple XMl document
|
|
302 */
|
|
303 public function getStats() {
|
|
304 if (!isset($this->stats)) {
|
|
305 $this->setStats();
|
|
306 }
|
|
307 return $this->stats;
|
|
308 }
|
|
309
|
|
310 /**
|
|
311 * Get summary information about the Solr Core.
|
|
312 */
|
|
313 public function getStatsSummary() {
|
|
314 $stats = $this->getStats();
|
|
315 $solr_version = $this->getSolrVersion();
|
|
316
|
|
317 $summary = array(
|
|
318 '@pending_docs' => '',
|
|
319 '@autocommit_time_seconds' => '',
|
|
320 '@autocommit_time' => '',
|
|
321 '@deletes_by_id' => '',
|
|
322 '@deletes_by_query' => '',
|
|
323 '@deletes_total' => '',
|
|
324 '@schema_version' => '',
|
|
325 '@core_name' => '',
|
|
326 '@index_size' => '',
|
|
327 );
|
|
328
|
|
329 if (!empty($stats)) {
|
|
330 if ($solr_version <= 3) {
|
|
331 $docs_pending_xpath = $stats->xpath('//stat[@name="docsPending"]');
|
|
332 $summary['@pending_docs'] = (int) trim(current($docs_pending_xpath));
|
|
333 $max_time_xpath = $stats->xpath('//stat[@name="autocommit maxTime"]');
|
|
334 $max_time = (int) trim(current($max_time_xpath));
|
|
335 // Convert to seconds.
|
|
336 $summary['@autocommit_time_seconds'] = $max_time / 1000;
|
|
337 $summary['@autocommit_time'] = format_interval($max_time / 1000);
|
|
338 $deletes_id_xpath = $stats->xpath('//stat[@name="deletesById"]');
|
|
339 $summary['@deletes_by_id'] = (int) trim(current($deletes_id_xpath));
|
|
340 $deletes_query_xpath = $stats->xpath('//stat[@name="deletesByQuery"]');
|
|
341 $summary['@deletes_by_query'] = (int) trim(current($deletes_query_xpath));
|
|
342 $summary['@deletes_total'] = $summary['@deletes_by_id'] + $summary['@deletes_by_query'];
|
|
343 $schema = $stats->xpath('/solr/schema[1]');
|
|
344 $summary['@schema_version'] = trim($schema[0]);
|
|
345 $core = $stats->xpath('/solr/core[1]');
|
|
346 $summary['@core_name'] = trim($core[0]);
|
|
347 $size_xpath = $stats->xpath('//stat[@name="indexSize"]');
|
|
348 $summary['@index_size'] = trim(current($size_xpath));
|
|
349 }
|
|
350 else {
|
|
351 $system_info = $this->getSystemInfo();
|
|
352 $docs_pending_xpath = $stats->xpath('//lst["stats"]/long[@name="docsPending"]');
|
|
353 $summary['@pending_docs'] = (int) trim(current($docs_pending_xpath));
|
|
354 $max_time_xpath = $stats->xpath('//lst["stats"]/str[@name="autocommit maxTime"]');
|
|
355 $max_time = (int) trim(current($max_time_xpath));
|
|
356 // Convert to seconds.
|
|
357 $summary['@autocommit_time_seconds'] = $max_time / 1000;
|
|
358 $summary['@autocommit_time'] = format_interval($max_time / 1000);
|
|
359 $deletes_id_xpath = $stats->xpath('//lst["stats"]/long[@name="deletesById"]');
|
|
360 $summary['@deletes_by_id'] = (int) trim(current($deletes_id_xpath));
|
|
361 $deletes_query_xpath = $stats->xpath('//lst["stats"]/long[@name="deletesByQuery"]');
|
|
362 $summary['@deletes_by_query'] = (int) trim(current($deletes_query_xpath));
|
|
363 $summary['@deletes_total'] = $summary['@deletes_by_id'] + $summary['@deletes_by_query'];
|
|
364 $schema = $system_info->core->schema;
|
|
365 $summary['@schema_version'] = $schema;
|
|
366 $core = $stats->xpath('//lst["core"]/str[@name="coreName"]');
|
|
367 $summary['@core_name'] = trim(current($core));
|
|
368 $size_xpath = $stats->xpath('//lst["core"]/str[@name="indexSize"]');
|
|
369 $summary['@index_size'] = trim(current($size_xpath));
|
|
370 }
|
|
371 }
|
|
372
|
|
373 return $summary;
|
|
374 }
|
|
375
|
|
376 /**
|
|
377 * Clear cached Solr data.
|
|
378 */
|
|
379 public function clearCache() {
|
|
380 // Don't clear cached data if the server is unavailable.
|
|
381 if (@$this->ping()) {
|
|
382 $this->_clearCache();
|
|
383 }
|
|
384 else {
|
|
385 throw new Exception('No Solr instance available when trying to clear the cache.');
|
|
386 }
|
|
387 }
|
|
388
|
|
389 protected function _clearCache() {
|
|
390 if ($this->env_id) {
|
|
391 cache_clear_all($this->env_id . ":stats:", 'cache_apachesolr', TRUE);
|
|
392 cache_clear_all($this->env_id . ":luke:", 'cache_apachesolr', TRUE);
|
|
393 }
|
|
394 $this->luke = array();
|
|
395 $this->stats = NULL;
|
|
396 }
|
|
397
|
|
398 /**
|
|
399 * Constructor
|
|
400 *
|
|
401 * @param $url
|
|
402 * The URL to the Solr server, possibly including a core name. E.g. http://localhost:8983/solr/
|
|
403 * or https://search.example.com/solr/core99/
|
|
404 * @param $env_id
|
|
405 * The machine name of a corresponding saved configuration used for loading
|
|
406 * data like which facets are enabled.
|
|
407 */
|
|
408 public function __construct($url, $env_id = NULL) {
|
|
409 $this->env_id = $env_id;
|
|
410 $this->setUrl($url);
|
|
411
|
|
412 // determine our default http timeout from ini settings
|
|
413 $this->_defaultTimeout = (int) ini_get('default_socket_timeout');
|
|
414
|
|
415 // double check we didn't get 0 for a timeout
|
|
416 if ($this->_defaultTimeout <= 0) {
|
|
417 $this->_defaultTimeout = 60;
|
|
418 }
|
|
419 }
|
|
420
|
|
421 function getId() {
|
|
422 return $this->env_id;
|
|
423 }
|
|
424
|
|
425 /**
|
|
426 * Check the reponse code and thow an exception if it's not 200.
|
|
427 *
|
|
428 * @param stdClass $response
|
|
429 * response object.
|
|
430 *
|
|
431 * @return
|
|
432 * response object
|
|
433 * @thows Exception
|
|
434 */
|
|
435 protected function checkResponse($response) {
|
|
436 $code = (int) $response->code;
|
|
437 if ($code != 200) {
|
|
438 if ($code >= 400 && $code != 403 && $code != 404) {
|
|
439 // Add details, like Solr's exception message.
|
|
440 $response->status_message .= $response->data;
|
|
441 }
|
|
442 throw new Exception('"' . $code . '" Status: ' . $response->status_message);
|
|
443 }
|
|
444 return $response;
|
|
445 }
|
|
446
|
|
447 /**
|
|
448 * Make a request to a servlet (a path) that's not a standard path.
|
|
449 *
|
|
450 * @param string $servlet
|
|
451 * A path to be added to the base Solr path. e.g. 'extract/tika'
|
|
452 *
|
|
453 * @param array $params
|
|
454 * Any request parameters when constructing the URL.
|
|
455 *
|
|
456 * @param array $options
|
|
457 * @see drupal_http_request() $options.
|
|
458 *
|
|
459 * @return
|
|
460 * response object
|
|
461 *
|
|
462 * @thows Exception
|
|
463 */
|
|
464 public function makeServletRequest($servlet, $params = array(), $options = array()) {
|
|
465 // Add default params.
|
|
466 $params += array(
|
|
467 'wt' => 'json',
|
|
468 'json.nl' => self::NAMED_LIST_FORMAT,
|
|
469 );
|
|
470
|
|
471 $url = $this->_constructUrl($servlet, $params);
|
|
472 $response = $this->_makeHttpRequest($url, $options);
|
|
473 return $this->checkResponse($response);
|
|
474 }
|
|
475
|
|
476 /**
|
|
477 * Central method for making a GET operation against this Solr Server
|
|
478 */
|
|
479 protected function _sendRawGet($url, $options = array()) {
|
|
480 $response = $this->_makeHttpRequest($url, $options);
|
|
481 return $this->checkResponse($response);
|
|
482 }
|
|
483
|
|
484 /**
|
|
485 * Central method for making a POST operation against this Solr Server
|
|
486 */
|
|
487 protected function _sendRawPost($url, $options = array()) {
|
|
488 $options['method'] = 'POST';
|
|
489 // Normally we use POST to send XML documents.
|
|
490 if (!isset($options['headers']['Content-Type'])) {
|
|
491 $options['headers']['Content-Type'] = 'text/xml; charset=UTF-8';
|
|
492 }
|
|
493 $response = $this->_makeHttpRequest($url, $options);
|
|
494 return $this->checkResponse($response);
|
|
495 }
|
|
496
|
|
497 /**
|
|
498 * Central method for making the actual http request to the Solr Server
|
|
499 *
|
|
500 * This is just a wrapper around drupal_http_request().
|
|
501 */
|
|
502 protected function _makeHttpRequest($url, array $options = array()) {
|
|
503 if (!isset($options['method']) || $options['method'] == 'GET' || $options['method'] == 'HEAD') {
|
|
504 // Make sure we are not sending a request body.
|
|
505 $options['data'] = NULL;
|
|
506 }
|
|
507
|
|
508 $result = drupal_http_request($url, $options);
|
|
509
|
|
510 if (!isset($result->code) || $result->code < 0) {
|
|
511 $result->code = 0;
|
|
512 $result->status_message = 'Request failed';
|
|
513 $result->protocol = 'HTTP/1.0';
|
|
514 }
|
|
515 // Additional information may be in the error property.
|
|
516 if (isset($result->error)) {
|
|
517 $result->status_message .= ': ' . check_plain($result->error);
|
|
518 }
|
|
519
|
|
520 if (!isset($result->data)) {
|
|
521 $result->data = '';
|
|
522 $result->response = NULL;
|
|
523 }
|
|
524 else {
|
|
525 $response = json_decode($result->data);
|
|
526 if (is_object($response)) {
|
|
527 foreach ($response as $key => $value) {
|
|
528 $result->$key = $value;
|
|
529 }
|
|
530 }
|
|
531 }
|
|
532 return $result;
|
|
533 }
|
|
534
|
|
535
|
|
536 /**
|
|
537 * Escape a value for special query characters such as ':', '(', ')', '*', '?', etc.
|
|
538 *
|
|
539 * NOTE: inside a phrase fewer characters need escaped, use {@link DrupalApacheSolrService::escapePhrase()} instead
|
|
540 *
|
|
541 * @param string $value
|
|
542 * @return string
|
|
543 */
|
|
544 static public function escape($value)
|
|
545 {
|
|
546 //list taken from http://lucene.apache.org/java/docs/queryparsersyntax.html#Escaping%20Special%20Characters
|
|
547 $pattern = '/(\+|-|&&|\|\||!|\(|\)|\{|}|\[|]|\^|"|~|\*|\?|:|\\\)/';
|
|
548 $replace = '\\\$1';
|
|
549
|
|
550 return preg_replace($pattern, $replace, $value);
|
|
551 }
|
|
552
|
|
553 /**
|
|
554 * Escape a value meant to be contained in a phrase for special query characters
|
|
555 *
|
|
556 * @param string $value
|
|
557 * @return string
|
|
558 */
|
|
559 static public function escapePhrase($value)
|
|
560 {
|
|
561 $pattern = '/("|\\\)/';
|
|
562 $replace = '\\\$1';
|
|
563
|
|
564 return preg_replace($pattern, $replace, $value);
|
|
565 }
|
|
566
|
|
567 /**
|
|
568 * Convenience function for creating phrase syntax from a value
|
|
569 *
|
|
570 * @param string $value
|
|
571 * @return string
|
|
572 */
|
|
573 static public function phrase($value)
|
|
574 {
|
|
575 return '"' . self::escapePhrase($value) . '"';
|
|
576 }
|
|
577
|
|
578 /**
|
|
579 * Return a valid http URL given this server's host, port and path and a provided servlet name
|
|
580 *
|
|
581 * @param $servlet
|
|
582 * A string path to a Solr request handler.
|
|
583 * @param $params
|
|
584 * @param $parsed_url
|
|
585 * A url to use instead of the stored one.
|
|
586 *
|
|
587 * @return string
|
|
588 */
|
|
589 protected function _constructUrl($servlet, $params = array(), $added_query_string = NULL) {
|
|
590 // PHP's built in http_build_query() doesn't give us the format Solr wants.
|
|
591 $query_string = $this->httpBuildQuery($params);
|
|
592
|
|
593 if ($query_string) {
|
|
594 $query_string = '?' . $query_string;
|
|
595 if ($added_query_string) {
|
|
596 $query_string = $query_string . '&' . $added_query_string;
|
|
597 }
|
|
598 }
|
|
599 elseif ($added_query_string) {
|
|
600 $query_string = '?' . $added_query_string;
|
|
601 }
|
|
602
|
|
603 $url = $this->parsed_url;
|
|
604 return $url['scheme'] . $url['user'] . $url['pass'] . $url['host'] . $url['port'] . $url['path'] . $servlet . $query_string;
|
|
605 }
|
|
606
|
|
607 /**
|
|
608 * Get the Solr url
|
|
609 *
|
|
610 * @return string
|
|
611 */
|
|
612 public function getUrl() {
|
|
613 return $this->_constructUrl('');
|
|
614 }
|
|
615
|
|
616 /**
|
|
617 * Set the Solr url.
|
|
618 *
|
|
619 * @param $url
|
|
620 *
|
|
621 * @return $this
|
|
622 */
|
|
623 public function setUrl($url) {
|
|
624 $parsed_url = parse_url($url);
|
|
625
|
|
626 if (!isset($parsed_url['scheme'])) {
|
|
627 $parsed_url['scheme'] = 'http';
|
|
628 }
|
|
629 $parsed_url['scheme'] .= '://';
|
|
630
|
|
631 if (!isset($parsed_url['user'])) {
|
|
632 $parsed_url['user'] = '';
|
|
633 }
|
|
634 else {
|
|
635 $parsed_url['host'] = '@' . $parsed_url['host'];
|
|
636 }
|
|
637 $parsed_url['pass'] = isset($parsed_url['pass']) ? ':' . $parsed_url['pass'] : '';
|
|
638 $parsed_url['port'] = isset($parsed_url['port']) ? ':' . $parsed_url['port'] : '';
|
|
639
|
|
640 if (isset($parsed_url['path'])) {
|
|
641 // Make sure the path has a single leading/trailing slash.
|
|
642 $parsed_url['path'] = '/' . ltrim($parsed_url['path'], '/');
|
|
643 $parsed_url['path'] = rtrim($parsed_url['path'], '/') . '/';
|
|
644 }
|
|
645 else {
|
|
646 $parsed_url['path'] = '/';
|
|
647 }
|
|
648 // For now we ignore query and fragment.
|
|
649 $this->parsed_url = $parsed_url;
|
|
650 // Force the update url to be rebuilt.
|
|
651 unset($this->update_url);
|
|
652 return $this;
|
|
653 }
|
|
654
|
|
655 /**
|
|
656 * Raw update Method. Takes a raw post body and sends it to the update service. Post body
|
|
657 * should be a complete and well formed xml document.
|
|
658 *
|
|
659 * @param string $rawPost
|
|
660 * @param float $timeout Maximum expected duration (in seconds)
|
|
661 *
|
|
662 * @return response object
|
|
663 *
|
|
664 * @throws Exception If an error occurs during the service call
|
|
665 */
|
|
666 public function update($rawPost, $timeout = FALSE) {
|
|
667 // @todo: throw exception if updates are disabled.
|
|
668 if (empty($this->update_url)) {
|
|
669 // Store the URL in an instance variable since many updates may be sent
|
|
670 // via a single instance of this class.
|
|
671 $this->update_url = $this->_constructUrl(self::UPDATE_SERVLET, array('wt' => 'json'));
|
|
672 }
|
|
673 $options['data'] = $rawPost;
|
|
674 if ($timeout) {
|
|
675 $options['timeout'] = $timeout;
|
|
676 }
|
|
677 return $this->_sendRawPost($this->update_url, $options);
|
|
678 }
|
|
679
|
|
680 /**
|
|
681 * Add an array of Solr Documents to the index all at once
|
|
682 *
|
|
683 * @param array $documents Should be an array of ApacheSolrDocument instances
|
|
684 * @param boolean $allowDups
|
|
685 * @param boolean $overwritePending
|
|
686 * @param boolean $overwriteCommitted
|
|
687 *
|
|
688 * @return response objecte
|
|
689 *
|
|
690 * @throws Exception If an error occurs during the service call
|
|
691 */
|
|
692 public function addDocuments($documents, $overwrite = NULL, $commitWithin = NULL) {
|
|
693 $attr = '';
|
|
694
|
|
695 if (isset($overwrite)) {
|
|
696 $attr .= ' overwrite="' . empty($overwrite) ? 'false"' : 'true"';
|
|
697 }
|
|
698 if (isset($commitWithin)) {
|
|
699 $attr .= ' commitWithin="' . intval($commitWithin) . '"';
|
|
700 }
|
|
701
|
|
702 $rawPost = "<add{$attr}>";
|
|
703 foreach ($documents as $document) {
|
|
704 if (is_object($document) && ($document instanceof ApacheSolrDocument)) {
|
|
705 $rawPost .= ApacheSolrDocument::documentToXml($document);
|
|
706 }
|
|
707 }
|
|
708 $rawPost .= '</add>';
|
|
709
|
|
710 return $this->update($rawPost);
|
|
711 }
|
|
712
|
|
713 /**
|
|
714 * Send a commit command. Will be synchronous unless both wait parameters are set to false.
|
|
715 *
|
|
716 * @param boolean $optimize Defaults to true
|
|
717 * optimizes the index files. Only valid for solr versions <= 3
|
|
718 * @param boolean $waitFlush
|
|
719 * block until index changes are flushed to disk. Only valid for solr versions <= 3
|
|
720 * @param boolean $waitSearcher
|
|
721 * block until a new searcher is opened and registered as the main query searcher, making the changes visible.
|
|
722 * @param float $timeout
|
|
723 * Maximum expected duration of the commit operation on the server (otherwise, will throw a communication exception)
|
|
724 *
|
|
725 * @return response object
|
|
726 *
|
|
727 * @throws Exception If an error occurs during the service call
|
|
728 */
|
|
729 public function commit($optimize = TRUE, $waitFlush = TRUE, $waitSearcher = TRUE, $timeout = 3600) {
|
|
730 $optimizeValue = $optimize ? 'true' : 'false';
|
|
731 $flushValue = $waitFlush ? 'true' : 'false';
|
|
732 $searcherValue = $waitSearcher ? 'true' : 'false';
|
|
733 $softCommit = $this->soft_commit ? 'true' : 'false';
|
|
734
|
|
735 $solr_version = $this->getSolrVersion();
|
|
736 if ($solr_version <= 3) {
|
|
737 $rawPost = '<commit waitSearcher="' . $searcherValue . '" waitFlush="' . $flushValue . '" optimize="' . $optimizeValue . '" />';
|
|
738 }
|
|
739 else {
|
|
740 $rawPost = '<commit waitSearcher="' . $searcherValue . '" softCommit="' . $softCommit . '" />';
|
|
741 }
|
|
742
|
|
743 $response = $this->update($rawPost, $timeout);
|
|
744 $this->_clearCache();
|
|
745 return $response;
|
|
746 }
|
|
747
|
|
748 /**
|
|
749 * Create a delete document based on document ID
|
|
750 *
|
|
751 * @param string $id Expected to be utf-8 encoded
|
|
752 * @param float $timeout Maximum expected duration of the delete operation on the server (otherwise, will throw a communication exception)
|
|
753 *
|
|
754 * @return response object
|
|
755 *
|
|
756 * @throws Exception If an error occurs during the service call
|
|
757 */
|
|
758 public function deleteById($id, $timeout = 3600) {
|
|
759 return $this->deleteByMultipleIds(array($id), $timeout);
|
|
760 }
|
|
761
|
|
762 /**
|
|
763 * Create and post a delete document based on multiple document IDs.
|
|
764 *
|
|
765 * @param array $ids Expected to be utf-8 encoded strings
|
|
766 * @param float $timeout Maximum expected duration of the delete operation on the server (otherwise, will throw a communication exception)
|
|
767 *
|
|
768 * @return response object
|
|
769 *
|
|
770 * @throws Exception If an error occurs during the service call
|
|
771 */
|
|
772 public function deleteByMultipleIds($ids, $timeout = 3600) {
|
|
773 $rawPost = '<delete>';
|
|
774
|
|
775 foreach ($ids as $id) {
|
|
776 $rawPost .= '<id>' . htmlspecialchars($id, ENT_NOQUOTES, 'UTF-8') . '</id>';
|
|
777 }
|
|
778 $rawPost .= '</delete>';
|
|
779
|
|
780 return $this->update($rawPost, $timeout);
|
|
781 }
|
|
782
|
|
783 /**
|
|
784 * Create a delete document based on a query and submit it
|
|
785 *
|
|
786 * @param string $rawQuery Expected to be utf-8 encoded
|
|
787 * @param float $timeout Maximum expected duration of the delete operation on the server (otherwise, will throw a communication exception)
|
|
788 * @return stdClass response object
|
|
789 *
|
|
790 * @throws Exception If an error occurs during the service call
|
|
791 */
|
|
792 public function deleteByQuery($rawQuery, $timeout = 3600) {
|
|
793 $rawPost = '<delete><query>' . htmlspecialchars($rawQuery, ENT_NOQUOTES, 'UTF-8') . '</query></delete>';
|
|
794
|
|
795 return $this->update($rawPost, $timeout);
|
|
796 }
|
|
797
|
|
798 /**
|
|
799 * Send an optimize command. Will be synchronous unless both wait parameters are set
|
|
800 * to false.
|
|
801 *
|
|
802 * @param boolean $waitFlush
|
|
803 * block until index changes are flushed to disk Removed in Solr 4.0
|
|
804 * @param boolean $waitSearcher
|
|
805 * block until a new searcher is opened and registered as the main query searcher, making the changes visible.
|
|
806 * @param float $timeout
|
|
807 * Maximum expected duration of the commit operation on the server (otherwise, will throw a communication exception)
|
|
808 *
|
|
809 * @return response object
|
|
810 *
|
|
811 * @throws Exception If an error occurs during the service call
|
|
812 */
|
|
813 public function optimize($waitFlush = TRUE, $waitSearcher = TRUE, $timeout = 3600) {
|
|
814 $flushValue = $waitFlush ? 'true' : 'false';
|
|
815 $searcherValue = $waitSearcher ? 'true' : 'false';
|
|
816 $softCommit = $this->soft_commit ? 'true' : 'false';
|
|
817
|
|
818 $solr_version = $this->getSolrVersion();
|
|
819 if ($solr_version <= 3) {
|
|
820 $rawPost = '<optimize waitSearcher="' . $searcherValue . '" waitFlush="' . $flushValue . '" />';
|
|
821 }
|
|
822 else {
|
|
823 $rawPost = '<optimize waitSearcher="' . $searcherValue . '" softCommit="' . $softCommit . '" />';
|
|
824 }
|
|
825
|
|
826 return $this->update($rawPost, $timeout);
|
|
827 }
|
|
828
|
|
829 /**
|
|
830 * Like PHP's built in http_build_query(), but uses rawurlencode() and no [] for repeated params.
|
|
831 */
|
|
832 protected function httpBuildQuery(array $query, $parent = '') {
|
|
833 $params = array();
|
|
834
|
|
835 foreach ($query as $key => $value) {
|
|
836 $key = ($parent ? $parent : rawurlencode($key));
|
|
837
|
|
838 // Recurse into children.
|
|
839 if (is_array($value)) {
|
|
840 $params[] = $this->httpBuildQuery($value, $key);
|
|
841 }
|
|
842 // If a query parameter value is NULL, only append its key.
|
|
843 elseif (!isset($value)) {
|
|
844 $params[] = $key;
|
|
845 }
|
|
846 else {
|
|
847 $params[] = $key . '=' . rawurlencode($value);
|
|
848 }
|
|
849 }
|
|
850
|
|
851 return implode('&', $params);
|
|
852 }
|
|
853
|
|
854 /**
|
|
855 * Simple Search interface
|
|
856 *
|
|
857 * @param string $query The raw query string
|
|
858 * @param array $params key / value pairs for other query parameters (see Solr documentation), use arrays for parameter keys used more than once (e.g. facet.field)
|
|
859 *
|
|
860 * @return response object
|
|
861 *
|
|
862 * @throws Exception If an error occurs during the service call
|
|
863 */
|
|
864 public function search($query = '', array $params = array(), $method = 'GET') {
|
|
865 // Always use JSON. See http://code.google.com/p/solr-php-client/issues/detail?id=6#c1 for reasoning
|
|
866 $params['wt'] = 'json';
|
|
867 // Additional default params.
|
|
868 $params += array(
|
|
869 'json.nl' => self::NAMED_LIST_FORMAT,
|
|
870 );
|
|
871 if ($query) {
|
|
872 $params['q'] = $query;
|
|
873 }
|
|
874 // PHP's built in http_build_query() doesn't give us the format Solr wants.
|
|
875 $queryString = $this->httpBuildQuery($params);
|
|
876 // Check string length of the query string, change method to POST
|
|
877 $len = strlen($queryString);
|
|
878 // Fetch our threshold to find out when to flip to POST
|
|
879 $max_len = apachesolr_environment_variable_get($this->env_id, 'apachesolr_search_post_threshold', 3600);
|
|
880
|
|
881 // if longer than $max_len (default 3600) characters
|
|
882 // we should switch to POST (a typical server handles 4096 max).
|
|
883 // If this class is used independently (without environments), we switch automatically to POST at an
|
|
884 // limit of 1800 chars.
|
|
885 if (($len > 1800) && (empty($this->env_id) || ($len > $max_len))) {
|
|
886 $method = 'POST';
|
|
887 }
|
|
888
|
|
889 if ($method == 'GET') {
|
|
890 $searchUrl = $this->_constructUrl(self::SEARCH_SERVLET, array(), $queryString);
|
|
891 return $this->_sendRawGet($searchUrl);
|
|
892 }
|
|
893 else if ($method == 'POST') {
|
|
894 $searchUrl = $this->_constructUrl(self::SEARCH_SERVLET);
|
|
895 $options['data'] = $queryString;
|
|
896 $options['headers']['Content-Type'] = 'application/x-www-form-urlencoded; charset=UTF-8';
|
|
897 return $this->_sendRawPost($searchUrl, $options);
|
|
898 }
|
|
899 else {
|
|
900 throw new Exception("Unsupported method '$method' for search(), use GET or POST");
|
|
901 }
|
|
902 }
|
|
903 }
|