Mercurial > hg > MPIWG-drupal-modules
comparison sites/all/modules/custom/solrconnect/Drupal_Apache_Solr_Service.php @ 0:015d06b10d37 default tip
initial
author | dwinter |
---|---|
date | Wed, 31 Jul 2013 13:49:13 +0200 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:015d06b10d37 |
---|---|
1 <?php | |
2 | |
3 /** | |
4 * Copyright (c) 2007-2009, Conduit Internet Technologies, Inc. | |
5 * All rights reserved. | |
6 * | |
7 * Redistribution and use in source and binary forms, with or without | |
8 * modification, are permitted provided that the following conditions are met: | |
9 * | |
10 * - Redistributions of source code must retain the above copyright notice, | |
11 * this list of conditions and the following disclaimer. | |
12 * - Redistributions in binary form must reproduce the above copyright | |
13 * notice, this list of conditions and the following disclaimer in the | |
14 * documentation and/or other materials provided with the distribution. | |
15 * - Neither the name of Conduit Internet Technologies, Inc. nor the names of | |
16 * its contributors may be used to endorse or promote products derived from | |
17 * this software without specific prior written permission. | |
18 * | |
19 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | |
20 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |
21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |
22 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE | |
23 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR | |
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF | |
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS | |
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN | |
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) | |
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE | |
29 * POSSIBILITY OF SUCH DAMAGE. | |
30 * | |
31 * @copyright Copyright 2007-2009 Conduit Internet Technologies, Inc. (http://conduit-it.com) | |
32 * @license New BSD (http://solr-php-client.googlecode.com/svn/trunk/COPYING) | |
33 * @version $Id: Service.php 22 2009-11-09 22:46:54Z donovan.jimenez $ | |
34 * | |
35 * @package Apache | |
36 * @subpackage Solr | |
37 * @author Donovan Jimenez <djimenez@conduit-it.com> | |
38 */ | |
39 | |
40 /** | |
41 * Additional code Copyright (c) 2008-2011 by Robert Douglass, James McKinney, | |
42 * Jacob Singh, Alejandro Garza, Peter Wolanin, and additional contributors. | |
43 * | |
44 * This program is free software; you can redistribute it and/or modify | |
45 * it under the terms of the GNU General Public License as published by | |
46 * the Free Software Foundation; either version 2 of the License, or (at | |
47 * your option) any later version. | |
48 | |
49 * | |
50 * This program is distributed in the hope that it will be useful, but | |
51 * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY | |
52 * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License | |
53 * for more details. | |
54 * | |
55 * You should have received a copy of the GNU General Public License | |
56 * along with this program as the file LICENSE.txt; if not, please see | |
57 * http://www.gnu.org/licenses/old-licenses/gpl-2.0.txt. | |
58 */ | |
59 | |
60 /** | |
61 * Starting point for the Solr API. Represents a Solr server resource and has | |
62 * methods for pinging, adding, deleting, committing, optimizing and searching. | |
63 */ | |
64 | |
65 class DrupalApacheSolrService implements DrupalApacheSolrServiceInterface { | |
66 /** | |
67 * How NamedLists should be formatted in the output. This specifically effects facet counts. Valid values | |
68 * are 'map' (default) or 'flat'. | |
69 * | |
70 */ | |
71 const NAMED_LIST_FORMAT = 'map'; | |
72 | |
73 /** | |
74 * Servlet mappings | |
75 */ | |
76 const PING_SERVLET = 'admin/ping'; | |
77 const UPDATE_SERVLET = 'update'; | |
78 const SEARCH_SERVLET = 'select'; | |
79 const LUKE_SERVLET = 'admin/luke'; | |
80 const SYSTEM_SERVLET = 'admin/system'; | |
81 const STATS_SERVLET = 'admin/stats.jsp'; | |
82 const STATS_SERVLET_4 = 'admin/mbeans?wt=xml&stats=true'; | |
83 | |
84 /** | |
85 * Server url | |
86 * | |
87 * @var array | |
88 */ | |
89 protected $parsed_url; | |
90 | |
91 /** | |
92 * Constructed servlet full path URLs | |
93 * | |
94 * @var string | |
95 */ | |
96 protected $update_url; | |
97 | |
98 /** | |
99 * Default HTTP timeout when one is not specified (initialized to default_socket_timeout ini setting) | |
100 * | |
101 * var float | |
102 */ | |
103 protected $_defaultTimeout; | |
104 protected $env_id; | |
105 protected $luke; | |
106 protected $stats; | |
107 protected $system_info; | |
108 | |
109 /** | |
110 * Flag that denotes whether to use soft commits for Solr 4.x, defaults to FALSE. | |
111 * | |
112 * @var bool | |
113 */ | |
114 protected $soft_commit = FALSE; | |
115 | |
116 /** | |
117 * Call the /admin/ping servlet, to test the connection to the server. | |
118 * | |
119 * @param $timeout | |
120 * maximum time to wait for ping in seconds, -1 for unlimited (default 2). | |
121 * @return | |
122 * (float) seconds taken to ping the server, FALSE if timeout occurs. | |
123 */ | |
124 public function ping($timeout = 2) { | |
125 $start = microtime(TRUE); | |
126 | |
127 if ($timeout <= 0.0) { | |
128 $timeout = -1; | |
129 } | |
130 $pingUrl = $this->_constructUrl(self::PING_SERVLET); | |
131 // Attempt a HEAD request to the solr ping url. | |
132 $options = array( | |
133 'method' => 'HEAD', | |
134 'timeout' => $timeout, | |
135 ); | |
136 $response = $this->_makeHttpRequest($pingUrl, $options); | |
137 | |
138 if ($response->code == 200) { | |
139 // Add 0.1 ms to the ping time so we never return 0.0. | |
140 return microtime(TRUE) - $start + 0.0001; | |
141 } | |
142 else { | |
143 return FALSE; | |
144 } | |
145 } | |
146 | |
147 /** | |
148 * Flags whether to use soft commits for Solr 4.x. | |
149 * | |
150 * @param bool $soft_commit | |
151 * Whether or not to use soft commits for Solr 4.x. | |
152 */ | |
153 public function setSoftCommit($soft_commit) { | |
154 $this->soft_commit = (bool) $soft_commit; | |
155 } | |
156 | |
157 /** | |
158 * Returns the flag that denotes whether to use soft commits for Solr 4.x. | |
159 * | |
160 * @return bool | |
161 * Whether to use soft commits for Solr 4.x. | |
162 */ | |
163 public function getSoftCommit() { | |
164 return $this->soft_commit; | |
165 } | |
166 | |
167 /** | |
168 * Call the /admin/system servlet | |
169 * | |
170 * @return | |
171 * (array) With all the system info | |
172 */ | |
173 protected function setSystemInfo() { | |
174 $url = $this->_constructUrl(self::SYSTEM_SERVLET, array('wt' => 'json')); | |
175 if ($this->env_id) { | |
176 $this->system_info_cid = $this->env_id . ":system:" . drupal_hash_base64($url); | |
177 $cache = cache_get($this->system_info_cid, 'cache_apachesolr'); | |
178 if (isset($cache->data)) { | |
179 $this->system_info = json_decode($cache->data); | |
180 } | |
181 } | |
182 // Second pass to populate the cache if necessary. | |
183 if (empty($this->system_info)) { | |
184 $response = $this->_sendRawGet($url); | |
185 $this->system_info = json_decode($response->data); | |
186 if ($this->env_id) { | |
187 cache_set($this->system_info_cid, $response->data, 'cache_apachesolr'); | |
188 } | |
189 } | |
190 } | |
191 | |
192 /** | |
193 * Get information about the Solr Core. | |
194 * | |
195 * @return | |
196 * (string) system info encoded in json | |
197 */ | |
198 public function getSystemInfo() { | |
199 if (!isset($this->system_info)) { | |
200 $this->setSystemInfo(); | |
201 } | |
202 return $this->system_info; | |
203 } | |
204 | |
205 /** | |
206 * Sets $this->luke with the meta-data about the index from admin/luke. | |
207 */ | |
208 protected function setLuke($num_terms = 0) { | |
209 if (empty($this->luke[$num_terms])) { | |
210 $params = array( | |
211 'numTerms' => "$num_terms", | |
212 'wt' => 'json', | |
213 'json.nl' => self::NAMED_LIST_FORMAT, | |
214 ); | |
215 $url = $this->_constructUrl(self::LUKE_SERVLET, $params); | |
216 if ($this->env_id) { | |
217 $cid = $this->env_id . ":luke:" . drupal_hash_base64($url); | |
218 $cache = cache_get($cid, 'cache_apachesolr'); | |
219 if (isset($cache->data)) { | |
220 $this->luke = $cache->data; | |
221 } | |
222 } | |
223 } | |
224 // Second pass to populate the cache if necessary. | |
225 if (empty($this->luke[$num_terms])) { | |
226 $this->luke[$num_terms] = $this->_sendRawGet($url); | |
227 if ($this->env_id) { | |
228 cache_set($cid, $this->luke, 'cache_apachesolr'); | |
229 } | |
230 } | |
231 } | |
232 | |
233 /** | |
234 * Get just the field meta-data about the index. | |
235 */ | |
236 public function getFields($num_terms = 0) { | |
237 return $this->getLuke($num_terms)->fields; | |
238 } | |
239 | |
240 /** | |
241 * Get meta-data about the index. | |
242 */ | |
243 public function getLuke($num_terms = 0) { | |
244 if (!isset($this->luke[$num_terms])) { | |
245 $this->setLuke($num_terms); | |
246 } | |
247 return $this->luke[$num_terms]; | |
248 } | |
249 | |
250 /** | |
251 * Get the current solr version. This could be 1, 3 or 4 | |
252 * | |
253 * @return int | |
254 * 1, 3 or 4. Does not give a more details version, for that you need | |
255 * to get the system info. | |
256 */ | |
257 public function getSolrVersion() { | |
258 $system_info = $this->getSystemInfo(); | |
259 // Get our solr version number | |
260 if (isset($system_info->lucene->{'solr-spec-version'})) { | |
261 return $system_info->lucene->{'solr-spec-version'}[0]; | |
262 } | |
263 return 0; | |
264 } | |
265 | |
266 /** | |
267 * Sets $this->stats with the information about the Solr Core form | |
268 */ | |
269 protected function setStats() { | |
270 $data = $this->getLuke(); | |
271 $solr_version = $this->getSolrVersion(); | |
272 // Only try to get stats if we have connected to the index. | |
273 if (empty($this->stats) && isset($data->index->numDocs)) { | |
274 if ($solr_version >= 4) { | |
275 $url = $this->_constructUrl(self::STATS_SERVLET_4); | |
276 } | |
277 else { | |
278 $url = $this->_constructUrl(self::STATS_SERVLET); | |
279 } | |
280 if ($this->env_id) { | |
281 $this->stats_cid = $this->env_id . ":stats:" . drupal_hash_base64($url); | |
282 $cache = cache_get($this->stats_cid, 'cache_apachesolr'); | |
283 if (isset($cache->data)) { | |
284 $this->stats = simplexml_load_string($cache->data); | |
285 } | |
286 } | |
287 // Second pass to populate the cache if necessary. | |
288 if (empty($this->stats)) { | |
289 $response = $this->_sendRawGet($url); | |
290 $this->stats = simplexml_load_string($response->data); | |
291 if ($this->env_id) { | |
292 cache_set($this->stats_cid, $response->data, 'cache_apachesolr'); | |
293 } | |
294 } | |
295 } | |
296 } | |
297 | |
298 /** | |
299 * Get information about the Solr Core. | |
300 * | |
301 * Returns a Simple XMl document | |
302 */ | |
303 public function getStats() { | |
304 if (!isset($this->stats)) { | |
305 $this->setStats(); | |
306 } | |
307 return $this->stats; | |
308 } | |
309 | |
310 /** | |
311 * Get summary information about the Solr Core. | |
312 */ | |
313 public function getStatsSummary() { | |
314 $stats = $this->getStats(); | |
315 $solr_version = $this->getSolrVersion(); | |
316 | |
317 $summary = array( | |
318 '@pending_docs' => '', | |
319 '@autocommit_time_seconds' => '', | |
320 '@autocommit_time' => '', | |
321 '@deletes_by_id' => '', | |
322 '@deletes_by_query' => '', | |
323 '@deletes_total' => '', | |
324 '@schema_version' => '', | |
325 '@core_name' => '', | |
326 '@index_size' => '', | |
327 ); | |
328 | |
329 if (!empty($stats)) { | |
330 if ($solr_version <= 3) { | |
331 $docs_pending_xpath = $stats->xpath('//stat[@name="docsPending"]'); | |
332 $summary['@pending_docs'] = (int) trim(current($docs_pending_xpath)); | |
333 $max_time_xpath = $stats->xpath('//stat[@name="autocommit maxTime"]'); | |
334 $max_time = (int) trim(current($max_time_xpath)); | |
335 // Convert to seconds. | |
336 $summary['@autocommit_time_seconds'] = $max_time / 1000; | |
337 $summary['@autocommit_time'] = format_interval($max_time / 1000); | |
338 $deletes_id_xpath = $stats->xpath('//stat[@name="deletesById"]'); | |
339 $summary['@deletes_by_id'] = (int) trim(current($deletes_id_xpath)); | |
340 $deletes_query_xpath = $stats->xpath('//stat[@name="deletesByQuery"]'); | |
341 $summary['@deletes_by_query'] = (int) trim(current($deletes_query_xpath)); | |
342 $summary['@deletes_total'] = $summary['@deletes_by_id'] + $summary['@deletes_by_query']; | |
343 $schema = $stats->xpath('/solr/schema[1]'); | |
344 $summary['@schema_version'] = trim($schema[0]); | |
345 $core = $stats->xpath('/solr/core[1]'); | |
346 $summary['@core_name'] = trim($core[0]); | |
347 $size_xpath = $stats->xpath('//stat[@name="indexSize"]'); | |
348 $summary['@index_size'] = trim(current($size_xpath)); | |
349 } | |
350 else { | |
351 $system_info = $this->getSystemInfo(); | |
352 $docs_pending_xpath = $stats->xpath('//lst["stats"]/long[@name="docsPending"]'); | |
353 $summary['@pending_docs'] = (int) trim(current($docs_pending_xpath)); | |
354 $max_time_xpath = $stats->xpath('//lst["stats"]/str[@name="autocommit maxTime"]'); | |
355 $max_time = (int) trim(current($max_time_xpath)); | |
356 // Convert to seconds. | |
357 $summary['@autocommit_time_seconds'] = $max_time / 1000; | |
358 $summary['@autocommit_time'] = format_interval($max_time / 1000); | |
359 $deletes_id_xpath = $stats->xpath('//lst["stats"]/long[@name="deletesById"]'); | |
360 $summary['@deletes_by_id'] = (int) trim(current($deletes_id_xpath)); | |
361 $deletes_query_xpath = $stats->xpath('//lst["stats"]/long[@name="deletesByQuery"]'); | |
362 $summary['@deletes_by_query'] = (int) trim(current($deletes_query_xpath)); | |
363 $summary['@deletes_total'] = $summary['@deletes_by_id'] + $summary['@deletes_by_query']; | |
364 $schema = $system_info->core->schema; | |
365 $summary['@schema_version'] = $schema; | |
366 $core = $stats->xpath('//lst["core"]/str[@name="coreName"]'); | |
367 $summary['@core_name'] = trim(current($core)); | |
368 $size_xpath = $stats->xpath('//lst["core"]/str[@name="indexSize"]'); | |
369 $summary['@index_size'] = trim(current($size_xpath)); | |
370 } | |
371 } | |
372 | |
373 return $summary; | |
374 } | |
375 | |
376 /** | |
377 * Clear cached Solr data. | |
378 */ | |
379 public function clearCache() { | |
380 // Don't clear cached data if the server is unavailable. | |
381 if (@$this->ping()) { | |
382 $this->_clearCache(); | |
383 } | |
384 else { | |
385 throw new Exception('No Solr instance available when trying to clear the cache.'); | |
386 } | |
387 } | |
388 | |
389 protected function _clearCache() { | |
390 if ($this->env_id) { | |
391 cache_clear_all($this->env_id . ":stats:", 'cache_apachesolr', TRUE); | |
392 cache_clear_all($this->env_id . ":luke:", 'cache_apachesolr', TRUE); | |
393 } | |
394 $this->luke = array(); | |
395 $this->stats = NULL; | |
396 } | |
397 | |
398 /** | |
399 * Constructor | |
400 * | |
401 * @param $url | |
402 * The URL to the Solr server, possibly including a core name. E.g. http://localhost:8983/solr/ | |
403 * or https://search.example.com/solr/core99/ | |
404 * @param $env_id | |
405 * The machine name of a corresponding saved configuration used for loading | |
406 * data like which facets are enabled. | |
407 */ | |
408 public function __construct($url, $env_id = NULL) { | |
409 $this->env_id = $env_id; | |
410 $this->setUrl($url); | |
411 | |
412 // determine our default http timeout from ini settings | |
413 $this->_defaultTimeout = (int) ini_get('default_socket_timeout'); | |
414 | |
415 // double check we didn't get 0 for a timeout | |
416 if ($this->_defaultTimeout <= 0) { | |
417 $this->_defaultTimeout = 60; | |
418 } | |
419 } | |
420 | |
421 function getId() { | |
422 return $this->env_id; | |
423 } | |
424 | |
425 /** | |
426 * Check the reponse code and thow an exception if it's not 200. | |
427 * | |
428 * @param stdClass $response | |
429 * response object. | |
430 * | |
431 * @return | |
432 * response object | |
433 * @thows Exception | |
434 */ | |
435 protected function checkResponse($response) { | |
436 $code = (int) $response->code; | |
437 if ($code != 200) { | |
438 if ($code >= 400 && $code != 403 && $code != 404) { | |
439 // Add details, like Solr's exception message. | |
440 $response->status_message .= $response->data; | |
441 } | |
442 throw new Exception('"' . $code . '" Status: ' . $response->status_message); | |
443 } | |
444 return $response; | |
445 } | |
446 | |
447 /** | |
448 * Make a request to a servlet (a path) that's not a standard path. | |
449 * | |
450 * @param string $servlet | |
451 * A path to be added to the base Solr path. e.g. 'extract/tika' | |
452 * | |
453 * @param array $params | |
454 * Any request parameters when constructing the URL. | |
455 * | |
456 * @param array $options | |
457 * @see drupal_http_request() $options. | |
458 * | |
459 * @return | |
460 * response object | |
461 * | |
462 * @thows Exception | |
463 */ | |
464 public function makeServletRequest($servlet, $params = array(), $options = array()) { | |
465 // Add default params. | |
466 $params += array( | |
467 'wt' => 'json', | |
468 'json.nl' => self::NAMED_LIST_FORMAT, | |
469 ); | |
470 | |
471 $url = $this->_constructUrl($servlet, $params); | |
472 $response = $this->_makeHttpRequest($url, $options); | |
473 return $this->checkResponse($response); | |
474 } | |
475 | |
476 /** | |
477 * Central method for making a GET operation against this Solr Server | |
478 */ | |
479 protected function _sendRawGet($url, $options = array()) { | |
480 $response = $this->_makeHttpRequest($url, $options); | |
481 return $this->checkResponse($response); | |
482 } | |
483 | |
484 /** | |
485 * Central method for making a POST operation against this Solr Server | |
486 */ | |
487 protected function _sendRawPost($url, $options = array()) { | |
488 $options['method'] = 'POST'; | |
489 // Normally we use POST to send XML documents. | |
490 if (!isset($options['headers']['Content-Type'])) { | |
491 $options['headers']['Content-Type'] = 'text/xml; charset=UTF-8'; | |
492 } | |
493 $response = $this->_makeHttpRequest($url, $options); | |
494 return $this->checkResponse($response); | |
495 } | |
496 | |
497 /** | |
498 * Central method for making the actual http request to the Solr Server | |
499 * | |
500 * This is just a wrapper around drupal_http_request(). | |
501 */ | |
502 protected function _makeHttpRequest($url, array $options = array()) { | |
503 if (!isset($options['method']) || $options['method'] == 'GET' || $options['method'] == 'HEAD') { | |
504 // Make sure we are not sending a request body. | |
505 $options['data'] = NULL; | |
506 } | |
507 | |
508 $result = drupal_http_request($url, $options); | |
509 | |
510 if (!isset($result->code) || $result->code < 0) { | |
511 $result->code = 0; | |
512 $result->status_message = 'Request failed'; | |
513 $result->protocol = 'HTTP/1.0'; | |
514 } | |
515 // Additional information may be in the error property. | |
516 if (isset($result->error)) { | |
517 $result->status_message .= ': ' . check_plain($result->error); | |
518 } | |
519 | |
520 if (!isset($result->data)) { | |
521 $result->data = ''; | |
522 $result->response = NULL; | |
523 } | |
524 else { | |
525 $response = json_decode($result->data); | |
526 if (is_object($response)) { | |
527 foreach ($response as $key => $value) { | |
528 $result->$key = $value; | |
529 } | |
530 } | |
531 } | |
532 return $result; | |
533 } | |
534 | |
535 | |
536 /** | |
537 * Escape a value for special query characters such as ':', '(', ')', '*', '?', etc. | |
538 * | |
539 * NOTE: inside a phrase fewer characters need escaped, use {@link DrupalApacheSolrService::escapePhrase()} instead | |
540 * | |
541 * @param string $value | |
542 * @return string | |
543 */ | |
544 static public function escape($value) | |
545 { | |
546 //list taken from http://lucene.apache.org/java/docs/queryparsersyntax.html#Escaping%20Special%20Characters | |
547 $pattern = '/(\+|-|&&|\|\||!|\(|\)|\{|}|\[|]|\^|"|~|\*|\?|:|\\\)/'; | |
548 $replace = '\\\$1'; | |
549 | |
550 return preg_replace($pattern, $replace, $value); | |
551 } | |
552 | |
553 /** | |
554 * Escape a value meant to be contained in a phrase for special query characters | |
555 * | |
556 * @param string $value | |
557 * @return string | |
558 */ | |
559 static public function escapePhrase($value) | |
560 { | |
561 $pattern = '/("|\\\)/'; | |
562 $replace = '\\\$1'; | |
563 | |
564 return preg_replace($pattern, $replace, $value); | |
565 } | |
566 | |
567 /** | |
568 * Convenience function for creating phrase syntax from a value | |
569 * | |
570 * @param string $value | |
571 * @return string | |
572 */ | |
573 static public function phrase($value) | |
574 { | |
575 return '"' . self::escapePhrase($value) . '"'; | |
576 } | |
577 | |
578 /** | |
579 * Return a valid http URL given this server's host, port and path and a provided servlet name | |
580 * | |
581 * @param $servlet | |
582 * A string path to a Solr request handler. | |
583 * @param $params | |
584 * @param $parsed_url | |
585 * A url to use instead of the stored one. | |
586 * | |
587 * @return string | |
588 */ | |
589 protected function _constructUrl($servlet, $params = array(), $added_query_string = NULL) { | |
590 // PHP's built in http_build_query() doesn't give us the format Solr wants. | |
591 $query_string = $this->httpBuildQuery($params); | |
592 | |
593 if ($query_string) { | |
594 $query_string = '?' . $query_string; | |
595 if ($added_query_string) { | |
596 $query_string = $query_string . '&' . $added_query_string; | |
597 } | |
598 } | |
599 elseif ($added_query_string) { | |
600 $query_string = '?' . $added_query_string; | |
601 } | |
602 | |
603 $url = $this->parsed_url; | |
604 return $url['scheme'] . $url['user'] . $url['pass'] . $url['host'] . $url['port'] . $url['path'] . $servlet . $query_string; | |
605 } | |
606 | |
607 /** | |
608 * Get the Solr url | |
609 * | |
610 * @return string | |
611 */ | |
612 public function getUrl() { | |
613 return $this->_constructUrl(''); | |
614 } | |
615 | |
616 /** | |
617 * Set the Solr url. | |
618 * | |
619 * @param $url | |
620 * | |
621 * @return $this | |
622 */ | |
623 public function setUrl($url) { | |
624 $parsed_url = parse_url($url); | |
625 | |
626 if (!isset($parsed_url['scheme'])) { | |
627 $parsed_url['scheme'] = 'http'; | |
628 } | |
629 $parsed_url['scheme'] .= '://'; | |
630 | |
631 if (!isset($parsed_url['user'])) { | |
632 $parsed_url['user'] = ''; | |
633 } | |
634 else { | |
635 $parsed_url['host'] = '@' . $parsed_url['host']; | |
636 } | |
637 $parsed_url['pass'] = isset($parsed_url['pass']) ? ':' . $parsed_url['pass'] : ''; | |
638 $parsed_url['port'] = isset($parsed_url['port']) ? ':' . $parsed_url['port'] : ''; | |
639 | |
640 if (isset($parsed_url['path'])) { | |
641 // Make sure the path has a single leading/trailing slash. | |
642 $parsed_url['path'] = '/' . ltrim($parsed_url['path'], '/'); | |
643 $parsed_url['path'] = rtrim($parsed_url['path'], '/') . '/'; | |
644 } | |
645 else { | |
646 $parsed_url['path'] = '/'; | |
647 } | |
648 // For now we ignore query and fragment. | |
649 $this->parsed_url = $parsed_url; | |
650 // Force the update url to be rebuilt. | |
651 unset($this->update_url); | |
652 return $this; | |
653 } | |
654 | |
655 /** | |
656 * Raw update Method. Takes a raw post body and sends it to the update service. Post body | |
657 * should be a complete and well formed xml document. | |
658 * | |
659 * @param string $rawPost | |
660 * @param float $timeout Maximum expected duration (in seconds) | |
661 * | |
662 * @return response object | |
663 * | |
664 * @throws Exception If an error occurs during the service call | |
665 */ | |
666 public function update($rawPost, $timeout = FALSE) { | |
667 // @todo: throw exception if updates are disabled. | |
668 if (empty($this->update_url)) { | |
669 // Store the URL in an instance variable since many updates may be sent | |
670 // via a single instance of this class. | |
671 $this->update_url = $this->_constructUrl(self::UPDATE_SERVLET, array('wt' => 'json')); | |
672 } | |
673 $options['data'] = $rawPost; | |
674 if ($timeout) { | |
675 $options['timeout'] = $timeout; | |
676 } | |
677 return $this->_sendRawPost($this->update_url, $options); | |
678 } | |
679 | |
680 /** | |
681 * Add an array of Solr Documents to the index all at once | |
682 * | |
683 * @param array $documents Should be an array of ApacheSolrDocument instances | |
684 * @param boolean $allowDups | |
685 * @param boolean $overwritePending | |
686 * @param boolean $overwriteCommitted | |
687 * | |
688 * @return response objecte | |
689 * | |
690 * @throws Exception If an error occurs during the service call | |
691 */ | |
692 public function addDocuments($documents, $overwrite = NULL, $commitWithin = NULL) { | |
693 $attr = ''; | |
694 | |
695 if (isset($overwrite)) { | |
696 $attr .= ' overwrite="' . empty($overwrite) ? 'false"' : 'true"'; | |
697 } | |
698 if (isset($commitWithin)) { | |
699 $attr .= ' commitWithin="' . intval($commitWithin) . '"'; | |
700 } | |
701 | |
702 $rawPost = "<add{$attr}>"; | |
703 foreach ($documents as $document) { | |
704 if (is_object($document) && ($document instanceof ApacheSolrDocument)) { | |
705 $rawPost .= ApacheSolrDocument::documentToXml($document); | |
706 } | |
707 } | |
708 $rawPost .= '</add>'; | |
709 | |
710 return $this->update($rawPost); | |
711 } | |
712 | |
713 /** | |
714 * Send a commit command. Will be synchronous unless both wait parameters are set to false. | |
715 * | |
716 * @param boolean $optimize Defaults to true | |
717 * optimizes the index files. Only valid for solr versions <= 3 | |
718 * @param boolean $waitFlush | |
719 * block until index changes are flushed to disk. Only valid for solr versions <= 3 | |
720 * @param boolean $waitSearcher | |
721 * block until a new searcher is opened and registered as the main query searcher, making the changes visible. | |
722 * @param float $timeout | |
723 * Maximum expected duration of the commit operation on the server (otherwise, will throw a communication exception) | |
724 * | |
725 * @return response object | |
726 * | |
727 * @throws Exception If an error occurs during the service call | |
728 */ | |
729 public function commit($optimize = TRUE, $waitFlush = TRUE, $waitSearcher = TRUE, $timeout = 3600) { | |
730 $optimizeValue = $optimize ? 'true' : 'false'; | |
731 $flushValue = $waitFlush ? 'true' : 'false'; | |
732 $searcherValue = $waitSearcher ? 'true' : 'false'; | |
733 $softCommit = $this->soft_commit ? 'true' : 'false'; | |
734 | |
735 $solr_version = $this->getSolrVersion(); | |
736 if ($solr_version <= 3) { | |
737 $rawPost = '<commit waitSearcher="' . $searcherValue . '" waitFlush="' . $flushValue . '" optimize="' . $optimizeValue . '" />'; | |
738 } | |
739 else { | |
740 $rawPost = '<commit waitSearcher="' . $searcherValue . '" softCommit="' . $softCommit . '" />'; | |
741 } | |
742 | |
743 $response = $this->update($rawPost, $timeout); | |
744 $this->_clearCache(); | |
745 return $response; | |
746 } | |
747 | |
748 /** | |
749 * Create a delete document based on document ID | |
750 * | |
751 * @param string $id Expected to be utf-8 encoded | |
752 * @param float $timeout Maximum expected duration of the delete operation on the server (otherwise, will throw a communication exception) | |
753 * | |
754 * @return response object | |
755 * | |
756 * @throws Exception If an error occurs during the service call | |
757 */ | |
758 public function deleteById($id, $timeout = 3600) { | |
759 return $this->deleteByMultipleIds(array($id), $timeout); | |
760 } | |
761 | |
762 /** | |
763 * Create and post a delete document based on multiple document IDs. | |
764 * | |
765 * @param array $ids Expected to be utf-8 encoded strings | |
766 * @param float $timeout Maximum expected duration of the delete operation on the server (otherwise, will throw a communication exception) | |
767 * | |
768 * @return response object | |
769 * | |
770 * @throws Exception If an error occurs during the service call | |
771 */ | |
772 public function deleteByMultipleIds($ids, $timeout = 3600) { | |
773 $rawPost = '<delete>'; | |
774 | |
775 foreach ($ids as $id) { | |
776 $rawPost .= '<id>' . htmlspecialchars($id, ENT_NOQUOTES, 'UTF-8') . '</id>'; | |
777 } | |
778 $rawPost .= '</delete>'; | |
779 | |
780 return $this->update($rawPost, $timeout); | |
781 } | |
782 | |
783 /** | |
784 * Create a delete document based on a query and submit it | |
785 * | |
786 * @param string $rawQuery Expected to be utf-8 encoded | |
787 * @param float $timeout Maximum expected duration of the delete operation on the server (otherwise, will throw a communication exception) | |
788 * @return stdClass response object | |
789 * | |
790 * @throws Exception If an error occurs during the service call | |
791 */ | |
792 public function deleteByQuery($rawQuery, $timeout = 3600) { | |
793 $rawPost = '<delete><query>' . htmlspecialchars($rawQuery, ENT_NOQUOTES, 'UTF-8') . '</query></delete>'; | |
794 | |
795 return $this->update($rawPost, $timeout); | |
796 } | |
797 | |
798 /** | |
799 * Send an optimize command. Will be synchronous unless both wait parameters are set | |
800 * to false. | |
801 * | |
802 * @param boolean $waitFlush | |
803 * block until index changes are flushed to disk Removed in Solr 4.0 | |
804 * @param boolean $waitSearcher | |
805 * block until a new searcher is opened and registered as the main query searcher, making the changes visible. | |
806 * @param float $timeout | |
807 * Maximum expected duration of the commit operation on the server (otherwise, will throw a communication exception) | |
808 * | |
809 * @return response object | |
810 * | |
811 * @throws Exception If an error occurs during the service call | |
812 */ | |
813 public function optimize($waitFlush = TRUE, $waitSearcher = TRUE, $timeout = 3600) { | |
814 $flushValue = $waitFlush ? 'true' : 'false'; | |
815 $searcherValue = $waitSearcher ? 'true' : 'false'; | |
816 $softCommit = $this->soft_commit ? 'true' : 'false'; | |
817 | |
818 $solr_version = $this->getSolrVersion(); | |
819 if ($solr_version <= 3) { | |
820 $rawPost = '<optimize waitSearcher="' . $searcherValue . '" waitFlush="' . $flushValue . '" />'; | |
821 } | |
822 else { | |
823 $rawPost = '<optimize waitSearcher="' . $searcherValue . '" softCommit="' . $softCommit . '" />'; | |
824 } | |
825 | |
826 return $this->update($rawPost, $timeout); | |
827 } | |
828 | |
829 /** | |
830 * Like PHP's built in http_build_query(), but uses rawurlencode() and no [] for repeated params. | |
831 */ | |
832 protected function httpBuildQuery(array $query, $parent = '') { | |
833 $params = array(); | |
834 | |
835 foreach ($query as $key => $value) { | |
836 $key = ($parent ? $parent : rawurlencode($key)); | |
837 | |
838 // Recurse into children. | |
839 if (is_array($value)) { | |
840 $params[] = $this->httpBuildQuery($value, $key); | |
841 } | |
842 // If a query parameter value is NULL, only append its key. | |
843 elseif (!isset($value)) { | |
844 $params[] = $key; | |
845 } | |
846 else { | |
847 $params[] = $key . '=' . rawurlencode($value); | |
848 } | |
849 } | |
850 | |
851 return implode('&', $params); | |
852 } | |
853 | |
854 /** | |
855 * Simple Search interface | |
856 * | |
857 * @param string $query The raw query string | |
858 * @param array $params key / value pairs for other query parameters (see Solr documentation), use arrays for parameter keys used more than once (e.g. facet.field) | |
859 * | |
860 * @return response object | |
861 * | |
862 * @throws Exception If an error occurs during the service call | |
863 */ | |
864 public function search($query = '', array $params = array(), $method = 'GET') { | |
865 // Always use JSON. See http://code.google.com/p/solr-php-client/issues/detail?id=6#c1 for reasoning | |
866 $params['wt'] = 'json'; | |
867 // Additional default params. | |
868 $params += array( | |
869 'json.nl' => self::NAMED_LIST_FORMAT, | |
870 ); | |
871 if ($query) { | |
872 $params['q'] = $query; | |
873 } | |
874 // PHP's built in http_build_query() doesn't give us the format Solr wants. | |
875 $queryString = $this->httpBuildQuery($params); | |
876 // Check string length of the query string, change method to POST | |
877 $len = strlen($queryString); | |
878 // Fetch our threshold to find out when to flip to POST | |
879 $max_len = apachesolr_environment_variable_get($this->env_id, 'apachesolr_search_post_threshold', 3600); | |
880 | |
881 // if longer than $max_len (default 3600) characters | |
882 // we should switch to POST (a typical server handles 4096 max). | |
883 // If this class is used independently (without environments), we switch automatically to POST at an | |
884 // limit of 1800 chars. | |
885 if (($len > 1800) && (empty($this->env_id) || ($len > $max_len))) { | |
886 $method = 'POST'; | |
887 } | |
888 | |
889 if ($method == 'GET') { | |
890 $searchUrl = $this->_constructUrl(self::SEARCH_SERVLET, array(), $queryString); | |
891 return $this->_sendRawGet($searchUrl); | |
892 } | |
893 else if ($method == 'POST') { | |
894 $searchUrl = $this->_constructUrl(self::SEARCH_SERVLET); | |
895 $options['data'] = $queryString; | |
896 $options['headers']['Content-Type'] = 'application/x-www-form-urlencoded; charset=UTF-8'; | |
897 return $this->_sendRawPost($searchUrl, $options); | |
898 } | |
899 else { | |
900 throw new Exception("Unsupported method '$method' for search(), use GET or POST"); | |
901 } | |
902 } | |
903 } |