comparison sites/all/modules/custom/solrconnect/Drupal_Apache_Solr_Service.php @ 0:015d06b10d37 default tip

initial
author dwinter
date Wed, 31 Jul 2013 13:49:13 +0200
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:015d06b10d37
1 <?php
2
3 /**
4 * Copyright (c) 2007-2009, Conduit Internet Technologies, Inc.
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions are met:
9 *
10 * - Redistributions of source code must retain the above copyright notice,
11 * this list of conditions and the following disclaimer.
12 * - Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 * - Neither the name of Conduit Internet Technologies, Inc. nor the names of
16 * its contributors may be used to endorse or promote products derived from
17 * this software without specific prior written permission.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
23 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
30 *
31 * @copyright Copyright 2007-2009 Conduit Internet Technologies, Inc. (http://conduit-it.com)
32 * @license New BSD (http://solr-php-client.googlecode.com/svn/trunk/COPYING)
33 * @version $Id: Service.php 22 2009-11-09 22:46:54Z donovan.jimenez $
34 *
35 * @package Apache
36 * @subpackage Solr
37 * @author Donovan Jimenez <djimenez@conduit-it.com>
38 */
39
40 /**
41 * Additional code Copyright (c) 2008-2011 by Robert Douglass, James McKinney,
42 * Jacob Singh, Alejandro Garza, Peter Wolanin, and additional contributors.
43 *
44 * This program is free software; you can redistribute it and/or modify
45 * it under the terms of the GNU General Public License as published by
46 * the Free Software Foundation; either version 2 of the License, or (at
47 * your option) any later version.
48
49 *
50 * This program is distributed in the hope that it will be useful, but
51 * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
52 * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
53 * for more details.
54 *
55 * You should have received a copy of the GNU General Public License
56 * along with this program as the file LICENSE.txt; if not, please see
57 * http://www.gnu.org/licenses/old-licenses/gpl-2.0.txt.
58 */
59
60 /**
61 * Starting point for the Solr API. Represents a Solr server resource and has
62 * methods for pinging, adding, deleting, committing, optimizing and searching.
63 */
64
65 class DrupalApacheSolrService implements DrupalApacheSolrServiceInterface {
66 /**
67 * How NamedLists should be formatted in the output. This specifically effects facet counts. Valid values
68 * are 'map' (default) or 'flat'.
69 *
70 */
71 const NAMED_LIST_FORMAT = 'map';
72
73 /**
74 * Servlet mappings
75 */
76 const PING_SERVLET = 'admin/ping';
77 const UPDATE_SERVLET = 'update';
78 const SEARCH_SERVLET = 'select';
79 const LUKE_SERVLET = 'admin/luke';
80 const SYSTEM_SERVLET = 'admin/system';
81 const STATS_SERVLET = 'admin/stats.jsp';
82 const STATS_SERVLET_4 = 'admin/mbeans?wt=xml&stats=true';
83
84 /**
85 * Server url
86 *
87 * @var array
88 */
89 protected $parsed_url;
90
91 /**
92 * Constructed servlet full path URLs
93 *
94 * @var string
95 */
96 protected $update_url;
97
98 /**
99 * Default HTTP timeout when one is not specified (initialized to default_socket_timeout ini setting)
100 *
101 * var float
102 */
103 protected $_defaultTimeout;
104 protected $env_id;
105 protected $luke;
106 protected $stats;
107 protected $system_info;
108
109 /**
110 * Flag that denotes whether to use soft commits for Solr 4.x, defaults to FALSE.
111 *
112 * @var bool
113 */
114 protected $soft_commit = FALSE;
115
116 /**
117 * Call the /admin/ping servlet, to test the connection to the server.
118 *
119 * @param $timeout
120 * maximum time to wait for ping in seconds, -1 for unlimited (default 2).
121 * @return
122 * (float) seconds taken to ping the server, FALSE if timeout occurs.
123 */
124 public function ping($timeout = 2) {
125 $start = microtime(TRUE);
126
127 if ($timeout <= 0.0) {
128 $timeout = -1;
129 }
130 $pingUrl = $this->_constructUrl(self::PING_SERVLET);
131 // Attempt a HEAD request to the solr ping url.
132 $options = array(
133 'method' => 'HEAD',
134 'timeout' => $timeout,
135 );
136 $response = $this->_makeHttpRequest($pingUrl, $options);
137
138 if ($response->code == 200) {
139 // Add 0.1 ms to the ping time so we never return 0.0.
140 return microtime(TRUE) - $start + 0.0001;
141 }
142 else {
143 return FALSE;
144 }
145 }
146
147 /**
148 * Flags whether to use soft commits for Solr 4.x.
149 *
150 * @param bool $soft_commit
151 * Whether or not to use soft commits for Solr 4.x.
152 */
153 public function setSoftCommit($soft_commit) {
154 $this->soft_commit = (bool) $soft_commit;
155 }
156
157 /**
158 * Returns the flag that denotes whether to use soft commits for Solr 4.x.
159 *
160 * @return bool
161 * Whether to use soft commits for Solr 4.x.
162 */
163 public function getSoftCommit() {
164 return $this->soft_commit;
165 }
166
167 /**
168 * Call the /admin/system servlet
169 *
170 * @return
171 * (array) With all the system info
172 */
173 protected function setSystemInfo() {
174 $url = $this->_constructUrl(self::SYSTEM_SERVLET, array('wt' => 'json'));
175 if ($this->env_id) {
176 $this->system_info_cid = $this->env_id . ":system:" . drupal_hash_base64($url);
177 $cache = cache_get($this->system_info_cid, 'cache_apachesolr');
178 if (isset($cache->data)) {
179 $this->system_info = json_decode($cache->data);
180 }
181 }
182 // Second pass to populate the cache if necessary.
183 if (empty($this->system_info)) {
184 $response = $this->_sendRawGet($url);
185 $this->system_info = json_decode($response->data);
186 if ($this->env_id) {
187 cache_set($this->system_info_cid, $response->data, 'cache_apachesolr');
188 }
189 }
190 }
191
192 /**
193 * Get information about the Solr Core.
194 *
195 * @return
196 * (string) system info encoded in json
197 */
198 public function getSystemInfo() {
199 if (!isset($this->system_info)) {
200 $this->setSystemInfo();
201 }
202 return $this->system_info;
203 }
204
205 /**
206 * Sets $this->luke with the meta-data about the index from admin/luke.
207 */
208 protected function setLuke($num_terms = 0) {
209 if (empty($this->luke[$num_terms])) {
210 $params = array(
211 'numTerms' => "$num_terms",
212 'wt' => 'json',
213 'json.nl' => self::NAMED_LIST_FORMAT,
214 );
215 $url = $this->_constructUrl(self::LUKE_SERVLET, $params);
216 if ($this->env_id) {
217 $cid = $this->env_id . ":luke:" . drupal_hash_base64($url);
218 $cache = cache_get($cid, 'cache_apachesolr');
219 if (isset($cache->data)) {
220 $this->luke = $cache->data;
221 }
222 }
223 }
224 // Second pass to populate the cache if necessary.
225 if (empty($this->luke[$num_terms])) {
226 $this->luke[$num_terms] = $this->_sendRawGet($url);
227 if ($this->env_id) {
228 cache_set($cid, $this->luke, 'cache_apachesolr');
229 }
230 }
231 }
232
233 /**
234 * Get just the field meta-data about the index.
235 */
236 public function getFields($num_terms = 0) {
237 return $this->getLuke($num_terms)->fields;
238 }
239
240 /**
241 * Get meta-data about the index.
242 */
243 public function getLuke($num_terms = 0) {
244 if (!isset($this->luke[$num_terms])) {
245 $this->setLuke($num_terms);
246 }
247 return $this->luke[$num_terms];
248 }
249
250 /**
251 * Get the current solr version. This could be 1, 3 or 4
252 *
253 * @return int
254 * 1, 3 or 4. Does not give a more details version, for that you need
255 * to get the system info.
256 */
257 public function getSolrVersion() {
258 $system_info = $this->getSystemInfo();
259 // Get our solr version number
260 if (isset($system_info->lucene->{'solr-spec-version'})) {
261 return $system_info->lucene->{'solr-spec-version'}[0];
262 }
263 return 0;
264 }
265
266 /**
267 * Sets $this->stats with the information about the Solr Core form
268 */
269 protected function setStats() {
270 $data = $this->getLuke();
271 $solr_version = $this->getSolrVersion();
272 // Only try to get stats if we have connected to the index.
273 if (empty($this->stats) && isset($data->index->numDocs)) {
274 if ($solr_version >= 4) {
275 $url = $this->_constructUrl(self::STATS_SERVLET_4);
276 }
277 else {
278 $url = $this->_constructUrl(self::STATS_SERVLET);
279 }
280 if ($this->env_id) {
281 $this->stats_cid = $this->env_id . ":stats:" . drupal_hash_base64($url);
282 $cache = cache_get($this->stats_cid, 'cache_apachesolr');
283 if (isset($cache->data)) {
284 $this->stats = simplexml_load_string($cache->data);
285 }
286 }
287 // Second pass to populate the cache if necessary.
288 if (empty($this->stats)) {
289 $response = $this->_sendRawGet($url);
290 $this->stats = simplexml_load_string($response->data);
291 if ($this->env_id) {
292 cache_set($this->stats_cid, $response->data, 'cache_apachesolr');
293 }
294 }
295 }
296 }
297
298 /**
299 * Get information about the Solr Core.
300 *
301 * Returns a Simple XMl document
302 */
303 public function getStats() {
304 if (!isset($this->stats)) {
305 $this->setStats();
306 }
307 return $this->stats;
308 }
309
310 /**
311 * Get summary information about the Solr Core.
312 */
313 public function getStatsSummary() {
314 $stats = $this->getStats();
315 $solr_version = $this->getSolrVersion();
316
317 $summary = array(
318 '@pending_docs' => '',
319 '@autocommit_time_seconds' => '',
320 '@autocommit_time' => '',
321 '@deletes_by_id' => '',
322 '@deletes_by_query' => '',
323 '@deletes_total' => '',
324 '@schema_version' => '',
325 '@core_name' => '',
326 '@index_size' => '',
327 );
328
329 if (!empty($stats)) {
330 if ($solr_version <= 3) {
331 $docs_pending_xpath = $stats->xpath('//stat[@name="docsPending"]');
332 $summary['@pending_docs'] = (int) trim(current($docs_pending_xpath));
333 $max_time_xpath = $stats->xpath('//stat[@name="autocommit maxTime"]');
334 $max_time = (int) trim(current($max_time_xpath));
335 // Convert to seconds.
336 $summary['@autocommit_time_seconds'] = $max_time / 1000;
337 $summary['@autocommit_time'] = format_interval($max_time / 1000);
338 $deletes_id_xpath = $stats->xpath('//stat[@name="deletesById"]');
339 $summary['@deletes_by_id'] = (int) trim(current($deletes_id_xpath));
340 $deletes_query_xpath = $stats->xpath('//stat[@name="deletesByQuery"]');
341 $summary['@deletes_by_query'] = (int) trim(current($deletes_query_xpath));
342 $summary['@deletes_total'] = $summary['@deletes_by_id'] + $summary['@deletes_by_query'];
343 $schema = $stats->xpath('/solr/schema[1]');
344 $summary['@schema_version'] = trim($schema[0]);
345 $core = $stats->xpath('/solr/core[1]');
346 $summary['@core_name'] = trim($core[0]);
347 $size_xpath = $stats->xpath('//stat[@name="indexSize"]');
348 $summary['@index_size'] = trim(current($size_xpath));
349 }
350 else {
351 $system_info = $this->getSystemInfo();
352 $docs_pending_xpath = $stats->xpath('//lst["stats"]/long[@name="docsPending"]');
353 $summary['@pending_docs'] = (int) trim(current($docs_pending_xpath));
354 $max_time_xpath = $stats->xpath('//lst["stats"]/str[@name="autocommit maxTime"]');
355 $max_time = (int) trim(current($max_time_xpath));
356 // Convert to seconds.
357 $summary['@autocommit_time_seconds'] = $max_time / 1000;
358 $summary['@autocommit_time'] = format_interval($max_time / 1000);
359 $deletes_id_xpath = $stats->xpath('//lst["stats"]/long[@name="deletesById"]');
360 $summary['@deletes_by_id'] = (int) trim(current($deletes_id_xpath));
361 $deletes_query_xpath = $stats->xpath('//lst["stats"]/long[@name="deletesByQuery"]');
362 $summary['@deletes_by_query'] = (int) trim(current($deletes_query_xpath));
363 $summary['@deletes_total'] = $summary['@deletes_by_id'] + $summary['@deletes_by_query'];
364 $schema = $system_info->core->schema;
365 $summary['@schema_version'] = $schema;
366 $core = $stats->xpath('//lst["core"]/str[@name="coreName"]');
367 $summary['@core_name'] = trim(current($core));
368 $size_xpath = $stats->xpath('//lst["core"]/str[@name="indexSize"]');
369 $summary['@index_size'] = trim(current($size_xpath));
370 }
371 }
372
373 return $summary;
374 }
375
376 /**
377 * Clear cached Solr data.
378 */
379 public function clearCache() {
380 // Don't clear cached data if the server is unavailable.
381 if (@$this->ping()) {
382 $this->_clearCache();
383 }
384 else {
385 throw new Exception('No Solr instance available when trying to clear the cache.');
386 }
387 }
388
389 protected function _clearCache() {
390 if ($this->env_id) {
391 cache_clear_all($this->env_id . ":stats:", 'cache_apachesolr', TRUE);
392 cache_clear_all($this->env_id . ":luke:", 'cache_apachesolr', TRUE);
393 }
394 $this->luke = array();
395 $this->stats = NULL;
396 }
397
398 /**
399 * Constructor
400 *
401 * @param $url
402 * The URL to the Solr server, possibly including a core name. E.g. http://localhost:8983/solr/
403 * or https://search.example.com/solr/core99/
404 * @param $env_id
405 * The machine name of a corresponding saved configuration used for loading
406 * data like which facets are enabled.
407 */
408 public function __construct($url, $env_id = NULL) {
409 $this->env_id = $env_id;
410 $this->setUrl($url);
411
412 // determine our default http timeout from ini settings
413 $this->_defaultTimeout = (int) ini_get('default_socket_timeout');
414
415 // double check we didn't get 0 for a timeout
416 if ($this->_defaultTimeout <= 0) {
417 $this->_defaultTimeout = 60;
418 }
419 }
420
421 function getId() {
422 return $this->env_id;
423 }
424
425 /**
426 * Check the reponse code and thow an exception if it's not 200.
427 *
428 * @param stdClass $response
429 * response object.
430 *
431 * @return
432 * response object
433 * @thows Exception
434 */
435 protected function checkResponse($response) {
436 $code = (int) $response->code;
437 if ($code != 200) {
438 if ($code >= 400 && $code != 403 && $code != 404) {
439 // Add details, like Solr's exception message.
440 $response->status_message .= $response->data;
441 }
442 throw new Exception('"' . $code . '" Status: ' . $response->status_message);
443 }
444 return $response;
445 }
446
447 /**
448 * Make a request to a servlet (a path) that's not a standard path.
449 *
450 * @param string $servlet
451 * A path to be added to the base Solr path. e.g. 'extract/tika'
452 *
453 * @param array $params
454 * Any request parameters when constructing the URL.
455 *
456 * @param array $options
457 * @see drupal_http_request() $options.
458 *
459 * @return
460 * response object
461 *
462 * @thows Exception
463 */
464 public function makeServletRequest($servlet, $params = array(), $options = array()) {
465 // Add default params.
466 $params += array(
467 'wt' => 'json',
468 'json.nl' => self::NAMED_LIST_FORMAT,
469 );
470
471 $url = $this->_constructUrl($servlet, $params);
472 $response = $this->_makeHttpRequest($url, $options);
473 return $this->checkResponse($response);
474 }
475
476 /**
477 * Central method for making a GET operation against this Solr Server
478 */
479 protected function _sendRawGet($url, $options = array()) {
480 $response = $this->_makeHttpRequest($url, $options);
481 return $this->checkResponse($response);
482 }
483
484 /**
485 * Central method for making a POST operation against this Solr Server
486 */
487 protected function _sendRawPost($url, $options = array()) {
488 $options['method'] = 'POST';
489 // Normally we use POST to send XML documents.
490 if (!isset($options['headers']['Content-Type'])) {
491 $options['headers']['Content-Type'] = 'text/xml; charset=UTF-8';
492 }
493 $response = $this->_makeHttpRequest($url, $options);
494 return $this->checkResponse($response);
495 }
496
497 /**
498 * Central method for making the actual http request to the Solr Server
499 *
500 * This is just a wrapper around drupal_http_request().
501 */
502 protected function _makeHttpRequest($url, array $options = array()) {
503 if (!isset($options['method']) || $options['method'] == 'GET' || $options['method'] == 'HEAD') {
504 // Make sure we are not sending a request body.
505 $options['data'] = NULL;
506 }
507
508 $result = drupal_http_request($url, $options);
509
510 if (!isset($result->code) || $result->code < 0) {
511 $result->code = 0;
512 $result->status_message = 'Request failed';
513 $result->protocol = 'HTTP/1.0';
514 }
515 // Additional information may be in the error property.
516 if (isset($result->error)) {
517 $result->status_message .= ': ' . check_plain($result->error);
518 }
519
520 if (!isset($result->data)) {
521 $result->data = '';
522 $result->response = NULL;
523 }
524 else {
525 $response = json_decode($result->data);
526 if (is_object($response)) {
527 foreach ($response as $key => $value) {
528 $result->$key = $value;
529 }
530 }
531 }
532 return $result;
533 }
534
535
536 /**
537 * Escape a value for special query characters such as ':', '(', ')', '*', '?', etc.
538 *
539 * NOTE: inside a phrase fewer characters need escaped, use {@link DrupalApacheSolrService::escapePhrase()} instead
540 *
541 * @param string $value
542 * @return string
543 */
544 static public function escape($value)
545 {
546 //list taken from http://lucene.apache.org/java/docs/queryparsersyntax.html#Escaping%20Special%20Characters
547 $pattern = '/(\+|-|&&|\|\||!|\(|\)|\{|}|\[|]|\^|"|~|\*|\?|:|\\\)/';
548 $replace = '\\\$1';
549
550 return preg_replace($pattern, $replace, $value);
551 }
552
553 /**
554 * Escape a value meant to be contained in a phrase for special query characters
555 *
556 * @param string $value
557 * @return string
558 */
559 static public function escapePhrase($value)
560 {
561 $pattern = '/("|\\\)/';
562 $replace = '\\\$1';
563
564 return preg_replace($pattern, $replace, $value);
565 }
566
567 /**
568 * Convenience function for creating phrase syntax from a value
569 *
570 * @param string $value
571 * @return string
572 */
573 static public function phrase($value)
574 {
575 return '"' . self::escapePhrase($value) . '"';
576 }
577
578 /**
579 * Return a valid http URL given this server's host, port and path and a provided servlet name
580 *
581 * @param $servlet
582 * A string path to a Solr request handler.
583 * @param $params
584 * @param $parsed_url
585 * A url to use instead of the stored one.
586 *
587 * @return string
588 */
589 protected function _constructUrl($servlet, $params = array(), $added_query_string = NULL) {
590 // PHP's built in http_build_query() doesn't give us the format Solr wants.
591 $query_string = $this->httpBuildQuery($params);
592
593 if ($query_string) {
594 $query_string = '?' . $query_string;
595 if ($added_query_string) {
596 $query_string = $query_string . '&' . $added_query_string;
597 }
598 }
599 elseif ($added_query_string) {
600 $query_string = '?' . $added_query_string;
601 }
602
603 $url = $this->parsed_url;
604 return $url['scheme'] . $url['user'] . $url['pass'] . $url['host'] . $url['port'] . $url['path'] . $servlet . $query_string;
605 }
606
607 /**
608 * Get the Solr url
609 *
610 * @return string
611 */
612 public function getUrl() {
613 return $this->_constructUrl('');
614 }
615
616 /**
617 * Set the Solr url.
618 *
619 * @param $url
620 *
621 * @return $this
622 */
623 public function setUrl($url) {
624 $parsed_url = parse_url($url);
625
626 if (!isset($parsed_url['scheme'])) {
627 $parsed_url['scheme'] = 'http';
628 }
629 $parsed_url['scheme'] .= '://';
630
631 if (!isset($parsed_url['user'])) {
632 $parsed_url['user'] = '';
633 }
634 else {
635 $parsed_url['host'] = '@' . $parsed_url['host'];
636 }
637 $parsed_url['pass'] = isset($parsed_url['pass']) ? ':' . $parsed_url['pass'] : '';
638 $parsed_url['port'] = isset($parsed_url['port']) ? ':' . $parsed_url['port'] : '';
639
640 if (isset($parsed_url['path'])) {
641 // Make sure the path has a single leading/trailing slash.
642 $parsed_url['path'] = '/' . ltrim($parsed_url['path'], '/');
643 $parsed_url['path'] = rtrim($parsed_url['path'], '/') . '/';
644 }
645 else {
646 $parsed_url['path'] = '/';
647 }
648 // For now we ignore query and fragment.
649 $this->parsed_url = $parsed_url;
650 // Force the update url to be rebuilt.
651 unset($this->update_url);
652 return $this;
653 }
654
655 /**
656 * Raw update Method. Takes a raw post body and sends it to the update service. Post body
657 * should be a complete and well formed xml document.
658 *
659 * @param string $rawPost
660 * @param float $timeout Maximum expected duration (in seconds)
661 *
662 * @return response object
663 *
664 * @throws Exception If an error occurs during the service call
665 */
666 public function update($rawPost, $timeout = FALSE) {
667 // @todo: throw exception if updates are disabled.
668 if (empty($this->update_url)) {
669 // Store the URL in an instance variable since many updates may be sent
670 // via a single instance of this class.
671 $this->update_url = $this->_constructUrl(self::UPDATE_SERVLET, array('wt' => 'json'));
672 }
673 $options['data'] = $rawPost;
674 if ($timeout) {
675 $options['timeout'] = $timeout;
676 }
677 return $this->_sendRawPost($this->update_url, $options);
678 }
679
680 /**
681 * Add an array of Solr Documents to the index all at once
682 *
683 * @param array $documents Should be an array of ApacheSolrDocument instances
684 * @param boolean $allowDups
685 * @param boolean $overwritePending
686 * @param boolean $overwriteCommitted
687 *
688 * @return response objecte
689 *
690 * @throws Exception If an error occurs during the service call
691 */
692 public function addDocuments($documents, $overwrite = NULL, $commitWithin = NULL) {
693 $attr = '';
694
695 if (isset($overwrite)) {
696 $attr .= ' overwrite="' . empty($overwrite) ? 'false"' : 'true"';
697 }
698 if (isset($commitWithin)) {
699 $attr .= ' commitWithin="' . intval($commitWithin) . '"';
700 }
701
702 $rawPost = "<add{$attr}>";
703 foreach ($documents as $document) {
704 if (is_object($document) && ($document instanceof ApacheSolrDocument)) {
705 $rawPost .= ApacheSolrDocument::documentToXml($document);
706 }
707 }
708 $rawPost .= '</add>';
709
710 return $this->update($rawPost);
711 }
712
713 /**
714 * Send a commit command. Will be synchronous unless both wait parameters are set to false.
715 *
716 * @param boolean $optimize Defaults to true
717 * optimizes the index files. Only valid for solr versions <= 3
718 * @param boolean $waitFlush
719 * block until index changes are flushed to disk. Only valid for solr versions <= 3
720 * @param boolean $waitSearcher
721 * block until a new searcher is opened and registered as the main query searcher, making the changes visible.
722 * @param float $timeout
723 * Maximum expected duration of the commit operation on the server (otherwise, will throw a communication exception)
724 *
725 * @return response object
726 *
727 * @throws Exception If an error occurs during the service call
728 */
729 public function commit($optimize = TRUE, $waitFlush = TRUE, $waitSearcher = TRUE, $timeout = 3600) {
730 $optimizeValue = $optimize ? 'true' : 'false';
731 $flushValue = $waitFlush ? 'true' : 'false';
732 $searcherValue = $waitSearcher ? 'true' : 'false';
733 $softCommit = $this->soft_commit ? 'true' : 'false';
734
735 $solr_version = $this->getSolrVersion();
736 if ($solr_version <= 3) {
737 $rawPost = '<commit waitSearcher="' . $searcherValue . '" waitFlush="' . $flushValue . '" optimize="' . $optimizeValue . '" />';
738 }
739 else {
740 $rawPost = '<commit waitSearcher="' . $searcherValue . '" softCommit="' . $softCommit . '" />';
741 }
742
743 $response = $this->update($rawPost, $timeout);
744 $this->_clearCache();
745 return $response;
746 }
747
748 /**
749 * Create a delete document based on document ID
750 *
751 * @param string $id Expected to be utf-8 encoded
752 * @param float $timeout Maximum expected duration of the delete operation on the server (otherwise, will throw a communication exception)
753 *
754 * @return response object
755 *
756 * @throws Exception If an error occurs during the service call
757 */
758 public function deleteById($id, $timeout = 3600) {
759 return $this->deleteByMultipleIds(array($id), $timeout);
760 }
761
762 /**
763 * Create and post a delete document based on multiple document IDs.
764 *
765 * @param array $ids Expected to be utf-8 encoded strings
766 * @param float $timeout Maximum expected duration of the delete operation on the server (otherwise, will throw a communication exception)
767 *
768 * @return response object
769 *
770 * @throws Exception If an error occurs during the service call
771 */
772 public function deleteByMultipleIds($ids, $timeout = 3600) {
773 $rawPost = '<delete>';
774
775 foreach ($ids as $id) {
776 $rawPost .= '<id>' . htmlspecialchars($id, ENT_NOQUOTES, 'UTF-8') . '</id>';
777 }
778 $rawPost .= '</delete>';
779
780 return $this->update($rawPost, $timeout);
781 }
782
783 /**
784 * Create a delete document based on a query and submit it
785 *
786 * @param string $rawQuery Expected to be utf-8 encoded
787 * @param float $timeout Maximum expected duration of the delete operation on the server (otherwise, will throw a communication exception)
788 * @return stdClass response object
789 *
790 * @throws Exception If an error occurs during the service call
791 */
792 public function deleteByQuery($rawQuery, $timeout = 3600) {
793 $rawPost = '<delete><query>' . htmlspecialchars($rawQuery, ENT_NOQUOTES, 'UTF-8') . '</query></delete>';
794
795 return $this->update($rawPost, $timeout);
796 }
797
798 /**
799 * Send an optimize command. Will be synchronous unless both wait parameters are set
800 * to false.
801 *
802 * @param boolean $waitFlush
803 * block until index changes are flushed to disk Removed in Solr 4.0
804 * @param boolean $waitSearcher
805 * block until a new searcher is opened and registered as the main query searcher, making the changes visible.
806 * @param float $timeout
807 * Maximum expected duration of the commit operation on the server (otherwise, will throw a communication exception)
808 *
809 * @return response object
810 *
811 * @throws Exception If an error occurs during the service call
812 */
813 public function optimize($waitFlush = TRUE, $waitSearcher = TRUE, $timeout = 3600) {
814 $flushValue = $waitFlush ? 'true' : 'false';
815 $searcherValue = $waitSearcher ? 'true' : 'false';
816 $softCommit = $this->soft_commit ? 'true' : 'false';
817
818 $solr_version = $this->getSolrVersion();
819 if ($solr_version <= 3) {
820 $rawPost = '<optimize waitSearcher="' . $searcherValue . '" waitFlush="' . $flushValue . '" />';
821 }
822 else {
823 $rawPost = '<optimize waitSearcher="' . $searcherValue . '" softCommit="' . $softCommit . '" />';
824 }
825
826 return $this->update($rawPost, $timeout);
827 }
828
829 /**
830 * Like PHP's built in http_build_query(), but uses rawurlencode() and no [] for repeated params.
831 */
832 protected function httpBuildQuery(array $query, $parent = '') {
833 $params = array();
834
835 foreach ($query as $key => $value) {
836 $key = ($parent ? $parent : rawurlencode($key));
837
838 // Recurse into children.
839 if (is_array($value)) {
840 $params[] = $this->httpBuildQuery($value, $key);
841 }
842 // If a query parameter value is NULL, only append its key.
843 elseif (!isset($value)) {
844 $params[] = $key;
845 }
846 else {
847 $params[] = $key . '=' . rawurlencode($value);
848 }
849 }
850
851 return implode('&', $params);
852 }
853
854 /**
855 * Simple Search interface
856 *
857 * @param string $query The raw query string
858 * @param array $params key / value pairs for other query parameters (see Solr documentation), use arrays for parameter keys used more than once (e.g. facet.field)
859 *
860 * @return response object
861 *
862 * @throws Exception If an error occurs during the service call
863 */
864 public function search($query = '', array $params = array(), $method = 'GET') {
865 // Always use JSON. See http://code.google.com/p/solr-php-client/issues/detail?id=6#c1 for reasoning
866 $params['wt'] = 'json';
867 // Additional default params.
868 $params += array(
869 'json.nl' => self::NAMED_LIST_FORMAT,
870 );
871 if ($query) {
872 $params['q'] = $query;
873 }
874 // PHP's built in http_build_query() doesn't give us the format Solr wants.
875 $queryString = $this->httpBuildQuery($params);
876 // Check string length of the query string, change method to POST
877 $len = strlen($queryString);
878 // Fetch our threshold to find out when to flip to POST
879 $max_len = apachesolr_environment_variable_get($this->env_id, 'apachesolr_search_post_threshold', 3600);
880
881 // if longer than $max_len (default 3600) characters
882 // we should switch to POST (a typical server handles 4096 max).
883 // If this class is used independently (without environments), we switch automatically to POST at an
884 // limit of 1800 chars.
885 if (($len > 1800) && (empty($this->env_id) || ($len > $max_len))) {
886 $method = 'POST';
887 }
888
889 if ($method == 'GET') {
890 $searchUrl = $this->_constructUrl(self::SEARCH_SERVLET, array(), $queryString);
891 return $this->_sendRawGet($searchUrl);
892 }
893 else if ($method == 'POST') {
894 $searchUrl = $this->_constructUrl(self::SEARCH_SERVLET);
895 $options['data'] = $queryString;
896 $options['headers']['Content-Type'] = 'application/x-www-form-urlencoded; charset=UTF-8';
897 return $this->_sendRawPost($searchUrl, $options);
898 }
899 else {
900 throw new Exception("Unsupported method '$method' for search(), use GET or POST");
901 }
902 }
903 }