Mercurial > hg > MPIWG-drupal-modules
diff sites/all/modules/custom/solrsearch/Drupal_Apache_Solr_Service.php @ 0:015d06b10d37 default tip
initial
author | dwinter |
---|---|
date | Wed, 31 Jul 2013 13:49:13 +0200 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/sites/all/modules/custom/solrsearch/Drupal_Apache_Solr_Service.php Wed Jul 31 13:49:13 2013 +0200 @@ -0,0 +1,913 @@ +<?php + +/** + * Copyright (c) 2007-2009, Conduit Internet Technologies, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * - Neither the name of Conduit Internet Technologies, Inc. nor the names of + * its contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + * @copyright Copyright 2007-2009 Conduit Internet Technologies, Inc. (http://conduit-it.com) + * @license New BSD (http://solr-php-client.googlecode.com/svn/trunk/COPYING) + * @version $Id: Service.php 22 2009-11-09 22:46:54Z donovan.jimenez $ + * + * @package Apache + * @subpackage Solr + * @author Donovan Jimenez <djimenez@conduit-it.com> + */ + +/** + * Additional code Copyright (c) 2008-2011 by Robert Douglass, James McKinney, + * Jacob Singh, Alejandro Garza, Peter Wolanin, and additional contributors. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or (at + * your option) any later version. + + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program as the file LICENSE.txt; if not, please see + * http://www.gnu.org/licenses/old-licenses/gpl-2.0.txt. + */ + +/** + * Starting point for the Solr API. Represents a Solr server resource and has + * methods for pinging, adding, deleting, committing, optimizing and searching. + */ + +class DrupalsolrsearchService implements DrupalApacheSolrServiceInterface { + /** + * How NamedLists should be formatted in the output. This specifically effects facet counts. Valid values + * are 'map' (default) or 'flat'. + * + */ + const NAMED_LIST_FORMAT = 'map'; + + /** + * Servlet mappings + */ + const PING_SERVLET = 'admin/ping'; + const UPDATE_SERVLET = 'update'; + const SEARCH_SERVLET = 'select'; + const LUKE_SERVLET = 'admin/luke'; + const SYSTEM_SERVLET = 'admin/system'; + const STATS_SERVLET = 'admin/stats.jsp'; + const STATS_SERVLET_4 = 'admin/mbeans?wt=xml&stats=true'; + + /** + * Server url + * + * @var array + */ + protected $parsed_url; + + /** + * Constructed servlet full path URLs + * + * @var string + */ + protected $update_url; + + /** + * Default HTTP timeout when one is not specified (initialized to default_socket_timeout ini setting) + * + * var float + */ + protected $_defaultTimeout; + protected $env_id; + protected $luke; + protected $stats; + protected $system_info; + + /** + * Flag that denotes whether to use soft commits for Solr 4.x, defaults to FALSE. + * + * @var bool + */ + protected $soft_commit = FALSE; + + /** + * Call the /admin/ping servlet, to test the connection to the server. + * + * @param $timeout + * maximum time to wait for ping in seconds, -1 for unlimited (default 2). + * @return + * (float) seconds taken to ping the server, FALSE if timeout occurs. + */ + public function ping($timeout = 2) { + $start = microtime(TRUE); + + if ($timeout <= 0.0) { + $timeout = -1; + } + $pingUrl = $this->_constructUrl(self::PING_SERVLET); + // Attempt a HEAD request to the solr ping url. + $options = array( + 'method' => 'HEAD', + 'timeout' => $timeout, + ); + $response = $this->_makeHttpRequest($pingUrl, $options); + + if ($response->code == 200) { + // Add 0.1 ms to the ping time so we never return 0.0. + return microtime(TRUE) - $start + 0.0001; + } + else { + return FALSE; + } + } + + /** + * Flags whether to use soft commits for Solr 4.x. + * + * @param bool $soft_commit + * Whether or not to use soft commits for Solr 4.x. + */ + public function setSoftCommit($soft_commit) { + $this->soft_commit = (bool) $soft_commit; + } + + /** + * Returns the flag that denotes whether to use soft commits for Solr 4.x. + * + * @return bool + * Whether to use soft commits for Solr 4.x. + */ + public function getSoftCommit() { + return $this->soft_commit; + } + + /** + * Call the /admin/system servlet + * + * @return + * (array) With all the system info + */ + protected function setSystemInfo() { + $url = $this->_constructUrl(self::SYSTEM_SERVLET, array('wt' => 'json')); + if ($this->env_id) { + $this->system_info_cid = $this->env_id . ":system:" . drupal_hash_base64($url); + $cache = cache_get($this->system_info_cid, 'cache_solrsearch'); + if (isset($cache->data)) { + $this->system_info = json_decode($cache->data); + } + } + // Second pass to populate the cache if necessary. + if (empty($this->system_info)) { + $response = $this->_sendRawGet($url); + $this->system_info = json_decode($response->data); + if ($this->env_id) { + cache_set($this->system_info_cid, $response->data, 'cache_solrsearch'); + } + } + } + + /** + * Get information about the Solr Core. + * + * @return + * (string) system info encoded in json + */ + public function getSystemInfo() { + if (!isset($this->system_info)) { + $this->setSystemInfo(); + } + return $this->system_info; + } + + /** + * Sets $this->luke with the meta-data about the index from admin/luke. + */ + protected function setLuke($num_terms = 0) { + if (empty($this->luke[$num_terms])) { + $params = array( + 'numTerms' => "$num_terms", + 'wt' => 'json', + 'json.nl' => self::NAMED_LIST_FORMAT, + ); + $url = $this->_constructUrl(self::LUKE_SERVLET, $params); + if ($this->env_id) { + $cid = $this->env_id . ":luke:" . drupal_hash_base64($url); + $cache = cache_get($cid, 'cache_solrsearch'); + if (isset($cache->data)) { + $this->luke = $cache->data; + } + } + } + // Second pass to populate the cache if necessary. + if (empty($this->luke[$num_terms])) { + $this->luke[$num_terms] = $this->_sendRawGet($url); + if ($this->env_id) { + cache_set($cid, $this->luke, 'cache_solrsearch'); + } + } + } + + /** + * Get just the field meta-data about the index. + */ + public function getFields($num_terms = 0) { + return $this->getLuke($num_terms)->fields; + } + + /** + * Get meta-data about the index. + */ + public function getLuke($num_terms = 0) { + if (!isset($this->luke[$num_terms])) { + $this->setLuke($num_terms); + } + return $this->luke[$num_terms]; + } + + /** + * Get the current solr version. This could be 1, 3 or 4 + * + * @return int + * 1, 3 or 4. Does not give a more details version, for that you need + * to get the system info. + */ + public function getSolrVersion() { + $system_info = $this->getSystemInfo(); + // Get our solr version number + if (isset($system_info->lucene->{'solr-spec-version'})) { + return $system_info->lucene->{'solr-spec-version'}[0]; + } + return 0; + } + + /** + * Sets $this->stats with the information about the Solr Core form + */ + protected function setStats() { + $data = $this->getLuke(); + $solr_version = $this->getSolrVersion(); + // Only try to get stats if we have connected to the index. + if (empty($this->stats) && isset($data->index->numDocs)) { + if ($solr_version >= 4) { + $url = $this->_constructUrl(self::STATS_SERVLET_4); + } + else { + $url = $this->_constructUrl(self::STATS_SERVLET); + } + if ($this->env_id) { + $this->stats_cid = $this->env_id . ":stats:" . drupal_hash_base64($url); + $cache = cache_get($this->stats_cid, 'cache_solrsearch'); + if (isset($cache->data)) { + $this->stats = simplexml_load_string($cache->data); + } + } + // Second pass to populate the cache if necessary. + if (empty($this->stats)) { + $response = $this->_sendRawGet($url); + $this->stats = simplexml_load_string($response->data); + if ($this->env_id) { + cache_set($this->stats_cid, $response->data, 'cache_solrsearch'); + } + } + } + } + + /** + * Get information about the Solr Core. + * + * Returns a Simple XMl document + */ + public function getStats() { + if (!isset($this->stats)) { + $this->setStats(); + } + return $this->stats; + } + + /** + * Get summary information about the Solr Core. + */ + public function getStatsSummary() { + $stats = $this->getStats(); + $solr_version = $this->getSolrVersion(); + + $summary = array( + '@pending_docs' => '', + '@autocommit_time_seconds' => '', + '@autocommit_time' => '', + '@deletes_by_id' => '', + '@deletes_by_query' => '', + '@deletes_total' => '', + '@schema_version' => '', + '@core_name' => '', + '@index_size' => '', + ); + + if (!empty($stats)) { + if ($solr_version <= 3) { + $docs_pending_xpath = $stats->xpath('//stat[@name="docsPending"]'); + $summary['@pending_docs'] = (int) trim(current($docs_pending_xpath)); + $max_time_xpath = $stats->xpath('//stat[@name="autocommit maxTime"]'); + $max_time = (int) trim(current($max_time_xpath)); + // Convert to seconds. + $summary['@autocommit_time_seconds'] = $max_time / 1000; + $summary['@autocommit_time'] = format_interval($max_time / 1000); + $deletes_id_xpath = $stats->xpath('//stat[@name="deletesById"]'); + $summary['@deletes_by_id'] = (int) trim(current($deletes_id_xpath)); + $deletes_query_xpath = $stats->xpath('//stat[@name="deletesByQuery"]'); + $summary['@deletes_by_query'] = (int) trim(current($deletes_query_xpath)); + $summary['@deletes_total'] = $summary['@deletes_by_id'] + $summary['@deletes_by_query']; + $schema = $stats->xpath('/solr/schema[1]'); + $summary['@schema_version'] = trim($schema[0]); + $core = $stats->xpath('/solr/core[1]'); + $summary['@core_name'] = trim($core[0]); + $size_xpath = $stats->xpath('//stat[@name="indexSize"]'); + $summary['@index_size'] = trim(current($size_xpath)); + } + else { + $system_info = $this->getSystemInfo(); + $docs_pending_xpath = $stats->xpath('//lst["stats"]/long[@name="docsPending"]'); + $summary['@pending_docs'] = (int) trim(current($docs_pending_xpath)); + $max_time_xpath = $stats->xpath('//lst["stats"]/str[@name="autocommit maxTime"]'); + $max_time = (int) trim(current($max_time_xpath)); + // Convert to seconds. + $summary['@autocommit_time_seconds'] = $max_time / 1000; + $summary['@autocommit_time'] = format_interval($max_time / 1000); + $deletes_id_xpath = $stats->xpath('//lst["stats"]/long[@name="deletesById"]'); + $summary['@deletes_by_id'] = (int) trim(current($deletes_id_xpath)); + $deletes_query_xpath = $stats->xpath('//lst["stats"]/long[@name="deletesByQuery"]'); + $summary['@deletes_by_query'] = (int) trim(current($deletes_query_xpath)); + $summary['@deletes_total'] = $summary['@deletes_by_id'] + $summary['@deletes_by_query']; + $schema = $system_info->core->schema; + $summary['@schema_version'] = $schema; + $core = $stats->xpath('//lst["core"]/str[@name="coreName"]'); + $summary['@core_name'] = trim(current($core)); + $size_xpath = $stats->xpath('//lst["core"]/str[@name="indexSize"]'); + $summary['@index_size'] = trim(current($size_xpath)); + } + } + + return $summary; + } + + /** + * Clear cached Solr data. + */ + public function clearCache() { + // Don't clear cached data if the server is unavailable. + if (@$this->ping()) { + $this->_clearCache(); + } + else { + throw new Exception('No Solr instance available when trying to clear the cache.'); + } + } + + protected function _clearCache() { + if ($this->env_id) { + cache_clear_all($this->env_id . ":stats:", 'cache_solrsearch', TRUE); + cache_clear_all($this->env_id . ":luke:", 'cache_solrsearch', TRUE); + } + $this->luke = array(); + $this->stats = NULL; + } + + /** + * Constructor + * + * @param $url + * The URL to the Solr server, possibly including a core name. E.g. http://localhost:8983/solr/ + * or https://search.example.com/solr/core99/ + * @param $env_id + * The machine name of a corresponding saved configuration used for loading + * data like which facets are enabled. + */ + public function __construct($url, $env_id = NULL) { + $this->env_id = $env_id; + $this->setUrl($url); + + // determine our default http timeout from ini settings + $this->_defaultTimeout = (int) ini_get('default_socket_timeout'); + + // double check we didn't get 0 for a timeout + if ($this->_defaultTimeout <= 0) { + $this->_defaultTimeout = 60; + } + } + + function getId() { + return $this->env_id; + } + + /** + * Check the reponse code and thow an exception if it's not 200. + * + * @param stdClass $response + * response object. + * + * @return + * response object + * @thows Exception + */ + protected function checkResponse($response) { + $code = (int) $response->code; + if ($code != 200) { + if ($code >= 400 && $code != 403 && $code != 404) { + // Add details, like Solr's exception message. + $response->status_message .= $response->data; + } + throw new Exception('"' . $code . '" Status: ' . $response->status_message); + } + return $response; + } + + /** + * Make a request to a servlet (a path) that's not a standard path. + * + * @param string $servlet + * A path to be added to the base Solr path. e.g. 'extract/tika' + * + * @param array $params + * Any request parameters when constructing the URL. + * + * @param array $options + * @see drupal_http_request() $options. + * + * @return + * response object + * + * @thows Exception + */ + public function makeServletRequest($servlet, $params = array(), $options = array()) { + // Add default params. + $params += array( + 'wt' => 'json', + 'json.nl' => self::NAMED_LIST_FORMAT, + ); + + $url = $this->_constructUrl($servlet, $params); + $response = $this->_makeHttpRequest($url, $options); + return $this->checkResponse($response); + } + + /** + * Central method for making a GET operation against this Solr Server + */ + protected function _sendRawGet($url, $options = array()) { + $response = $this->_makeHttpRequest($url, $options); + return $this->checkResponse($response); + } + + /** + * Central method for making a POST operation against this Solr Server + */ + protected function _sendRawPost($url, $options = array()) { + $options['method'] = 'POST'; + // Normally we use POST to send XML documents. + if (!isset($options['headers']['Content-Type'])) { + $options['headers']['Content-Type'] = 'text/xml; charset=UTF-8'; + } + $response = $this->_makeHttpRequest($url, $options); + return $this->checkResponse($response); + } + + /** + * Central method for making the actual http request to the Solr Server + * + * This is just a wrapper around drupal_http_request(). + */ + protected function _makeHttpRequest($url, array $options = array()) { + if (!isset($options['method']) || $options['method'] == 'GET' || $options['method'] == 'HEAD') { + // Make sure we are not sending a request body. + $options['data'] = NULL; + } + + $result = drupal_http_request($url, $options); + + if (!isset($result->code) || $result->code < 0) { + $result->code = 0; + $result->status_message = 'Request failed'; + $result->protocol = 'HTTP/1.0'; + } + // Additional information may be in the error property. + if (isset($result->error)) { + $result->status_message .= ': ' . check_plain($result->error); + } + + if (!isset($result->data)) { + $result->data = ''; + $result->response = NULL; + } + else { + $response = json_decode($result->data); + if (is_object($response)) { + foreach ($response as $key => $value) { + $result->$key = $value; + } + } + } + return $result; + } + + + /** + * Escape a value for special query characters such as ':', '(', ')', '*', '?', etc. + * + * NOTE: inside a phrase fewer characters need escaped, use {@link DrupalsolrsearchService::escapePhrase()} instead + * + * @param string $value + * @return string + */ + static public function escape($value) + { + //list taken from http://lucene.apache.org/java/docs/queryparsersyntax.html#Escaping%20Special%20Characters + $pattern = '/(\+|-|&&|\|\||!|\(|\)|\{|}|\[|]|\^|"|~|\*|\?|:|\\\)/'; + $replace = '\\\$1'; + + return preg_replace($pattern, $replace, $value); + } + + /** + * Escape a value meant to be contained in a phrase for special query characters + * + * @param string $value + * @return string + */ + static public function escapePhrase($value) + { + $pattern = '/("|\\\)/'; + $replace = '\\\$1'; + + return preg_replace($pattern, $replace, $value); + } + + /** + * Convenience function for creating phrase syntax from a value + * + * @param string $value + * @return string + */ + static public function phrase($value) + { + return '"' . self::escapePhrase($value) . '"'; + } + + /** + * Return a valid http URL given this server's host, port and path and a provided servlet name + * + * @param $servlet + * A string path to a Solr request handler. + * @param $params + * @param $parsed_url + * A url to use instead of the stored one. + * + * @return string + */ + protected function _constructUrl($servlet, $params = array(), $added_query_string = NULL) { + // PHP's built in http_build_query() doesn't give us the format Solr wants. + $query_string = $this->httpBuildQuery($params); + + if ($query_string) { + $query_string = '?' . $query_string; + if ($added_query_string) { + $query_string = $query_string . '&' . $added_query_string; + } + } + elseif ($added_query_string) { + $query_string = '?' . $added_query_string; + } + + $url = $this->parsed_url; + return $url['scheme'] . $url['user'] . $url['pass'] . $url['host'] . $url['port'] . $url['path'] . $servlet . $query_string; + } + + /** + * Get the Solr url + * + * @return string + */ + public function getUrl() { + return $this->_constructUrl(''); + } + + /** + * Set the Solr url. + * + * @param $url + * + * @return $this + */ + public function setUrl($url) { + $parsed_url = parse_url($url); + + if (!isset($parsed_url['scheme'])) { + $parsed_url['scheme'] = 'http'; + } + $parsed_url['scheme'] .= '://'; + + if (!isset($parsed_url['user'])) { + $parsed_url['user'] = ''; + } + else { + $parsed_url['host'] = '@' . $parsed_url['host']; + } + $parsed_url['pass'] = isset($parsed_url['pass']) ? ':' . $parsed_url['pass'] : ''; + $parsed_url['port'] = isset($parsed_url['port']) ? ':' . $parsed_url['port'] : ''; + + if (isset($parsed_url['path'])) { + // Make sure the path has a single leading/trailing slash. + $parsed_url['path'] = '/' . ltrim($parsed_url['path'], '/'); + $parsed_url['path'] = rtrim($parsed_url['path'], '/') . '/'; + } + else { + $parsed_url['path'] = '/'; + } + // For now we ignore query and fragment. + $this->parsed_url = $parsed_url; + // Force the update url to be rebuilt. + unset($this->update_url); + return $this; + } + + /** + * Raw update Method. Takes a raw post body and sends it to the update service. Post body + * should be a complete and well formed xml document. + * + * @param string $rawPost + * @param float $timeout Maximum expected duration (in seconds) + * + * @return response object + * + * @throws Exception If an error occurs during the service call + */ + public function update($rawPost, $timeout = FALSE) { + // @todo: throw exception if updates are disabled. + if (empty($this->update_url)) { + // Store the URL in an instance variable since many updates may be sent + // via a single instance of this class. + $this->update_url = $this->_constructUrl(self::UPDATE_SERVLET, array('wt' => 'json')); + } + $options['data'] = $rawPost; + if ($timeout) { + $options['timeout'] = $timeout; + } + return $this->_sendRawPost($this->update_url, $options); + } + + /** + * Add an array of Solr Documents to the index all at once + * + * @param array $documents Should be an array of solrsearchDocument instances + * @param boolean $allowDups + * @param boolean $overwritePending + * @param boolean $overwriteCommitted + * + * @return response objecte + * + * @throws Exception If an error occurs during the service call + */ + public function addDocuments($documents, $overwrite = NULL, $commitWithin = NULL) { + $attr = ''; + + if (isset($overwrite)) { + $attr .= ' overwrite="' . empty($overwrite) ? 'false"' : 'true"'; + } + if (isset($commitWithin)) { + $attr .= ' commitWithin="' . intval($commitWithin) . '"'; + } + + $rawPost = "<add{$attr}>"; + foreach ($documents as $document) { + if (is_object($document) && ($document instanceof solrsearchDocument)) { + $rawPost .= solrsearchDocument::documentToXml($document); + } + } + $rawPost .= '</add>'; + + return $this->update($rawPost); + } + + /** + * Send a commit command. Will be synchronous unless both wait parameters are set to false. + * + * @param boolean $optimize Defaults to true + * optimizes the index files. Only valid for solr versions <= 3 + * @param boolean $waitFlush + * block until index changes are flushed to disk. Only valid for solr versions <= 3 + * @param boolean $waitSearcher + * block until a new searcher is opened and registered as the main query searcher, making the changes visible. + * @param float $timeout + * Maximum expected duration of the commit operation on the server (otherwise, will throw a communication exception) + * + * @return response object + * + * @throws Exception If an error occurs during the service call + */ + public function commit($optimize = TRUE, $waitFlush = TRUE, $waitSearcher = TRUE, $timeout = 3600) { + $optimizeValue = $optimize ? 'true' : 'false'; + $flushValue = $waitFlush ? 'true' : 'false'; + $searcherValue = $waitSearcher ? 'true' : 'false'; + $softCommit = $this->soft_commit ? 'true' : 'false'; + + $solr_version = $this->getSolrVersion(); + if ($solr_version <= 3) { + $rawPost = '<commit waitSearcher="' . $searcherValue . '" waitFlush="' . $flushValue . '" optimize="' . $optimizeValue . '" />'; + } + else { + $rawPost = '<commit waitSearcher="' . $searcherValue . '" softCommit="' . $softCommit . '" />'; + } + + $response = $this->update($rawPost, $timeout); + $this->_clearCache(); + return $response; + } + + /** + * Create a delete document based on document ID + * + * @param string $id Expected to be utf-8 encoded + * @param float $timeout Maximum expected duration of the delete operation on the server (otherwise, will throw a communication exception) + * + * @return response object + * + * @throws Exception If an error occurs during the service call + */ + public function deleteById($id, $timeout = 3600) { + return $this->deleteByMultipleIds(array($id), $timeout); + } + + /** + * Create and post a delete document based on multiple document IDs. + * + * @param array $ids Expected to be utf-8 encoded strings + * @param float $timeout Maximum expected duration of the delete operation on the server (otherwise, will throw a communication exception) + * + * @return response object + * + * @throws Exception If an error occurs during the service call + */ + public function deleteByMultipleIds($ids, $timeout = 3600) { + $rawPost = '<delete>'; + + foreach ($ids as $id) { + $rawPost .= '<id>' . htmlspecialchars($id, ENT_NOQUOTES, 'UTF-8') . '</id>'; + } + $rawPost .= '</delete>'; + + return $this->update($rawPost, $timeout); + } + + /** + * Create a delete document based on a query and submit it + * + * @param string $rawQuery Expected to be utf-8 encoded + * @param float $timeout Maximum expected duration of the delete operation on the server (otherwise, will throw a communication exception) + * @return stdClass response object + * + * @throws Exception If an error occurs during the service call + */ + public function deleteByQuery($rawQuery, $timeout = 3600) { + $rawPost = '<delete><query>' . htmlspecialchars($rawQuery, ENT_NOQUOTES, 'UTF-8') . '</query></delete>'; + + return $this->update($rawPost, $timeout); + } + + /** + * Send an optimize command. Will be synchronous unless both wait parameters are set + * to false. + * + * @param boolean $waitFlush + * block until index changes are flushed to disk Removed in Solr 4.0 + * @param boolean $waitSearcher + * block until a new searcher is opened and registered as the main query searcher, making the changes visible. + * @param float $timeout + * Maximum expected duration of the commit operation on the server (otherwise, will throw a communication exception) + * + * @return response object + * + * @throws Exception If an error occurs during the service call + */ + public function optimize($waitFlush = TRUE, $waitSearcher = TRUE, $timeout = 3600) { + $flushValue = $waitFlush ? 'true' : 'false'; + $searcherValue = $waitSearcher ? 'true' : 'false'; + $softCommit = $this->soft_commit ? 'true' : 'false'; + + $solr_version = $this->getSolrVersion(); + if ($solr_version <= 3) { + $rawPost = '<optimize waitSearcher="' . $searcherValue . '" waitFlush="' . $flushValue . '" />'; + } + else { + $rawPost = '<optimize waitSearcher="' . $searcherValue . '" softCommit="' . $softCommit . '" />'; + } + + return $this->update($rawPost, $timeout); + } + + /** + * Like PHP's built in http_build_query(), but uses rawurlencode() and no [] for repeated params. + */ + protected function httpBuildQuery(array $query, $parent = '') { + $params = array(); + + foreach ($query as $key => $value) { + $key = ($parent ? $parent : rawurlencode($key)); + + // Recurse into children. + if (is_array($value)) { + $params[] = $this->httpBuildQuery($value, $key); + } + // If a query parameter value is NULL, only append its key. + elseif (!isset($value)) { + $params[] = $key; + } + else { + $params[] = $key . '=' . rawurlencode($value); + } + } + + return implode('&', $params); + } + + /** + * Simple Search interface + * + * @param string $query The raw query string + * @param array $params key / value pairs for other query parameters (see Solr documentation), use arrays for parameter keys used more than once (e.g. facet.field) + * + * @return response object + * + * @throws Exception If an error occurs during the service call + */ + public function search($query = '', array $params = array(), $method = 'GET') { + // Always use JSON. See http://code.google.com/p/solr-php-client/issues/detail?id=6#c1 for reasoning + + + + if (!user_access("view restricted content")){ + $params['fq'][]='access-type:free'; + + + } + + $params['wt'] = 'json'; + // Additional default params. + $params += array( + 'json.nl' => self::NAMED_LIST_FORMAT, + ); + if ($query) { + $params['q'] = $query; + } + // PHP's built in http_build_query() doesn't give us the format Solr wants. + $queryString = $this->httpBuildQuery($params); + // Check string length of the query string, change method to POST + $len = strlen($queryString); + // Fetch our threshold to find out when to flip to POST + $max_len = solrsearch_environment_variable_get($this->env_id, 'solrsearch_search_post_threshold', 3600); + + // if longer than $max_len (default 3600) characters + // we should switch to POST (a typical server handles 4096 max). + // If this class is used independently (without environments), we switch automatically to POST at an + // limit of 1800 chars. + if (($len > 1800) && (empty($this->env_id) || ($len > $max_len))) { + $method = 'POST'; + } + + if ($method == 'GET') { + $searchUrl = $this->_constructUrl(self::SEARCH_SERVLET, array(), $queryString); + + return $this->_sendRawGet($searchUrl); + } + else if ($method == 'POST') { + $searchUrl = $this->_constructUrl(self::SEARCH_SERVLET); + $options['data'] = $queryString; + $options['headers']['Content-Type'] = 'application/x-www-form-urlencoded; charset=UTF-8'; + return $this->_sendRawPost($searchUrl, $options); + } + else { + throw new Exception("Unsupported method '$method' for search(), use GET or POST"); + } + } +}