comparison sites/all/modules/custom/solrsearch/Apache_Solr_Document.php @ 0:015d06b10d37 default tip

initial
author dwinter
date Wed, 31 Jul 2013 13:49:13 +0200
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:015d06b10d37
1 <?php
2 /**
3 * Copyright (c) 2007-2009, Conduit Internet Technologies, Inc.
4 * All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions are met:
8 *
9 * - Redistributions of source code must retain the above copyright notice,
10 * this list of conditions and the following disclaimer.
11 * - Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 * - Neither the name of Conduit Internet Technologies, Inc. nor the names of
15 * its contributors may be used to endorse or promote products derived from
16 * this software without specific prior written permission.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
22 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
23 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
24 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
25 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
26 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
27 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
28 * POSSIBILITY OF SUCH DAMAGE.
29 *
30 * @copyright Copyright 2007-2009 Conduit Internet Technologies, Inc. (http://conduit-it.com)
31 * @license New BSD (http://solr-php-client.googlecode.com/svn/trunk/COPYING)
32 * @version $Id: Document.php 15 2009-08-04 17:53:08Z donovan.jimenez $
33 *
34 * @package Apache
35 * @subpackage Solr
36 * @author Donovan Jimenez <djimenez@conduit-it.com>
37 */
38
39 /**
40 * Additional code Copyright (c) 2011 by Peter Wolanin, and
41 * additional contributors.
42 *
43 * This program is free software; you can redistribute it and/or modify
44 * it under the terms of the GNU General Public License as published by
45 * the Free Software Foundation; either version 2 of the License, or (at
46 * your option) any later version.
47
48 *
49 * This program is distributed in the hope that it will be useful, but
50 * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
51 * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
52 * for more details.
53 *
54 * You should have received a copy of the GNU General Public License
55 * along with this program as the file LICENSE.txt; if not, please see
56 * http://www.gnu.org/licenses/old-licenses/gpl-2.0.txt.
57 */
58
59 /**
60 * Holds Key / Value pairs that represent a Solr Document along with any associated boost
61 * values. Field values can be accessed by direct dereferencing such as:
62 *
63 * @code
64 * $document->title = 'Something';
65 * echo $document->title;
66 *
67 * Additionally, the field values can be iterated with foreach
68 *
69 * @code
70 * foreach ($document as $fieldName => $fieldValue) {
71 * ...
72 * }
73 * </code>
74 */
75 class ApacheSolrDocument implements IteratorAggregate {
76
77 /**
78 * Document boost value
79 *
80 * @var float
81 */
82 protected $_documentBoost = FALSE;
83
84 /**
85 * Document field values, indexed by name
86 *
87 * @var array
88 */
89 protected $_fields = array();
90
91 /**
92 * Document field boost values, indexed by name
93 *
94 * @var array array of floats
95 */
96 protected $_fieldBoosts = array();
97
98 /**
99 * Clear all boosts and fields from this document
100 */
101 public function clear() {
102 $this->_documentBoost = FALSE;
103
104 $this->_fields = array();
105 $this->_fieldBoosts = array();
106 }
107
108 /**
109 * Get current document boost
110 *
111 * @return mixed
112 * will be false for default, or else a float
113 */
114 public function getBoost() {
115 return $this->_documentBoost;
116 }
117
118 /**
119 * Set document boost factor
120 *
121 * @param mixed $boost
122 * Use false for default boost, else cast to float that should be > 0 or will be treated as false
123 */
124 public function setBoost($boost) {
125 $boost = (float) $boost;
126
127 if ($boost > 0.0) {
128 $this->_documentBoost = $boost;
129 }
130 else {
131 $this->_documentBoost = FALSE;
132 }
133 }
134
135 /**
136 * Add a value to a multi-valued field
137 *
138 * NOTE: the solr XML format allows you to specify boosts
139 * PER value even though the underlying Lucene implementation
140 * only allows a boost per field. To remedy this, the final
141 * field boost value will be the product of all specified boosts
142 * on field values - this is similar to SolrJ's functionality.
143 *
144 * @code
145 * $doc = new ApacheSolrDocument();
146 * $doc->addField('foo', 'bar', 2.0);
147 * $doc->addField('foo', 'baz', 3.0);
148 * // resultant field boost will be 6!
149 * echo $doc->getFieldBoost('foo');
150 *
151 * @param string $key
152 * @param mixed $value
153 * @param mixed $boost
154 * Use false for default boost, else cast to float that should be > 0 or will be treated as false
155 */
156 public function addField($key, $value, $boost = FALSE) {
157 if (!isset($this->_fields[$key])) {
158 // create holding array if this is the first value
159 $this->_fields[$key] = array();
160 }
161 else if (!is_array($this->_fields[$key])) {
162 // move existing value into array if it is not already an array
163 $this->_fields[$key] = array($this->_fields[$key]);
164 }
165
166 if ($this->getFieldBoost($key) === FALSE) {
167 // boost not already set, set it now
168 $this->setFieldBoost($key, $boost);
169 }
170 else if ((float) $boost > 0.0) {
171 // multiply passed boost with current field boost - similar to SolrJ implementation
172 $this->_fieldBoosts[$key] *= (float) $boost;
173 }
174
175 // add value to array
176 $this->_fields[$key][] = $value;
177 }
178
179 /**
180 * Handle the array manipulation for a multi-valued field
181 *
182 * @param string $key
183 * @param string $value
184 * @param mixed $boost
185 * Use false for default boost, else cast to float that should be > 0 or will be treated as false
186 *
187 * @deprecated Use addField(...) instead
188 */
189 public function setMultiValue($key, $value, $boost = FALSE) {
190 $this->addField($key, $value, $boost);
191 }
192
193 /**
194 * Get field information
195 *
196 * @param string $key
197 * @return mixed associative array of info if field exists, false otherwise
198 */
199 public function getField($key) {
200 if (isset($this->_fields[$key])) {
201 return array(
202 'name' => $key,
203 'value' => $this->_fields[$key],
204 'boost' => $this->getFieldBoost($key)
205 );
206 }
207
208 return FALSE;
209 }
210
211 /**
212 * Set a field value. Multi-valued fields should be set as arrays
213 * or instead use the addField(...) function which will automatically
214 * make sure the field is an array.
215 *
216 * @param string $key
217 * @param mixed $value
218 * @param mixed $boost
219 * Use false for default boost, else cast to float that should be > 0 or will be treated as false
220 */
221 public function setField($key, $value, $boost = FALSE) {
222 $this->_fields[$key] = $value;
223 $this->setFieldBoost($key, $boost);
224 }
225
226 /**
227 * Get the currently set field boost for a document field
228 *
229 * @param string $key
230 * @return float
231 * currently set field boost, false if one is not set
232 */
233 public function getFieldBoost($key) {
234 return isset($this->_fieldBoosts[$key]) ? $this->_fieldBoosts[$key] : FALSE;
235 }
236
237 /**
238 * Set the field boost for a document field
239 *
240 * @param string $key
241 * field name for the boost
242 * @param mixed $boost
243 * Use false for default boost, else cast to float that should be > 0 or will be treated as false
244 */
245 public function setFieldBoost($key, $boost) {
246 $boost = (float) $boost;
247
248 if ($boost > 0.0) {
249 $this->_fieldBoosts[$key] = $boost;
250 }
251 else {
252 $this->_fieldBoosts[$key] = FALSE;
253 }
254 }
255
256 /**
257 * Return current field boosts, indexed by field name
258 *
259 * @return array
260 */
261 public function getFieldBoosts() {
262 return $this->_fieldBoosts;
263 }
264
265 /**
266 * Get the names of all fields in this document
267 *
268 * @return array
269 */
270 public function getFieldNames() {
271 return array_keys($this->_fields);
272 }
273
274 /**
275 * Get the values of all fields in this document
276 *
277 * @return array
278 */
279 public function getFieldValues() {
280 return array_values($this->_fields);
281 }
282
283 /**
284 * IteratorAggregate implementation function. Allows usage:
285 *
286 * @code
287 * foreach ($document as $key => $value) {
288 * ...
289 * }
290 *
291 */
292 public function getIterator() {
293 $arrayObject = new ArrayObject($this->_fields);
294
295 return $arrayObject->getIterator();
296 }
297
298 /**
299 * Magic get for field values
300 *
301 * @param string $key
302 * @return mixed
303 */
304 public function __get($key) {
305 return $this->_fields[$key];
306 }
307
308 /**
309 * Magic set for field values. Multi-valued fields should be set as arrays
310 * or instead use the addField(...) function which will automatically
311 * make sure the field is an array.
312 *
313 * @param string $key
314 * @param mixed $value
315 */
316 public function __set($key, $value) {
317 $this->setField($key, $value);
318 }
319
320 /**
321 * Magic isset for fields values. Do not call directly. Allows usage:
322 *
323 * @code
324 * isset($document->some_field);
325 *
326 * @param string $key
327 * @return boolean
328 * Whether the given key is set in the document
329 */
330 public function __isset($key) {
331 return isset($this->_fields[$key]);
332 }
333
334 /**
335 * Magic unset for field values. Do not call directly. Allows usage:
336 *
337 * @code
338 * unset($document->some_field);
339 *
340 * @param string $key
341 */
342 public function __unset($key) {
343 unset($this->_fields[$key]);
344 unset($this->_fieldBoosts[$key]);
345 }
346
347 /**
348 * Create an XML fragment from a ApacheSolrDocument instance appropriate for use inside a Solr add call
349 *
350 * @param ApacheSolrDocument $document
351 *
352 * @return string
353 * an xml formatted string from the given document
354 */
355 public static function documentToXml(ApacheSolrDocument $document) {
356 $xml = '<doc';
357
358 if ($document->getBoost() !== FALSE) {
359 $xml .= ' boost="' . $document->getBoost() . '"';
360 }
361
362 $xml .= '>';
363
364 foreach ($document as $key => $value) {
365 $key = htmlspecialchars($key, ENT_QUOTES, 'UTF-8');
366 $fieldBoost = $document->getFieldBoost($key);
367
368 if (is_array($value)) {
369 foreach ($value as $multivalue) {
370 $xml .= '<field name="' . $key . '"';
371
372 if ($fieldBoost !== FALSE) {
373 $xml .= ' boost="' . $fieldBoost . '"';
374
375 // Only set the boost for the first field in the set
376 $fieldBoost = FALSE;
377 }
378
379 $xml .= '>' . htmlspecialchars($multivalue, ENT_NOQUOTES, 'UTF-8') . '</field>';
380 }
381 }
382 else {
383 $xml .= '<field name="' . $key . '"';
384
385 if ($fieldBoost !== FALSE) {
386 $xml .= ' boost="' . $fieldBoost . '"';
387 }
388
389 $xml .= '>' . htmlspecialchars($value, ENT_NOQUOTES, 'UTF-8') . '</field>';
390 }
391 }
392
393 $xml .= '</doc>';
394
395 // Remove any control characters to avoid Solr XML parser exception
396 return self::stripCtrlChars($xml);
397 }
398
399 /**
400 * Replace control (non-printable) characters from string that are invalid to Solr's XML parser with a space.
401 *
402 * @param string $string
403 * @return string
404 */
405 public static function stripCtrlChars($string) {
406 // See: http://w3.org/International/questions/qa-forms-utf-8.html
407 // Printable utf-8 does not include any of these chars below x7F
408 return preg_replace('@[\x00-\x08\x0B\x0C\x0E-\x1F]@', ' ', $string);
409 }
410 }