Mercurial > hg > solrsearch
comparison Apache_Solr_Document.php @ 0:a2b4f67e73dc default tip
initial
author | Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de> |
---|---|
date | Mon, 08 Jun 2015 10:21:54 +0200 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:a2b4f67e73dc |
---|---|
1 <?php | |
2 /** | |
3 * Copyright (c) 2007-2009, Conduit Internet Technologies, Inc. | |
4 * All rights reserved. | |
5 * | |
6 * Redistribution and use in source and binary forms, with or without | |
7 * modification, are permitted provided that the following conditions are met: | |
8 * | |
9 * - Redistributions of source code must retain the above copyright notice, | |
10 * this list of conditions and the following disclaimer. | |
11 * - Redistributions in binary form must reproduce the above copyright | |
12 * notice, this list of conditions and the following disclaimer in the | |
13 * documentation and/or other materials provided with the distribution. | |
14 * - Neither the name of Conduit Internet Technologies, Inc. nor the names of | |
15 * its contributors may be used to endorse or promote products derived from | |
16 * this software without specific prior written permission. | |
17 * | |
18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | |
19 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |
20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |
21 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE | |
22 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR | |
23 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF | |
24 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS | |
25 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN | |
26 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) | |
27 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE | |
28 * POSSIBILITY OF SUCH DAMAGE. | |
29 * | |
30 * @copyright Copyright 2007-2009 Conduit Internet Technologies, Inc. (http://conduit-it.com) | |
31 * @license New BSD (http://solr-php-client.googlecode.com/svn/trunk/COPYING) | |
32 * @version $Id: Document.php 15 2009-08-04 17:53:08Z donovan.jimenez $ | |
33 * | |
34 * @package Apache | |
35 * @subpackage Solr | |
36 * @author Donovan Jimenez <djimenez@conduit-it.com> | |
37 */ | |
38 | |
39 /** | |
40 * Additional code Copyright (c) 2011 by Peter Wolanin, and | |
41 * additional contributors. | |
42 * | |
43 * This program is free software; you can redistribute it and/or modify | |
44 * it under the terms of the GNU General Public License as published by | |
45 * the Free Software Foundation; either version 2 of the License, or (at | |
46 * your option) any later version. | |
47 | |
48 * | |
49 * This program is distributed in the hope that it will be useful, but | |
50 * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY | |
51 * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License | |
52 * for more details. | |
53 * | |
54 * You should have received a copy of the GNU General Public License | |
55 * along with this program as the file LICENSE.txt; if not, please see | |
56 * http://www.gnu.org/licenses/old-licenses/gpl-2.0.txt. | |
57 */ | |
58 | |
59 /** | |
60 * Holds Key / Value pairs that represent a Solr Document along with any associated boost | |
61 * values. Field values can be accessed by direct dereferencing such as: | |
62 * | |
63 * @code | |
64 * $document->title = 'Something'; | |
65 * echo $document->title; | |
66 * | |
67 * Additionally, the field values can be iterated with foreach | |
68 * | |
69 * @code | |
70 * foreach ($document as $fieldName => $fieldValue) { | |
71 * ... | |
72 * } | |
73 * </code> | |
74 */ | |
75 class ApacheSolrDocument implements IteratorAggregate { | |
76 | |
77 /** | |
78 * Document boost value | |
79 * | |
80 * @var float | |
81 */ | |
82 protected $_documentBoost = FALSE; | |
83 | |
84 /** | |
85 * Document field values, indexed by name | |
86 * | |
87 * @var array | |
88 */ | |
89 protected $_fields = array(); | |
90 | |
91 /** | |
92 * Document field boost values, indexed by name | |
93 * | |
94 * @var array array of floats | |
95 */ | |
96 protected $_fieldBoosts = array(); | |
97 | |
98 /** | |
99 * Clear all boosts and fields from this document | |
100 */ | |
101 public function clear() { | |
102 $this->_documentBoost = FALSE; | |
103 | |
104 $this->_fields = array(); | |
105 $this->_fieldBoosts = array(); | |
106 } | |
107 | |
108 /** | |
109 * Get current document boost | |
110 * | |
111 * @return mixed | |
112 * will be false for default, or else a float | |
113 */ | |
114 public function getBoost() { | |
115 return $this->_documentBoost; | |
116 } | |
117 | |
118 /** | |
119 * Set document boost factor | |
120 * | |
121 * @param mixed $boost | |
122 * Use false for default boost, else cast to float that should be > 0 or will be treated as false | |
123 */ | |
124 public function setBoost($boost) { | |
125 $boost = (float) $boost; | |
126 | |
127 if ($boost > 0.0) { | |
128 $this->_documentBoost = $boost; | |
129 } | |
130 else { | |
131 $this->_documentBoost = FALSE; | |
132 } | |
133 } | |
134 | |
135 /** | |
136 * Add a value to a multi-valued field | |
137 * | |
138 * NOTE: the solr XML format allows you to specify boosts | |
139 * PER value even though the underlying Lucene implementation | |
140 * only allows a boost per field. To remedy this, the final | |
141 * field boost value will be the product of all specified boosts | |
142 * on field values - this is similar to SolrJ's functionality. | |
143 * | |
144 * @code | |
145 * $doc = new ApacheSolrDocument(); | |
146 * $doc->addField('foo', 'bar', 2.0); | |
147 * $doc->addField('foo', 'baz', 3.0); | |
148 * // resultant field boost will be 6! | |
149 * echo $doc->getFieldBoost('foo'); | |
150 * | |
151 * @param string $key | |
152 * @param mixed $value | |
153 * @param mixed $boost | |
154 * Use false for default boost, else cast to float that should be > 0 or will be treated as false | |
155 */ | |
156 public function addField($key, $value, $boost = FALSE) { | |
157 if (!isset($this->_fields[$key])) { | |
158 // create holding array if this is the first value | |
159 $this->_fields[$key] = array(); | |
160 } | |
161 else if (!is_array($this->_fields[$key])) { | |
162 // move existing value into array if it is not already an array | |
163 $this->_fields[$key] = array($this->_fields[$key]); | |
164 } | |
165 | |
166 if ($this->getFieldBoost($key) === FALSE) { | |
167 // boost not already set, set it now | |
168 $this->setFieldBoost($key, $boost); | |
169 } | |
170 else if ((float) $boost > 0.0) { | |
171 // multiply passed boost with current field boost - similar to SolrJ implementation | |
172 $this->_fieldBoosts[$key] *= (float) $boost; | |
173 } | |
174 | |
175 // add value to array | |
176 $this->_fields[$key][] = $value; | |
177 } | |
178 | |
179 /** | |
180 * Handle the array manipulation for a multi-valued field | |
181 * | |
182 * @param string $key | |
183 * @param string $value | |
184 * @param mixed $boost | |
185 * Use false for default boost, else cast to float that should be > 0 or will be treated as false | |
186 * | |
187 * @deprecated Use addField(...) instead | |
188 */ | |
189 public function setMultiValue($key, $value, $boost = FALSE) { | |
190 $this->addField($key, $value, $boost); | |
191 } | |
192 | |
193 /** | |
194 * Get field information | |
195 * | |
196 * @param string $key | |
197 * @return mixed associative array of info if field exists, false otherwise | |
198 */ | |
199 public function getField($key) { | |
200 if (isset($this->_fields[$key])) { | |
201 return array( | |
202 'name' => $key, | |
203 'value' => $this->_fields[$key], | |
204 'boost' => $this->getFieldBoost($key) | |
205 ); | |
206 } | |
207 | |
208 return FALSE; | |
209 } | |
210 | |
211 /** | |
212 * Set a field value. Multi-valued fields should be set as arrays | |
213 * or instead use the addField(...) function which will automatically | |
214 * make sure the field is an array. | |
215 * | |
216 * @param string $key | |
217 * @param mixed $value | |
218 * @param mixed $boost | |
219 * Use false for default boost, else cast to float that should be > 0 or will be treated as false | |
220 */ | |
221 public function setField($key, $value, $boost = FALSE) { | |
222 $this->_fields[$key] = $value; | |
223 $this->setFieldBoost($key, $boost); | |
224 } | |
225 | |
226 /** | |
227 * Get the currently set field boost for a document field | |
228 * | |
229 * @param string $key | |
230 * @return float | |
231 * currently set field boost, false if one is not set | |
232 */ | |
233 public function getFieldBoost($key) { | |
234 return isset($this->_fieldBoosts[$key]) ? $this->_fieldBoosts[$key] : FALSE; | |
235 } | |
236 | |
237 /** | |
238 * Set the field boost for a document field | |
239 * | |
240 * @param string $key | |
241 * field name for the boost | |
242 * @param mixed $boost | |
243 * Use false for default boost, else cast to float that should be > 0 or will be treated as false | |
244 */ | |
245 public function setFieldBoost($key, $boost) { | |
246 $boost = (float) $boost; | |
247 | |
248 if ($boost > 0.0) { | |
249 $this->_fieldBoosts[$key] = $boost; | |
250 } | |
251 else { | |
252 $this->_fieldBoosts[$key] = FALSE; | |
253 } | |
254 } | |
255 | |
256 /** | |
257 * Return current field boosts, indexed by field name | |
258 * | |
259 * @return array | |
260 */ | |
261 public function getFieldBoosts() { | |
262 return $this->_fieldBoosts; | |
263 } | |
264 | |
265 /** | |
266 * Get the names of all fields in this document | |
267 * | |
268 * @return array | |
269 */ | |
270 public function getFieldNames() { | |
271 return array_keys($this->_fields); | |
272 } | |
273 | |
274 /** | |
275 * Get the values of all fields in this document | |
276 * | |
277 * @return array | |
278 */ | |
279 public function getFieldValues() { | |
280 return array_values($this->_fields); | |
281 } | |
282 | |
283 /** | |
284 * IteratorAggregate implementation function. Allows usage: | |
285 * | |
286 * @code | |
287 * foreach ($document as $key => $value) { | |
288 * ... | |
289 * } | |
290 * | |
291 */ | |
292 public function getIterator() { | |
293 $arrayObject = new ArrayObject($this->_fields); | |
294 | |
295 return $arrayObject->getIterator(); | |
296 } | |
297 | |
298 /** | |
299 * Magic get for field values | |
300 * | |
301 * @param string $key | |
302 * @return mixed | |
303 */ | |
304 public function __get($key) { | |
305 return $this->_fields[$key]; | |
306 } | |
307 | |
308 /** | |
309 * Magic set for field values. Multi-valued fields should be set as arrays | |
310 * or instead use the addField(...) function which will automatically | |
311 * make sure the field is an array. | |
312 * | |
313 * @param string $key | |
314 * @param mixed $value | |
315 */ | |
316 public function __set($key, $value) { | |
317 $this->setField($key, $value); | |
318 } | |
319 | |
320 /** | |
321 * Magic isset for fields values. Do not call directly. Allows usage: | |
322 * | |
323 * @code | |
324 * isset($document->some_field); | |
325 * | |
326 * @param string $key | |
327 * @return boolean | |
328 * Whether the given key is set in the document | |
329 */ | |
330 public function __isset($key) { | |
331 return isset($this->_fields[$key]); | |
332 } | |
333 | |
334 /** | |
335 * Magic unset for field values. Do not call directly. Allows usage: | |
336 * | |
337 * @code | |
338 * unset($document->some_field); | |
339 * | |
340 * @param string $key | |
341 */ | |
342 public function __unset($key) { | |
343 unset($this->_fields[$key]); | |
344 unset($this->_fieldBoosts[$key]); | |
345 } | |
346 | |
347 /** | |
348 * Create an XML fragment from a ApacheSolrDocument instance appropriate for use inside a Solr add call | |
349 * | |
350 * @param ApacheSolrDocument $document | |
351 * | |
352 * @return string | |
353 * an xml formatted string from the given document | |
354 */ | |
355 public static function documentToXml(ApacheSolrDocument $document) { | |
356 $xml = '<doc'; | |
357 | |
358 if ($document->getBoost() !== FALSE) { | |
359 $xml .= ' boost="' . $document->getBoost() . '"'; | |
360 } | |
361 | |
362 $xml .= '>'; | |
363 | |
364 foreach ($document as $key => $value) { | |
365 $key = htmlspecialchars($key, ENT_QUOTES, 'UTF-8'); | |
366 $fieldBoost = $document->getFieldBoost($key); | |
367 | |
368 if (is_array($value)) { | |
369 foreach ($value as $multivalue) { | |
370 $xml .= '<field name="' . $key . '"'; | |
371 | |
372 if ($fieldBoost !== FALSE) { | |
373 $xml .= ' boost="' . $fieldBoost . '"'; | |
374 | |
375 // Only set the boost for the first field in the set | |
376 $fieldBoost = FALSE; | |
377 } | |
378 | |
379 $xml .= '>' . htmlspecialchars($multivalue, ENT_NOQUOTES, 'UTF-8') . '</field>'; | |
380 } | |
381 } | |
382 else { | |
383 $xml .= '<field name="' . $key . '"'; | |
384 | |
385 if ($fieldBoost !== FALSE) { | |
386 $xml .= ' boost="' . $fieldBoost . '"'; | |
387 } | |
388 | |
389 $xml .= '>' . htmlspecialchars($value, ENT_NOQUOTES, 'UTF-8') . '</field>'; | |
390 } | |
391 } | |
392 | |
393 $xml .= '</doc>'; | |
394 | |
395 // Remove any control characters to avoid Solr XML parser exception | |
396 return self::stripCtrlChars($xml); | |
397 } | |
398 | |
399 /** | |
400 * Replace control (non-printable) characters from string that are invalid to Solr's XML parser with a space. | |
401 * | |
402 * @param string $string | |
403 * @return string | |
404 */ | |
405 public static function stripCtrlChars($string) { | |
406 // See: http://w3.org/International/questions/qa-forms-utf-8.html | |
407 // Printable utf-8 does not include any of these chars below x7F | |
408 return preg_replace('@[\x00-\x08\x0B\x0C\x0E-\x1F]@', ' ', $string); | |
409 } | |
410 } |