Mercurial > hg > solrsearch
comparison solrsearch.index.inc_unused @ 0:a2b4f67e73dc default tip
initial
author | Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de> |
---|---|
date | Mon, 08 Jun 2015 10:21:54 +0200 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:a2b4f67e73dc |
---|---|
1 <?php | |
2 | |
3 /** | |
4 * @file | |
5 * Functions related to Apache Solr indexing operations. | |
6 */ | |
7 | |
8 /** | |
9 * Processes all index queues associated with the passed environment. | |
10 * | |
11 * An environment usually indexes one or more entity types. Each entity type | |
12 * stores its queue in a database table that is defined in the entity type's | |
13 * info array. This function processes N number of items in each queue table, | |
14 * where N is the limit passed as the second argument. | |
15 * | |
16 * The indexing routine allows developers to selectively bypass indexing on a | |
17 * per-entity basis by implementing the following hooks: | |
18 * - hook_solrsearch_exclude() | |
19 * - hook_solrsearch_ENTITY_TYPE_exclude() | |
20 * | |
21 * @param string $env_id | |
22 * The machine name of the environment. | |
23 * @param int $limit | |
24 * The number of items to process per queue table. For example, if there are | |
25 * two entities that are being indexed in this environment and they each have | |
26 * their own queue table, setting a limit of 50 will send a maximum number of | |
27 * 100 documents to the Apache Solr server. | |
28 * | |
29 * @return int | |
30 * The total number of documents sent to the Apache Solr server for indexing. | |
31 * | |
32 * @see solrsearch_index_get_entities_to_index() | |
33 * @see solrsearch_index_entity_to_documents() | |
34 * @see solrsearch_index_send_to_solr() | |
35 */ | |
36 function solrsearch_index_entities($env_id, $limit) { | |
37 $documents_submitted = 0; | |
38 foreach (entity_get_info() as $entity_type => $info) { | |
39 // With each pass through the callback, retrieve the next group of nids. | |
40 $rows = solrsearch_index_get_entities_to_index($env_id, $entity_type, $limit); | |
41 $documents = array(); | |
42 foreach ($rows as $row) { | |
43 $row_documents = solrsearch_index_entities_document($row, $entity_type, $env_id); | |
44 $documents = array_merge($documents, $row_documents); | |
45 } | |
46 | |
47 $indexed = solrsearch_index_send_to_solr($env_id, $documents); | |
48 if ($indexed !== FALSE) { | |
49 $documents_submitted += count($documents); | |
50 $index_position = solrsearch_get_last_index_position($env_id, $entity_type); | |
51 $max_changed = $index_position['last_changed']; | |
52 $max_entity_id = $index_position['last_entity_id']; | |
53 foreach ($rows as $row) { | |
54 if (!empty($row->status)) { | |
55 if ($row->changed > $max_changed) { | |
56 $max_changed = $row->changed; | |
57 } | |
58 if ($row->entity_id > $max_entity_id) { | |
59 $max_entity_id = $row->entity_id; | |
60 } | |
61 } | |
62 } | |
63 solrsearch_set_last_index_position($env_id, $entity_type, $max_changed, $max_entity_id); | |
64 solrsearch_set_last_index_updated($env_id, REQUEST_TIME); | |
65 } | |
66 } | |
67 return $documents_submitted; | |
68 } | |
69 | |
70 /** | |
71 * Convert a certain entity from the solrsearch index table to a set of documents. 1 entity | |
72 * can be converted in multiple documents if the solrsearch_index_entity_to_documents decides to do so. | |
73 * | |
74 * @param array $row | |
75 * A row from the indexing table | |
76 * @param string $entity_type | |
77 * The type of the entity | |
78 * @param string $env_id | |
79 * The machine name of the environment. | |
80 * | |
81 * @return array of solrsearchDocument(s) | |
82 */ | |
83 function solrsearch_index_entities_document($row, $entity_type, $env_id) { | |
84 $documents = array(); | |
85 if (!empty($row->status)) { | |
86 // Let any module exclude this entity from the index. | |
87 $build_document = TRUE; | |
88 foreach (module_implements('solrsearch_exclude') as $module) { | |
89 $exclude = module_invoke($module, 'solrsearch_exclude', $row->entity_id, $entity_type, $row, $env_id); | |
90 // If the hook returns TRUE we should exclude the entity | |
91 if (!empty($exclude)) { | |
92 $build_document = FALSE; | |
93 } | |
94 } | |
95 foreach (module_implements('solrsearch_' . $entity_type . '_exclude') as $module) { | |
96 $exclude = module_invoke($module, 'solrsearch_' . $entity_type . '_exclude', $row->entity_id, $row, $env_id); | |
97 // If the hook returns TRUE we should exclude the entity | |
98 if (!empty($exclude)) { | |
99 $build_document = FALSE; | |
100 } | |
101 } | |
102 if ($build_document) { | |
103 $documents = array_merge($documents, solrsearch_index_entity_to_documents($row, $env_id)); | |
104 } | |
105 } | |
106 else { | |
107 // Delete the entity from our index if the status callback returned 0 | |
108 solrsearch_remove_entity($env_id, $row->entity_type, $row->entity_id); | |
109 } | |
110 // Clear entity cache for this specific entity | |
111 entity_get_controller($row->entity_type)->resetCache(array($row->entity_id)); | |
112 return $documents; | |
113 } | |
114 /** | |
115 * Returns the total number of documents that are able to be indexed and the | |
116 * number of documents left to be indexed. | |
117 * | |
118 * This is a helper function for modules that implement hook_search_status(). | |
119 * | |
120 * @param string $env_id | |
121 * The machine name of the environment. | |
122 * | |
123 * @return array | |
124 * An associative array with the key-value pairs: | |
125 * - remaining: The number of items left to index. | |
126 * - total: The total number of items to index. | |
127 * | |
128 * @see hook_search_status() | |
129 */ | |
130 function solrsearch_index_status($env_id) { | |
131 $remaining = 0; | |
132 $total = 0; | |
133 | |
134 foreach (entity_get_info() as $entity_type => $info) { | |
135 $bundles = solrsearch_get_index_bundles($env_id, $entity_type); | |
136 if (empty($bundles)) { | |
137 continue; | |
138 } | |
139 | |
140 $table = solrsearch_get_indexer_table($entity_type); | |
141 $query = db_select($table, 'asn')->condition('asn.status', 1)->condition('asn.bundle', $bundles); | |
142 $total += $query->countQuery()->execute()->fetchField(); | |
143 | |
144 // Get $last_entity_id and $last_changed. | |
145 $last_index_position = solrsearch_get_last_index_position($env_id, $entity_type); | |
146 $last_entity_id = $last_index_position['last_entity_id']; | |
147 $last_changed = $last_index_position['last_changed']; | |
148 | |
149 // Find the remaining entities to index for this entity type. | |
150 $query = db_select($table, 'aie') | |
151 ->condition('aie.bundle', $bundles) | |
152 ->condition('aie.status', 1) | |
153 ->condition(db_or() | |
154 ->condition('aie.changed', $last_changed, '>') | |
155 ->condition(db_and() | |
156 ->condition('aie.changed', $last_changed, '<=') | |
157 ->condition('aie.entity_id', $last_entity_id, '>'))) | |
158 ->addTag('solrsearch_index_' . $entity_type); | |
159 | |
160 | |
161 if ($table == 'solrsearch_index_entities') { | |
162 // Other, entity-specific tables don't need this condition. | |
163 $query->condition('aie.entity_type', $entity_type); | |
164 } | |
165 $remaining += $query->countQuery()->execute()->fetchField(); | |
166 } | |
167 return array('remaining' => $remaining, 'total' => $total); | |
168 } | |
169 | |
170 /** | |
171 * Worker callback for solrsearch_index_entities(). | |
172 * | |
173 * Loads and proccesses the entity queued for indexing and converts into one or | |
174 * more documents that are sent to the Apache Solr server for indexing. | |
175 * | |
176 * The entity is loaded as the user specified in the "solrsearch_index_user" | |
177 * system variable in order to prevent sentive data from being indexed and | |
178 * displayed to underprivileged users in search results. The index user defaults | |
179 * to a user ID of "0", which is the anonymous user. | |
180 * | |
181 * After the entity is loaded, it is converted to an array via the callback | |
182 * specified in the entity type's info array. The array that the entity is | |
183 * converted to is the model of the document sent to the Apache Solr server for | |
184 * indexing. This function allows develoeprs to modify the document by | |
185 * implementing the following hooks: | |
186 * - solrsearch_index_document_build() | |
187 * - solrsearch_index_document_build_ENTITY_TYPE() | |
188 * - solrsearch_index_documents_alter() | |
189 * | |
190 * @param stdClass $item | |
191 * The data returned by the queue table containing: | |
192 * - entity_id: An integer containing the unique identifier of the entity, for | |
193 * example a node ID or comment ID. | |
194 * - entity_type: The unique identifier for the entity, i.e. "node", "file". | |
195 * - bundle: The machine-readable name of the bundle the passed entity is | |
196 * associated with. | |
197 * - status: The "published" status of the entity. The status will also be set | |
198 * to "0" when entity is deleted but the Apache Solr server is unavailable. | |
199 * - changed: A timestamp flagging when the entity was last modified. | |
200 * @param string $env_id | |
201 * The machine name of the environment. | |
202 * | |
203 * @return array | |
204 * An associative array of documents that are sent to the Apache Solr server | |
205 * for indexing. | |
206 * | |
207 * @see solrsearch_index_nodes() for the old-skool version. | |
208 */ | |
209 function solrsearch_index_entity_to_documents($item, $env_id) { | |
210 global $user; | |
211 drupal_save_session(FALSE); | |
212 $saved_user = $user; | |
213 // build the content for the index as an anonymous user to avoid exposing restricted fields and such. | |
214 // By setting a variable, indexing can take place as a different user | |
215 $uid = variable_get('solrsearch_index_user', 0); | |
216 if ($uid == 0) { | |
217 $user = drupal_anonymous_user(); | |
218 } | |
219 else { | |
220 $user = user_load($uid); | |
221 } | |
222 // Pull out all of our pertinent data. | |
223 $entity_type = $item->entity_type; | |
224 | |
225 // Entity cache will be reset at the end of the indexing algorithm, to use the cache properly whenever | |
226 // the code does another entity_load | |
227 $entity = entity_load($entity_type, array($item->entity_id)); | |
228 $entity = $entity ? reset($entity) : FALSE; | |
229 | |
230 if (empty($entity)) { | |
231 // If the object failed to load, just stop. | |
232 return FALSE; | |
233 } | |
234 | |
235 list($entity_id, $vid, $bundle) = entity_extract_ids($entity_type, $entity); | |
236 | |
237 // Create a new document, and do the bare minimum on it. | |
238 $document = _solrsearch_index_process_entity_get_document($entity, $entity_type); | |
239 | |
240 //Get the callback array to add stuff to the document | |
241 $callbacks = solrsearch_entity_get_callback($entity_type, 'document callback', $bundle); | |
242 $documents = array(); | |
243 foreach ($callbacks as $callback) { | |
244 // Call a type-specific callback to add stuff to the document. | |
245 $documents = array_merge($documents, $callback($document, $entity, $entity_type, $env_id)); | |
246 } | |
247 | |
248 //do this for all possible documents that were returned by the callbacks | |
249 foreach ($documents as $document) { | |
250 // Call an all-entity hook to add stuff to the document. | |
251 module_invoke_all('solrsearch_index_document_build', $document, $entity, $entity_type, $env_id); | |
252 | |
253 // Call a type-specific hook to add stuff to the document. | |
254 module_invoke_all('solrsearch_index_document_build_' . $entity_type, $document, $entity, $env_id); | |
255 | |
256 // Final processing to ensure that the document is properly structured. | |
257 // All records must have a label field, which is used for user-friendly labeling. | |
258 if (empty($document->label)) { | |
259 $document->label = ''; | |
260 } | |
261 | |
262 // All records must have a "content" field, which is used for fulltext indexing. | |
263 // If we don't have one, enter an empty value. This does mean that the entity | |
264 // will not be fulltext searchable. | |
265 if (empty($document->content)) { | |
266 $document->content = ''; | |
267 } | |
268 | |
269 // All records must have a "teaser" field, which is used for abbreviated | |
270 // displays when no highlighted text is available. | |
271 if (empty($document->teaser)) { | |
272 $document->teaser = truncate_utf8($document->content, 300, TRUE); | |
273 } | |
274 | |
275 // Add additional indexing based on the body of each record. | |
276 solrsearch_index_add_tags_to_document($document, $document->content); | |
277 } | |
278 | |
279 // Now allow modules to alter each other's additions for maximum flexibility. | |
280 | |
281 // Hook to allow modifications of the retrieved results | |
282 foreach (module_implements('solrsearch_index_documents_alter') as $module) { | |
283 $function = $module . '_solrsearch_index_documents_alter'; | |
284 $function($documents, $entity, $entity_type, $env_id); | |
285 } | |
286 | |
287 // Restore the user. | |
288 $user = $saved_user; | |
289 drupal_save_session(TRUE); | |
290 | |
291 return $documents; | |
292 } | |
293 | |
294 /** | |
295 * Index an array of documents to solr. | |
296 * | |
297 * @param $env_id | |
298 * @param array $documents | |
299 * | |
300 * @return bool|int number indexed, or FALSE on failure. | |
301 * @throws Exception | |
302 */ | |
303 function solrsearch_index_send_to_solr($env_id, array $documents) { | |
304 try { | |
305 // Get the $solr object | |
306 $solr = solrsearch_get_solr($env_id); | |
307 // If there is no server available, don't continue. | |
308 if (!$solr->ping(variable_get('solrsearch_ping_timeout', 4))) { | |
309 throw new Exception(t('No Solr instance available during indexing.')); | |
310 } | |
311 } | |
312 catch (Exception $e) { | |
313 watchdog('Apache Solr', nl2br(check_plain($e->getMessage())), NULL, WATCHDOG_ERROR); | |
314 return FALSE; | |
315 } | |
316 // Do not index when we do not have any documents to send | |
317 // Send TRUE because this is not an error | |
318 if (empty($documents)) { | |
319 return TRUE; | |
320 } | |
321 // Send the document off to Solr. | |
322 watchdog('Apache Solr', 'Adding @count documents.', array('@count' => count($documents))); | |
323 try { | |
324 $docs_chunk = array_chunk($documents, 20); | |
325 foreach ($docs_chunk as $docs) { | |
326 $solr->addDocuments($docs); | |
327 } | |
328 watchdog('Apache Solr', 'Indexing succeeded on @count documents', array( | |
329 '@count' => count($documents), | |
330 ), WATCHDOG_INFO); | |
331 return count($documents); | |
332 } | |
333 catch (Exception $e) { | |
334 if (!empty($docs)) { | |
335 foreach ($docs as $doc) { | |
336 $eids[] = $doc->entity_type . '/' . $doc->entity_id; | |
337 } | |
338 } | |
339 watchdog('Apache Solr', 'Indexing failed on one of the following entity ids: @eids <br /> !message', array( | |
340 '@eids' => implode(', ', $eids), | |
341 '!message' => nl2br(strip_tags($e->getMessage())), | |
342 ), WATCHDOG_ERROR); | |
343 return FALSE; | |
344 } | |
345 } | |
346 | |
347 /** | |
348 * Extract HTML tag contents from $text and add to boost fields. | |
349 * | |
350 * @param solrsearchDocument $document | |
351 * @param string $text | |
352 * must be stripped of control characters before hand. | |
353 * | |
354 */ | |
355 function solrsearch_index_add_tags_to_document(solrsearchDocument $document, $text) { | |
356 $tags_to_index = variable_get('solrsearch_tags_to_index', array( | |
357 'h1' => 'tags_h1', | |
358 'h2' => 'tags_h2_h3', | |
359 'h3' => 'tags_h2_h3', | |
360 'h4' => 'tags_h4_h5_h6', | |
361 'h5' => 'tags_h4_h5_h6', | |
362 'h6' => 'tags_h4_h5_h6', | |
363 'u' => 'tags_inline', | |
364 'b' => 'tags_inline', | |
365 'i' => 'tags_inline', | |
366 'strong' => 'tags_inline', | |
367 'em' => 'tags_inline', | |
368 'a' => 'tags_a' | |
369 )); | |
370 | |
371 // Strip off all ignored tags. | |
372 $text = strip_tags($text, '<' . implode('><', array_keys($tags_to_index)) . '>'); | |
373 | |
374 preg_match_all('@<(' . implode('|', array_keys($tags_to_index)) . ')[^>]*>(.*)</\1>@Ui', $text, $matches); | |
375 foreach ($matches[1] as $key => $tag) { | |
376 $tag = drupal_strtolower($tag); | |
377 // We don't want to index links auto-generated by the url filter. | |
378 if ($tag != 'a' || !preg_match('@(?:http://|https://|ftp://|mailto:|smb://|afp://|file://|gopher://|news://|ssl://|sslv2://|sslv3://|tls://|tcp://|udp://|www\.)[a-zA-Z0-9]+@', $matches[2][$key])) { | |
379 if (!isset($document->{$tags_to_index[$tag]})) { | |
380 $document->{$tags_to_index[$tag]} = ''; | |
381 } | |
382 $document->{$tags_to_index[$tag]} .= ' ' . solrsearch_clean_text($matches[2][$key]); | |
383 } | |
384 } | |
385 } | |
386 | |
387 /** | |
388 * Returns a generic Solr document object for this entity. | |
389 * | |
390 * This function will do the basic processing for the document that is common | |
391 * to all entities, but virtually all entities will need their own additional | |
392 * processing. | |
393 * | |
394 * @param object $entity | |
395 * The entity for which we want a document. | |
396 * @param string $entity_type | |
397 * The type of entity we're processing. | |
398 * @return solrsearchDocument | |
399 */ | |
400 function _solrsearch_index_process_entity_get_document($entity, $entity_type) { | |
401 list($entity_id, $vid, $bundle) = entity_extract_ids($entity_type, $entity); | |
402 | |
403 $document = new solrsearchDocument(); | |
404 | |
405 // Define our url options in advance. This differs depending on the | |
406 // language | |
407 $languages = language_list(); | |
408 $url_options = array('absolute' => TRUE); | |
409 if (isset($entity->language) && isset($languages[$entity->language])) { | |
410 $url_options = $url_options + array('language' => $languages[$entity->language]); | |
411 } | |
412 | |
413 $document->id = solrsearch_document_id($entity_id, $entity_type); | |
414 $document->site = url(NULL, $url_options); | |
415 $document->hash = solrsearch_site_hash(); | |
416 | |
417 $document->entity_id = $entity_id; | |
418 $document->entity_type = $entity_type; | |
419 $document->bundle = $bundle; | |
420 $document->bundle_name = entity_bundle_label($entity_type, $bundle); | |
421 | |
422 if (empty($entity->language)) { | |
423 // 'und' is the language-neutral code in Drupal 7. | |
424 $document->language = LANGUAGE_NONE; | |
425 } | |
426 else { | |
427 $document->language = $entity->language; | |
428 } | |
429 | |
430 $path = entity_uri($entity_type, $entity); | |
431 // A path is not a requirement of an entity | |
432 if (!empty($path)) { | |
433 $document->path = $path['path']; | |
434 $document->url = url($path['path'], $path['options'] + $url_options); | |
435 // Path aliases can have important information about the content. | |
436 // Add them to the index as well. | |
437 if (function_exists('drupal_get_path_alias')) { | |
438 // Add any path alias to the index, looking first for language specific | |
439 // aliases but using language neutral aliases otherwise. | |
440 $output = drupal_get_path_alias($document->path, $document->language); | |
441 if ($output && $output != $document->path) { | |
442 $document->path_alias = $output; | |
443 } | |
444 } | |
445 } | |
446 return $document; | |
447 } | |
448 | |
449 /** | |
450 * Returns an array of rows from a query based on an indexing environment. | |
451 * @todo Remove the read only because it is not environment specific | |
452 * | |
453 * @param $env_id | |
454 * @param $entity_type | |
455 * @param $limit | |
456 * | |
457 * @return array list of row to index | |
458 */ | |
459 function solrsearch_index_get_entities_to_index($env_id, $entity_type, $limit) { | |
460 $rows = array(); | |
461 if (variable_get('solrsearch_read_only', 0)) { | |
462 return $rows; | |
463 } | |
464 $bundles = solrsearch_get_index_bundles($env_id, $entity_type); | |
465 if (empty($bundles)) { | |
466 return $rows; | |
467 } | |
468 | |
469 $table = solrsearch_get_indexer_table($entity_type); | |
470 // Get $last_entity_id and $last_changed. | |
471 $last_index_position = solrsearch_get_last_index_position($env_id, $entity_type); | |
472 $last_entity_id = $last_index_position['last_entity_id']; | |
473 $last_changed = $last_index_position['last_changed']; | |
474 | |
475 // Find the next batch of entities to index for this entity type. Note that | |
476 // for ordering we're grabbing the oldest first and then ordering by ID so | |
477 // that we get a definitive order. | |
478 // Also note that we fetch ALL fields from the indexer table | |
479 $query = db_select($table, 'aie') | |
480 ->fields('aie') | |
481 ->condition('aie.bundle', $bundles) | |
482 ->condition(db_or() | |
483 ->condition('aie.changed', $last_changed, '>') | |
484 ->condition(db_and() | |
485 ->condition('aie.changed', $last_changed, '<=') | |
486 ->condition('aie.entity_id', $last_entity_id, '>'))) | |
487 ->orderBy('aie.changed', 'ASC') | |
488 ->orderBy('aie.entity_id', 'ASC') | |
489 ->addTag('solrsearch_index_' . $entity_type); | |
490 | |
491 if ($table == 'solrsearch_index_entities') { | |
492 // Other, entity-specific tables don't need this condition. | |
493 $query->condition('aie.entity_type', $entity_type); | |
494 } | |
495 $query->range(0, $limit); | |
496 $records = $query->execute(); | |
497 | |
498 $status_callbacks = solrsearch_entity_get_callback($entity_type, 'status callback'); | |
499 foreach ($records as $record) { | |
500 // Check status and status callbacks before sending to the index | |
501 if (is_array($status_callbacks)) { | |
502 foreach($status_callbacks as $status_callback) { | |
503 if (is_callable($status_callback)) { | |
504 // by placing $status in front we prevent calling any other callback | |
505 // after one status callback returned false | |
506 $record->status = $record->status && $status_callback($record->entity_id, $record->entity_type); | |
507 } | |
508 } | |
509 } | |
510 $rows[] = $record; | |
511 } | |
512 return $rows; | |
513 } | |
514 | |
515 /** | |
516 * Delete the whole index for an environment. | |
517 * | |
518 * @param string $env_id | |
519 * The machine name of the environment. | |
520 * @param string $entity_type | |
521 * (optional) specify to remove just this entity_type from the index. | |
522 * @param string $bundle | |
523 * (optional) also specify a bundle to remove just the bundle from | |
524 * the index. | |
525 */ | |
526 function solrsearch_index_delete_index($env_id, $entity_type = NULL, $bundle = NULL) { | |
527 // Instantiate a new Solr object. | |
528 try { | |
529 $solr = solrsearch_get_solr($env_id); | |
530 $query = '*:*'; | |
531 | |
532 if (!empty($entity_type) && !empty($bundle)) { | |
533 $query = "(bundle:$bundle AND entity_type:$entity_type) OR sm_parent_entity_bundle:{$entity_type}-{$bundle}"; | |
534 } | |
535 elseif (!empty($bundle)) { | |
536 $query = "(bundle:$bundle)"; | |
537 } | |
538 | |
539 // Allow other modules to modify the delete query. | |
540 // For example, use the site hash so that you only delete this site's | |
541 // content: $query = 'hash:' . solrsearch_site_hash() | |
542 drupal_alter('solrsearch_delete_by_query', $query); | |
543 $solr->deleteByQuery($query); | |
544 $solr->commit(); | |
545 | |
546 if (!empty($entity_type)) { | |
547 $rebuild_callback = solrsearch_entity_get_callback($entity_type, 'reindex callback'); | |
548 if (is_callable($rebuild_callback)) { | |
549 $rebuild_callback($env_id, $bundle); | |
550 } | |
551 } | |
552 else { | |
553 solrsearch_index_mark_for_reindex($env_id); | |
554 } | |
555 | |
556 solrsearch_set_last_index_updated($env_id, REQUEST_TIME); | |
557 } | |
558 catch (Exception $e) { | |
559 watchdog('Apache Solr', nl2br(check_plain($e->getMessage())), NULL, WATCHDOG_ERROR); | |
560 } | |
561 } | |
562 | |
563 /** | |
564 * Delete from the index documents with the entity type and any of the excluded bundles. | |
565 * | |
566 * Also deletes all documents that have the entity type and bundle as a parent. | |
567 * | |
568 * @param string $env_id | |
569 * The machine name of the environment. | |
570 * @param string $entity_type | |
571 * @param array $excluded_bundles | |
572 * | |
573 * @return true on success, false on failure. | |
574 */ | |
575 function solrsearch_index_delete_bundles($env_id, $entity_type, array $excluded_bundles) { | |
576 // Remove newly omitted bundles. | |
577 try { | |
578 $solr = solrsearch_get_solr($env_id); | |
579 foreach ($excluded_bundles as $bundle) { | |
580 $query = "(bundle:$bundle AND entity_type:$entity_type) OR sm_parent_entity_bundle:{$entity_type}-{$bundle}"; | |
581 | |
582 // Allow other modules to modify the delete query. | |
583 // For example, use the site hash so that you only delete this site's | |
584 // content: $query = 'hash:' . solrsearch_site_hash() | |
585 drupal_alter('solrsearch_delete_by_query', $query); | |
586 $solr->deleteByQuery($query); | |
587 } | |
588 if ($excluded_bundles) { | |
589 $solr->commit(); | |
590 } | |
591 return TRUE; | |
592 } | |
593 catch (Exception $e) { | |
594 watchdog('Apache Solr', nl2br(check_plain($e->getMessage())), NULL, WATCHDOG_ERROR); | |
595 return FALSE; | |
596 } | |
597 } | |
598 | |
599 /** | |
600 * Delete an entity from the index. | |
601 * | |
602 * Also deletes all documents that have the deleted document as a parent. | |
603 * | |
604 * @param string $env_id | |
605 * The machine name of the environment. | |
606 * @param string $entity_type | |
607 * @param string $entity_id | |
608 * | |
609 * @return true on success, false on failure. | |
610 */ | |
611 function solrsearch_index_delete_entity_from_index($env_id, $entity_type, $entity_id) { | |
612 static $failed = FALSE; | |
613 if ($failed) { | |
614 return FALSE; | |
615 } | |
616 try { | |
617 $solr = solrsearch_get_solr($env_id); | |
618 $document_id = solrsearch_document_id($entity_id, $entity_type); | |
619 $query = "id:\"$document_id\" OR sm_parent_document_id:\"$document_id\""; | |
620 $solr->deleteByQuery($query); | |
621 solrsearch_set_last_index_updated($env_id, REQUEST_TIME); | |
622 return TRUE; | |
623 } | |
624 catch (Exception $e) { | |
625 watchdog('Apache Solr', nl2br(check_plain($e->getMessage())), NULL, WATCHDOG_ERROR); | |
626 // Don't keep trying queries if they are failing. | |
627 $failed = TRUE; | |
628 return FALSE; | |
629 } | |
630 } | |
631 | |
632 /** | |
633 * Mark a certain entity type for a specific environment for reindexing. | |
634 * | |
635 * @param $env_id | |
636 * @param null $entity_type | |
637 */ | |
638 function solrsearch_index_mark_for_reindex($env_id, $entity_type = NULL) { | |
639 foreach (entity_get_info() as $type => $entity_info) { | |
640 if (($type == $entity_type) || ($entity_type == NULL)) { | |
641 if (isset($entity_info['solrsearch']) && ($entity_info['solrsearch']['indexable'])) { | |
642 $reindex_callback = solrsearch_entity_get_callback($type, 'reindex callback'); | |
643 if (!empty($reindex_callback)) { | |
644 call_user_func($reindex_callback, $env_id); | |
645 } | |
646 } | |
647 } | |
648 } | |
649 solrsearch_clear_last_index_position($env_id, $entity_type); | |
650 cache_clear_all('*', 'cache_solrsearch', TRUE); | |
651 } | |
652 | |
653 /** | |
654 * Sets what bundles on the specified entity type should be indexed. | |
655 * | |
656 * @param string $env_id | |
657 * The machine name of the environment. | |
658 * @param string $entity_type | |
659 * The entity type to index. | |
660 * @param array $bundles | |
661 * The machine names of the bundles to index. | |
662 * | |
663 * @throws Exception | |
664 */ | |
665 function solrsearch_index_set_bundles($env_id, $entity_type, array $bundles) { | |
666 $transaction = db_transaction(); | |
667 try { | |
668 db_delete('solrsearch_index_bundles') | |
669 ->condition('env_id', $env_id) | |
670 ->condition('entity_type', $entity_type) | |
671 ->execute(); | |
672 | |
673 if ($bundles) { | |
674 $insert = db_insert('solrsearch_index_bundles') | |
675 ->fields(array('env_id', 'entity_type', 'bundle')); | |
676 | |
677 foreach ($bundles as $bundle) { | |
678 $insert->values(array( | |
679 'env_id' => $env_id, | |
680 'entity_type' => $entity_type, | |
681 'bundle' => $bundle, | |
682 )); | |
683 } | |
684 $insert->execute(); | |
685 } | |
686 } | |
687 catch (Exception $e) { | |
688 $transaction->rollback(); | |
689 // Re-throw the exception so we are aware of the failure. | |
690 throw $e; | |
691 } | |
692 } | |
693 | |
694 // This really should be in core, but it isn't yet. When it gets added to core, | |
695 // we can remove this version. | |
696 // @see http://drupal.org/node/969180 | |
697 if (!function_exists('entity_bundle_label')) { | |
698 | |
699 /** | |
700 * Returns the label of a bundle. | |
701 * | |
702 * @param string $entity_type | |
703 * The entity type; e.g. 'node' or 'user'. | |
704 * @param string $bundle_name | |
705 * The bundle for which we want the label from | |
706 * | |
707 * @return | |
708 * A string with the human-readable name of the bundle, or FALSE if not specified. | |
709 */ | |
710 function entity_bundle_label($entity_type, $bundle_name) { | |
711 $labels = &drupal_static(__FUNCTION__, array()); | |
712 | |
713 if (empty($labels)) { | |
714 foreach (entity_get_info() as $type => $info) { | |
715 foreach ($info['bundles'] as $bundle => $bundle_info) { | |
716 $labels[$type][$bundle] = !empty($bundle_info['label']) ? $bundle_info['label'] : FALSE; | |
717 } | |
718 } | |
719 } | |
720 | |
721 return $labels[$entity_type][$bundle_name]; | |
722 } | |
723 | |
724 } | |
725 | |
726 /** | |
727 * Builds the node-specific information for a Solr document. | |
728 * | |
729 * @param solrsearchDocument $document | |
730 * The Solr document we are building up. | |
731 * @param object $node | |
732 * The entity we are indexing. | |
733 * @param string $entity_type | |
734 * The type of entity we're dealing with. | |
735 * @param string $env_id | |
736 * The type of entity we're dealing with. | |
737 * | |
738 * @return array A set of solrsearchDocument documents | |
739 */ | |
740 function solrsearch_index_node_solr_document(solrsearchDocument $document, $node, $entity_type, $env_id) { | |
741 // None of these get added unless they are explicitly in our schema.xml | |
742 $document->label = solrsearch_clean_text($node->title); | |
743 | |
744 // Build the node body. | |
745 $build = node_view($node, 'search_index', !empty($node->language) ? $node->language : LANGUAGE_NONE); | |
746 // Remove useless html crap out of the render. | |
747 unset($build['#theme']); | |
748 $text = drupal_render($build); | |
749 $document->content = solrsearch_clean_text($text); | |
750 | |
751 // Adding the teaser | |
752 if (isset($node->teaser)) { | |
753 $document->teaser = solrsearch_clean_text($node->teaser); | |
754 } | |
755 else { | |
756 $document->teaser = truncate_utf8($document->content, 300, TRUE); | |
757 } | |
758 | |
759 // Path aliases can have important information about the content. | |
760 // Add them to the index as well. | |
761 if (function_exists('drupal_get_path_alias')) { | |
762 // Add any path alias to the index, looking first for language specific | |
763 // aliases but using language neutral aliases otherwise. | |
764 $language = empty($node->language) ? NULL : $node->language; | |
765 $path = 'node/' . $node->nid; | |
766 $output = drupal_get_path_alias($path, $language); | |
767 if ($output && $output != $path) { | |
768 $document->path_alias = $output; | |
769 } | |
770 } | |
771 | |
772 // Author information | |
773 $document->ss_name = $node->name; | |
774 // We want the name to be searchable for keywords. | |
775 $document->tos_name = $node->name; | |
776 | |
777 // Index formatted username so it can be searched and sorted on. | |
778 $account = (object) array('uid' => $node->uid, 'name' => $node->name); | |
779 $username = format_username($account); | |
780 $document->ss_name_formatted = $username; | |
781 $document->tos_name_formatted = $username; | |
782 $document->is_uid = $node->uid; | |
783 $document->bs_status = $node->status; | |
784 $document->bs_sticky = $node->sticky; | |
785 $document->bs_promote = $node->promote; | |
786 $document->is_tnid = $node->tnid; | |
787 $document->bs_translate = $node->translate; | |
788 | |
789 // Language specific checks | |
790 if (empty($node->language)) { | |
791 // 'und' is the language-neutral code in Drupal 7. | |
792 $document->ss_language = LANGUAGE_NONE; | |
793 } | |
794 else { | |
795 $document->ss_language = $node->language; | |
796 } | |
797 | |
798 // Timestamp of the node | |
799 $document->ds_created = solrsearch_date_iso($node->created); | |
800 $document->ds_changed = solrsearch_date_iso($node->changed); | |
801 | |
802 // Comment counts + time | |
803 if (isset($node->last_comment_timestamp) && !empty($node->comment_count)) { | |
804 $document->ds_last_comment_timestamp = solrsearch_date_iso($node->last_comment_timestamp); | |
805 $document->ds_last_comment_or_change = solrsearch_date_iso(max($node->last_comment_timestamp, $node->changed)); | |
806 $document->is_comment_count = $node->comment_count; | |
807 } | |
808 else { | |
809 $document->ds_last_comment_or_change = solrsearch_date_iso($node->changed); | |
810 } | |
811 | |
812 // Fetch extra data normally not visible, including comments. | |
813 // We do this manually (with module_implements instead of node_invoke_nodeapi) | |
814 // because we want a keyed array to come back. Only in this way can we decide | |
815 // whether to index comments or not. | |
816 $extra = array(); | |
817 $excludes = variable_get('solrsearch_exclude_nodeapi_types', array()); | |
818 $exclude_nodeapi = isset($excludes[$node->type]) ? $excludes[$node->type] : array(); | |
819 | |
820 foreach (module_implements('node_update_index') as $module) { | |
821 // Invoke nodeapi if this module has not been excluded, for example, | |
822 // exclude 'comment' for a type to skip indexing its comments. | |
823 if (empty($exclude_nodeapi[$module])) { | |
824 $function = $module . '_node_update_index'; | |
825 if ($output = $function($node)) { | |
826 $extra[$module] = $output; | |
827 } | |
828 } | |
829 } | |
830 | |
831 // Adding the text of the comments | |
832 if (isset($extra['comment'])) { | |
833 $comments = $extra['comment']; | |
834 // Remove comments from the extra fields | |
835 unset($extra['comment']); | |
836 $document->ts_comments = solrsearch_clean_text($comments); | |
837 // @todo: do we want to reproduce solrsearch_add_tags_to_document() for comments? | |
838 } | |
839 // If there are other extra fields, add them to the document | |
840 if (!empty($extra)) { | |
841 // Use an omit-norms text field since this is generally going to be short; not | |
842 // really a full-text field. | |
843 $document->tos_content_extra = solrsearch_clean_text(implode(' ', $extra)); | |
844 } | |
845 | |
846 // Generic use case for future reference. Callbacks can | |
847 // allow you to send back multiple documents | |
848 $documents = array(); | |
849 $documents[] = $document; | |
850 return $documents; | |
851 } | |
852 | |
853 /** | |
854 * Function that will be executed if the node bundles were updated. | |
855 * Currently it does nothing, but it could potentially do something later on. | |
856 * | |
857 * @param $env_id | |
858 * @param $existing_bundles | |
859 * @param $new_bundles | |
860 */ | |
861 function solrsearch_index_node_bundles_changed($env_id, $existing_bundles, $new_bundles) { | |
862 // Nothing to do for now. | |
863 } | |
864 | |
865 /** | |
866 * Reindexing callback for solrsearch, for nodes. | |
867 * | |
868 * @param string $env_id | |
869 * The machine name of the environment. | |
870 * @param string|null $bundle | |
871 * (optional) The bundle type to reindex. If not used | |
872 * all bundles will be re-indexed. | |
873 * | |
874 * @return null | |
875 * returns NULL if the specified bundle is not in the indexable bundles list | |
876 * | |
877 * @throws Exception | |
878 */ | |
879 function solrsearch_index_node_solr_reindex($env_id, $bundle = NULL) { | |
880 $indexer_table = solrsearch_get_indexer_table('node'); | |
881 $transaction = db_transaction(); | |
882 try { | |
883 $indexable_bundles = solrsearch_get_index_bundles($env_id, 'node'); | |
884 | |
885 if ($bundle && !empty($indexable_bundles) && !in_array($bundle, $indexable_bundles)) { | |
886 // The bundle specified is not in the indexable bundles list. | |
887 return NULL; | |
888 } | |
889 | |
890 // Leave status 0 rows - those need to be | |
891 // removed from the index later. | |
892 $delete = db_delete($indexer_table); | |
893 $delete->condition('status', 1); | |
894 | |
895 if (!empty($bundle)) { | |
896 $delete->condition('bundle', $bundle); | |
897 } | |
898 elseif (!empty($indexable_bundles)) { | |
899 $delete->condition('bundle', $indexable_bundles, 'IN'); | |
900 } | |
901 | |
902 $delete->execute(); | |
903 | |
904 $select = db_select('node', 'n'); | |
905 $select->condition('status', 1); | |
906 $select->addExpression("'node'", 'entity_type'); | |
907 $select->addField('n', 'nid', 'entity_id'); | |
908 $select->addField('n', 'type', 'bundle'); | |
909 $select->addField('n', 'status', 'status'); | |
910 $select->addExpression(REQUEST_TIME, 'changed'); | |
911 | |
912 if ($bundle) { | |
913 // Mark all nodes of the specified content type for reindexing. | |
914 $select->condition('n.type', $bundle); | |
915 } | |
916 elseif (!empty($indexable_bundles)) { | |
917 // Restrict reindex to content types in the indexable bundles list. | |
918 $select->condition('n.type', $indexable_bundles, 'IN'); | |
919 } | |
920 | |
921 $insert = db_insert($indexer_table) | |
922 ->fields(array('entity_id', 'bundle', 'status', 'entity_type', 'changed')) | |
923 ->from($select) | |
924 ->execute(); | |
925 } | |
926 catch (Exception $e) { | |
927 $transaction->rollback(); | |
928 throw $e; | |
929 } | |
930 } | |
931 | |
932 /** | |
933 * Status callback for solrsearch, for nodes. | |
934 * after indexing a certain amount of nodes | |
935 * | |
936 * @param $entity_id | |
937 * @param $entity_type | |
938 * | |
939 * @return int | |
940 * The status of the node | |
941 */ | |
942 function solrsearch_index_node_status_callback($entity_id, $entity_type) { | |
943 // Make sure we have a boolean value. | |
944 // Anything different from 1 becomes zero | |
945 $entity = entity_load($entity_type, array($entity_id)); | |
946 $entity = $entity ? reset($entity) : FALSE; | |
947 | |
948 if (empty($entity)) { | |
949 // If the object failed to load, just stop. | |
950 return FALSE; | |
951 } | |
952 $status = ($entity->status == 1 ? 1 : 0); | |
953 return $status; | |
954 } | |
955 | |
956 /** | |
957 * Callback that converts term_reference field into an array | |
958 * | |
959 * @param object $node | |
960 * @param string $field_name | |
961 * @param string $index_key | |
962 * @param array $field_info | |
963 * @return array $fields | |
964 * fields that will be indexed for this term reference | |
965 */ | |
966 function solrsearch_term_reference_indexing_callback($node, $field_name, $index_key, array $field_info) { | |
967 // Keep ancestors cached | |
968 $ancestors = &drupal_static(__FUNCTION__, array()); | |
969 | |
970 $fields = array(); | |
971 $vocab_names = array(); | |
972 if (!empty($node->{$field_name}) && function_exists('taxonomy_get_parents_all')) { | |
973 $field = $node->$field_name; | |
974 list($lang, $items) = each($field); | |
975 foreach ($items as $item) { | |
976 // Triple indexing of tids lets us do efficient searches (on tid) | |
977 // and do accurate per field or per-vocabulary faceting. | |
978 | |
979 // By including the ancestors to a term in the index we make | |
980 // sure that searches for general categories match specific | |
981 // categories, e.g. Fruit -> apple, a search for fruit will find | |
982 // content categorized with apple. | |
983 if (!isset($ancestors[$item['tid']])) { | |
984 $ancestors[$item['tid']] = taxonomy_get_parents_all($item['tid']); | |
985 } | |
986 foreach ($ancestors[$item['tid']] as $ancestor) { | |
987 // Index parent term against the field. Note that this happens | |
988 // regardless of whether the facet is set to show as a hierarchy or not. | |
989 // We would need a separate field if we were to index terms without any | |
990 // hierarchy at all. | |
991 // If the term is singular, then we cannot add another value to the | |
992 // document as the field is single | |
993 if ($field_info['multiple'] == true) { | |
994 $fields[] = array( | |
995 'key' => $index_key, | |
996 'value' => $ancestor->tid, | |
997 ); | |
998 } | |
999 $fields[] = array( | |
1000 'key' => 'tid', | |
1001 'value' => $ancestor->tid, | |
1002 ); | |
1003 $fields[] = array( | |
1004 'key' => 'im_vid_' . $ancestor->vid, | |
1005 'value' => $ancestor->tid, | |
1006 ); | |
1007 $name = solrsearch_clean_text($ancestor->name); | |
1008 $vocab_names[$ancestor->vid][] = $name; | |
1009 // We index each name as a string for cross-site faceting | |
1010 // using the vocab name rather than vid in field construction . | |
1011 $fields[] = array( | |
1012 'key' => 'sm_vid_' . solrsearch_vocab_name($ancestor->vid), | |
1013 'value' => $name, | |
1014 ); | |
1015 } | |
1016 } | |
1017 // Index the term names into a text field for MLT queries and keyword searching. | |
1018 foreach ($vocab_names as $vid => $names) { | |
1019 $fields[] = array( | |
1020 'key' => 'tm_vid_' . $vid . '_names', | |
1021 'value' => implode(' ', $names), | |
1022 ); | |
1023 } | |
1024 } | |
1025 return $fields; | |
1026 } | |
1027 | |
1028 /** | |
1029 * Helper function - return a safe (PHP identifier) vocabulary name. | |
1030 * | |
1031 * @param integer $vid | |
1032 * @return string | |
1033 */ | |
1034 function solrsearch_vocab_name($vid) { | |
1035 $names = &drupal_static(__FUNCTION__, array()); | |
1036 | |
1037 if (!isset($names[$vid])) { | |
1038 $vocab_name = db_query('SELECT v.name FROM {taxonomy_vocabulary} v WHERE v.vid = :vid', array(':vid' => $vid))->fetchField(); | |
1039 $names[$vid] = preg_replace('/[^a-zA-Z0-9_\x7f-\xff]/', '_', $vocab_name); | |
1040 // Fallback for names ending up all as '_'. | |
1041 $check = rtrim($names[$vid], '_'); | |
1042 if (!$check) { | |
1043 $names[$vid] = '_' . $vid . '_'; | |
1044 } | |
1045 } | |
1046 return $names[$vid]; | |
1047 } | |
1048 | |
1049 /** | |
1050 * Callback that converts list module field into an array | |
1051 * For every multivalued value we also add a single value to be able to | |
1052 * use the stats | |
1053 * | |
1054 * @param object $entity | |
1055 * @param string $field_name | |
1056 * @param string $index_key | |
1057 * @param array $field_info | |
1058 * @return array $fields | |
1059 */ | |
1060 function solrsearch_fields_default_indexing_callback($entity, $field_name, $index_key, array $field_info) { | |
1061 $fields = array(); | |
1062 $numeric = TRUE; | |
1063 if (!empty($entity->{$field_name})) { | |
1064 $field = $entity->$field_name; | |
1065 list($lang, $values) = each($field); | |
1066 switch ($field_info['index_type']) { | |
1067 case 'integer': | |
1068 case 'half-int': | |
1069 case 'sint': | |
1070 case 'tint': | |
1071 case 'thalf-int': | |
1072 case 'boolean': | |
1073 $function = 'intval'; | |
1074 break; | |
1075 case 'float': | |
1076 case 'double': | |
1077 case 'sfloat': | |
1078 case 'sdouble': | |
1079 case 'tfloat': | |
1080 case 'tdouble': | |
1081 $function = 'solrsearch_floatval'; | |
1082 break; | |
1083 default: | |
1084 $numeric = FALSE; | |
1085 $function = 'solrsearch_clean_text'; | |
1086 } | |
1087 for ($i = 0; $i < count($values); $i++) { | |
1088 $fields[] = array( | |
1089 'key' => $index_key, | |
1090 'value' => $function($values[$i]['value']), | |
1091 ); | |
1092 } | |
1093 // Also store the first value of the field in a singular index for multi value fields | |
1094 if ($field_info['multiple'] && $numeric && !empty($values[0])) { | |
1095 $singular_field_info = $field_info; | |
1096 $singular_field_info['multiple'] = FALSE; | |
1097 $single_key = solrsearch_index_key($singular_field_info); | |
1098 $fields[] = array( | |
1099 'key' => $single_key, | |
1100 'value' => $function($values[0]['value']), | |
1101 ); | |
1102 } | |
1103 } | |
1104 return $fields; | |
1105 } | |
1106 | |
1107 /** | |
1108 * This function is used during indexing to normalize the DATE and DATETIME | |
1109 * fields into the appropriate format for Apache Solr. | |
1110 * | |
1111 * @param object $entity | |
1112 * @param string $field_name | |
1113 * @param string $index_key | |
1114 * @param array $field_info | |
1115 * @return array $fields | |
1116 */ | |
1117 function solrsearch_date_default_indexing_callback($entity, $field_name, $index_key, array $field_info) { | |
1118 $fields = array(); | |
1119 if (!empty($entity->{$field_name})) { | |
1120 $field = $entity->$field_name; | |
1121 list($lang, $values) = each($field); | |
1122 // Construct a Solr-ready date string in UTC time zone based on the field's date string and time zone. | |
1123 $tz = new DateTimeZone(isset($field['timezone']) ? $field['timezone'] : 'UTC'); | |
1124 | |
1125 // $fields may end up having two values; one for the start date | |
1126 // and one for the end date. | |
1127 foreach ($values as $value) { | |
1128 if ($date = date_create($value['value'], $tz)) { | |
1129 $index_value = solrsearch_date_iso($date->format('U')); | |
1130 $fields[] = array( | |
1131 'key' => $index_key, | |
1132 'value' => $index_value, | |
1133 ); | |
1134 } | |
1135 | |
1136 if (isset($value['value2'])) { | |
1137 if ($date = date_create($value['value2'], $tz)) { | |
1138 $index_value = solrsearch_date_iso($date->format('U')); | |
1139 $fields[] = array( | |
1140 // The value2 element is the end date. Therefore it gets indexed | |
1141 // into its own Solr field. | |
1142 'key' => $index_key . '_end', | |
1143 'value' => $index_value, | |
1144 ); | |
1145 } | |
1146 } | |
1147 } | |
1148 } | |
1149 return $fields; | |
1150 } | |
1151 | |
1152 /** | |
1153 * This function is used during indexing to normalize the DATESTAMP fields | |
1154 * into the appropriate format for Apache Solr. | |
1155 * | |
1156 * @param object $entity | |
1157 * @param string $field_name | |
1158 * @param string $index_key | |
1159 * @param array $field_info | |
1160 * @return array $fields | |
1161 */ | |
1162 function solrsearch_datestamp_default_indexing_callback($entity, $field_name, $index_key, array $field_info) { | |
1163 $fields = array(); | |
1164 if (!empty($entity->{$field_name})) { | |
1165 // $fields may end up having two values; one for the start date | |
1166 // and one for the end date. | |
1167 $field = $entity->$field_name; | |
1168 list($lang, $values) = each($field); | |
1169 | |
1170 foreach ($values as $value) { | |
1171 if (isset($value['value']) && $value['value'] != 0) { | |
1172 $index_value = solrsearch_date_iso($value['value']); | |
1173 $fields[] = array( | |
1174 'key' => $index_key, | |
1175 'value' => $index_value, | |
1176 ); | |
1177 } | |
1178 if (isset($value['value2']) && $value['value'] != 0) { | |
1179 $index_value = solrsearch_date_iso($value['value2']); | |
1180 $fields[] = array( | |
1181 // The value2 element is the end date. Therefore it gets indexed | |
1182 // into its own Solr field. | |
1183 'key' => $index_key . '_end', | |
1184 'value' => $index_value, | |
1185 ); | |
1186 } | |
1187 } | |
1188 } | |
1189 return $fields; | |
1190 } | |
1191 | |
1192 function solrsearch_floatval($value) { | |
1193 return sprintf('%0.20f', $value); | |
1194 } | |
1195 | |
1196 /** | |
1197 * Indexing callback for the node_reference module | |
1198 * by the references module | |
1199 * | |
1200 * @param object $entity | |
1201 * @param string $field_name | |
1202 * @param string $index_key | |
1203 * @param array $field_info | |
1204 * @return array $fields | |
1205 */ | |
1206 function solrsearch_nodereference_indexing_callback($entity, $field_name, $index_key, array $field_info) { | |
1207 $fields = array(); | |
1208 if (!empty($entity->{$field_name})) { | |
1209 $index_key = solrsearch_index_key($field_info); | |
1210 foreach ($entity->$field_name as $field_references) { | |
1211 foreach ($field_references as $reference) { | |
1212 if ($index_value = (!empty($reference['nid'])) ? $reference['nid'] : FALSE) { | |
1213 $fields[] = array( | |
1214 'key' => $index_key, | |
1215 'value' => $index_value, | |
1216 ); | |
1217 } | |
1218 } | |
1219 } | |
1220 } | |
1221 return $fields; | |
1222 } | |
1223 | |
1224 /** | |
1225 * Indexing callback for the user_reference module | |
1226 * by the references module | |
1227 * | |
1228 * @param object $entity | |
1229 * @param string $field_name | |
1230 * @param string $index_key | |
1231 * @param array $field_info | |
1232 * @return array $fields | |
1233 */ | |
1234 function solrsearch_userreference_indexing_callback($entity, $field_name, $index_key, array $field_info) { | |
1235 $fields = array(); | |
1236 if (!empty($entity->$field_name)) { | |
1237 $index_key = solrsearch_index_key($field_info); | |
1238 foreach ($entity->$field_name as $field_references) { | |
1239 foreach ($field_references as $reference) { | |
1240 if ($index_value = (isset($reference['uid']) && strlen($reference['uid'])) ? $reference['uid'] : FALSE) { | |
1241 $fields[] = array( | |
1242 'key' => $index_key, | |
1243 'value' => $index_value, | |
1244 ); | |
1245 } | |
1246 } | |
1247 } | |
1248 } | |
1249 return $fields; | |
1250 } | |
1251 | |
1252 /** | |
1253 * Indexing callback for entityreference fields. | |
1254 * | |
1255 * @param object $entity | |
1256 * @param string $field_name | |
1257 * @param string $index_key | |
1258 * @param array $field_info | |
1259 * @return array $fields | |
1260 * | |
1261 */ | |
1262 function solrsearch_entityreference_indexing_callback($entity, $field_name, $index_key, $field_info) { | |
1263 $fields = array(); | |
1264 if (!empty($entity->{$field_name})) { | |
1265 | |
1266 // Gets entity type and index key. We need to prefix the ID with the entity | |
1267 // type so we know what entity we are dealing with in the mapping callback. | |
1268 $entity_type = $field_info['field']['settings']['target_type']; | |
1269 $index_key = solrsearch_index_key($field_info); | |
1270 | |
1271 // Iterates over all references and adds them to the fields. | |
1272 foreach ($entity->$field_name as $entity_references) { | |
1273 foreach ($entity_references as $reference) { | |
1274 if ($id = (!empty($reference['target_id'])) ? $reference['target_id'] : FALSE) { | |
1275 $fields[] = array( | |
1276 'key' => $index_key, | |
1277 'value' => $entity_type . ':' . $id, | |
1278 ); | |
1279 } | |
1280 } | |
1281 } | |
1282 } | |
1283 return $fields; | |
1284 } | |
1285 | |
1286 /** | |
1287 * Extract HTML tag contents from $text and add to boost fields. | |
1288 * | |
1289 * $text must be stripped of control characters before hand. | |
1290 * | |
1291 * @param solrsearchDocument $document | |
1292 * @param type $text | |
1293 */ | |
1294 function solrsearch_add_tags_to_document(solrsearchDocument $document, $text) { | |
1295 $tags_to_index = variable_get('solrsearch_tags_to_index', array( | |
1296 'h1' => 'tags_h1', | |
1297 'h2' => 'tags_h2_h3', | |
1298 'h3' => 'tags_h2_h3', | |
1299 'h4' => 'tags_h4_h5_h6', | |
1300 'h5' => 'tags_h4_h5_h6', | |
1301 'h6' => 'tags_h4_h5_h6', | |
1302 'u' => 'tags_inline', | |
1303 'b' => 'tags_inline', | |
1304 'i' => 'tags_inline', | |
1305 'strong' => 'tags_inline', | |
1306 'em' => 'tags_inline', | |
1307 'a' => 'tags_a' | |
1308 )); | |
1309 | |
1310 // Strip off all ignored tags. | |
1311 $text = strip_tags($text, '<' . implode('><', array_keys($tags_to_index)) . '>'); | |
1312 | |
1313 preg_match_all('@<(' . implode('|', array_keys($tags_to_index)) . ')[^>]*>(.*)</\1>@Ui', $text, $matches); | |
1314 foreach ($matches[1] as $key => $tag) { | |
1315 $tag = strtolower($tag); | |
1316 // We don't want to index links auto-generated by the url filter. | |
1317 if ($tag != 'a' || !preg_match('@(?:http://|https://|ftp://|mailto:|smb://|afp://|file://|gopher://|news://|ssl://|sslv2://|sslv3://|tls://|tcp://|udp://|www\.)[a-zA-Z0-9]+@', $matches[2][$key])) { | |
1318 if (!isset($document->{$tags_to_index[$tag]})) { | |
1319 $document->{$tags_to_index[$tag]} = ''; | |
1320 } | |
1321 $document->{$tags_to_index[$tag]} .= ' ' . solrsearch_clean_text($matches[2][$key]); | |
1322 } | |
1323 } | |
1324 } | |
1325 | |
1326 /** | |
1327 * hook_cron() helper to try to make the index table consistent with their | |
1328 * respective entity table. | |
1329 */ | |
1330 function solrsearch_index_node_check_table() { | |
1331 // Check for unpublished content that wasn't deleted from the index. | |
1332 $table = solrsearch_get_indexer_table('node'); | |
1333 // We do not check more nodes than double the cron limit per time | |
1334 // Update or delete at most this many in each Solr query. | |
1335 $limit = variable_get('solrsearch_cron_mass_limit', 500); | |
1336 $query = db_select($table, 'aien') | |
1337 ->fields('n', array('nid', 'status')) | |
1338 ->where('aien.status <> n.status') | |
1339 ->range(0, ($limit * 2)) | |
1340 ->addTag('solrsearch_index_node'); | |
1341 $query->innerJoin('node', 'n', 'n.nid = aien.entity_id'); | |
1342 $nodes = $query->execute()->fetchAllAssoc('nid'); | |
1343 | |
1344 $node_lists = array_chunk($nodes, $limit, TRUE); | |
1345 foreach ($node_lists as $nodes) { | |
1346 watchdog('Apache Solr', 'On cron running solrsearch_nodeapi_mass_update() on nids @nids', array('@nids' => implode(',', array_keys($nodes))), WATCHDOG_NOTICE); | |
1347 if (!solrsearch_index_nodeapi_mass_update($nodes, $table)) { | |
1348 // Solr query failed - so stop trying. | |
1349 break; | |
1350 } | |
1351 } | |
1352 | |
1353 // Check for deleted content that wasn't deleted from the index. | |
1354 $query = db_select($table, 'aien') | |
1355 ->isNull('n.nid') | |
1356 ->range(0, ($limit*2)); | |
1357 $query->addExpression('aien.entity_id', 'nid'); | |
1358 $query->leftJoin('node', 'n', 'n.nid = aien.entity_id'); | |
1359 $nodes = $query->execute()->fetchAllAssoc('nid'); | |
1360 $node_lists = array_chunk($nodes, $limit, TRUE); | |
1361 | |
1362 foreach ($node_lists as $nodes) { | |
1363 watchdog('Apache Solr', 'On cron running solrsearch_nodeapi_mass_delete() on nids @nids', array('@nids' => implode(',', array_keys($nodes))), WATCHDOG_NOTICE); | |
1364 if (!solrsearch_index_nodeapi_mass_delete($nodes, $table)) { | |
1365 // Solr query failed - so stop trying. | |
1366 break; | |
1367 } | |
1368 } | |
1369 } | |
1370 | |
1371 /** | |
1372 * Mass Update nodes from the solr indexer table | |
1373 * | |
1374 * @param array $nodes | |
1375 * @param string $table | |
1376 * @return boolean | |
1377 * true if we mass updated, false if failed | |
1378 */ | |
1379 function solrsearch_index_nodeapi_mass_update(array $nodes, $table = NULL) { | |
1380 if (empty($nodes)) { | |
1381 return TRUE; | |
1382 } | |
1383 if (empty($table)) { | |
1384 $table = solrsearch_get_indexer_table('node'); | |
1385 } | |
1386 | |
1387 if (solrsearch_environment_variable_get(solrsearch_default_environment(), 'solrsearch_read_only', solrsearch_READ_WRITE) == solrsearch_READ_ONLY) { | |
1388 return TRUE; | |
1389 } | |
1390 | |
1391 $published_ids = array(); | |
1392 $unpublished_ids = array(); | |
1393 foreach ($nodes as $node) { | |
1394 if ($node->status) { | |
1395 $published_ids[$node->nid] = solrsearch_document_id($node->nid); | |
1396 } | |
1397 else { | |
1398 $unpublished_ids[$node->nid] = solrsearch_document_id($node->nid); | |
1399 } | |
1400 } | |
1401 try { | |
1402 $env_id = solrsearch_default_environment(); | |
1403 $solr = solrsearch_get_solr($env_id); | |
1404 $solr->deleteByMultipleIds($unpublished_ids); | |
1405 solrsearch_set_last_index_updated($env_id, REQUEST_TIME); | |
1406 | |
1407 // There was no exception, so update the table. | |
1408 if ($published_ids) { | |
1409 db_update($table) | |
1410 ->fields(array('changed' => REQUEST_TIME, 'status' => 1)) | |
1411 ->condition('entity_id', array_keys($published_ids), 'IN') | |
1412 ->execute(); | |
1413 } | |
1414 if ($unpublished_ids) { | |
1415 db_update($table) | |
1416 ->fields(array('changed' => REQUEST_TIME, 'status' => 0)) | |
1417 ->condition('entity_id', array_keys($unpublished_ids), 'IN') | |
1418 ->execute(); | |
1419 } | |
1420 return TRUE; | |
1421 } | |
1422 catch (Exception $e) { | |
1423 watchdog('Apache Solr', nl2br(check_plain($e->getMessage())), NULL, WATCHDOG_ERROR); | |
1424 return FALSE; | |
1425 } | |
1426 } | |
1427 | |
1428 /** | |
1429 * Mass delete nodes from the solr indexer tables. | |
1430 * | |
1431 * @param array $nodes | |
1432 * @param string $table | |
1433 * @return boolean | |
1434 * true if we mass updated, false if failed | |
1435 */ | |
1436 function solrsearch_index_nodeapi_mass_delete(array $nodes, $table = NULL) { | |
1437 if (empty($nodes)) { | |
1438 return TRUE; | |
1439 } | |
1440 if (empty($table)) { | |
1441 $table = solrsearch_get_indexer_table('node'); | |
1442 } | |
1443 | |
1444 if (solrsearch_environment_variable_get(solrsearch_default_environment(), 'solrsearch_read_only', solrsearch_READ_WRITE) == solrsearch_READ_ONLY) { | |
1445 return TRUE; | |
1446 } | |
1447 | |
1448 $ids = array(); | |
1449 $nids = array(); | |
1450 foreach ($nodes as $node) { | |
1451 $ids[] = solrsearch_document_id($node->nid); | |
1452 $nids[] = $node->nid; | |
1453 } | |
1454 try { | |
1455 $env_id = solrsearch_default_environment(); | |
1456 $solr = solrsearch_get_solr($env_id); | |
1457 $solr->deleteByMultipleIds($ids); | |
1458 solrsearch_set_last_index_updated($env_id, REQUEST_TIME); | |
1459 // There was no exception, so update the table. | |
1460 db_delete($table) | |
1461 ->condition('entity_id', $nids, 'IN') | |
1462 ->execute(); | |
1463 return TRUE; | |
1464 } | |
1465 catch (Exception $e) { | |
1466 watchdog('Apache Solr', nl2br(check_plain($e->getMessage())), NULL, WATCHDOG_ERROR); | |
1467 return FALSE; | |
1468 } | |
1469 } |