comparison src/main/java/edu/harvard/iq/dataverse/SearchServiceBean.java @ 10:a50cf11e5178

Rewrite LGDataverse completely upgrading to dataverse4.0
author Zoe Hong <zhong@mpiwg-berlin.mpg.de>
date Tue, 08 Sep 2015 17:00:21 +0200
parents
children
comparison
equal deleted inserted replaced
9:5926d6419569 10:a50cf11e5178
1 package edu.harvard.iq.dataverse;
2
3 import edu.harvard.iq.dataverse.authorization.groups.Group;
4 import edu.harvard.iq.dataverse.authorization.groups.GroupServiceBean;
5 import edu.harvard.iq.dataverse.search.SearchFields;
6 import edu.harvard.iq.dataverse.authorization.users.AuthenticatedUser;
7 import edu.harvard.iq.dataverse.authorization.users.GuestUser;
8 import edu.harvard.iq.dataverse.authorization.users.User;
9 import edu.harvard.iq.dataverse.search.Highlight;
10 import edu.harvard.iq.dataverse.search.SearchException;
11 import edu.harvard.iq.dataverse.search.SearchUtil;
12 import edu.harvard.iq.dataverse.util.JsfHelper;
13 import edu.harvard.iq.dataverse.util.SystemConfig;
14 import java.lang.reflect.Field;
15 import java.util.ArrayList;
16 import java.util.Arrays;
17 import java.util.Calendar;
18 import java.util.Date;
19 import java.util.HashMap;
20 import java.util.Iterator;
21 import java.util.List;
22 import java.util.ListIterator;
23 import java.util.Map;
24 import java.util.Set;
25 import java.util.logging.Level;
26 import java.util.logging.Logger;
27 import javax.ejb.EJB;
28 import javax.ejb.EJBTransactionRolledbackException;
29 import javax.ejb.Stateless;
30 import javax.ejb.TransactionRolledbackLocalException;
31 import javax.inject.Named;
32 import javax.persistence.NoResultException;
33 import org.apache.solr.client.solrj.SolrQuery;
34 import org.apache.solr.client.solrj.SolrQuery.SortClause;
35 import org.apache.solr.client.solrj.SolrServer;
36 import org.apache.solr.client.solrj.SolrServerException;
37 import org.apache.solr.client.solrj.impl.HttpSolrServer;
38 import org.apache.solr.client.solrj.impl.HttpSolrServer.RemoteSolrException;
39 import org.apache.solr.client.solrj.response.FacetField;
40 import org.apache.solr.client.solrj.response.QueryResponse;
41 import org.apache.solr.client.solrj.response.RangeFacet;
42 import org.apache.solr.client.solrj.response.SpellCheckResponse;
43 import org.apache.solr.common.SolrDocument;
44 import org.apache.solr.common.SolrDocumentList;
45
46 @Stateless
47 @Named
48 public class SearchServiceBean {
49
50 private static final Logger logger = Logger.getLogger(SearchServiceBean.class.getCanonicalName());
51
52 /**
53 * We're trying to make the SearchServiceBean lean, mean, and fast, with as
54 * few injections of EJBs as possible.
55 */
56 /**
57 * @todo Can we do without the DatasetFieldServiceBean?
58 */
59 @EJB
60 DvObjectServiceBean dvObjectService;
61 @EJB
62 DataverseServiceBean dataverseService;
63 @EJB
64 DatasetServiceBean datasetService;
65 @EJB
66 DatasetVersionServiceBean datasetVersionService;
67 @EJB
68 DataFileServiceBean dataFileService;
69 @EJB
70 DatasetFieldServiceBean datasetFieldService;
71 @EJB
72 GroupServiceBean groupService;
73 @EJB
74 SystemConfig systemConfig;
75
76 public static final JsfHelper JH = new JsfHelper();
77
78 public SolrQueryResponse search(User user, Dataverse dataverse, String query, List<String> filterQueries, String sortField, String sortOrder, int paginationStart, boolean onlyDatatRelatedToMe, int numResultsPerPage) throws SearchException {
79 SolrServer solrServer = new HttpSolrServer("http://" + systemConfig.getSolrHostColonPort() + "/solr");
80 SolrQuery solrQuery = new SolrQuery();
81 query = SearchUtil.sanitizeQuery(query);
82 solrQuery.setQuery(query);
83 // SortClause foo = new SortClause("name", SolrQuery.ORDER.desc);
84 // if (query.equals("*") || query.equals("*:*")) {
85 // solrQuery.setSort(new SortClause(SearchFields.NAME_SORT, SolrQuery.ORDER.asc));
86 solrQuery.setSort(new SortClause(sortField, sortOrder));
87 // } else {
88 // solrQuery.setSort(sortClause);
89 // }
90 // solrQuery.setSort(sortClause);
91 solrQuery.setHighlight(true).setHighlightSnippets(1);
92 solrQuery.setHighlightSimplePre("<span class=\"search-term-match\">");
93 solrQuery.setHighlightSimplePost("</span>");
94 Map<String, String> solrFieldsToHightlightOnMap = new HashMap<>();
95 solrFieldsToHightlightOnMap.put(SearchFields.NAME, "Name");
96 solrFieldsToHightlightOnMap.put(SearchFields.AFFILIATION, "Affiliation");
97 solrFieldsToHightlightOnMap.put(SearchFields.FILE_TYPE_FRIENDLY, "File Type");
98 solrFieldsToHightlightOnMap.put(SearchFields.DESCRIPTION, "Description");
99 solrFieldsToHightlightOnMap.put(SearchFields.VARIABLE_NAME, "Variable Name");
100 solrFieldsToHightlightOnMap.put(SearchFields.VARIABLE_LABEL, "Variable Label");
101 solrFieldsToHightlightOnMap.put(SearchFields.FILE_TYPE_SEARCHABLE, "File Type");
102 solrFieldsToHightlightOnMap.put(SearchFields.DATASET_PUBLICATION_DATE, "Publication Date");
103 solrFieldsToHightlightOnMap.put(SearchFields.DATASET_PERSISTENT_ID, localize("advanced.search.datasets.persistentId"));
104 /**
105 * @todo Dataverse subject and affiliation should be highlighted but
106 * this is commented out right now because the "friendly" names are not
107 * being shown on the dataverse cards. See also
108 * https://github.com/IQSS/dataverse/issues/1431
109 */
110 // solrFieldsToHightlightOnMap.put(SearchFields.DATAVERSE_SUBJECT, "Subject");
111 // solrFieldsToHightlightOnMap.put(SearchFields.DATAVERSE_AFFILIATION, "Affiliation");
112 /**
113 * @todo: show highlight on file card?
114 * https://redmine.hmdc.harvard.edu/issues/3848
115 */
116 solrFieldsToHightlightOnMap.put(SearchFields.FILENAME_WITHOUT_EXTENSION, "Filename Without Extension");
117 List<DatasetFieldType> datasetFields = datasetFieldService.findAllOrderedById();
118 for (DatasetFieldType datasetFieldType : datasetFields) {
119 String solrField = datasetFieldType.getSolrField().getNameSearchable();
120 String displayName = datasetFieldType.getDisplayName();
121 solrFieldsToHightlightOnMap.put(solrField, displayName);
122 }
123 for (Map.Entry<String, String> entry : solrFieldsToHightlightOnMap.entrySet()) {
124 String solrField = entry.getKey();
125 // String displayName = entry.getValue();
126 solrQuery.addHighlightField(solrField);
127 }
128 solrQuery.setParam("fl", "*,score");
129 solrQuery.setParam("qt", "/spell");
130 solrQuery.setParam("facet", "true");
131 /**
132 * @todo: do we need facet.query?
133 */
134 solrQuery.setParam("facet.query", "*");
135 for (String filterQuery : filterQueries) {
136 solrQuery.addFilterQuery(filterQuery);
137 }
138
139 /**
140 * @todo For people who are not logged in, should we show stuff indexed
141 * with "AllUsers" group or not? If so, uncomment the allUsersString
142 * stuff below.
143 */
144 // String allUsersString = IndexServiceBean.getGroupPrefix() + AllUsers.get().getAlias();
145 // String publicOnly = "{!join from=" + SearchFields.DEFINITION_POINT + " to=id}" + SearchFields.DISCOVERABLE_BY + ":(" + IndexServiceBean.getPublicGroupString() + " OR " + allUsersString + ")";
146 String publicOnly = "{!join from=" + SearchFields.DEFINITION_POINT + " to=id}" + SearchFields.DISCOVERABLE_BY + ":(" + IndexServiceBean.getPublicGroupString() + ")";
147 // String publicOnly = "{!join from=" + SearchFields.GROUPS + " to=" + SearchFields.PERMS + "}id:" + IndexServiceBean.getPublicGroupString();
148 // initialize to public only to be safe
149 String permissionFilterQuery = publicOnly;
150 if (user instanceof GuestUser) {
151 permissionFilterQuery = publicOnly;
152 } else if (user instanceof AuthenticatedUser) {
153 // Non-guests might get more than public stuff with an OR or two
154 AuthenticatedUser au = (AuthenticatedUser) user;
155 solrQuery.addFacetField(SearchFields.PUBLICATION_STATUS);
156
157 /**
158 * @todo all this code needs cleanup and clarification.
159 */
160 /**
161 * Every AuthenticatedUser is part of a "User Private Group" (UGP),
162 * a concept we borrow from RHEL:
163 * https://access.redhat.com/site/documentation/en-US/Red_Hat_Enterprise_Linux/6/html/Deployment_Guide/ch-Managing_Users_and_Groups.html#s2-users-groups-private-groups
164 */
165 /**
166 * @todo rename this from publicPlusUserPrivateGroup. Confusing
167 */
168 // safe default: public only
169 String publicPlusUserPrivateGroup = publicOnly;
170 // + (onlyDatatRelatedToMe ? "" : (publicOnly + " OR "))
171 // + "{!join from=" + SearchFields.GROUPS + " to=" + SearchFields.PERMS + "}id:" + IndexServiceBean.getGroupPerUserPrefix() + au.getId() + ")";
172
173 // /**
174 // * @todo add onlyDatatRelatedToMe option into the experimental JOIN
175 // * before enabling it.
176 // */
177 String groupsFromProviders = "";
178 /**
179 * @todo What should the value be? Is null ok? From a search
180 * perspective, we don't care about if the group was created within
181 * one dataverse or another. We just want a list of all the groups
182 * the user is part of. A JOIN on "permission documents" will
183 * determine if the user can find a given "content document"
184 * (dataset version, etc) in Solr.
185 */
186 // DvObject groupsForDvObjectParamNull = null;
187 // Set<Group> groups = groupService.groupsFor(au, groupsForDvObjectParamNull);
188 /**
189 * @todo What is the expected behavior when you pass in a dataverse?
190 * It seems like no matter what you pass in you always get the
191 * following types of groups:
192 *
193 * - BuiltIn Groups
194 *
195 * - IP Groups
196 *
197 * - Shibboleth Groups
198 *
199 * If you pass in the root dataverse it seems like you get all
200 * groups that you're part of.
201 *
202 * If you pass in a non-root dataverse, it seems like you get groups
203 * that you're part of for that dataverse. It's unclear if there is
204 * any inheritance of groups.
205 */
206 DvObject groupsForDvObjectParamCurrentDataverse = dataverse;
207 Set<Group> groups = groupService.groupsFor(au, groupsForDvObjectParamCurrentDataverse);
208 StringBuilder sb = new StringBuilder();
209 for (Group group : groups) {
210 logger.fine("found group " + group.getIdentifier() + " with alias " + group.getAlias());
211 String groupAlias = group.getAlias();
212 if (groupAlias != null && !groupAlias.isEmpty()) {
213 sb.append(" OR ");
214 // i.e. group_shib/2
215 sb.append(IndexServiceBean.getGroupPrefix() + groupAlias);
216 }
217 groupsFromProviders = sb.toString();
218 }
219
220 logger.fine(groupsFromProviders);
221 if (true) {
222 /**
223 * @todo get rid of "experimental" in name
224 */
225 String experimentalJoin = "{!join from=" + SearchFields.DEFINITION_POINT + " to=id}" + SearchFields.DISCOVERABLE_BY + ":(" + IndexServiceBean.getPublicGroupString() + " OR " + IndexServiceBean.getGroupPerUserPrefix() + au.getId() + groupsFromProviders + ")";
226 if (onlyDatatRelatedToMe) {
227 /**
228 * @todo make this a variable called "String
229 * dataRelatedToMeFilterQuery" or something
230 */
231 experimentalJoin = "{!join from=" + SearchFields.DEFINITION_POINT + " to=id}" + SearchFields.DISCOVERABLE_BY + ":(" + IndexServiceBean.getGroupPerUserPrefix() + au.getId() + groupsFromProviders + ")";
232 }
233 publicPlusUserPrivateGroup = experimentalJoin;
234 }
235
236 permissionFilterQuery = publicPlusUserPrivateGroup;
237 logger.fine(permissionFilterQuery);
238
239 if (au.isSuperuser()) {
240 // dangerous because this user will be able to see
241 // EVERYTHING in Solr with no regard to permissions!
242 String dangerZoneNoSolrJoin = null;
243 permissionFilterQuery = dangerZoneNoSolrJoin;
244 }
245
246 } else {
247 logger.info("Should never reach here. A User must be an AuthenticatedUser or a Guest");
248 }
249
250 solrQuery.addFilterQuery(permissionFilterQuery);
251
252 // solrQuery.addFacetField(SearchFields.HOST_DATAVERSE);
253 // solrQuery.addFacetField(SearchFields.AUTHOR_STRING);
254 solrQuery.addFacetField(SearchFields.DATAVERSE_CATEGORY);
255 solrQuery.addFacetField(SearchFields.AFFILIATION);
256 solrQuery.addFacetField(SearchFields.PUBLICATION_DATE);
257 // solrQuery.addFacetField(SearchFields.CATEGORY);
258 // solrQuery.addFacetField(SearchFields.FILE_TYPE_MIME);
259 // solrQuery.addFacetField(SearchFields.DISTRIBUTOR);
260 // solrQuery.addFacetField(SearchFields.KEYWORD);
261 /**
262 * @todo when a new method on datasetFieldService is available
263 * (retrieveFacetsByDataverse?) only show the facets that the dataverse
264 * in question wants to show (and in the right order):
265 * https://redmine.hmdc.harvard.edu/issues/3490
266 *
267 * also, findAll only returns advancedSearchField = true... we should
268 * probably introduce the "isFacetable" boolean rather than caring about
269 * if advancedSearchField is true or false
270 *
271 */
272 for (DataverseFacet dataverseFacet : dataverse.getDataverseFacets()) {
273 DatasetFieldType datasetField = dataverseFacet.getDatasetFieldType();
274 solrQuery.addFacetField(datasetField.getSolrField().getNameFacetable());
275 }
276 solrQuery.addFacetField(SearchFields.FILE_TYPE);
277 /**
278 * @todo: hide the extra line this shows in the GUI... at least it's
279 * last...
280 */
281 solrQuery.addFacetField(SearchFields.TYPE);
282 /**
283 * @todo: do sanity checking... throw error if negative
284 */
285 solrQuery.setStart(paginationStart);
286 /**
287 * @todo: decide if year CITATION_YEAR is good enough or if we should
288 * support CITATION_DATE
289 */
290 // Calendar calendar = Calendar.getInstance(TimeZone.getTimeZone("UTC"), Locale.UK);
291 // calendar.set(2010, 1, 1);
292 // Date start = calendar.getTime();
293 // calendar.set(2013, 1, 1);
294 // Date end = calendar.getTime();
295 // solrQuery.addDateRangeFacet(SearchFields.CITATION_DATE, start, end, "+1MONTH");
296 /**
297 * @todo make this configurable
298 */
299 int thisYear = Calendar.getInstance().get(Calendar.YEAR);
300 /**
301 * @todo: odd or even makes a difference. Couldn't find value of 2014
302 * when this was set to 2000
303 */
304 final int citationYearRangeStart = 1901;
305 final int citationYearRangeEnd = thisYear;
306 final int citationYearRangeSpan = 2;
307 /**
308 * @todo: these are dates and should be "range facets" not "field
309 * facets"
310 *
311 * right now they are lumped in with the datasetFieldService.findAll()
312 * above
313 */
314 // solrQuery.addNumericRangeFacet(SearchFields.PRODUCTION_DATE_YEAR_ONLY, citationYearRangeStart, citationYearRangeEnd, citationYearRangeSpan);
315 // solrQuery.addNumericRangeFacet(SearchFields.DISTRIBUTION_DATE_YEAR_ONLY, citationYearRangeStart, citationYearRangeEnd, citationYearRangeSpan);
316 solrQuery.setRows(numResultsPerPage);
317 logger.fine("Solr query:" + solrQuery);
318
319 QueryResponse queryResponse;
320 try {
321 queryResponse = solrServer.query(solrQuery);
322 } catch (RemoteSolrException ex) {
323 String messageFromSolr = ex.getLocalizedMessage();
324 String error = "Search Syntax Error: ";
325 String stringToHide = "org.apache.solr.search.SyntaxError: ";
326 if (messageFromSolr.startsWith(stringToHide)) {
327 // hide "org.apache.solr..."
328 error += messageFromSolr.substring(stringToHide.length());
329 } else {
330 error += messageFromSolr;
331 }
332 logger.fine(error);
333 SolrQueryResponse exceptionSolrQueryResponse = new SolrQueryResponse();
334 exceptionSolrQueryResponse.setError(error);
335
336 // we can't show anything because of the search syntax error
337 long zeroNumResultsFound = 0;
338 long zeroGetResultsStart = 0;
339 List<SolrSearchResult> emptySolrSearchResults = new ArrayList<>();
340 List<FacetCategory> exceptionFacetCategoryList = new ArrayList<>();
341 Map<String, List<String>> emptySpellingSuggestion = new HashMap<>();
342 exceptionSolrQueryResponse.setNumResultsFound(zeroNumResultsFound);
343 exceptionSolrQueryResponse.setResultsStart(zeroGetResultsStart);
344 exceptionSolrQueryResponse.setSolrSearchResults(emptySolrSearchResults);
345 exceptionSolrQueryResponse.setFacetCategoryList(exceptionFacetCategoryList);
346 exceptionSolrQueryResponse.setTypeFacetCategories(exceptionFacetCategoryList);
347 exceptionSolrQueryResponse.setSpellingSuggestionsByToken(emptySpellingSuggestion);
348 return exceptionSolrQueryResponse;
349 } catch (SolrServerException ex) {
350 throw new SearchException("Internal Dataverse Search Engine Error", ex);
351 }
352 SolrDocumentList docs = queryResponse.getResults();
353 Iterator<SolrDocument> iter = docs.iterator();
354 List<SolrSearchResult> solrSearchResults = new ArrayList<>();
355
356 /**
357 * @todo refactor SearchFields to a hashmap (or something? put in
358 * database? internationalize?) to avoid the crazy reflection and string
359 * manipulation below
360 */
361 Object searchFieldsObject = new SearchFields();
362 Field[] staticSearchFields = searchFieldsObject.getClass().getDeclaredFields();
363 String titleSolrField = null;
364 try {
365 DatasetFieldType titleDatasetField = datasetFieldService.findByName(DatasetFieldConstant.title);
366 titleSolrField = titleDatasetField.getSolrField().getNameSearchable();
367 } catch (EJBTransactionRolledbackException ex) {
368 logger.info("Couldn't find " + DatasetFieldConstant.title);
369 if (ex.getCause() instanceof TransactionRolledbackLocalException) {
370 if (ex.getCause().getCause() instanceof NoResultException) {
371 logger.info("Caught NoResultException");
372 }
373 }
374 }
375 Map<String, String> datasetfieldFriendlyNamesBySolrField = new HashMap<>();
376 Map<String, String> staticSolrFieldFriendlyNamesBySolrField = new HashMap<>();
377 String baseUrl = systemConfig.getDataverseSiteUrl();
378 while (iter.hasNext()) {
379 SolrDocument solrDocument = iter.next();
380 String id = (String) solrDocument.getFieldValue(SearchFields.ID);
381 Long entityid = (Long) solrDocument.getFieldValue(SearchFields.ENTITY_ID);
382 String type = (String) solrDocument.getFieldValue(SearchFields.TYPE);
383 float score = (Float) solrDocument.getFieldValue(SearchFields.RELEVANCE);
384 logger.fine("score for " + id + ": " + score);
385 String identifier = (String) solrDocument.getFieldValue(SearchFields.IDENTIFIER);
386 String citation = (String) solrDocument.getFieldValue(SearchFields.DATASET_CITATION);
387 String persistentUrl = (String) solrDocument.getFieldValue(SearchFields.PERSISTENT_URL);
388 String name = (String) solrDocument.getFieldValue(SearchFields.NAME);
389 String nameSort = (String) solrDocument.getFieldValue(SearchFields.NAME_SORT);
390 // ArrayList titles = (ArrayList) solrDocument.getFieldValues(SearchFields.TITLE);
391 String title = (String) solrDocument.getFieldValue(titleSolrField);
392 Long datasetVersionId = (Long) solrDocument.getFieldValue(SearchFields.DATASET_VERSION_ID);
393 String deaccessionReason = (String) solrDocument.getFieldValue(SearchFields.DATASET_DEACCESSION_REASON);
394 // logger.info("titleSolrField: " + titleSolrField);
395 // logger.info("title: " + title);
396 String filetype = (String) solrDocument.getFieldValue(SearchFields.FILE_TYPE_FRIENDLY);
397 String fileContentType = (String) solrDocument.getFieldValue(SearchFields.FILE_CONTENT_TYPE);
398 Date release_or_create_date = (Date) solrDocument.getFieldValue(SearchFields.RELEASE_OR_CREATE_DATE);
399 String dateToDisplayOnCard = (String) solrDocument.getFirstValue(SearchFields.RELEASE_OR_CREATE_DATE_SEARCHABLE_TEXT);
400 List<String> matchedFields = new ArrayList<>();
401 List<Highlight> highlights = new ArrayList<>();
402 Map<SolrField, Highlight> highlightsMap = new HashMap<>();
403 Map<SolrField, List<String>> highlightsMap2 = new HashMap<>();
404 Map<String, Highlight> highlightsMap3 = new HashMap<>();
405 if (queryResponse.getHighlighting().get(id) != null) {
406 for (Map.Entry<String, String> entry : solrFieldsToHightlightOnMap.entrySet()) {
407 String field = entry.getKey();
408 String displayName = entry.getValue();
409
410 List<String> highlightSnippets = queryResponse.getHighlighting().get(id).get(field);
411 if (highlightSnippets != null) {
412 matchedFields.add(field);
413 /**
414 * @todo only SolrField.SolrType.STRING? that's not
415 * right... knit the SolrField object more into the
416 * highlighting stuff
417 */
418 SolrField solrField = new SolrField(field, SolrField.SolrType.STRING, true, true);
419 Highlight highlight = new Highlight(solrField, highlightSnippets, displayName);
420 highlights.add(highlight);
421 highlightsMap.put(solrField, highlight);
422 highlightsMap2.put(solrField, highlightSnippets);
423 highlightsMap3.put(field, highlight);
424 }
425 }
426
427 }
428 SolrSearchResult solrSearchResult = new SolrSearchResult(query, name);
429 /**
430 * @todo put all this in the constructor?
431 */
432 List<String> states = (ArrayList<String>) solrDocument.getFieldValue(SearchFields.PUBLICATION_STATUS);
433 if (states != null) {
434 for (String state : states) {
435 if (state.equals(IndexServiceBean.getUNPUBLISHED_STRING())) {
436 solrSearchResult.setUnpublishedState(true);
437 } else if (state.equals(IndexServiceBean.getDRAFT_STRING())) {
438 solrSearchResult.setDraftState(true);
439 // } else if (state.equals(IndexServiceBean.getDEACCESSIONED_STRING())) {
440 // solrSearchResult.setDeaccessionedState(true);
441 }
442 }
443 }
444 // logger.info(id + ": " + description);
445 solrSearchResult.setId(id);
446 solrSearchResult.setEntityId(entityid);
447 solrSearchResult.setEntity(dvObjectService.findDvObject(entityid));
448 solrSearchResult.setIdentifier(identifier);
449 solrSearchResult.setPersistentUrl(persistentUrl);
450 solrSearchResult.setType(type);
451 solrSearchResult.setScore(score);
452 solrSearchResult.setNameSort(nameSort);
453 solrSearchResult.setReleaseOrCreateDate(release_or_create_date);
454 solrSearchResult.setDateToDisplayOnCard(dateToDisplayOnCard);
455 solrSearchResult.setMatchedFields(matchedFields);
456 solrSearchResult.setHighlightsAsList(highlights);
457 solrSearchResult.setHighlightsMap(highlightsMap);
458 solrSearchResult.setHighlightsAsMap(highlightsMap3);
459 Map<String, String> parent = new HashMap<>();
460 String description = (String) solrDocument.getFieldValue(SearchFields.DESCRIPTION);
461 solrSearchResult.setDescriptionNoSnippet(description);
462 solrSearchResult.setDeaccessionReason(deaccessionReason);
463 /**
464 * @todo start using SearchConstants class here
465 */
466 if (type.equals("dataverses")) {
467 solrSearchResult.setName(name);
468 solrSearchResult.setHtmlUrl(baseUrl + "/dataverse/" + identifier);
469 solrSearchResult.setImageUrl(baseUrl + "/api/access/dvCardImage/" + entityid);
470 /**
471 * @todo Expose this API URL after "dvs" is changed to
472 * "dataverses". Also, is an API token required for published
473 * dataverses?
474 * Michael: url changed.
475 */
476 // solrSearchResult.setApiUrl(baseUrl + "/api/dataverses/" + entityid);
477 } else if (type.equals("datasets")) {
478 solrSearchResult.setHtmlUrl(baseUrl + "/dataset.xhtml?globalId=" + identifier);
479 solrSearchResult.setApiUrl(baseUrl + "/api/datasets/" + entityid);
480 solrSearchResult.setImageUrl(baseUrl + "/api/access/dsCardImage/" + datasetVersionId);
481 /**
482 * @todo Could use getFieldValues (plural) here.
483 */
484 ArrayList<String> datasetDescriptions = (ArrayList<String>) solrDocument.getFieldValue(SearchFields.DATASET_DESCRIPTION);
485 if (datasetDescriptions != null) {
486 String firstDatasetDescription = datasetDescriptions.get(0);
487 if (firstDatasetDescription != null) {
488 solrSearchResult.setDescriptionNoSnippet(firstDatasetDescription);
489 }
490 }
491 solrSearchResult.setDatasetVersionId(datasetVersionId);
492
493 solrSearchResult.setCitation(citation);
494 if (title != null) {
495 // solrSearchResult.setTitle((String) titles.get(0));
496 solrSearchResult.setTitle((String) title);
497 } else {
498 logger.info("No title indexed. Setting to empty string to prevent NPE. Dataset id " + entityid + " and version id " + datasetVersionId);
499 solrSearchResult.setTitle("");
500 }
501 List<String> authors = (ArrayList) solrDocument.getFieldValues(DatasetFieldConstant.authorName);
502 if (authors != null) {
503 solrSearchResult.setDatasetAuthors(authors);
504 }
505 } else if (type.equals("files")) {
506 String parentGlobalId = null;
507 Object parentGlobalIdObject = solrDocument.getFieldValue(SearchFields.PARENT_IDENTIFIER);
508 if (parentGlobalIdObject != null) {
509 parentGlobalId = (String) parentGlobalIdObject;
510 parent.put(SolrSearchResult.PARENT_IDENTIFIER, parentGlobalId);
511 }
512 solrSearchResult.setHtmlUrl(baseUrl + "/dataset.xhtml?persistentId=" + parentGlobalId);
513 solrSearchResult.setDownloadUrl(baseUrl + "/api/access/datafile/" + entityid);
514 /**
515 * @todo We are not yet setting the API URL for files because
516 * not all files have metadata. Only subsettable files (those
517 * with a datatable) seem to have metadata. Furthermore, the
518 * response is in XML whereas the rest of the Search API returns
519 * JSON.
520 */
521 // solrSearchResult.setApiUrl(baseUrl + "/api/meta/datafile/" + entityid);
522 solrSearchResult.setImageUrl(baseUrl + "/api/access/fileCardImage/" + entityid);
523 solrSearchResult.setName(name);
524 solrSearchResult.setFiletype(filetype);
525 solrSearchResult.setFileContentType(fileContentType);
526 Object fileSizeInBytesObject = solrDocument.getFieldValue(SearchFields.FILE_SIZE_IN_BYTES);
527 if (fileSizeInBytesObject != null) {
528 try {
529 long fileSizeInBytesLong = (long) fileSizeInBytesObject;
530 solrSearchResult.setFileSizeInBytes(fileSizeInBytesLong);
531 } catch (ClassCastException ex) {
532 logger.info("Could not cast file " + entityid + " to long for " + SearchFields.FILE_SIZE_IN_BYTES + ": " + ex.getLocalizedMessage());
533 }
534 }
535 solrSearchResult.setFileMd5((String) solrDocument.getFieldValue(SearchFields.FILE_MD5));
536 solrSearchResult.setUnf((String) solrDocument.getFieldValue(SearchFields.UNF));
537 solrSearchResult.setDatasetVersionId(datasetVersionId);
538 }
539 /**
540 * @todo store PARENT_ID as a long instead and cast as such
541 */
542 parent.put("id", (String) solrDocument.getFieldValue(SearchFields.PARENT_ID));
543 parent.put("name", (String) solrDocument.getFieldValue(SearchFields.PARENT_NAME));
544 parent.put("citation", (String) solrDocument.getFieldValue(SearchFields.PARENT_CITATION));
545 solrSearchResult.setParent(parent);
546 solrSearchResults.add(solrSearchResult);
547 }
548 Map<String, List<String>> spellingSuggestionsByToken = new HashMap<>();
549 SpellCheckResponse spellCheckResponse = queryResponse.getSpellCheckResponse();
550 if (spellCheckResponse != null) {
551 List<SpellCheckResponse.Suggestion> suggestions = spellCheckResponse.getSuggestions();
552 for (SpellCheckResponse.Suggestion suggestion : suggestions) {
553 spellingSuggestionsByToken.put(suggestion.getToken(), suggestion.getAlternatives());
554 }
555 }
556
557 List<FacetCategory> facetCategoryList = new ArrayList<FacetCategory>();
558 List<FacetCategory> typeFacetCategories = new ArrayList<>();
559 boolean hidePublicationStatusFacet = true;
560 boolean draftsAvailable = false;
561 boolean unpublishedAvailable = false;
562 for (FacetField facetField : queryResponse.getFacetFields()) {
563 FacetCategory facetCategory = new FacetCategory();
564 List<FacetLabel> facetLabelList = new ArrayList<>();
565 for (FacetField.Count facetFieldCount : facetField.getValues()) {
566 /**
567 * @todo we do want to show the count for each facet
568 */
569 // logger.info("field: " + facetField.getName() + " " + facetFieldCount.getName() + " (" + facetFieldCount.getCount() + ")");
570 if (facetFieldCount.getCount() > 0) {
571 FacetLabel facetLabel = new FacetLabel(facetFieldCount.getName(), facetFieldCount.getCount());
572 // quote field facets
573 facetLabel.setFilterQuery(facetField.getName() + ":\"" + facetFieldCount.getName() + "\"");
574 facetLabelList.add(facetLabel);
575 if (facetField.getName().equals(SearchFields.PUBLICATION_STATUS)) {
576 if (facetLabel.getName().equals(IndexServiceBean.getUNPUBLISHED_STRING())) {
577 unpublishedAvailable = true;
578 } else if (facetLabel.getName().equals(IndexServiceBean.getDRAFT_STRING())) {
579 draftsAvailable = true;
580 }
581 }
582 }
583 }
584 facetCategory.setName(facetField.getName());
585 // hopefully people will never see the raw facetField.getName() because it may well have an _s at the end
586 facetCategory.setFriendlyName(facetField.getName());
587 // try to find a friendlier name to display as a facet
588 /**
589 * @todo hmm, we thought we wanted the datasetFields array to go
590 * away once we have more granularity than findAll() available per
591 * the todo above but we need a way to lookup by Solr field, so
592 * we'll build a hashmap
593 */
594 for (DatasetFieldType datasetField : datasetFields) {
595 String solrFieldNameForDataset = datasetField.getSolrField().getNameFacetable();
596 String friendlyName = datasetField.getDisplayName();
597 if (solrFieldNameForDataset != null && facetField.getName().endsWith(datasetField.getTmpNullFieldTypeIdentifier())) {
598 // give it the non-friendly name so we remember to update the reference data script for datasets
599 facetCategory.setName(facetField.getName());
600 } else if (solrFieldNameForDataset != null && facetField.getName().equals(solrFieldNameForDataset)) {
601 if (friendlyName != null && !friendlyName.isEmpty()) {
602 facetCategory.setFriendlyName(friendlyName);
603 // stop examining available dataset fields. we found a match
604 break;
605 }
606 }
607 datasetfieldFriendlyNamesBySolrField.put(datasetField.getSolrField().getNameFacetable(), friendlyName);
608 }
609 /**
610 * @todo get rid of this crazy reflection, per todo above... or
611 * should we... let's put into a hash the friendly names of facet
612 * categories, indexed by Solr field
613 */
614 for (Field fieldObject : staticSearchFields) {
615 String name = fieldObject.getName();
616 String staticSearchField = null;
617 try {
618 staticSearchField = (String) fieldObject.get(searchFieldsObject);
619 } catch (IllegalArgumentException ex) {
620 Logger.getLogger(SearchServiceBean.class.getName()).log(Level.SEVERE, null, ex);
621 } catch (IllegalAccessException ex) {
622 Logger.getLogger(SearchServiceBean.class.getName()).log(Level.SEVERE, null, ex);
623 }
624 if (staticSearchField != null && facetField.getName().equals(staticSearchField)) {
625 String[] parts = name.split("_");
626 StringBuilder stringBuilder = new StringBuilder();
627 for (String part : parts) {
628 stringBuilder.append(getCapitalizedName(part.toLowerCase()) + " ");
629 }
630 String friendlyNameWithTrailingSpace = stringBuilder.toString();
631 String friendlyName = friendlyNameWithTrailingSpace.replaceAll(" $", "");
632 facetCategory.setFriendlyName(friendlyName);
633 // logger.info("adding <<<" + staticSearchField + ":" + friendlyName + ">>>");
634 staticSolrFieldFriendlyNamesBySolrField.put(staticSearchField, friendlyName);
635 // stop examining the declared/static fields in the SearchFields object. we found a match
636 break;
637 }
638 }
639
640 facetCategory.setFacetLabel(facetLabelList);
641 if (!facetLabelList.isEmpty()) {
642 if (facetCategory.getName().equals(SearchFields.TYPE)) {
643 // the "type" facet is special, these are not
644 typeFacetCategories.add(facetCategory);
645 } else if (facetCategory.getName().equals(SearchFields.PUBLICATION_STATUS)) {
646 if (unpublishedAvailable || draftsAvailable) {
647 hidePublicationStatusFacet = false;
648 }
649 if (!hidePublicationStatusFacet) {
650 facetCategoryList.add(facetCategory);
651 }
652 } else {
653 facetCategoryList.add(facetCategory);
654 }
655 }
656 }
657
658 // for now the only range facet is citation year
659 for (RangeFacet rangeFacet : queryResponse.getFacetRanges()) {
660 FacetCategory facetCategory = new FacetCategory();
661 List<FacetLabel> facetLabelList = new ArrayList<>();
662 for (Object rfObj : rangeFacet.getCounts()) {
663 RangeFacet.Count rangeFacetCount = (RangeFacet.Count) rfObj;
664 String valueString = rangeFacetCount.getValue();
665 Integer start = Integer.parseInt(valueString);
666 Integer end = start + Integer.parseInt(rangeFacet.getGap().toString());
667 // to avoid overlapping dates
668 end = end - 1;
669 if (rangeFacetCount.getCount() > 0) {
670 FacetLabel facetLabel = new FacetLabel(start + "-" + end, new Long(rangeFacetCount.getCount()));
671 // special [12 TO 34] syntax for range facets
672 facetLabel.setFilterQuery(rangeFacet.getName() + ":" + "[" + start + " TO " + end + "]");
673 facetLabelList.add(facetLabel);
674 }
675 }
676 facetCategory.setName(rangeFacet.getName());
677 facetCategory.setFacetLabel(facetLabelList);
678 // reverse to show the newest citation year range at the top
679 List<FacetLabel> facetLabelListReversed = new ArrayList<>();
680 ListIterator li = facetLabelList.listIterator(facetLabelList.size());
681 while (li.hasPrevious()) {
682 facetLabelListReversed.add((FacetLabel) li.previous());
683 }
684 facetCategory.setFacetLabel(facetLabelListReversed);
685 if (!facetLabelList.isEmpty()) {
686 facetCategoryList.add(facetCategory);
687 }
688 }
689
690 SolrQueryResponse solrQueryResponse = new SolrQueryResponse();
691 solrQueryResponse.setSolrSearchResults(solrSearchResults);
692 solrQueryResponse.setSpellingSuggestionsByToken(spellingSuggestionsByToken);
693 solrQueryResponse.setFacetCategoryList(facetCategoryList);
694 solrQueryResponse.setTypeFacetCategories(typeFacetCategories);
695 solrQueryResponse.setNumResultsFound(queryResponse.getResults().getNumFound());
696 solrQueryResponse.setResultsStart(queryResponse.getResults().getStart());
697 solrQueryResponse.setDatasetfieldFriendlyNamesBySolrField(datasetfieldFriendlyNamesBySolrField);
698 solrQueryResponse.setStaticSolrFieldFriendlyNamesBySolrField(staticSolrFieldFriendlyNamesBySolrField);
699 solrQueryResponse.setFilterQueriesActual(Arrays.asList(solrQuery.getFilterQueries()));
700 return solrQueryResponse;
701 }
702
703 private static String localize(String bundleKey) {
704 try {
705 String value = JH.localize(bundleKey);
706 return value;
707 } catch (Exception e) {
708 // can throw MissingResourceException
709 return "Match";
710 }
711 }
712
713 public String getCapitalizedName(String name) {
714 return Character.toUpperCase(name.charAt(0)) + name.substring(1);
715 }
716 }