Mercurial > hg > LGDataverses
comparison src/main/java/edu/harvard/iq/dataverse/SearchServiceBean.java @ 10:a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
| author | Zoe Hong <zhong@mpiwg-berlin.mpg.de> |
|---|---|
| date | Tue, 08 Sep 2015 17:00:21 +0200 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| 9:5926d6419569 | 10:a50cf11e5178 |
|---|---|
| 1 package edu.harvard.iq.dataverse; | |
| 2 | |
| 3 import edu.harvard.iq.dataverse.authorization.groups.Group; | |
| 4 import edu.harvard.iq.dataverse.authorization.groups.GroupServiceBean; | |
| 5 import edu.harvard.iq.dataverse.search.SearchFields; | |
| 6 import edu.harvard.iq.dataverse.authorization.users.AuthenticatedUser; | |
| 7 import edu.harvard.iq.dataverse.authorization.users.GuestUser; | |
| 8 import edu.harvard.iq.dataverse.authorization.users.User; | |
| 9 import edu.harvard.iq.dataverse.search.Highlight; | |
| 10 import edu.harvard.iq.dataverse.search.SearchException; | |
| 11 import edu.harvard.iq.dataverse.search.SearchUtil; | |
| 12 import edu.harvard.iq.dataverse.util.JsfHelper; | |
| 13 import edu.harvard.iq.dataverse.util.SystemConfig; | |
| 14 import java.lang.reflect.Field; | |
| 15 import java.util.ArrayList; | |
| 16 import java.util.Arrays; | |
| 17 import java.util.Calendar; | |
| 18 import java.util.Date; | |
| 19 import java.util.HashMap; | |
| 20 import java.util.Iterator; | |
| 21 import java.util.List; | |
| 22 import java.util.ListIterator; | |
| 23 import java.util.Map; | |
| 24 import java.util.Set; | |
| 25 import java.util.logging.Level; | |
| 26 import java.util.logging.Logger; | |
| 27 import javax.ejb.EJB; | |
| 28 import javax.ejb.EJBTransactionRolledbackException; | |
| 29 import javax.ejb.Stateless; | |
| 30 import javax.ejb.TransactionRolledbackLocalException; | |
| 31 import javax.inject.Named; | |
| 32 import javax.persistence.NoResultException; | |
| 33 import org.apache.solr.client.solrj.SolrQuery; | |
| 34 import org.apache.solr.client.solrj.SolrQuery.SortClause; | |
| 35 import org.apache.solr.client.solrj.SolrServer; | |
| 36 import org.apache.solr.client.solrj.SolrServerException; | |
| 37 import org.apache.solr.client.solrj.impl.HttpSolrServer; | |
| 38 import org.apache.solr.client.solrj.impl.HttpSolrServer.RemoteSolrException; | |
| 39 import org.apache.solr.client.solrj.response.FacetField; | |
| 40 import org.apache.solr.client.solrj.response.QueryResponse; | |
| 41 import org.apache.solr.client.solrj.response.RangeFacet; | |
| 42 import org.apache.solr.client.solrj.response.SpellCheckResponse; | |
| 43 import org.apache.solr.common.SolrDocument; | |
| 44 import org.apache.solr.common.SolrDocumentList; | |
| 45 | |
| 46 @Stateless | |
| 47 @Named | |
| 48 public class SearchServiceBean { | |
| 49 | |
| 50 private static final Logger logger = Logger.getLogger(SearchServiceBean.class.getCanonicalName()); | |
| 51 | |
| 52 /** | |
| 53 * We're trying to make the SearchServiceBean lean, mean, and fast, with as | |
| 54 * few injections of EJBs as possible. | |
| 55 */ | |
| 56 /** | |
| 57 * @todo Can we do without the DatasetFieldServiceBean? | |
| 58 */ | |
| 59 @EJB | |
| 60 DvObjectServiceBean dvObjectService; | |
| 61 @EJB | |
| 62 DataverseServiceBean dataverseService; | |
| 63 @EJB | |
| 64 DatasetServiceBean datasetService; | |
| 65 @EJB | |
| 66 DatasetVersionServiceBean datasetVersionService; | |
| 67 @EJB | |
| 68 DataFileServiceBean dataFileService; | |
| 69 @EJB | |
| 70 DatasetFieldServiceBean datasetFieldService; | |
| 71 @EJB | |
| 72 GroupServiceBean groupService; | |
| 73 @EJB | |
| 74 SystemConfig systemConfig; | |
| 75 | |
| 76 public static final JsfHelper JH = new JsfHelper(); | |
| 77 | |
| 78 public SolrQueryResponse search(User user, Dataverse dataverse, String query, List<String> filterQueries, String sortField, String sortOrder, int paginationStart, boolean onlyDatatRelatedToMe, int numResultsPerPage) throws SearchException { | |
| 79 SolrServer solrServer = new HttpSolrServer("http://" + systemConfig.getSolrHostColonPort() + "/solr"); | |
| 80 SolrQuery solrQuery = new SolrQuery(); | |
| 81 query = SearchUtil.sanitizeQuery(query); | |
| 82 solrQuery.setQuery(query); | |
| 83 // SortClause foo = new SortClause("name", SolrQuery.ORDER.desc); | |
| 84 // if (query.equals("*") || query.equals("*:*")) { | |
| 85 // solrQuery.setSort(new SortClause(SearchFields.NAME_SORT, SolrQuery.ORDER.asc)); | |
| 86 solrQuery.setSort(new SortClause(sortField, sortOrder)); | |
| 87 // } else { | |
| 88 // solrQuery.setSort(sortClause); | |
| 89 // } | |
| 90 // solrQuery.setSort(sortClause); | |
| 91 solrQuery.setHighlight(true).setHighlightSnippets(1); | |
| 92 solrQuery.setHighlightSimplePre("<span class=\"search-term-match\">"); | |
| 93 solrQuery.setHighlightSimplePost("</span>"); | |
| 94 Map<String, String> solrFieldsToHightlightOnMap = new HashMap<>(); | |
| 95 solrFieldsToHightlightOnMap.put(SearchFields.NAME, "Name"); | |
| 96 solrFieldsToHightlightOnMap.put(SearchFields.AFFILIATION, "Affiliation"); | |
| 97 solrFieldsToHightlightOnMap.put(SearchFields.FILE_TYPE_FRIENDLY, "File Type"); | |
| 98 solrFieldsToHightlightOnMap.put(SearchFields.DESCRIPTION, "Description"); | |
| 99 solrFieldsToHightlightOnMap.put(SearchFields.VARIABLE_NAME, "Variable Name"); | |
| 100 solrFieldsToHightlightOnMap.put(SearchFields.VARIABLE_LABEL, "Variable Label"); | |
| 101 solrFieldsToHightlightOnMap.put(SearchFields.FILE_TYPE_SEARCHABLE, "File Type"); | |
| 102 solrFieldsToHightlightOnMap.put(SearchFields.DATASET_PUBLICATION_DATE, "Publication Date"); | |
| 103 solrFieldsToHightlightOnMap.put(SearchFields.DATASET_PERSISTENT_ID, localize("advanced.search.datasets.persistentId")); | |
| 104 /** | |
| 105 * @todo Dataverse subject and affiliation should be highlighted but | |
| 106 * this is commented out right now because the "friendly" names are not | |
| 107 * being shown on the dataverse cards. See also | |
| 108 * https://github.com/IQSS/dataverse/issues/1431 | |
| 109 */ | |
| 110 // solrFieldsToHightlightOnMap.put(SearchFields.DATAVERSE_SUBJECT, "Subject"); | |
| 111 // solrFieldsToHightlightOnMap.put(SearchFields.DATAVERSE_AFFILIATION, "Affiliation"); | |
| 112 /** | |
| 113 * @todo: show highlight on file card? | |
| 114 * https://redmine.hmdc.harvard.edu/issues/3848 | |
| 115 */ | |
| 116 solrFieldsToHightlightOnMap.put(SearchFields.FILENAME_WITHOUT_EXTENSION, "Filename Without Extension"); | |
| 117 List<DatasetFieldType> datasetFields = datasetFieldService.findAllOrderedById(); | |
| 118 for (DatasetFieldType datasetFieldType : datasetFields) { | |
| 119 String solrField = datasetFieldType.getSolrField().getNameSearchable(); | |
| 120 String displayName = datasetFieldType.getDisplayName(); | |
| 121 solrFieldsToHightlightOnMap.put(solrField, displayName); | |
| 122 } | |
| 123 for (Map.Entry<String, String> entry : solrFieldsToHightlightOnMap.entrySet()) { | |
| 124 String solrField = entry.getKey(); | |
| 125 // String displayName = entry.getValue(); | |
| 126 solrQuery.addHighlightField(solrField); | |
| 127 } | |
| 128 solrQuery.setParam("fl", "*,score"); | |
| 129 solrQuery.setParam("qt", "/spell"); | |
| 130 solrQuery.setParam("facet", "true"); | |
| 131 /** | |
| 132 * @todo: do we need facet.query? | |
| 133 */ | |
| 134 solrQuery.setParam("facet.query", "*"); | |
| 135 for (String filterQuery : filterQueries) { | |
| 136 solrQuery.addFilterQuery(filterQuery); | |
| 137 } | |
| 138 | |
| 139 /** | |
| 140 * @todo For people who are not logged in, should we show stuff indexed | |
| 141 * with "AllUsers" group or not? If so, uncomment the allUsersString | |
| 142 * stuff below. | |
| 143 */ | |
| 144 // String allUsersString = IndexServiceBean.getGroupPrefix() + AllUsers.get().getAlias(); | |
| 145 // String publicOnly = "{!join from=" + SearchFields.DEFINITION_POINT + " to=id}" + SearchFields.DISCOVERABLE_BY + ":(" + IndexServiceBean.getPublicGroupString() + " OR " + allUsersString + ")"; | |
| 146 String publicOnly = "{!join from=" + SearchFields.DEFINITION_POINT + " to=id}" + SearchFields.DISCOVERABLE_BY + ":(" + IndexServiceBean.getPublicGroupString() + ")"; | |
| 147 // String publicOnly = "{!join from=" + SearchFields.GROUPS + " to=" + SearchFields.PERMS + "}id:" + IndexServiceBean.getPublicGroupString(); | |
| 148 // initialize to public only to be safe | |
| 149 String permissionFilterQuery = publicOnly; | |
| 150 if (user instanceof GuestUser) { | |
| 151 permissionFilterQuery = publicOnly; | |
| 152 } else if (user instanceof AuthenticatedUser) { | |
| 153 // Non-guests might get more than public stuff with an OR or two | |
| 154 AuthenticatedUser au = (AuthenticatedUser) user; | |
| 155 solrQuery.addFacetField(SearchFields.PUBLICATION_STATUS); | |
| 156 | |
| 157 /** | |
| 158 * @todo all this code needs cleanup and clarification. | |
| 159 */ | |
| 160 /** | |
| 161 * Every AuthenticatedUser is part of a "User Private Group" (UGP), | |
| 162 * a concept we borrow from RHEL: | |
| 163 * https://access.redhat.com/site/documentation/en-US/Red_Hat_Enterprise_Linux/6/html/Deployment_Guide/ch-Managing_Users_and_Groups.html#s2-users-groups-private-groups | |
| 164 */ | |
| 165 /** | |
| 166 * @todo rename this from publicPlusUserPrivateGroup. Confusing | |
| 167 */ | |
| 168 // safe default: public only | |
| 169 String publicPlusUserPrivateGroup = publicOnly; | |
| 170 // + (onlyDatatRelatedToMe ? "" : (publicOnly + " OR ")) | |
| 171 // + "{!join from=" + SearchFields.GROUPS + " to=" + SearchFields.PERMS + "}id:" + IndexServiceBean.getGroupPerUserPrefix() + au.getId() + ")"; | |
| 172 | |
| 173 // /** | |
| 174 // * @todo add onlyDatatRelatedToMe option into the experimental JOIN | |
| 175 // * before enabling it. | |
| 176 // */ | |
| 177 String groupsFromProviders = ""; | |
| 178 /** | |
| 179 * @todo What should the value be? Is null ok? From a search | |
| 180 * perspective, we don't care about if the group was created within | |
| 181 * one dataverse or another. We just want a list of all the groups | |
| 182 * the user is part of. A JOIN on "permission documents" will | |
| 183 * determine if the user can find a given "content document" | |
| 184 * (dataset version, etc) in Solr. | |
| 185 */ | |
| 186 // DvObject groupsForDvObjectParamNull = null; | |
| 187 // Set<Group> groups = groupService.groupsFor(au, groupsForDvObjectParamNull); | |
| 188 /** | |
| 189 * @todo What is the expected behavior when you pass in a dataverse? | |
| 190 * It seems like no matter what you pass in you always get the | |
| 191 * following types of groups: | |
| 192 * | |
| 193 * - BuiltIn Groups | |
| 194 * | |
| 195 * - IP Groups | |
| 196 * | |
| 197 * - Shibboleth Groups | |
| 198 * | |
| 199 * If you pass in the root dataverse it seems like you get all | |
| 200 * groups that you're part of. | |
| 201 * | |
| 202 * If you pass in a non-root dataverse, it seems like you get groups | |
| 203 * that you're part of for that dataverse. It's unclear if there is | |
| 204 * any inheritance of groups. | |
| 205 */ | |
| 206 DvObject groupsForDvObjectParamCurrentDataverse = dataverse; | |
| 207 Set<Group> groups = groupService.groupsFor(au, groupsForDvObjectParamCurrentDataverse); | |
| 208 StringBuilder sb = new StringBuilder(); | |
| 209 for (Group group : groups) { | |
| 210 logger.fine("found group " + group.getIdentifier() + " with alias " + group.getAlias()); | |
| 211 String groupAlias = group.getAlias(); | |
| 212 if (groupAlias != null && !groupAlias.isEmpty()) { | |
| 213 sb.append(" OR "); | |
| 214 // i.e. group_shib/2 | |
| 215 sb.append(IndexServiceBean.getGroupPrefix() + groupAlias); | |
| 216 } | |
| 217 groupsFromProviders = sb.toString(); | |
| 218 } | |
| 219 | |
| 220 logger.fine(groupsFromProviders); | |
| 221 if (true) { | |
| 222 /** | |
| 223 * @todo get rid of "experimental" in name | |
| 224 */ | |
| 225 String experimentalJoin = "{!join from=" + SearchFields.DEFINITION_POINT + " to=id}" + SearchFields.DISCOVERABLE_BY + ":(" + IndexServiceBean.getPublicGroupString() + " OR " + IndexServiceBean.getGroupPerUserPrefix() + au.getId() + groupsFromProviders + ")"; | |
| 226 if (onlyDatatRelatedToMe) { | |
| 227 /** | |
| 228 * @todo make this a variable called "String | |
| 229 * dataRelatedToMeFilterQuery" or something | |
| 230 */ | |
| 231 experimentalJoin = "{!join from=" + SearchFields.DEFINITION_POINT + " to=id}" + SearchFields.DISCOVERABLE_BY + ":(" + IndexServiceBean.getGroupPerUserPrefix() + au.getId() + groupsFromProviders + ")"; | |
| 232 } | |
| 233 publicPlusUserPrivateGroup = experimentalJoin; | |
| 234 } | |
| 235 | |
| 236 permissionFilterQuery = publicPlusUserPrivateGroup; | |
| 237 logger.fine(permissionFilterQuery); | |
| 238 | |
| 239 if (au.isSuperuser()) { | |
| 240 // dangerous because this user will be able to see | |
| 241 // EVERYTHING in Solr with no regard to permissions! | |
| 242 String dangerZoneNoSolrJoin = null; | |
| 243 permissionFilterQuery = dangerZoneNoSolrJoin; | |
| 244 } | |
| 245 | |
| 246 } else { | |
| 247 logger.info("Should never reach here. A User must be an AuthenticatedUser or a Guest"); | |
| 248 } | |
| 249 | |
| 250 solrQuery.addFilterQuery(permissionFilterQuery); | |
| 251 | |
| 252 // solrQuery.addFacetField(SearchFields.HOST_DATAVERSE); | |
| 253 // solrQuery.addFacetField(SearchFields.AUTHOR_STRING); | |
| 254 solrQuery.addFacetField(SearchFields.DATAVERSE_CATEGORY); | |
| 255 solrQuery.addFacetField(SearchFields.AFFILIATION); | |
| 256 solrQuery.addFacetField(SearchFields.PUBLICATION_DATE); | |
| 257 // solrQuery.addFacetField(SearchFields.CATEGORY); | |
| 258 // solrQuery.addFacetField(SearchFields.FILE_TYPE_MIME); | |
| 259 // solrQuery.addFacetField(SearchFields.DISTRIBUTOR); | |
| 260 // solrQuery.addFacetField(SearchFields.KEYWORD); | |
| 261 /** | |
| 262 * @todo when a new method on datasetFieldService is available | |
| 263 * (retrieveFacetsByDataverse?) only show the facets that the dataverse | |
| 264 * in question wants to show (and in the right order): | |
| 265 * https://redmine.hmdc.harvard.edu/issues/3490 | |
| 266 * | |
| 267 * also, findAll only returns advancedSearchField = true... we should | |
| 268 * probably introduce the "isFacetable" boolean rather than caring about | |
| 269 * if advancedSearchField is true or false | |
| 270 * | |
| 271 */ | |
| 272 for (DataverseFacet dataverseFacet : dataverse.getDataverseFacets()) { | |
| 273 DatasetFieldType datasetField = dataverseFacet.getDatasetFieldType(); | |
| 274 solrQuery.addFacetField(datasetField.getSolrField().getNameFacetable()); | |
| 275 } | |
| 276 solrQuery.addFacetField(SearchFields.FILE_TYPE); | |
| 277 /** | |
| 278 * @todo: hide the extra line this shows in the GUI... at least it's | |
| 279 * last... | |
| 280 */ | |
| 281 solrQuery.addFacetField(SearchFields.TYPE); | |
| 282 /** | |
| 283 * @todo: do sanity checking... throw error if negative | |
| 284 */ | |
| 285 solrQuery.setStart(paginationStart); | |
| 286 /** | |
| 287 * @todo: decide if year CITATION_YEAR is good enough or if we should | |
| 288 * support CITATION_DATE | |
| 289 */ | |
| 290 // Calendar calendar = Calendar.getInstance(TimeZone.getTimeZone("UTC"), Locale.UK); | |
| 291 // calendar.set(2010, 1, 1); | |
| 292 // Date start = calendar.getTime(); | |
| 293 // calendar.set(2013, 1, 1); | |
| 294 // Date end = calendar.getTime(); | |
| 295 // solrQuery.addDateRangeFacet(SearchFields.CITATION_DATE, start, end, "+1MONTH"); | |
| 296 /** | |
| 297 * @todo make this configurable | |
| 298 */ | |
| 299 int thisYear = Calendar.getInstance().get(Calendar.YEAR); | |
| 300 /** | |
| 301 * @todo: odd or even makes a difference. Couldn't find value of 2014 | |
| 302 * when this was set to 2000 | |
| 303 */ | |
| 304 final int citationYearRangeStart = 1901; | |
| 305 final int citationYearRangeEnd = thisYear; | |
| 306 final int citationYearRangeSpan = 2; | |
| 307 /** | |
| 308 * @todo: these are dates and should be "range facets" not "field | |
| 309 * facets" | |
| 310 * | |
| 311 * right now they are lumped in with the datasetFieldService.findAll() | |
| 312 * above | |
| 313 */ | |
| 314 // solrQuery.addNumericRangeFacet(SearchFields.PRODUCTION_DATE_YEAR_ONLY, citationYearRangeStart, citationYearRangeEnd, citationYearRangeSpan); | |
| 315 // solrQuery.addNumericRangeFacet(SearchFields.DISTRIBUTION_DATE_YEAR_ONLY, citationYearRangeStart, citationYearRangeEnd, citationYearRangeSpan); | |
| 316 solrQuery.setRows(numResultsPerPage); | |
| 317 logger.fine("Solr query:" + solrQuery); | |
| 318 | |
| 319 QueryResponse queryResponse; | |
| 320 try { | |
| 321 queryResponse = solrServer.query(solrQuery); | |
| 322 } catch (RemoteSolrException ex) { | |
| 323 String messageFromSolr = ex.getLocalizedMessage(); | |
| 324 String error = "Search Syntax Error: "; | |
| 325 String stringToHide = "org.apache.solr.search.SyntaxError: "; | |
| 326 if (messageFromSolr.startsWith(stringToHide)) { | |
| 327 // hide "org.apache.solr..." | |
| 328 error += messageFromSolr.substring(stringToHide.length()); | |
| 329 } else { | |
| 330 error += messageFromSolr; | |
| 331 } | |
| 332 logger.fine(error); | |
| 333 SolrQueryResponse exceptionSolrQueryResponse = new SolrQueryResponse(); | |
| 334 exceptionSolrQueryResponse.setError(error); | |
| 335 | |
| 336 // we can't show anything because of the search syntax error | |
| 337 long zeroNumResultsFound = 0; | |
| 338 long zeroGetResultsStart = 0; | |
| 339 List<SolrSearchResult> emptySolrSearchResults = new ArrayList<>(); | |
| 340 List<FacetCategory> exceptionFacetCategoryList = new ArrayList<>(); | |
| 341 Map<String, List<String>> emptySpellingSuggestion = new HashMap<>(); | |
| 342 exceptionSolrQueryResponse.setNumResultsFound(zeroNumResultsFound); | |
| 343 exceptionSolrQueryResponse.setResultsStart(zeroGetResultsStart); | |
| 344 exceptionSolrQueryResponse.setSolrSearchResults(emptySolrSearchResults); | |
| 345 exceptionSolrQueryResponse.setFacetCategoryList(exceptionFacetCategoryList); | |
| 346 exceptionSolrQueryResponse.setTypeFacetCategories(exceptionFacetCategoryList); | |
| 347 exceptionSolrQueryResponse.setSpellingSuggestionsByToken(emptySpellingSuggestion); | |
| 348 return exceptionSolrQueryResponse; | |
| 349 } catch (SolrServerException ex) { | |
| 350 throw new SearchException("Internal Dataverse Search Engine Error", ex); | |
| 351 } | |
| 352 SolrDocumentList docs = queryResponse.getResults(); | |
| 353 Iterator<SolrDocument> iter = docs.iterator(); | |
| 354 List<SolrSearchResult> solrSearchResults = new ArrayList<>(); | |
| 355 | |
| 356 /** | |
| 357 * @todo refactor SearchFields to a hashmap (or something? put in | |
| 358 * database? internationalize?) to avoid the crazy reflection and string | |
| 359 * manipulation below | |
| 360 */ | |
| 361 Object searchFieldsObject = new SearchFields(); | |
| 362 Field[] staticSearchFields = searchFieldsObject.getClass().getDeclaredFields(); | |
| 363 String titleSolrField = null; | |
| 364 try { | |
| 365 DatasetFieldType titleDatasetField = datasetFieldService.findByName(DatasetFieldConstant.title); | |
| 366 titleSolrField = titleDatasetField.getSolrField().getNameSearchable(); | |
| 367 } catch (EJBTransactionRolledbackException ex) { | |
| 368 logger.info("Couldn't find " + DatasetFieldConstant.title); | |
| 369 if (ex.getCause() instanceof TransactionRolledbackLocalException) { | |
| 370 if (ex.getCause().getCause() instanceof NoResultException) { | |
| 371 logger.info("Caught NoResultException"); | |
| 372 } | |
| 373 } | |
| 374 } | |
| 375 Map<String, String> datasetfieldFriendlyNamesBySolrField = new HashMap<>(); | |
| 376 Map<String, String> staticSolrFieldFriendlyNamesBySolrField = new HashMap<>(); | |
| 377 String baseUrl = systemConfig.getDataverseSiteUrl(); | |
| 378 while (iter.hasNext()) { | |
| 379 SolrDocument solrDocument = iter.next(); | |
| 380 String id = (String) solrDocument.getFieldValue(SearchFields.ID); | |
| 381 Long entityid = (Long) solrDocument.getFieldValue(SearchFields.ENTITY_ID); | |
| 382 String type = (String) solrDocument.getFieldValue(SearchFields.TYPE); | |
| 383 float score = (Float) solrDocument.getFieldValue(SearchFields.RELEVANCE); | |
| 384 logger.fine("score for " + id + ": " + score); | |
| 385 String identifier = (String) solrDocument.getFieldValue(SearchFields.IDENTIFIER); | |
| 386 String citation = (String) solrDocument.getFieldValue(SearchFields.DATASET_CITATION); | |
| 387 String persistentUrl = (String) solrDocument.getFieldValue(SearchFields.PERSISTENT_URL); | |
| 388 String name = (String) solrDocument.getFieldValue(SearchFields.NAME); | |
| 389 String nameSort = (String) solrDocument.getFieldValue(SearchFields.NAME_SORT); | |
| 390 // ArrayList titles = (ArrayList) solrDocument.getFieldValues(SearchFields.TITLE); | |
| 391 String title = (String) solrDocument.getFieldValue(titleSolrField); | |
| 392 Long datasetVersionId = (Long) solrDocument.getFieldValue(SearchFields.DATASET_VERSION_ID); | |
| 393 String deaccessionReason = (String) solrDocument.getFieldValue(SearchFields.DATASET_DEACCESSION_REASON); | |
| 394 // logger.info("titleSolrField: " + titleSolrField); | |
| 395 // logger.info("title: " + title); | |
| 396 String filetype = (String) solrDocument.getFieldValue(SearchFields.FILE_TYPE_FRIENDLY); | |
| 397 String fileContentType = (String) solrDocument.getFieldValue(SearchFields.FILE_CONTENT_TYPE); | |
| 398 Date release_or_create_date = (Date) solrDocument.getFieldValue(SearchFields.RELEASE_OR_CREATE_DATE); | |
| 399 String dateToDisplayOnCard = (String) solrDocument.getFirstValue(SearchFields.RELEASE_OR_CREATE_DATE_SEARCHABLE_TEXT); | |
| 400 List<String> matchedFields = new ArrayList<>(); | |
| 401 List<Highlight> highlights = new ArrayList<>(); | |
| 402 Map<SolrField, Highlight> highlightsMap = new HashMap<>(); | |
| 403 Map<SolrField, List<String>> highlightsMap2 = new HashMap<>(); | |
| 404 Map<String, Highlight> highlightsMap3 = new HashMap<>(); | |
| 405 if (queryResponse.getHighlighting().get(id) != null) { | |
| 406 for (Map.Entry<String, String> entry : solrFieldsToHightlightOnMap.entrySet()) { | |
| 407 String field = entry.getKey(); | |
| 408 String displayName = entry.getValue(); | |
| 409 | |
| 410 List<String> highlightSnippets = queryResponse.getHighlighting().get(id).get(field); | |
| 411 if (highlightSnippets != null) { | |
| 412 matchedFields.add(field); | |
| 413 /** | |
| 414 * @todo only SolrField.SolrType.STRING? that's not | |
| 415 * right... knit the SolrField object more into the | |
| 416 * highlighting stuff | |
| 417 */ | |
| 418 SolrField solrField = new SolrField(field, SolrField.SolrType.STRING, true, true); | |
| 419 Highlight highlight = new Highlight(solrField, highlightSnippets, displayName); | |
| 420 highlights.add(highlight); | |
| 421 highlightsMap.put(solrField, highlight); | |
| 422 highlightsMap2.put(solrField, highlightSnippets); | |
| 423 highlightsMap3.put(field, highlight); | |
| 424 } | |
| 425 } | |
| 426 | |
| 427 } | |
| 428 SolrSearchResult solrSearchResult = new SolrSearchResult(query, name); | |
| 429 /** | |
| 430 * @todo put all this in the constructor? | |
| 431 */ | |
| 432 List<String> states = (ArrayList<String>) solrDocument.getFieldValue(SearchFields.PUBLICATION_STATUS); | |
| 433 if (states != null) { | |
| 434 for (String state : states) { | |
| 435 if (state.equals(IndexServiceBean.getUNPUBLISHED_STRING())) { | |
| 436 solrSearchResult.setUnpublishedState(true); | |
| 437 } else if (state.equals(IndexServiceBean.getDRAFT_STRING())) { | |
| 438 solrSearchResult.setDraftState(true); | |
| 439 // } else if (state.equals(IndexServiceBean.getDEACCESSIONED_STRING())) { | |
| 440 // solrSearchResult.setDeaccessionedState(true); | |
| 441 } | |
| 442 } | |
| 443 } | |
| 444 // logger.info(id + ": " + description); | |
| 445 solrSearchResult.setId(id); | |
| 446 solrSearchResult.setEntityId(entityid); | |
| 447 solrSearchResult.setEntity(dvObjectService.findDvObject(entityid)); | |
| 448 solrSearchResult.setIdentifier(identifier); | |
| 449 solrSearchResult.setPersistentUrl(persistentUrl); | |
| 450 solrSearchResult.setType(type); | |
| 451 solrSearchResult.setScore(score); | |
| 452 solrSearchResult.setNameSort(nameSort); | |
| 453 solrSearchResult.setReleaseOrCreateDate(release_or_create_date); | |
| 454 solrSearchResult.setDateToDisplayOnCard(dateToDisplayOnCard); | |
| 455 solrSearchResult.setMatchedFields(matchedFields); | |
| 456 solrSearchResult.setHighlightsAsList(highlights); | |
| 457 solrSearchResult.setHighlightsMap(highlightsMap); | |
| 458 solrSearchResult.setHighlightsAsMap(highlightsMap3); | |
| 459 Map<String, String> parent = new HashMap<>(); | |
| 460 String description = (String) solrDocument.getFieldValue(SearchFields.DESCRIPTION); | |
| 461 solrSearchResult.setDescriptionNoSnippet(description); | |
| 462 solrSearchResult.setDeaccessionReason(deaccessionReason); | |
| 463 /** | |
| 464 * @todo start using SearchConstants class here | |
| 465 */ | |
| 466 if (type.equals("dataverses")) { | |
| 467 solrSearchResult.setName(name); | |
| 468 solrSearchResult.setHtmlUrl(baseUrl + "/dataverse/" + identifier); | |
| 469 solrSearchResult.setImageUrl(baseUrl + "/api/access/dvCardImage/" + entityid); | |
| 470 /** | |
| 471 * @todo Expose this API URL after "dvs" is changed to | |
| 472 * "dataverses". Also, is an API token required for published | |
| 473 * dataverses? | |
| 474 * Michael: url changed. | |
| 475 */ | |
| 476 // solrSearchResult.setApiUrl(baseUrl + "/api/dataverses/" + entityid); | |
| 477 } else if (type.equals("datasets")) { | |
| 478 solrSearchResult.setHtmlUrl(baseUrl + "/dataset.xhtml?globalId=" + identifier); | |
| 479 solrSearchResult.setApiUrl(baseUrl + "/api/datasets/" + entityid); | |
| 480 solrSearchResult.setImageUrl(baseUrl + "/api/access/dsCardImage/" + datasetVersionId); | |
| 481 /** | |
| 482 * @todo Could use getFieldValues (plural) here. | |
| 483 */ | |
| 484 ArrayList<String> datasetDescriptions = (ArrayList<String>) solrDocument.getFieldValue(SearchFields.DATASET_DESCRIPTION); | |
| 485 if (datasetDescriptions != null) { | |
| 486 String firstDatasetDescription = datasetDescriptions.get(0); | |
| 487 if (firstDatasetDescription != null) { | |
| 488 solrSearchResult.setDescriptionNoSnippet(firstDatasetDescription); | |
| 489 } | |
| 490 } | |
| 491 solrSearchResult.setDatasetVersionId(datasetVersionId); | |
| 492 | |
| 493 solrSearchResult.setCitation(citation); | |
| 494 if (title != null) { | |
| 495 // solrSearchResult.setTitle((String) titles.get(0)); | |
| 496 solrSearchResult.setTitle((String) title); | |
| 497 } else { | |
| 498 logger.info("No title indexed. Setting to empty string to prevent NPE. Dataset id " + entityid + " and version id " + datasetVersionId); | |
| 499 solrSearchResult.setTitle(""); | |
| 500 } | |
| 501 List<String> authors = (ArrayList) solrDocument.getFieldValues(DatasetFieldConstant.authorName); | |
| 502 if (authors != null) { | |
| 503 solrSearchResult.setDatasetAuthors(authors); | |
| 504 } | |
| 505 } else if (type.equals("files")) { | |
| 506 String parentGlobalId = null; | |
| 507 Object parentGlobalIdObject = solrDocument.getFieldValue(SearchFields.PARENT_IDENTIFIER); | |
| 508 if (parentGlobalIdObject != null) { | |
| 509 parentGlobalId = (String) parentGlobalIdObject; | |
| 510 parent.put(SolrSearchResult.PARENT_IDENTIFIER, parentGlobalId); | |
| 511 } | |
| 512 solrSearchResult.setHtmlUrl(baseUrl + "/dataset.xhtml?persistentId=" + parentGlobalId); | |
| 513 solrSearchResult.setDownloadUrl(baseUrl + "/api/access/datafile/" + entityid); | |
| 514 /** | |
| 515 * @todo We are not yet setting the API URL for files because | |
| 516 * not all files have metadata. Only subsettable files (those | |
| 517 * with a datatable) seem to have metadata. Furthermore, the | |
| 518 * response is in XML whereas the rest of the Search API returns | |
| 519 * JSON. | |
| 520 */ | |
| 521 // solrSearchResult.setApiUrl(baseUrl + "/api/meta/datafile/" + entityid); | |
| 522 solrSearchResult.setImageUrl(baseUrl + "/api/access/fileCardImage/" + entityid); | |
| 523 solrSearchResult.setName(name); | |
| 524 solrSearchResult.setFiletype(filetype); | |
| 525 solrSearchResult.setFileContentType(fileContentType); | |
| 526 Object fileSizeInBytesObject = solrDocument.getFieldValue(SearchFields.FILE_SIZE_IN_BYTES); | |
| 527 if (fileSizeInBytesObject != null) { | |
| 528 try { | |
| 529 long fileSizeInBytesLong = (long) fileSizeInBytesObject; | |
| 530 solrSearchResult.setFileSizeInBytes(fileSizeInBytesLong); | |
| 531 } catch (ClassCastException ex) { | |
| 532 logger.info("Could not cast file " + entityid + " to long for " + SearchFields.FILE_SIZE_IN_BYTES + ": " + ex.getLocalizedMessage()); | |
| 533 } | |
| 534 } | |
| 535 solrSearchResult.setFileMd5((String) solrDocument.getFieldValue(SearchFields.FILE_MD5)); | |
| 536 solrSearchResult.setUnf((String) solrDocument.getFieldValue(SearchFields.UNF)); | |
| 537 solrSearchResult.setDatasetVersionId(datasetVersionId); | |
| 538 } | |
| 539 /** | |
| 540 * @todo store PARENT_ID as a long instead and cast as such | |
| 541 */ | |
| 542 parent.put("id", (String) solrDocument.getFieldValue(SearchFields.PARENT_ID)); | |
| 543 parent.put("name", (String) solrDocument.getFieldValue(SearchFields.PARENT_NAME)); | |
| 544 parent.put("citation", (String) solrDocument.getFieldValue(SearchFields.PARENT_CITATION)); | |
| 545 solrSearchResult.setParent(parent); | |
| 546 solrSearchResults.add(solrSearchResult); | |
| 547 } | |
| 548 Map<String, List<String>> spellingSuggestionsByToken = new HashMap<>(); | |
| 549 SpellCheckResponse spellCheckResponse = queryResponse.getSpellCheckResponse(); | |
| 550 if (spellCheckResponse != null) { | |
| 551 List<SpellCheckResponse.Suggestion> suggestions = spellCheckResponse.getSuggestions(); | |
| 552 for (SpellCheckResponse.Suggestion suggestion : suggestions) { | |
| 553 spellingSuggestionsByToken.put(suggestion.getToken(), suggestion.getAlternatives()); | |
| 554 } | |
| 555 } | |
| 556 | |
| 557 List<FacetCategory> facetCategoryList = new ArrayList<FacetCategory>(); | |
| 558 List<FacetCategory> typeFacetCategories = new ArrayList<>(); | |
| 559 boolean hidePublicationStatusFacet = true; | |
| 560 boolean draftsAvailable = false; | |
| 561 boolean unpublishedAvailable = false; | |
| 562 for (FacetField facetField : queryResponse.getFacetFields()) { | |
| 563 FacetCategory facetCategory = new FacetCategory(); | |
| 564 List<FacetLabel> facetLabelList = new ArrayList<>(); | |
| 565 for (FacetField.Count facetFieldCount : facetField.getValues()) { | |
| 566 /** | |
| 567 * @todo we do want to show the count for each facet | |
| 568 */ | |
| 569 // logger.info("field: " + facetField.getName() + " " + facetFieldCount.getName() + " (" + facetFieldCount.getCount() + ")"); | |
| 570 if (facetFieldCount.getCount() > 0) { | |
| 571 FacetLabel facetLabel = new FacetLabel(facetFieldCount.getName(), facetFieldCount.getCount()); | |
| 572 // quote field facets | |
| 573 facetLabel.setFilterQuery(facetField.getName() + ":\"" + facetFieldCount.getName() + "\""); | |
| 574 facetLabelList.add(facetLabel); | |
| 575 if (facetField.getName().equals(SearchFields.PUBLICATION_STATUS)) { | |
| 576 if (facetLabel.getName().equals(IndexServiceBean.getUNPUBLISHED_STRING())) { | |
| 577 unpublishedAvailable = true; | |
| 578 } else if (facetLabel.getName().equals(IndexServiceBean.getDRAFT_STRING())) { | |
| 579 draftsAvailable = true; | |
| 580 } | |
| 581 } | |
| 582 } | |
| 583 } | |
| 584 facetCategory.setName(facetField.getName()); | |
| 585 // hopefully people will never see the raw facetField.getName() because it may well have an _s at the end | |
| 586 facetCategory.setFriendlyName(facetField.getName()); | |
| 587 // try to find a friendlier name to display as a facet | |
| 588 /** | |
| 589 * @todo hmm, we thought we wanted the datasetFields array to go | |
| 590 * away once we have more granularity than findAll() available per | |
| 591 * the todo above but we need a way to lookup by Solr field, so | |
| 592 * we'll build a hashmap | |
| 593 */ | |
| 594 for (DatasetFieldType datasetField : datasetFields) { | |
| 595 String solrFieldNameForDataset = datasetField.getSolrField().getNameFacetable(); | |
| 596 String friendlyName = datasetField.getDisplayName(); | |
| 597 if (solrFieldNameForDataset != null && facetField.getName().endsWith(datasetField.getTmpNullFieldTypeIdentifier())) { | |
| 598 // give it the non-friendly name so we remember to update the reference data script for datasets | |
| 599 facetCategory.setName(facetField.getName()); | |
| 600 } else if (solrFieldNameForDataset != null && facetField.getName().equals(solrFieldNameForDataset)) { | |
| 601 if (friendlyName != null && !friendlyName.isEmpty()) { | |
| 602 facetCategory.setFriendlyName(friendlyName); | |
| 603 // stop examining available dataset fields. we found a match | |
| 604 break; | |
| 605 } | |
| 606 } | |
| 607 datasetfieldFriendlyNamesBySolrField.put(datasetField.getSolrField().getNameFacetable(), friendlyName); | |
| 608 } | |
| 609 /** | |
| 610 * @todo get rid of this crazy reflection, per todo above... or | |
| 611 * should we... let's put into a hash the friendly names of facet | |
| 612 * categories, indexed by Solr field | |
| 613 */ | |
| 614 for (Field fieldObject : staticSearchFields) { | |
| 615 String name = fieldObject.getName(); | |
| 616 String staticSearchField = null; | |
| 617 try { | |
| 618 staticSearchField = (String) fieldObject.get(searchFieldsObject); | |
| 619 } catch (IllegalArgumentException ex) { | |
| 620 Logger.getLogger(SearchServiceBean.class.getName()).log(Level.SEVERE, null, ex); | |
| 621 } catch (IllegalAccessException ex) { | |
| 622 Logger.getLogger(SearchServiceBean.class.getName()).log(Level.SEVERE, null, ex); | |
| 623 } | |
| 624 if (staticSearchField != null && facetField.getName().equals(staticSearchField)) { | |
| 625 String[] parts = name.split("_"); | |
| 626 StringBuilder stringBuilder = new StringBuilder(); | |
| 627 for (String part : parts) { | |
| 628 stringBuilder.append(getCapitalizedName(part.toLowerCase()) + " "); | |
| 629 } | |
| 630 String friendlyNameWithTrailingSpace = stringBuilder.toString(); | |
| 631 String friendlyName = friendlyNameWithTrailingSpace.replaceAll(" $", ""); | |
| 632 facetCategory.setFriendlyName(friendlyName); | |
| 633 // logger.info("adding <<<" + staticSearchField + ":" + friendlyName + ">>>"); | |
| 634 staticSolrFieldFriendlyNamesBySolrField.put(staticSearchField, friendlyName); | |
| 635 // stop examining the declared/static fields in the SearchFields object. we found a match | |
| 636 break; | |
| 637 } | |
| 638 } | |
| 639 | |
| 640 facetCategory.setFacetLabel(facetLabelList); | |
| 641 if (!facetLabelList.isEmpty()) { | |
| 642 if (facetCategory.getName().equals(SearchFields.TYPE)) { | |
| 643 // the "type" facet is special, these are not | |
| 644 typeFacetCategories.add(facetCategory); | |
| 645 } else if (facetCategory.getName().equals(SearchFields.PUBLICATION_STATUS)) { | |
| 646 if (unpublishedAvailable || draftsAvailable) { | |
| 647 hidePublicationStatusFacet = false; | |
| 648 } | |
| 649 if (!hidePublicationStatusFacet) { | |
| 650 facetCategoryList.add(facetCategory); | |
| 651 } | |
| 652 } else { | |
| 653 facetCategoryList.add(facetCategory); | |
| 654 } | |
| 655 } | |
| 656 } | |
| 657 | |
| 658 // for now the only range facet is citation year | |
| 659 for (RangeFacet rangeFacet : queryResponse.getFacetRanges()) { | |
| 660 FacetCategory facetCategory = new FacetCategory(); | |
| 661 List<FacetLabel> facetLabelList = new ArrayList<>(); | |
| 662 for (Object rfObj : rangeFacet.getCounts()) { | |
| 663 RangeFacet.Count rangeFacetCount = (RangeFacet.Count) rfObj; | |
| 664 String valueString = rangeFacetCount.getValue(); | |
| 665 Integer start = Integer.parseInt(valueString); | |
| 666 Integer end = start + Integer.parseInt(rangeFacet.getGap().toString()); | |
| 667 // to avoid overlapping dates | |
| 668 end = end - 1; | |
| 669 if (rangeFacetCount.getCount() > 0) { | |
| 670 FacetLabel facetLabel = new FacetLabel(start + "-" + end, new Long(rangeFacetCount.getCount())); | |
| 671 // special [12 TO 34] syntax for range facets | |
| 672 facetLabel.setFilterQuery(rangeFacet.getName() + ":" + "[" + start + " TO " + end + "]"); | |
| 673 facetLabelList.add(facetLabel); | |
| 674 } | |
| 675 } | |
| 676 facetCategory.setName(rangeFacet.getName()); | |
| 677 facetCategory.setFacetLabel(facetLabelList); | |
| 678 // reverse to show the newest citation year range at the top | |
| 679 List<FacetLabel> facetLabelListReversed = new ArrayList<>(); | |
| 680 ListIterator li = facetLabelList.listIterator(facetLabelList.size()); | |
| 681 while (li.hasPrevious()) { | |
| 682 facetLabelListReversed.add((FacetLabel) li.previous()); | |
| 683 } | |
| 684 facetCategory.setFacetLabel(facetLabelListReversed); | |
| 685 if (!facetLabelList.isEmpty()) { | |
| 686 facetCategoryList.add(facetCategory); | |
| 687 } | |
| 688 } | |
| 689 | |
| 690 SolrQueryResponse solrQueryResponse = new SolrQueryResponse(); | |
| 691 solrQueryResponse.setSolrSearchResults(solrSearchResults); | |
| 692 solrQueryResponse.setSpellingSuggestionsByToken(spellingSuggestionsByToken); | |
| 693 solrQueryResponse.setFacetCategoryList(facetCategoryList); | |
| 694 solrQueryResponse.setTypeFacetCategories(typeFacetCategories); | |
| 695 solrQueryResponse.setNumResultsFound(queryResponse.getResults().getNumFound()); | |
| 696 solrQueryResponse.setResultsStart(queryResponse.getResults().getStart()); | |
| 697 solrQueryResponse.setDatasetfieldFriendlyNamesBySolrField(datasetfieldFriendlyNamesBySolrField); | |
| 698 solrQueryResponse.setStaticSolrFieldFriendlyNamesBySolrField(staticSolrFieldFriendlyNamesBySolrField); | |
| 699 solrQueryResponse.setFilterQueriesActual(Arrays.asList(solrQuery.getFilterQueries())); | |
| 700 return solrQueryResponse; | |
| 701 } | |
| 702 | |
| 703 private static String localize(String bundleKey) { | |
| 704 try { | |
| 705 String value = JH.localize(bundleKey); | |
| 706 return value; | |
| 707 } catch (Exception e) { | |
| 708 // can throw MissingResourceException | |
| 709 return "Match"; | |
| 710 } | |
| 711 } | |
| 712 | |
| 713 public String getCapitalizedName(String name) { | |
| 714 return Character.toUpperCase(name.charAt(0)) + name.substring(1); | |
| 715 } | |
| 716 } |
