Mercurial > hg > LGDataverses
diff src/main/java/edu/harvard/iq/dataverse/SearchServiceBean.java @ 10:a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
| author | Zoe Hong <zhong@mpiwg-berlin.mpg.de> |
|---|---|
| date | Tue, 08 Sep 2015 17:00:21 +0200 |
| parents | |
| children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/main/java/edu/harvard/iq/dataverse/SearchServiceBean.java Tue Sep 08 17:00:21 2015 +0200 @@ -0,0 +1,716 @@ +package edu.harvard.iq.dataverse; + +import edu.harvard.iq.dataverse.authorization.groups.Group; +import edu.harvard.iq.dataverse.authorization.groups.GroupServiceBean; +import edu.harvard.iq.dataverse.search.SearchFields; +import edu.harvard.iq.dataverse.authorization.users.AuthenticatedUser; +import edu.harvard.iq.dataverse.authorization.users.GuestUser; +import edu.harvard.iq.dataverse.authorization.users.User; +import edu.harvard.iq.dataverse.search.Highlight; +import edu.harvard.iq.dataverse.search.SearchException; +import edu.harvard.iq.dataverse.search.SearchUtil; +import edu.harvard.iq.dataverse.util.JsfHelper; +import edu.harvard.iq.dataverse.util.SystemConfig; +import java.lang.reflect.Field; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Calendar; +import java.util.Date; +import java.util.HashMap; +import java.util.Iterator; +import java.util.List; +import java.util.ListIterator; +import java.util.Map; +import java.util.Set; +import java.util.logging.Level; +import java.util.logging.Logger; +import javax.ejb.EJB; +import javax.ejb.EJBTransactionRolledbackException; +import javax.ejb.Stateless; +import javax.ejb.TransactionRolledbackLocalException; +import javax.inject.Named; +import javax.persistence.NoResultException; +import org.apache.solr.client.solrj.SolrQuery; +import org.apache.solr.client.solrj.SolrQuery.SortClause; +import org.apache.solr.client.solrj.SolrServer; +import org.apache.solr.client.solrj.SolrServerException; +import org.apache.solr.client.solrj.impl.HttpSolrServer; +import org.apache.solr.client.solrj.impl.HttpSolrServer.RemoteSolrException; +import org.apache.solr.client.solrj.response.FacetField; +import org.apache.solr.client.solrj.response.QueryResponse; +import org.apache.solr.client.solrj.response.RangeFacet; +import org.apache.solr.client.solrj.response.SpellCheckResponse; +import org.apache.solr.common.SolrDocument; +import org.apache.solr.common.SolrDocumentList; + +@Stateless +@Named +public class SearchServiceBean { + + private static final Logger logger = Logger.getLogger(SearchServiceBean.class.getCanonicalName()); + + /** + * We're trying to make the SearchServiceBean lean, mean, and fast, with as + * few injections of EJBs as possible. + */ + /** + * @todo Can we do without the DatasetFieldServiceBean? + */ + @EJB + DvObjectServiceBean dvObjectService; + @EJB + DataverseServiceBean dataverseService; + @EJB + DatasetServiceBean datasetService; + @EJB + DatasetVersionServiceBean datasetVersionService; + @EJB + DataFileServiceBean dataFileService; + @EJB + DatasetFieldServiceBean datasetFieldService; + @EJB + GroupServiceBean groupService; + @EJB + SystemConfig systemConfig; + + public static final JsfHelper JH = new JsfHelper(); + + public SolrQueryResponse search(User user, Dataverse dataverse, String query, List<String> filterQueries, String sortField, String sortOrder, int paginationStart, boolean onlyDatatRelatedToMe, int numResultsPerPage) throws SearchException { + SolrServer solrServer = new HttpSolrServer("http://" + systemConfig.getSolrHostColonPort() + "/solr"); + SolrQuery solrQuery = new SolrQuery(); + query = SearchUtil.sanitizeQuery(query); + solrQuery.setQuery(query); +// SortClause foo = new SortClause("name", SolrQuery.ORDER.desc); +// if (query.equals("*") || query.equals("*:*")) { +// solrQuery.setSort(new SortClause(SearchFields.NAME_SORT, SolrQuery.ORDER.asc)); + solrQuery.setSort(new SortClause(sortField, sortOrder)); +// } else { +// solrQuery.setSort(sortClause); +// } +// solrQuery.setSort(sortClause); + solrQuery.setHighlight(true).setHighlightSnippets(1); + solrQuery.setHighlightSimplePre("<span class=\"search-term-match\">"); + solrQuery.setHighlightSimplePost("</span>"); + Map<String, String> solrFieldsToHightlightOnMap = new HashMap<>(); + solrFieldsToHightlightOnMap.put(SearchFields.NAME, "Name"); + solrFieldsToHightlightOnMap.put(SearchFields.AFFILIATION, "Affiliation"); + solrFieldsToHightlightOnMap.put(SearchFields.FILE_TYPE_FRIENDLY, "File Type"); + solrFieldsToHightlightOnMap.put(SearchFields.DESCRIPTION, "Description"); + solrFieldsToHightlightOnMap.put(SearchFields.VARIABLE_NAME, "Variable Name"); + solrFieldsToHightlightOnMap.put(SearchFields.VARIABLE_LABEL, "Variable Label"); + solrFieldsToHightlightOnMap.put(SearchFields.FILE_TYPE_SEARCHABLE, "File Type"); + solrFieldsToHightlightOnMap.put(SearchFields.DATASET_PUBLICATION_DATE, "Publication Date"); + solrFieldsToHightlightOnMap.put(SearchFields.DATASET_PERSISTENT_ID, localize("advanced.search.datasets.persistentId")); + /** + * @todo Dataverse subject and affiliation should be highlighted but + * this is commented out right now because the "friendly" names are not + * being shown on the dataverse cards. See also + * https://github.com/IQSS/dataverse/issues/1431 + */ +// solrFieldsToHightlightOnMap.put(SearchFields.DATAVERSE_SUBJECT, "Subject"); +// solrFieldsToHightlightOnMap.put(SearchFields.DATAVERSE_AFFILIATION, "Affiliation"); + /** + * @todo: show highlight on file card? + * https://redmine.hmdc.harvard.edu/issues/3848 + */ + solrFieldsToHightlightOnMap.put(SearchFields.FILENAME_WITHOUT_EXTENSION, "Filename Without Extension"); + List<DatasetFieldType> datasetFields = datasetFieldService.findAllOrderedById(); + for (DatasetFieldType datasetFieldType : datasetFields) { + String solrField = datasetFieldType.getSolrField().getNameSearchable(); + String displayName = datasetFieldType.getDisplayName(); + solrFieldsToHightlightOnMap.put(solrField, displayName); + } + for (Map.Entry<String, String> entry : solrFieldsToHightlightOnMap.entrySet()) { + String solrField = entry.getKey(); + // String displayName = entry.getValue(); + solrQuery.addHighlightField(solrField); + } + solrQuery.setParam("fl", "*,score"); + solrQuery.setParam("qt", "/spell"); + solrQuery.setParam("facet", "true"); + /** + * @todo: do we need facet.query? + */ + solrQuery.setParam("facet.query", "*"); + for (String filterQuery : filterQueries) { + solrQuery.addFilterQuery(filterQuery); + } + + /** + * @todo For people who are not logged in, should we show stuff indexed + * with "AllUsers" group or not? If so, uncomment the allUsersString + * stuff below. + */ +// String allUsersString = IndexServiceBean.getGroupPrefix() + AllUsers.get().getAlias(); +// String publicOnly = "{!join from=" + SearchFields.DEFINITION_POINT + " to=id}" + SearchFields.DISCOVERABLE_BY + ":(" + IndexServiceBean.getPublicGroupString() + " OR " + allUsersString + ")"; + String publicOnly = "{!join from=" + SearchFields.DEFINITION_POINT + " to=id}" + SearchFields.DISCOVERABLE_BY + ":(" + IndexServiceBean.getPublicGroupString() + ")"; +// String publicOnly = "{!join from=" + SearchFields.GROUPS + " to=" + SearchFields.PERMS + "}id:" + IndexServiceBean.getPublicGroupString(); + // initialize to public only to be safe + String permissionFilterQuery = publicOnly; + if (user instanceof GuestUser) { + permissionFilterQuery = publicOnly; + } else if (user instanceof AuthenticatedUser) { + // Non-guests might get more than public stuff with an OR or two + AuthenticatedUser au = (AuthenticatedUser) user; + solrQuery.addFacetField(SearchFields.PUBLICATION_STATUS); + + /** + * @todo all this code needs cleanup and clarification. + */ + /** + * Every AuthenticatedUser is part of a "User Private Group" (UGP), + * a concept we borrow from RHEL: + * https://access.redhat.com/site/documentation/en-US/Red_Hat_Enterprise_Linux/6/html/Deployment_Guide/ch-Managing_Users_and_Groups.html#s2-users-groups-private-groups + */ + /** + * @todo rename this from publicPlusUserPrivateGroup. Confusing + */ + // safe default: public only + String publicPlusUserPrivateGroup = publicOnly; +// + (onlyDatatRelatedToMe ? "" : (publicOnly + " OR ")) +// + "{!join from=" + SearchFields.GROUPS + " to=" + SearchFields.PERMS + "}id:" + IndexServiceBean.getGroupPerUserPrefix() + au.getId() + ")"; + +// /** +// * @todo add onlyDatatRelatedToMe option into the experimental JOIN +// * before enabling it. +// */ + String groupsFromProviders = ""; + /** + * @todo What should the value be? Is null ok? From a search + * perspective, we don't care about if the group was created within + * one dataverse or another. We just want a list of all the groups + * the user is part of. A JOIN on "permission documents" will + * determine if the user can find a given "content document" + * (dataset version, etc) in Solr. + */ +// DvObject groupsForDvObjectParamNull = null; +// Set<Group> groups = groupService.groupsFor(au, groupsForDvObjectParamNull); + /** + * @todo What is the expected behavior when you pass in a dataverse? + * It seems like no matter what you pass in you always get the + * following types of groups: + * + * - BuiltIn Groups + * + * - IP Groups + * + * - Shibboleth Groups + * + * If you pass in the root dataverse it seems like you get all + * groups that you're part of. + * + * If you pass in a non-root dataverse, it seems like you get groups + * that you're part of for that dataverse. It's unclear if there is + * any inheritance of groups. + */ + DvObject groupsForDvObjectParamCurrentDataverse = dataverse; + Set<Group> groups = groupService.groupsFor(au, groupsForDvObjectParamCurrentDataverse); + StringBuilder sb = new StringBuilder(); + for (Group group : groups) { + logger.fine("found group " + group.getIdentifier() + " with alias " + group.getAlias()); + String groupAlias = group.getAlias(); + if (groupAlias != null && !groupAlias.isEmpty()) { + sb.append(" OR "); + // i.e. group_shib/2 + sb.append(IndexServiceBean.getGroupPrefix() + groupAlias); + } + groupsFromProviders = sb.toString(); + } + + logger.fine(groupsFromProviders); + if (true) { + /** + * @todo get rid of "experimental" in name + */ + String experimentalJoin = "{!join from=" + SearchFields.DEFINITION_POINT + " to=id}" + SearchFields.DISCOVERABLE_BY + ":(" + IndexServiceBean.getPublicGroupString() + " OR " + IndexServiceBean.getGroupPerUserPrefix() + au.getId() + groupsFromProviders + ")"; + if (onlyDatatRelatedToMe) { + /** + * @todo make this a variable called "String + * dataRelatedToMeFilterQuery" or something + */ + experimentalJoin = "{!join from=" + SearchFields.DEFINITION_POINT + " to=id}" + SearchFields.DISCOVERABLE_BY + ":(" + IndexServiceBean.getGroupPerUserPrefix() + au.getId() + groupsFromProviders + ")"; + } + publicPlusUserPrivateGroup = experimentalJoin; + } + + permissionFilterQuery = publicPlusUserPrivateGroup; + logger.fine(permissionFilterQuery); + + if (au.isSuperuser()) { + // dangerous because this user will be able to see + // EVERYTHING in Solr with no regard to permissions! + String dangerZoneNoSolrJoin = null; + permissionFilterQuery = dangerZoneNoSolrJoin; + } + + } else { + logger.info("Should never reach here. A User must be an AuthenticatedUser or a Guest"); + } + + solrQuery.addFilterQuery(permissionFilterQuery); + +// solrQuery.addFacetField(SearchFields.HOST_DATAVERSE); +// solrQuery.addFacetField(SearchFields.AUTHOR_STRING); + solrQuery.addFacetField(SearchFields.DATAVERSE_CATEGORY); + solrQuery.addFacetField(SearchFields.AFFILIATION); + solrQuery.addFacetField(SearchFields.PUBLICATION_DATE); +// solrQuery.addFacetField(SearchFields.CATEGORY); +// solrQuery.addFacetField(SearchFields.FILE_TYPE_MIME); +// solrQuery.addFacetField(SearchFields.DISTRIBUTOR); +// solrQuery.addFacetField(SearchFields.KEYWORD); + /** + * @todo when a new method on datasetFieldService is available + * (retrieveFacetsByDataverse?) only show the facets that the dataverse + * in question wants to show (and in the right order): + * https://redmine.hmdc.harvard.edu/issues/3490 + * + * also, findAll only returns advancedSearchField = true... we should + * probably introduce the "isFacetable" boolean rather than caring about + * if advancedSearchField is true or false + * + */ + for (DataverseFacet dataverseFacet : dataverse.getDataverseFacets()) { + DatasetFieldType datasetField = dataverseFacet.getDatasetFieldType(); + solrQuery.addFacetField(datasetField.getSolrField().getNameFacetable()); + } + solrQuery.addFacetField(SearchFields.FILE_TYPE); + /** + * @todo: hide the extra line this shows in the GUI... at least it's + * last... + */ + solrQuery.addFacetField(SearchFields.TYPE); + /** + * @todo: do sanity checking... throw error if negative + */ + solrQuery.setStart(paginationStart); + /** + * @todo: decide if year CITATION_YEAR is good enough or if we should + * support CITATION_DATE + */ +// Calendar calendar = Calendar.getInstance(TimeZone.getTimeZone("UTC"), Locale.UK); +// calendar.set(2010, 1, 1); +// Date start = calendar.getTime(); +// calendar.set(2013, 1, 1); +// Date end = calendar.getTime(); +// solrQuery.addDateRangeFacet(SearchFields.CITATION_DATE, start, end, "+1MONTH"); + /** + * @todo make this configurable + */ + int thisYear = Calendar.getInstance().get(Calendar.YEAR); + /** + * @todo: odd or even makes a difference. Couldn't find value of 2014 + * when this was set to 2000 + */ + final int citationYearRangeStart = 1901; + final int citationYearRangeEnd = thisYear; + final int citationYearRangeSpan = 2; + /** + * @todo: these are dates and should be "range facets" not "field + * facets" + * + * right now they are lumped in with the datasetFieldService.findAll() + * above + */ +// solrQuery.addNumericRangeFacet(SearchFields.PRODUCTION_DATE_YEAR_ONLY, citationYearRangeStart, citationYearRangeEnd, citationYearRangeSpan); +// solrQuery.addNumericRangeFacet(SearchFields.DISTRIBUTION_DATE_YEAR_ONLY, citationYearRangeStart, citationYearRangeEnd, citationYearRangeSpan); + solrQuery.setRows(numResultsPerPage); + logger.fine("Solr query:" + solrQuery); + + QueryResponse queryResponse; + try { + queryResponse = solrServer.query(solrQuery); + } catch (RemoteSolrException ex) { + String messageFromSolr = ex.getLocalizedMessage(); + String error = "Search Syntax Error: "; + String stringToHide = "org.apache.solr.search.SyntaxError: "; + if (messageFromSolr.startsWith(stringToHide)) { + // hide "org.apache.solr..." + error += messageFromSolr.substring(stringToHide.length()); + } else { + error += messageFromSolr; + } + logger.fine(error); + SolrQueryResponse exceptionSolrQueryResponse = new SolrQueryResponse(); + exceptionSolrQueryResponse.setError(error); + + // we can't show anything because of the search syntax error + long zeroNumResultsFound = 0; + long zeroGetResultsStart = 0; + List<SolrSearchResult> emptySolrSearchResults = new ArrayList<>(); + List<FacetCategory> exceptionFacetCategoryList = new ArrayList<>(); + Map<String, List<String>> emptySpellingSuggestion = new HashMap<>(); + exceptionSolrQueryResponse.setNumResultsFound(zeroNumResultsFound); + exceptionSolrQueryResponse.setResultsStart(zeroGetResultsStart); + exceptionSolrQueryResponse.setSolrSearchResults(emptySolrSearchResults); + exceptionSolrQueryResponse.setFacetCategoryList(exceptionFacetCategoryList); + exceptionSolrQueryResponse.setTypeFacetCategories(exceptionFacetCategoryList); + exceptionSolrQueryResponse.setSpellingSuggestionsByToken(emptySpellingSuggestion); + return exceptionSolrQueryResponse; + } catch (SolrServerException ex) { + throw new SearchException("Internal Dataverse Search Engine Error", ex); + } + SolrDocumentList docs = queryResponse.getResults(); + Iterator<SolrDocument> iter = docs.iterator(); + List<SolrSearchResult> solrSearchResults = new ArrayList<>(); + + /** + * @todo refactor SearchFields to a hashmap (or something? put in + * database? internationalize?) to avoid the crazy reflection and string + * manipulation below + */ + Object searchFieldsObject = new SearchFields(); + Field[] staticSearchFields = searchFieldsObject.getClass().getDeclaredFields(); + String titleSolrField = null; + try { + DatasetFieldType titleDatasetField = datasetFieldService.findByName(DatasetFieldConstant.title); + titleSolrField = titleDatasetField.getSolrField().getNameSearchable(); + } catch (EJBTransactionRolledbackException ex) { + logger.info("Couldn't find " + DatasetFieldConstant.title); + if (ex.getCause() instanceof TransactionRolledbackLocalException) { + if (ex.getCause().getCause() instanceof NoResultException) { + logger.info("Caught NoResultException"); + } + } + } + Map<String, String> datasetfieldFriendlyNamesBySolrField = new HashMap<>(); + Map<String, String> staticSolrFieldFriendlyNamesBySolrField = new HashMap<>(); + String baseUrl = systemConfig.getDataverseSiteUrl(); + while (iter.hasNext()) { + SolrDocument solrDocument = iter.next(); + String id = (String) solrDocument.getFieldValue(SearchFields.ID); + Long entityid = (Long) solrDocument.getFieldValue(SearchFields.ENTITY_ID); + String type = (String) solrDocument.getFieldValue(SearchFields.TYPE); + float score = (Float) solrDocument.getFieldValue(SearchFields.RELEVANCE); + logger.fine("score for " + id + ": " + score); + String identifier = (String) solrDocument.getFieldValue(SearchFields.IDENTIFIER); + String citation = (String) solrDocument.getFieldValue(SearchFields.DATASET_CITATION); + String persistentUrl = (String) solrDocument.getFieldValue(SearchFields.PERSISTENT_URL); + String name = (String) solrDocument.getFieldValue(SearchFields.NAME); + String nameSort = (String) solrDocument.getFieldValue(SearchFields.NAME_SORT); +// ArrayList titles = (ArrayList) solrDocument.getFieldValues(SearchFields.TITLE); + String title = (String) solrDocument.getFieldValue(titleSolrField); + Long datasetVersionId = (Long) solrDocument.getFieldValue(SearchFields.DATASET_VERSION_ID); + String deaccessionReason = (String) solrDocument.getFieldValue(SearchFields.DATASET_DEACCESSION_REASON); +// logger.info("titleSolrField: " + titleSolrField); +// logger.info("title: " + title); + String filetype = (String) solrDocument.getFieldValue(SearchFields.FILE_TYPE_FRIENDLY); + String fileContentType = (String) solrDocument.getFieldValue(SearchFields.FILE_CONTENT_TYPE); + Date release_or_create_date = (Date) solrDocument.getFieldValue(SearchFields.RELEASE_OR_CREATE_DATE); + String dateToDisplayOnCard = (String) solrDocument.getFirstValue(SearchFields.RELEASE_OR_CREATE_DATE_SEARCHABLE_TEXT); + List<String> matchedFields = new ArrayList<>(); + List<Highlight> highlights = new ArrayList<>(); + Map<SolrField, Highlight> highlightsMap = new HashMap<>(); + Map<SolrField, List<String>> highlightsMap2 = new HashMap<>(); + Map<String, Highlight> highlightsMap3 = new HashMap<>(); + if (queryResponse.getHighlighting().get(id) != null) { + for (Map.Entry<String, String> entry : solrFieldsToHightlightOnMap.entrySet()) { + String field = entry.getKey(); + String displayName = entry.getValue(); + + List<String> highlightSnippets = queryResponse.getHighlighting().get(id).get(field); + if (highlightSnippets != null) { + matchedFields.add(field); + /** + * @todo only SolrField.SolrType.STRING? that's not + * right... knit the SolrField object more into the + * highlighting stuff + */ + SolrField solrField = new SolrField(field, SolrField.SolrType.STRING, true, true); + Highlight highlight = new Highlight(solrField, highlightSnippets, displayName); + highlights.add(highlight); + highlightsMap.put(solrField, highlight); + highlightsMap2.put(solrField, highlightSnippets); + highlightsMap3.put(field, highlight); + } + } + + } + SolrSearchResult solrSearchResult = new SolrSearchResult(query, name); + /** + * @todo put all this in the constructor? + */ + List<String> states = (ArrayList<String>) solrDocument.getFieldValue(SearchFields.PUBLICATION_STATUS); + if (states != null) { + for (String state : states) { + if (state.equals(IndexServiceBean.getUNPUBLISHED_STRING())) { + solrSearchResult.setUnpublishedState(true); + } else if (state.equals(IndexServiceBean.getDRAFT_STRING())) { + solrSearchResult.setDraftState(true); +// } else if (state.equals(IndexServiceBean.getDEACCESSIONED_STRING())) { +// solrSearchResult.setDeaccessionedState(true); + } + } + } +// logger.info(id + ": " + description); + solrSearchResult.setId(id); + solrSearchResult.setEntityId(entityid); + solrSearchResult.setEntity(dvObjectService.findDvObject(entityid)); + solrSearchResult.setIdentifier(identifier); + solrSearchResult.setPersistentUrl(persistentUrl); + solrSearchResult.setType(type); + solrSearchResult.setScore(score); + solrSearchResult.setNameSort(nameSort); + solrSearchResult.setReleaseOrCreateDate(release_or_create_date); + solrSearchResult.setDateToDisplayOnCard(dateToDisplayOnCard); + solrSearchResult.setMatchedFields(matchedFields); + solrSearchResult.setHighlightsAsList(highlights); + solrSearchResult.setHighlightsMap(highlightsMap); + solrSearchResult.setHighlightsAsMap(highlightsMap3); + Map<String, String> parent = new HashMap<>(); + String description = (String) solrDocument.getFieldValue(SearchFields.DESCRIPTION); + solrSearchResult.setDescriptionNoSnippet(description); + solrSearchResult.setDeaccessionReason(deaccessionReason); + /** + * @todo start using SearchConstants class here + */ + if (type.equals("dataverses")) { + solrSearchResult.setName(name); + solrSearchResult.setHtmlUrl(baseUrl + "/dataverse/" + identifier); + solrSearchResult.setImageUrl(baseUrl + "/api/access/dvCardImage/" + entityid); + /** + * @todo Expose this API URL after "dvs" is changed to + * "dataverses". Also, is an API token required for published + * dataverses? + * Michael: url changed. + */ +// solrSearchResult.setApiUrl(baseUrl + "/api/dataverses/" + entityid); + } else if (type.equals("datasets")) { + solrSearchResult.setHtmlUrl(baseUrl + "/dataset.xhtml?globalId=" + identifier); + solrSearchResult.setApiUrl(baseUrl + "/api/datasets/" + entityid); + solrSearchResult.setImageUrl(baseUrl + "/api/access/dsCardImage/" + datasetVersionId); + /** + * @todo Could use getFieldValues (plural) here. + */ + ArrayList<String> datasetDescriptions = (ArrayList<String>) solrDocument.getFieldValue(SearchFields.DATASET_DESCRIPTION); + if (datasetDescriptions != null) { + String firstDatasetDescription = datasetDescriptions.get(0); + if (firstDatasetDescription != null) { + solrSearchResult.setDescriptionNoSnippet(firstDatasetDescription); + } + } + solrSearchResult.setDatasetVersionId(datasetVersionId); + + solrSearchResult.setCitation(citation); + if (title != null) { +// solrSearchResult.setTitle((String) titles.get(0)); + solrSearchResult.setTitle((String) title); + } else { + logger.info("No title indexed. Setting to empty string to prevent NPE. Dataset id " + entityid + " and version id " + datasetVersionId); + solrSearchResult.setTitle(""); + } + List<String> authors = (ArrayList) solrDocument.getFieldValues(DatasetFieldConstant.authorName); + if (authors != null) { + solrSearchResult.setDatasetAuthors(authors); + } + } else if (type.equals("files")) { + String parentGlobalId = null; + Object parentGlobalIdObject = solrDocument.getFieldValue(SearchFields.PARENT_IDENTIFIER); + if (parentGlobalIdObject != null) { + parentGlobalId = (String) parentGlobalIdObject; + parent.put(SolrSearchResult.PARENT_IDENTIFIER, parentGlobalId); + } + solrSearchResult.setHtmlUrl(baseUrl + "/dataset.xhtml?persistentId=" + parentGlobalId); + solrSearchResult.setDownloadUrl(baseUrl + "/api/access/datafile/" + entityid); + /** + * @todo We are not yet setting the API URL for files because + * not all files have metadata. Only subsettable files (those + * with a datatable) seem to have metadata. Furthermore, the + * response is in XML whereas the rest of the Search API returns + * JSON. + */ +// solrSearchResult.setApiUrl(baseUrl + "/api/meta/datafile/" + entityid); + solrSearchResult.setImageUrl(baseUrl + "/api/access/fileCardImage/" + entityid); + solrSearchResult.setName(name); + solrSearchResult.setFiletype(filetype); + solrSearchResult.setFileContentType(fileContentType); + Object fileSizeInBytesObject = solrDocument.getFieldValue(SearchFields.FILE_SIZE_IN_BYTES); + if (fileSizeInBytesObject != null) { + try { + long fileSizeInBytesLong = (long) fileSizeInBytesObject; + solrSearchResult.setFileSizeInBytes(fileSizeInBytesLong); + } catch (ClassCastException ex) { + logger.info("Could not cast file " + entityid + " to long for " + SearchFields.FILE_SIZE_IN_BYTES + ": " + ex.getLocalizedMessage()); + } + } + solrSearchResult.setFileMd5((String) solrDocument.getFieldValue(SearchFields.FILE_MD5)); + solrSearchResult.setUnf((String) solrDocument.getFieldValue(SearchFields.UNF)); + solrSearchResult.setDatasetVersionId(datasetVersionId); + } + /** + * @todo store PARENT_ID as a long instead and cast as such + */ + parent.put("id", (String) solrDocument.getFieldValue(SearchFields.PARENT_ID)); + parent.put("name", (String) solrDocument.getFieldValue(SearchFields.PARENT_NAME)); + parent.put("citation", (String) solrDocument.getFieldValue(SearchFields.PARENT_CITATION)); + solrSearchResult.setParent(parent); + solrSearchResults.add(solrSearchResult); + } + Map<String, List<String>> spellingSuggestionsByToken = new HashMap<>(); + SpellCheckResponse spellCheckResponse = queryResponse.getSpellCheckResponse(); + if (spellCheckResponse != null) { + List<SpellCheckResponse.Suggestion> suggestions = spellCheckResponse.getSuggestions(); + for (SpellCheckResponse.Suggestion suggestion : suggestions) { + spellingSuggestionsByToken.put(suggestion.getToken(), suggestion.getAlternatives()); + } + } + + List<FacetCategory> facetCategoryList = new ArrayList<FacetCategory>(); + List<FacetCategory> typeFacetCategories = new ArrayList<>(); + boolean hidePublicationStatusFacet = true; + boolean draftsAvailable = false; + boolean unpublishedAvailable = false; + for (FacetField facetField : queryResponse.getFacetFields()) { + FacetCategory facetCategory = new FacetCategory(); + List<FacetLabel> facetLabelList = new ArrayList<>(); + for (FacetField.Count facetFieldCount : facetField.getValues()) { + /** + * @todo we do want to show the count for each facet + */ +// logger.info("field: " + facetField.getName() + " " + facetFieldCount.getName() + " (" + facetFieldCount.getCount() + ")"); + if (facetFieldCount.getCount() > 0) { + FacetLabel facetLabel = new FacetLabel(facetFieldCount.getName(), facetFieldCount.getCount()); + // quote field facets + facetLabel.setFilterQuery(facetField.getName() + ":\"" + facetFieldCount.getName() + "\""); + facetLabelList.add(facetLabel); + if (facetField.getName().equals(SearchFields.PUBLICATION_STATUS)) { + if (facetLabel.getName().equals(IndexServiceBean.getUNPUBLISHED_STRING())) { + unpublishedAvailable = true; + } else if (facetLabel.getName().equals(IndexServiceBean.getDRAFT_STRING())) { + draftsAvailable = true; + } + } + } + } + facetCategory.setName(facetField.getName()); + // hopefully people will never see the raw facetField.getName() because it may well have an _s at the end + facetCategory.setFriendlyName(facetField.getName()); + // try to find a friendlier name to display as a facet + /** + * @todo hmm, we thought we wanted the datasetFields array to go + * away once we have more granularity than findAll() available per + * the todo above but we need a way to lookup by Solr field, so + * we'll build a hashmap + */ + for (DatasetFieldType datasetField : datasetFields) { + String solrFieldNameForDataset = datasetField.getSolrField().getNameFacetable(); + String friendlyName = datasetField.getDisplayName(); + if (solrFieldNameForDataset != null && facetField.getName().endsWith(datasetField.getTmpNullFieldTypeIdentifier())) { + // give it the non-friendly name so we remember to update the reference data script for datasets + facetCategory.setName(facetField.getName()); + } else if (solrFieldNameForDataset != null && facetField.getName().equals(solrFieldNameForDataset)) { + if (friendlyName != null && !friendlyName.isEmpty()) { + facetCategory.setFriendlyName(friendlyName); + // stop examining available dataset fields. we found a match + break; + } + } + datasetfieldFriendlyNamesBySolrField.put(datasetField.getSolrField().getNameFacetable(), friendlyName); + } + /** + * @todo get rid of this crazy reflection, per todo above... or + * should we... let's put into a hash the friendly names of facet + * categories, indexed by Solr field + */ + for (Field fieldObject : staticSearchFields) { + String name = fieldObject.getName(); + String staticSearchField = null; + try { + staticSearchField = (String) fieldObject.get(searchFieldsObject); + } catch (IllegalArgumentException ex) { + Logger.getLogger(SearchServiceBean.class.getName()).log(Level.SEVERE, null, ex); + } catch (IllegalAccessException ex) { + Logger.getLogger(SearchServiceBean.class.getName()).log(Level.SEVERE, null, ex); + } + if (staticSearchField != null && facetField.getName().equals(staticSearchField)) { + String[] parts = name.split("_"); + StringBuilder stringBuilder = new StringBuilder(); + for (String part : parts) { + stringBuilder.append(getCapitalizedName(part.toLowerCase()) + " "); + } + String friendlyNameWithTrailingSpace = stringBuilder.toString(); + String friendlyName = friendlyNameWithTrailingSpace.replaceAll(" $", ""); + facetCategory.setFriendlyName(friendlyName); +// logger.info("adding <<<" + staticSearchField + ":" + friendlyName + ">>>"); + staticSolrFieldFriendlyNamesBySolrField.put(staticSearchField, friendlyName); + // stop examining the declared/static fields in the SearchFields object. we found a match + break; + } + } + + facetCategory.setFacetLabel(facetLabelList); + if (!facetLabelList.isEmpty()) { + if (facetCategory.getName().equals(SearchFields.TYPE)) { + // the "type" facet is special, these are not + typeFacetCategories.add(facetCategory); + } else if (facetCategory.getName().equals(SearchFields.PUBLICATION_STATUS)) { + if (unpublishedAvailable || draftsAvailable) { + hidePublicationStatusFacet = false; + } + if (!hidePublicationStatusFacet) { + facetCategoryList.add(facetCategory); + } + } else { + facetCategoryList.add(facetCategory); + } + } + } + + // for now the only range facet is citation year + for (RangeFacet rangeFacet : queryResponse.getFacetRanges()) { + FacetCategory facetCategory = new FacetCategory(); + List<FacetLabel> facetLabelList = new ArrayList<>(); + for (Object rfObj : rangeFacet.getCounts()) { + RangeFacet.Count rangeFacetCount = (RangeFacet.Count) rfObj; + String valueString = rangeFacetCount.getValue(); + Integer start = Integer.parseInt(valueString); + Integer end = start + Integer.parseInt(rangeFacet.getGap().toString()); + // to avoid overlapping dates + end = end - 1; + if (rangeFacetCount.getCount() > 0) { + FacetLabel facetLabel = new FacetLabel(start + "-" + end, new Long(rangeFacetCount.getCount())); + // special [12 TO 34] syntax for range facets + facetLabel.setFilterQuery(rangeFacet.getName() + ":" + "[" + start + " TO " + end + "]"); + facetLabelList.add(facetLabel); + } + } + facetCategory.setName(rangeFacet.getName()); + facetCategory.setFacetLabel(facetLabelList); + // reverse to show the newest citation year range at the top + List<FacetLabel> facetLabelListReversed = new ArrayList<>(); + ListIterator li = facetLabelList.listIterator(facetLabelList.size()); + while (li.hasPrevious()) { + facetLabelListReversed.add((FacetLabel) li.previous()); + } + facetCategory.setFacetLabel(facetLabelListReversed); + if (!facetLabelList.isEmpty()) { + facetCategoryList.add(facetCategory); + } + } + + SolrQueryResponse solrQueryResponse = new SolrQueryResponse(); + solrQueryResponse.setSolrSearchResults(solrSearchResults); + solrQueryResponse.setSpellingSuggestionsByToken(spellingSuggestionsByToken); + solrQueryResponse.setFacetCategoryList(facetCategoryList); + solrQueryResponse.setTypeFacetCategories(typeFacetCategories); + solrQueryResponse.setNumResultsFound(queryResponse.getResults().getNumFound()); + solrQueryResponse.setResultsStart(queryResponse.getResults().getStart()); + solrQueryResponse.setDatasetfieldFriendlyNamesBySolrField(datasetfieldFriendlyNamesBySolrField); + solrQueryResponse.setStaticSolrFieldFriendlyNamesBySolrField(staticSolrFieldFriendlyNamesBySolrField); + solrQueryResponse.setFilterQueriesActual(Arrays.asList(solrQuery.getFilterQueries())); + return solrQueryResponse; + } + + private static String localize(String bundleKey) { + try { + String value = JH.localize(bundleKey); + return value; + } catch (Exception e) { + // can throw MissingResourceException + return "Match"; + } + } + + public String getCapitalizedName(String name) { + return Character.toUpperCase(name.charAt(0)) + name.substring(1); + } +}
