Mercurial > hg > LGDataverses
comparison src/main/java/edu/harvard/iq/dataverse/IndexServiceBean.java @ 10:a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
| author | Zoe Hong <zhong@mpiwg-berlin.mpg.de> |
|---|---|
| date | Tue, 08 Sep 2015 17:00:21 +0200 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| 9:5926d6419569 | 10:a50cf11e5178 |
|---|---|
| 1 package edu.harvard.iq.dataverse; | |
| 2 | |
| 3 import edu.harvard.iq.dataverse.util.StringUtil; | |
| 4 import edu.harvard.iq.dataverse.authorization.providers.builtin.BuiltinUserServiceBean; | |
| 5 import edu.harvard.iq.dataverse.search.SearchFields; | |
| 6 import edu.harvard.iq.dataverse.authorization.AuthenticationServiceBean; | |
| 7 import edu.harvard.iq.dataverse.datavariable.DataVariable; | |
| 8 import edu.harvard.iq.dataverse.search.IndexResponse; | |
| 9 import edu.harvard.iq.dataverse.search.IndexableDataset; | |
| 10 import edu.harvard.iq.dataverse.search.IndexableObject; | |
| 11 import edu.harvard.iq.dataverse.search.SearchException; | |
| 12 import edu.harvard.iq.dataverse.search.SearchPermissionsServiceBean; | |
| 13 import edu.harvard.iq.dataverse.search.SolrIndexServiceBean; | |
| 14 import edu.harvard.iq.dataverse.util.FileUtil; | |
| 15 import edu.harvard.iq.dataverse.util.SystemConfig; | |
| 16 import java.io.IOException; | |
| 17 import java.sql.Timestamp; | |
| 18 import java.text.DateFormat; | |
| 19 import java.text.SimpleDateFormat; | |
| 20 import java.util.ArrayList; | |
| 21 import java.util.Calendar; | |
| 22 import java.util.Collection; | |
| 23 import java.util.Date; | |
| 24 import java.util.HashSet; | |
| 25 import java.util.LinkedHashMap; | |
| 26 import java.util.List; | |
| 27 import java.util.Locale; | |
| 28 import java.util.Map; | |
| 29 import java.util.Set; | |
| 30 import java.util.concurrent.Future; | |
| 31 import java.util.logging.Logger; | |
| 32 import javax.ejb.AsyncResult; | |
| 33 import javax.ejb.EJB; | |
| 34 import javax.ejb.EJBException; | |
| 35 import javax.ejb.Stateless; | |
| 36 import javax.ejb.TransactionAttribute; | |
| 37 import static javax.ejb.TransactionAttributeType.REQUIRES_NEW; | |
| 38 import javax.inject.Named; | |
| 39 import org.apache.solr.client.solrj.SolrQuery; | |
| 40 import org.apache.solr.client.solrj.SolrServer; | |
| 41 import org.apache.solr.client.solrj.SolrServerException; | |
| 42 import org.apache.solr.client.solrj.impl.HttpSolrServer; | |
| 43 import org.apache.solr.client.solrj.response.QueryResponse; | |
| 44 import org.apache.solr.client.solrj.response.UpdateResponse; | |
| 45 import org.apache.solr.common.SolrDocument; | |
| 46 import org.apache.solr.common.SolrDocumentList; | |
| 47 import org.apache.solr.common.SolrInputDocument; | |
| 48 | |
| 49 @Stateless | |
| 50 @Named | |
| 51 public class IndexServiceBean { | |
| 52 | |
| 53 private static final Logger logger = Logger.getLogger(IndexServiceBean.class.getCanonicalName()); | |
| 54 | |
| 55 @EJB | |
| 56 DvObjectServiceBean dvObjectService; | |
| 57 @EJB | |
| 58 DataverseServiceBean dataverseService; | |
| 59 @EJB | |
| 60 DatasetServiceBean datasetService; | |
| 61 @EJB | |
| 62 BuiltinUserServiceBean dataverseUserServiceBean; | |
| 63 @EJB | |
| 64 PermissionServiceBean permissionService; | |
| 65 @EJB | |
| 66 AuthenticationServiceBean userServiceBean; | |
| 67 @EJB | |
| 68 SystemConfig systemConfig; | |
| 69 @EJB | |
| 70 SearchPermissionsServiceBean searchPermissionsService; | |
| 71 @EJB | |
| 72 SolrIndexServiceBean solrIndexService; | |
| 73 @EJB | |
| 74 DatasetLinkingServiceBean dsLinkingService; | |
| 75 @EJB | |
| 76 DataverseLinkingServiceBean dvLinkingService; | |
| 77 | |
| 78 public static final String solrDocIdentifierDataverse = "dataverse_"; | |
| 79 public static final String solrDocIdentifierFile = "datafile_"; | |
| 80 public static final String solrDocIdentifierDataset = "dataset_"; | |
| 81 public static final String draftSuffix = "_draft"; | |
| 82 public static final String deaccessionedSuffix = "_deaccessioned"; | |
| 83 public static final String discoverabilityPermissionSuffix = "_permission"; | |
| 84 private static final String groupPrefix = "group_"; | |
| 85 private static final String groupPerUserPrefix = "group_user"; | |
| 86 private static final String publicGroupIdString = "public"; | |
| 87 private static final String publicGroupString = groupPrefix + "public"; | |
| 88 private static final String PUBLISHED_STRING = "Published"; | |
| 89 private static final String UNPUBLISHED_STRING = "Unpublished"; | |
| 90 private static final String DRAFT_STRING = "Draft"; | |
| 91 private static final String DEACCESSIONED_STRING = "Deaccessioned"; | |
| 92 private Dataverse rootDataverseCached; | |
| 93 | |
| 94 @TransactionAttribute(REQUIRES_NEW) | |
| 95 public Future<String> indexDataverseInNewTransaction(Dataverse dataverse) { | |
| 96 return indexDataverse(dataverse); | |
| 97 } | |
| 98 | |
| 99 public Future<String> indexDataverse(Dataverse dataverse) { | |
| 100 logger.fine("indexDataverse called on dataverse id " + dataverse.getId() + "(" + dataverse.getAlias() + ")"); | |
| 101 if (dataverse.getId() == null) { | |
| 102 String msg = "unable to index dataverse. id was null (alias: " + dataverse.getAlias() + ")"; | |
| 103 logger.info(msg); | |
| 104 return new AsyncResult<>(msg); | |
| 105 } | |
| 106 Dataverse rootDataverse = findRootDataverseCached(); | |
| 107 if (rootDataverse == null) { | |
| 108 String msg = "Could not find root dataverse and the root dataverse should not be indexed. Returning."; | |
| 109 return new AsyncResult<>(msg); | |
| 110 } else { | |
| 111 if (dataverse.getId() == rootDataverse.getId()) { | |
| 112 String msg = "The root dataverse should not be indexed. Returning."; | |
| 113 return new AsyncResult<>(msg); | |
| 114 } | |
| 115 } | |
| 116 Collection<SolrInputDocument> docs = new ArrayList<>(); | |
| 117 SolrInputDocument solrInputDocument = new SolrInputDocument(); | |
| 118 solrInputDocument.addField(SearchFields.ID, solrDocIdentifierDataverse + dataverse.getId()); | |
| 119 solrInputDocument.addField(SearchFields.ENTITY_ID, dataverse.getId()); | |
| 120 solrInputDocument.addField(SearchFields.IDENTIFIER, dataverse.getAlias()); | |
| 121 solrInputDocument.addField(SearchFields.TYPE, "dataverses"); | |
| 122 solrInputDocument.addField(SearchFields.NAME, dataverse.getName()); | |
| 123 solrInputDocument.addField(SearchFields.NAME_SORT, dataverse.getName()); | |
| 124 solrInputDocument.addField(SearchFields.DATAVERSE_NAME, dataverse.getName()); | |
| 125 solrInputDocument.addField(SearchFields.DATAVERSE_CATEGORY, dataverse.getIndexableCategoryName()); | |
| 126 if (dataverse.isReleased()) { | |
| 127 solrInputDocument.addField(SearchFields.PUBLICATION_STATUS, PUBLISHED_STRING); | |
| 128 solrInputDocument.addField(SearchFields.RELEASE_OR_CREATE_DATE, dataverse.getPublicationDate()); | |
| 129 solrInputDocument.addField(SearchFields.RELEASE_OR_CREATE_DATE_SEARCHABLE_TEXT, convertToFriendlyDate(dataverse.getPublicationDate())); | |
| 130 } else { | |
| 131 solrInputDocument.addField(SearchFields.PUBLICATION_STATUS, UNPUBLISHED_STRING); | |
| 132 solrInputDocument.addField(SearchFields.RELEASE_OR_CREATE_DATE, dataverse.getCreateDate()); | |
| 133 solrInputDocument.addField(SearchFields.RELEASE_OR_CREATE_DATE_SEARCHABLE_TEXT, convertToFriendlyDate(dataverse.getCreateDate())); | |
| 134 } | |
| 135 | |
| 136 addDataverseReleaseDateToSolrDoc(solrInputDocument, dataverse); | |
| 137 // if (dataverse.getOwner() != null) { | |
| 138 // solrInputDocument.addField(SearchFields.HOST_DATAVERSE, dataverse.getOwner().getName()); | |
| 139 // } | |
| 140 solrInputDocument.addField(SearchFields.DESCRIPTION, StringUtil.html2text(dataverse.getDescription())); | |
| 141 solrInputDocument.addField(SearchFields.DATAVERSE_DESCRIPTION, StringUtil.html2text(dataverse.getDescription())); | |
| 142 // logger.info("dataverse affiliation: " + dataverse.getAffiliation()); | |
| 143 if (dataverse.getAffiliation() != null && !dataverse.getAffiliation().isEmpty()) { | |
| 144 /** | |
| 145 * @todo: stop using affiliation as category | |
| 146 */ | |
| 147 // solrInputDocument.addField(SearchFields.CATEGORY, dataverse.getAffiliation()); | |
| 148 solrInputDocument.addField(SearchFields.AFFILIATION, dataverse.getAffiliation()); | |
| 149 solrInputDocument.addField(SearchFields.DATAVERSE_AFFILIATION, dataverse.getAffiliation()); | |
| 150 } | |
| 151 for (ControlledVocabularyValue dataverseSubject : dataverse.getDataverseSubjects()) { | |
| 152 String subject = dataverseSubject.getStrValue(); | |
| 153 if (!subject.equals(DatasetField.NA_VALUE)) { | |
| 154 solrInputDocument.addField(SearchFields.DATAVERSE_SUBJECT, subject); | |
| 155 // collapse into shared "subject" field used as a facet | |
| 156 solrInputDocument.addField(SearchFields.SUBJECT, subject); | |
| 157 } | |
| 158 } | |
| 159 // checking for NPE is important so we can create the root dataverse | |
| 160 if (rootDataverse != null && !dataverse.equals(rootDataverse)) { | |
| 161 // important when creating root dataverse | |
| 162 if (dataverse.getOwner() != null) { | |
| 163 solrInputDocument.addField(SearchFields.PARENT_ID, dataverse.getOwner().getId()); | |
| 164 solrInputDocument.addField(SearchFields.PARENT_NAME, dataverse.getOwner().getName()); | |
| 165 } | |
| 166 } | |
| 167 List<String> dataversePathSegmentsAccumulator = new ArrayList<>(); | |
| 168 List<String> dataverseSegments = findPathSegments(dataverse, dataversePathSegmentsAccumulator); | |
| 169 List<String> dataversePaths = getDataversePathsFromSegments(dataverseSegments); | |
| 170 if (dataversePaths.size() > 0) { | |
| 171 // don't show yourself while indexing or in search results: https://redmine.hmdc.harvard.edu/issues/3613 | |
| 172 // logger.info(dataverse.getName() + " size " + dataversePaths.size()); | |
| 173 dataversePaths.remove(dataversePaths.size() - 1); | |
| 174 } | |
| 175 //Add paths for linking dataverses | |
| 176 for (Dataverse linkingDataverse : dvLinkingService.findLinkingDataverses(dataverse.getId())) { | |
| 177 List<String> linkingDataversePathSegmentsAccumulator = new ArrayList<>(); | |
| 178 List<String> linkingdataverseSegments = findPathSegments(linkingDataverse, linkingDataversePathSegmentsAccumulator); | |
| 179 List<String> linkingDataversePaths = getDataversePathsFromSegments(linkingdataverseSegments); | |
| 180 for (String dvPath : linkingDataversePaths) { | |
| 181 dataversePaths.add(dvPath); | |
| 182 } | |
| 183 } | |
| 184 solrInputDocument.addField(SearchFields.SUBTREE, dataversePaths); | |
| 185 docs.add(solrInputDocument); | |
| 186 | |
| 187 SolrServer server = new HttpSolrServer("http://" + systemConfig.getSolrHostColonPort() + "/solr"); | |
| 188 | |
| 189 String status; | |
| 190 try { | |
| 191 if (dataverse.getId() != null) { | |
| 192 server.add(docs); | |
| 193 } else { | |
| 194 logger.info("WARNING: indexing of a dataverse with no id attempted"); | |
| 195 } | |
| 196 } catch (SolrServerException | IOException ex) { | |
| 197 status = ex.toString(); | |
| 198 logger.info(status); | |
| 199 return new AsyncResult<>(status); | |
| 200 } | |
| 201 try { | |
| 202 server.commit(); | |
| 203 } catch (SolrServerException | IOException ex) { | |
| 204 status = ex.toString(); | |
| 205 logger.info(status); | |
| 206 return new AsyncResult<>(status); | |
| 207 } | |
| 208 | |
| 209 dvObjectService.updateContentIndexTime(dataverse); | |
| 210 IndexResponse indexResponse = solrIndexService.indexPermissionsForOneDvObject(dataverse.getId()); | |
| 211 String msg = "indexed dataverse " + dataverse.getId() + ":" + dataverse.getAlias() + ". Response from permission indexing: " + indexResponse.getMessage(); | |
| 212 return new AsyncResult<>(msg); | |
| 213 | |
| 214 } | |
| 215 | |
| 216 @TransactionAttribute(REQUIRES_NEW) | |
| 217 public Future<String> indexDatasetInNewTransaction(Dataset dataset) { | |
| 218 boolean doNormalSolrDocCleanUp = false; | |
| 219 return indexDataset(dataset, doNormalSolrDocCleanUp); | |
| 220 } | |
| 221 | |
| 222 public Future<String> indexDataset(Dataset dataset, boolean doNormalSolrDocCleanUp) { | |
| 223 logger.fine("indexing dataset " + dataset.getId()); | |
| 224 /** | |
| 225 * @todo should we use solrDocIdentifierDataset or | |
| 226 * IndexableObject.IndexableTypes.DATASET.getName() + "_" ? | |
| 227 */ | |
| 228 // String solrIdPublished = solrDocIdentifierDataset + dataset.getId(); | |
| 229 String solrIdPublished = determinePublishedDatasetSolrDocId(dataset); | |
| 230 String solrIdDraftDataset = IndexableObject.IndexableTypes.DATASET.getName() + "_" + dataset.getId() + IndexableDataset.DatasetState.WORKING_COPY.getSuffix(); | |
| 231 // String solrIdDeaccessioned = IndexableObject.IndexableTypes.DATASET.getName() + "_" + dataset.getId() + IndexableDataset.DatasetState.DEACCESSIONED.getSuffix(); | |
| 232 String solrIdDeaccessioned = determineDeaccessionedDatasetId(dataset); | |
| 233 StringBuilder debug = new StringBuilder(); | |
| 234 debug.append("\ndebug:\n"); | |
| 235 int numPublishedVersions = 0; | |
| 236 List<DatasetVersion> versions = dataset.getVersions(); | |
| 237 List<String> solrIdsOfFilesToDelete = new ArrayList<>(); | |
| 238 for (DatasetVersion datasetVersion : versions) { | |
| 239 Long versionDatabaseId = datasetVersion.getId(); | |
| 240 String versionTitle = datasetVersion.getTitle(); | |
| 241 String semanticVersion = datasetVersion.getSemanticVersion(); | |
| 242 DatasetVersion.VersionState versionState = datasetVersion.getVersionState(); | |
| 243 if (versionState.equals(DatasetVersion.VersionState.RELEASED)) { | |
| 244 numPublishedVersions += 1; | |
| 245 } | |
| 246 debug.append("version found with database id " + versionDatabaseId + "\n"); | |
| 247 debug.append("- title: " + versionTitle + "\n"); | |
| 248 debug.append("- semanticVersion-VersionState: " + semanticVersion + "-" + versionState + "\n"); | |
| 249 List<FileMetadata> fileMetadatas = datasetVersion.getFileMetadatas(); | |
| 250 List<String> fileInfo = new ArrayList<>(); | |
| 251 for (FileMetadata fileMetadata : fileMetadatas) { | |
| 252 String solrIdOfPublishedFile = solrDocIdentifierFile + fileMetadata.getDataFile().getId(); | |
| 253 /** | |
| 254 * It sounds weird but the first thing we'll do is preemptively | |
| 255 * delete the Solr documents of all published files. Don't | |
| 256 * worry, published files will be re-indexed later along with | |
| 257 * the dataset. We do this so users can delete files from | |
| 258 * published versions of datasets and then re-publish a new | |
| 259 * version without fear that their old published files (now | |
| 260 * deleted from the latest published version) will be | |
| 261 * searchable. See also | |
| 262 * https://github.com/IQSS/dataverse/issues/762 | |
| 263 */ | |
| 264 solrIdsOfFilesToDelete.add(solrIdOfPublishedFile); | |
| 265 fileInfo.add(fileMetadata.getDataFile().getId() + ":" + fileMetadata.getLabel()); | |
| 266 } | |
| 267 int numFiles = 0; | |
| 268 if (fileMetadatas != null) { | |
| 269 numFiles = fileMetadatas.size(); | |
| 270 } | |
| 271 debug.append("- files: " + numFiles + " " + fileInfo.toString() + "\n"); | |
| 272 } | |
| 273 debug.append("numPublishedVersions: " + numPublishedVersions + "\n"); | |
| 274 if (doNormalSolrDocCleanUp) { | |
| 275 IndexResponse resultOfAttemptToPremptivelyDeletePublishedFiles = solrIndexService.deleteMultipleSolrIds(solrIdsOfFilesToDelete); | |
| 276 debug.append("result of attempt to premptively deleted published files before reindexing: " + resultOfAttemptToPremptivelyDeletePublishedFiles + "\n"); | |
| 277 } | |
| 278 DatasetVersion latestVersion = dataset.getLatestVersion(); | |
| 279 String latestVersionStateString = latestVersion.getVersionState().name(); | |
| 280 DatasetVersion.VersionState latestVersionState = latestVersion.getVersionState(); | |
| 281 DatasetVersion releasedVersion = dataset.getReleasedVersion(); | |
| 282 boolean atLeastOnePublishedVersion = false; | |
| 283 if (releasedVersion != null) { | |
| 284 atLeastOnePublishedVersion = true; | |
| 285 } else { | |
| 286 atLeastOnePublishedVersion = false; | |
| 287 } | |
| 288 Map<DatasetVersion.VersionState, Boolean> desiredCards = new LinkedHashMap<>(); | |
| 289 /** | |
| 290 * @todo refactor all of this below and have a single method that takes | |
| 291 * the map of desired cards (which correspond to Solr documents) as one | |
| 292 * of the arguments and does all the operations necessary to achieve the | |
| 293 * desired state. | |
| 294 */ | |
| 295 StringBuilder results = new StringBuilder(); | |
| 296 if (atLeastOnePublishedVersion == false) { | |
| 297 results.append("No published version, nothing will be indexed as ") | |
| 298 .append(solrIdPublished).append("\n"); | |
| 299 if (latestVersionState.equals(DatasetVersion.VersionState.DRAFT)) { | |
| 300 | |
| 301 desiredCards.put(DatasetVersion.VersionState.DRAFT, true); | |
| 302 IndexableDataset indexableDraftVersion = new IndexableDataset(latestVersion); | |
| 303 String indexDraftResult = addOrUpdateDataset(indexableDraftVersion); | |
| 304 results.append("The latest version is a working copy (latestVersionState: ") | |
| 305 .append(latestVersionStateString).append(") and indexing was attempted for ") | |
| 306 .append(solrIdDraftDataset).append(" (limited discoverability). Result: ") | |
| 307 .append(indexDraftResult).append("\n"); | |
| 308 | |
| 309 desiredCards.put(DatasetVersion.VersionState.DEACCESSIONED, false); | |
| 310 if (doNormalSolrDocCleanUp) { | |
| 311 String deleteDeaccessionedResult = removeDeaccessioned(dataset); | |
| 312 results.append("Draft exists, no need for deaccessioned version. Deletion attempted for ") | |
| 313 .append(solrIdDeaccessioned).append(" (and files). Result: ") | |
| 314 .append(deleteDeaccessionedResult).append("\n"); | |
| 315 } | |
| 316 | |
| 317 desiredCards.put(DatasetVersion.VersionState.RELEASED, false); | |
| 318 if (doNormalSolrDocCleanUp) { | |
| 319 String deletePublishedResults = removePublished(dataset); | |
| 320 results.append("No published version. Attempting to delete traces of published version from index. Result: "). | |
| 321 append(deletePublishedResults).append("\n"); | |
| 322 } | |
| 323 | |
| 324 /** | |
| 325 * Desired state for existence of cards: {DRAFT=true, | |
| 326 * DEACCESSIONED=false, RELEASED=false} | |
| 327 * | |
| 328 * No published version, nothing will be indexed as dataset_17 | |
| 329 * | |
| 330 * The latest version is a working copy (latestVersionState: | |
| 331 * DRAFT) and indexing was attempted for dataset_17_draft | |
| 332 * (limited discoverability). Result: indexed dataset 17 as | |
| 333 * dataset_17_draft. filesIndexed: [datafile_18_draft] | |
| 334 * | |
| 335 * Draft exists, no need for deaccessioned version. Deletion | |
| 336 * attempted for dataset_17_deaccessioned (and files). Result: | |
| 337 * Attempted to delete dataset_17_deaccessioned from Solr index. | |
| 338 * updateReponse was: | |
| 339 * {responseHeader={status=0,QTime=1}}Attempted to delete | |
| 340 * datafile_18_deaccessioned from Solr index. updateReponse was: | |
| 341 * {responseHeader={status=0,QTime=1}} | |
| 342 * | |
| 343 * No published version. Attempting to delete traces of | |
| 344 * published version from index. Result: Attempted to delete | |
| 345 * dataset_17 from Solr index. updateReponse was: | |
| 346 * {responseHeader={status=0,QTime=1}}Attempted to delete | |
| 347 * datafile_18 from Solr index. updateReponse was: | |
| 348 * {responseHeader={status=0,QTime=0}} | |
| 349 */ | |
| 350 String result = getDesiredCardState(desiredCards) + results.toString() + debug.toString(); | |
| 351 logger.fine(result); | |
| 352 indexDatasetPermissions(dataset); | |
| 353 return new AsyncResult<>(result); | |
| 354 } else if (latestVersionState.equals(DatasetVersion.VersionState.DEACCESSIONED)) { | |
| 355 | |
| 356 desiredCards.put(DatasetVersion.VersionState.DEACCESSIONED, true); | |
| 357 IndexableDataset indexableDeaccessionedVersion = new IndexableDataset(latestVersion); | |
| 358 String indexDeaccessionedVersionResult = addOrUpdateDataset(indexableDeaccessionedVersion); | |
| 359 results.append("No draft version. Attempting to index as deaccessioned. Result: ").append(indexDeaccessionedVersionResult).append("\n"); | |
| 360 | |
| 361 desiredCards.put(DatasetVersion.VersionState.RELEASED, false); | |
| 362 if (doNormalSolrDocCleanUp) { | |
| 363 String deletePublishedResults = removePublished(dataset); | |
| 364 results.append("No published version. Attempting to delete traces of published version from index. Result: "). | |
| 365 append(deletePublishedResults).append("\n"); | |
| 366 } | |
| 367 | |
| 368 desiredCards.put(DatasetVersion.VersionState.DRAFT, false); | |
| 369 if (doNormalSolrDocCleanUp) { | |
| 370 List<String> solrDocIdsForDraftFilesToDelete = findSolrDocIdsForDraftFilesToDelete(dataset); | |
| 371 String deleteDraftDatasetVersionResult = removeSolrDocFromIndex(solrIdDraftDataset); | |
| 372 String deleteDraftFilesResults = deleteDraftFiles(solrDocIdsForDraftFilesToDelete); | |
| 373 results.append("Attempting to delete traces of drafts. Result: ") | |
| 374 .append(deleteDraftDatasetVersionResult).append(deleteDraftFilesResults).append("\n"); | |
| 375 } | |
| 376 | |
| 377 /** | |
| 378 * Desired state for existence of cards: {DEACCESSIONED=true, | |
| 379 * RELEASED=false, DRAFT=false} | |
| 380 * | |
| 381 * No published version, nothing will be indexed as dataset_17 | |
| 382 * | |
| 383 * No draft version. Attempting to index as deaccessioned. | |
| 384 * Result: indexed dataset 17 as dataset_17_deaccessioned. | |
| 385 * filesIndexed: [] | |
| 386 * | |
| 387 * No published version. Attempting to delete traces of | |
| 388 * published version from index. Result: Attempted to delete | |
| 389 * dataset_17 from Solr index. updateReponse was: | |
| 390 * {responseHeader={status=0,QTime=0}}Attempted to delete | |
| 391 * datafile_18 from Solr index. updateReponse was: | |
| 392 * {responseHeader={status=0,QTime=3}} | |
| 393 * | |
| 394 * Attempting to delete traces of drafts. Result: Attempted to | |
| 395 * delete dataset_17_draft from Solr index. updateReponse was: | |
| 396 * {responseHeader={status=0,QTime=1}} | |
| 397 */ | |
| 398 String result = getDesiredCardState(desiredCards) + results.toString() + debug.toString(); | |
| 399 logger.fine(result); | |
| 400 indexDatasetPermissions(dataset); | |
| 401 return new AsyncResult<>(result); | |
| 402 } else { | |
| 403 String result = "No-op. Unexpected condition reached: No released version and latest version is neither draft nor deaccessioned"; | |
| 404 logger.fine(result); | |
| 405 return new AsyncResult<>(result); | |
| 406 } | |
| 407 } else if (atLeastOnePublishedVersion == true) { | |
| 408 results.append("Published versions found. ") | |
| 409 .append("Will attempt to index as ").append(solrIdPublished).append(" (discoverable by anonymous)\n"); | |
| 410 if (latestVersionState.equals(DatasetVersion.VersionState.RELEASED) | |
| 411 || latestVersionState.equals(DatasetVersion.VersionState.DEACCESSIONED)) { | |
| 412 | |
| 413 desiredCards.put(DatasetVersion.VersionState.RELEASED, true); | |
| 414 IndexableDataset indexableReleasedVersion = new IndexableDataset(releasedVersion); | |
| 415 String indexReleasedVersionResult = addOrUpdateDataset(indexableReleasedVersion); | |
| 416 results.append("Attempted to index " + solrIdPublished).append(". Result: ").append(indexReleasedVersionResult).append("\n"); | |
| 417 | |
| 418 desiredCards.put(DatasetVersion.VersionState.DRAFT, false); | |
| 419 if (doNormalSolrDocCleanUp) { | |
| 420 List<String> solrDocIdsForDraftFilesToDelete = findSolrDocIdsForDraftFilesToDelete(dataset); | |
| 421 String deleteDraftDatasetVersionResult = removeSolrDocFromIndex(solrIdDraftDataset); | |
| 422 String deleteDraftFilesResults = deleteDraftFiles(solrDocIdsForDraftFilesToDelete); | |
| 423 results.append("The latest version is published. Attempting to delete drafts. Result: ") | |
| 424 .append(deleteDraftDatasetVersionResult).append(deleteDraftFilesResults).append("\n"); | |
| 425 } | |
| 426 | |
| 427 desiredCards.put(DatasetVersion.VersionState.DEACCESSIONED, false); | |
| 428 if (doNormalSolrDocCleanUp) { | |
| 429 String deleteDeaccessionedResult = removeDeaccessioned(dataset); | |
| 430 results.append("No need for deaccessioned version. Deletion attempted for ") | |
| 431 .append(solrIdDeaccessioned).append(". Result: ").append(deleteDeaccessionedResult); | |
| 432 } | |
| 433 | |
| 434 /** | |
| 435 * Desired state for existence of cards: {RELEASED=true, | |
| 436 * DRAFT=false, DEACCESSIONED=false} | |
| 437 * | |
| 438 * Released versions found: 1. Will attempt to index as | |
| 439 * dataset_17 (discoverable by anonymous) | |
| 440 * | |
| 441 * Attempted to index dataset_17. Result: indexed dataset 17 as | |
| 442 * dataset_17. filesIndexed: [datafile_18] | |
| 443 * | |
| 444 * The latest version is published. Attempting to delete drafts. | |
| 445 * Result: Attempted to delete dataset_17_draft from Solr index. | |
| 446 * updateReponse was: {responseHeader={status=0,QTime=1}} | |
| 447 * | |
| 448 * No need for deaccessioned version. Deletion attempted for | |
| 449 * dataset_17_deaccessioned. Result: Attempted to delete | |
| 450 * dataset_17_deaccessioned from Solr index. updateReponse was: | |
| 451 * {responseHeader={status=0,QTime=1}}Attempted to delete | |
| 452 * datafile_18_deaccessioned from Solr index. updateReponse was: | |
| 453 * {responseHeader={status=0,QTime=0}} | |
| 454 */ | |
| 455 String result = getDesiredCardState(desiredCards) + results.toString() + debug.toString(); | |
| 456 logger.fine(result); | |
| 457 indexDatasetPermissions(dataset); | |
| 458 return new AsyncResult<>(result); | |
| 459 } else if (latestVersionState.equals(DatasetVersion.VersionState.DRAFT)) { | |
| 460 | |
| 461 IndexableDataset indexableDraftVersion = new IndexableDataset(latestVersion); | |
| 462 desiredCards.put(DatasetVersion.VersionState.DRAFT, true); | |
| 463 String indexDraftResult = addOrUpdateDataset(indexableDraftVersion); | |
| 464 results.append("The latest version is a working copy (latestVersionState: ") | |
| 465 .append(latestVersionStateString).append(") and will be indexed as ") | |
| 466 .append(solrIdDraftDataset).append(" (limited visibility). Result: ").append(indexDraftResult).append("\n"); | |
| 467 | |
| 468 desiredCards.put(DatasetVersion.VersionState.RELEASED, true); | |
| 469 IndexableDataset indexableReleasedVersion = new IndexableDataset(releasedVersion); | |
| 470 String indexReleasedVersionResult = addOrUpdateDataset(indexableReleasedVersion); | |
| 471 results.append("There is a published version we will attempt to index. Result: ").append(indexReleasedVersionResult).append("\n"); | |
| 472 | |
| 473 desiredCards.put(DatasetVersion.VersionState.DEACCESSIONED, false); | |
| 474 if (doNormalSolrDocCleanUp) { | |
| 475 String deleteDeaccessionedResult = removeDeaccessioned(dataset); | |
| 476 results.append("No need for deaccessioned version. Deletion attempted for ") | |
| 477 .append(solrIdDeaccessioned).append(". Result: ").append(deleteDeaccessionedResult); | |
| 478 } | |
| 479 | |
| 480 /** | |
| 481 * Desired state for existence of cards: {DRAFT=true, | |
| 482 * RELEASED=true, DEACCESSIONED=false} | |
| 483 * | |
| 484 * Released versions found: 1. Will attempt to index as | |
| 485 * dataset_17 (discoverable by anonymous) | |
| 486 * | |
| 487 * The latest version is a working copy (latestVersionState: | |
| 488 * DRAFT) and will be indexed as dataset_17_draft (limited | |
| 489 * visibility). Result: indexed dataset 17 as dataset_17_draft. | |
| 490 * filesIndexed: [datafile_18_draft] | |
| 491 * | |
| 492 * There is a published version we will attempt to index. | |
| 493 * Result: indexed dataset 17 as dataset_17. filesIndexed: | |
| 494 * [datafile_18] | |
| 495 * | |
| 496 * No need for deaccessioned version. Deletion attempted for | |
| 497 * dataset_17_deaccessioned. Result: Attempted to delete | |
| 498 * dataset_17_deaccessioned from Solr index. updateReponse was: | |
| 499 * {responseHeader={status=0,QTime=1}}Attempted to delete | |
| 500 * datafile_18_deaccessioned from Solr index. updateReponse was: | |
| 501 * {responseHeader={status=0,QTime=0}} | |
| 502 */ | |
| 503 String result = getDesiredCardState(desiredCards) + results.toString() + debug.toString(); | |
| 504 logger.fine(result); | |
| 505 indexDatasetPermissions(dataset); | |
| 506 return new AsyncResult<>(result); | |
| 507 } else { | |
| 508 String result = "No-op. Unexpected condition reached: There is at least one published version but the latest version is neither published nor draft"; | |
| 509 logger.fine(result); | |
| 510 return new AsyncResult<>(result); | |
| 511 } | |
| 512 } else { | |
| 513 String result = "No-op. Unexpected condition reached: Has a version been published or not?"; | |
| 514 logger.fine(result); | |
| 515 return new AsyncResult<>(result); | |
| 516 } | |
| 517 } | |
| 518 | |
| 519 private String deleteDraftFiles(List<String> solrDocIdsForDraftFilesToDelete) { | |
| 520 String deleteDraftFilesResults = ""; | |
| 521 IndexResponse indexResponse = solrIndexService.deleteMultipleSolrIds(solrDocIdsForDraftFilesToDelete); | |
| 522 deleteDraftFilesResults = indexResponse.toString(); | |
| 523 return deleteDraftFilesResults; | |
| 524 } | |
| 525 | |
| 526 private IndexResponse indexDatasetPermissions(Dataset dataset) { | |
| 527 IndexResponse indexResponse = solrIndexService.indexPermissionsOnSelfAndChildren(dataset); | |
| 528 return indexResponse; | |
| 529 } | |
| 530 | |
| 531 private String addOrUpdateDataset(IndexableDataset indexableDataset) { | |
| 532 IndexableDataset.DatasetState state = indexableDataset.getDatasetState(); | |
| 533 Dataset dataset = indexableDataset.getDatasetVersion().getDataset(); | |
| 534 logger.fine("adding or updating Solr document for dataset id " + dataset.getId()); | |
| 535 Collection<SolrInputDocument> docs = new ArrayList<>(); | |
| 536 List<String> dataversePathSegmentsAccumulator = new ArrayList<>(); | |
| 537 List<String> dataverseSegments = new ArrayList<>(); | |
| 538 try { | |
| 539 dataverseSegments = findPathSegments(dataset.getOwner(), dataversePathSegmentsAccumulator); | |
| 540 } catch (Exception ex) { | |
| 541 logger.info("failed to find dataverseSegments for dataversePaths for " + SearchFields.SUBTREE + ": " + ex); | |
| 542 } | |
| 543 List<String> dataversePaths = getDataversePathsFromSegments(dataverseSegments); | |
| 544 //Add Paths for linking dataverses | |
| 545 for (Dataverse linkingDataverse : dsLinkingService.findLinkingDataverses(dataset.getId())) { | |
| 546 List<String> linkingDataversePathSegmentsAccumulator = new ArrayList<>(); | |
| 547 List<String> linkingdataverseSegments = findPathSegments(linkingDataverse, linkingDataversePathSegmentsAccumulator); | |
| 548 List<String> linkingDataversePaths = getDataversePathsFromSegments(linkingdataverseSegments); | |
| 549 for (String dvPath : linkingDataversePaths) { | |
| 550 dataversePaths.add(dvPath); | |
| 551 } | |
| 552 } | |
| 553 SolrInputDocument solrInputDocument = new SolrInputDocument(); | |
| 554 String datasetSolrDocId = indexableDataset.getSolrDocId(); | |
| 555 solrInputDocument.addField(SearchFields.ID, datasetSolrDocId); | |
| 556 solrInputDocument.addField(SearchFields.ENTITY_ID, dataset.getId()); | |
| 557 solrInputDocument.addField(SearchFields.IDENTIFIER, dataset.getGlobalId()); | |
| 558 solrInputDocument.addField(SearchFields.DATASET_PERSISTENT_ID, dataset.getGlobalId()); | |
| 559 solrInputDocument.addField(SearchFields.PERSISTENT_URL, dataset.getPersistentURL()); | |
| 560 solrInputDocument.addField(SearchFields.TYPE, "datasets"); | |
| 561 | |
| 562 Date datasetSortByDate = new Date(); | |
| 563 Date majorVersionReleaseDate = dataset.getMostRecentMajorVersionReleaseDate(); | |
| 564 if (majorVersionReleaseDate != null) { | |
| 565 if (true) { | |
| 566 String msg = "major release date found: " + majorVersionReleaseDate.toString(); | |
| 567 logger.fine(msg); | |
| 568 } | |
| 569 datasetSortByDate = majorVersionReleaseDate; | |
| 570 } else { | |
| 571 if (indexableDataset.getDatasetState().equals(IndexableDataset.DatasetState.WORKING_COPY)) { | |
| 572 solrInputDocument.addField(SearchFields.PUBLICATION_STATUS, UNPUBLISHED_STRING); | |
| 573 } else if (indexableDataset.getDatasetState().equals(IndexableDataset.DatasetState.DEACCESSIONED)) { | |
| 574 // uncomment this if we change our mind and want a deaccessioned facet after all | |
| 575 // solrInputDocument.addField(SearchFields.PUBLICATION_STATUS, DEACCESSIONED_STRING); | |
| 576 } | |
| 577 Date createDate = dataset.getCreateDate(); | |
| 578 if (createDate != null) { | |
| 579 if (true) { | |
| 580 String msg = "can't find major release date, using create date: " + createDate; | |
| 581 logger.fine(msg); | |
| 582 } | |
| 583 datasetSortByDate = createDate; | |
| 584 } else { | |
| 585 String msg = "can't find major release date or create date, using \"now\""; | |
| 586 logger.info(msg); | |
| 587 datasetSortByDate = new Date(); | |
| 588 } | |
| 589 } | |
| 590 solrInputDocument.addField(SearchFields.RELEASE_OR_CREATE_DATE, datasetSortByDate); | |
| 591 solrInputDocument.addField(SearchFields.RELEASE_OR_CREATE_DATE_SEARCHABLE_TEXT, convertToFriendlyDate(datasetSortByDate)); | |
| 592 | |
| 593 if (state.equals(indexableDataset.getDatasetState().PUBLISHED)) { | |
| 594 solrInputDocument.addField(SearchFields.PUBLICATION_STATUS, PUBLISHED_STRING); | |
| 595 // solrInputDocument.addField(SearchFields.RELEASE_OR_CREATE_DATE, dataset.getPublicationDate()); | |
| 596 } else if (state.equals(indexableDataset.getDatasetState().WORKING_COPY)) { | |
| 597 solrInputDocument.addField(SearchFields.PUBLICATION_STATUS, DRAFT_STRING); | |
| 598 } | |
| 599 | |
| 600 addDatasetReleaseDateToSolrDoc(solrInputDocument, dataset); | |
| 601 | |
| 602 DatasetVersion datasetVersion = indexableDataset.getDatasetVersion(); | |
| 603 String parentDatasetTitle = "TBD"; | |
| 604 if (datasetVersion != null) { | |
| 605 | |
| 606 solrInputDocument.addField(SearchFields.DATASET_VERSION_ID, datasetVersion.getId()); | |
| 607 solrInputDocument.addField(SearchFields.DATASET_CITATION, datasetVersion.getCitation(true)); | |
| 608 | |
| 609 for (DatasetField dsf : datasetVersion.getFlatDatasetFields()) { | |
| 610 | |
| 611 DatasetFieldType dsfType = dsf.getDatasetFieldType(); | |
| 612 String solrFieldSearchable = dsfType.getSolrField().getNameSearchable(); | |
| 613 String solrFieldFacetable = dsfType.getSolrField().getNameFacetable(); | |
| 614 | |
| 615 if (dsf.getValues() != null && !dsf.getValues().isEmpty() && dsf.getValues().get(0) != null && solrFieldSearchable != null) { | |
| 616 logger.fine("indexing " + dsf.getDatasetFieldType().getName() + ":" + dsf.getValues() + " into " + solrFieldSearchable + " and maybe " + solrFieldFacetable); | |
| 617 // if (dsfType.getSolrField().getSolrType().equals(SolrField.SolrType.INTEGER)) { | |
| 618 if (dsfType.getSolrField().getSolrType().equals(SolrField.SolrType.EMAIL)) { | |
| 619 //no-op. we want to keep email address out of Solr per https://github.com/IQSS/dataverse/issues/759 | |
| 620 } else if (dsfType.getSolrField().getSolrType().equals(SolrField.SolrType.DATE)) { | |
| 621 String dateAsString = dsf.getValues().get(0); | |
| 622 logger.fine("date as string: " + dateAsString); | |
| 623 if (dateAsString != null && !dateAsString.isEmpty()) { | |
| 624 SimpleDateFormat inputDateyyyy = new SimpleDateFormat("yyyy", Locale.ENGLISH); | |
| 625 try { | |
| 626 /** | |
| 627 * @todo when bean validation is working we | |
| 628 * won't have to convert strings into dates | |
| 629 */ | |
| 630 logger.fine("Trying to convert " + dateAsString + " to a YYYY date from dataset " + dataset.getId()); | |
| 631 Date dateAsDate = inputDateyyyy.parse(dateAsString); | |
| 632 SimpleDateFormat yearOnly = new SimpleDateFormat("yyyy"); | |
| 633 String datasetFieldFlaggedAsDate = yearOnly.format(dateAsDate); | |
| 634 logger.fine("YYYY only: " + datasetFieldFlaggedAsDate); | |
| 635 // solrInputDocument.addField(solrFieldSearchable, Integer.parseInt(datasetFieldFlaggedAsDate)); | |
| 636 solrInputDocument.addField(solrFieldSearchable, datasetFieldFlaggedAsDate); | |
| 637 if (dsfType.getSolrField().isFacetable()) { | |
| 638 // solrInputDocument.addField(solrFieldFacetable, Integer.parseInt(datasetFieldFlaggedAsDate)); | |
| 639 solrInputDocument.addField(solrFieldFacetable, datasetFieldFlaggedAsDate); | |
| 640 } | |
| 641 } catch (Exception ex) { | |
| 642 logger.info("unable to convert " + dateAsString + " into YYYY format and couldn't index it (" + dsfType.getName() + ")"); | |
| 643 } | |
| 644 } | |
| 645 } else { | |
| 646 // _s (dynamic string) and all other Solr fields | |
| 647 | |
| 648 if (dsf.getDatasetFieldType().getName().equals("authorAffiliation")) { | |
| 649 /** | |
| 650 * @todo think about how to tie the fact that this | |
| 651 * needs to be multivalued (_ss) because a | |
| 652 * multivalued facet (authorAffilition_ss) is being | |
| 653 * collapsed into here at index time. The business | |
| 654 * logic to determine if a data-driven metadata | |
| 655 * field should be indexed into Solr as a single or | |
| 656 * multiple value lives in the getSolrField() method | |
| 657 * of DatasetField.java | |
| 658 */ | |
| 659 solrInputDocument.addField(SearchFields.AFFILIATION, dsf.getValuesWithoutNaValues()); | |
| 660 } else if (dsf.getDatasetFieldType().getName().equals("title")) { | |
| 661 // datasets have titles not names but index title under name as well so we can sort datasets by name along dataverses and files | |
| 662 List<String> possibleTitles = dsf.getValues(); | |
| 663 String firstTitle = possibleTitles.get(0); | |
| 664 if (firstTitle != null) { | |
| 665 parentDatasetTitle = firstTitle; | |
| 666 } | |
| 667 solrInputDocument.addField(SearchFields.NAME_SORT, dsf.getValues()); | |
| 668 } | |
| 669 if (dsfType.isControlledVocabulary()) { | |
| 670 for (ControlledVocabularyValue controlledVocabularyValue : dsf.getControlledVocabularyValues()) { | |
| 671 if (controlledVocabularyValue.getStrValue().equals(DatasetField.NA_VALUE)) { | |
| 672 continue; | |
| 673 } | |
| 674 solrInputDocument.addField(solrFieldSearchable, controlledVocabularyValue.getStrValue()); | |
| 675 if (dsfType.getSolrField().isFacetable()) { | |
| 676 solrInputDocument.addField(solrFieldFacetable, controlledVocabularyValue.getStrValue()); | |
| 677 } | |
| 678 } | |
| 679 } else { | |
| 680 if (dsfType.getFieldType().equals(DatasetFieldType.FieldType.TEXTBOX)) { | |
| 681 // strip HTML | |
| 682 List<String> htmlFreeText = StringUtil.htmlArray2textArray(dsf.getValuesWithoutNaValues()); | |
| 683 solrInputDocument.addField(solrFieldSearchable, htmlFreeText); | |
| 684 if (dsfType.getSolrField().isFacetable()) { | |
| 685 solrInputDocument.addField(solrFieldFacetable, htmlFreeText); | |
| 686 } | |
| 687 } else { | |
| 688 // do not strip HTML | |
| 689 solrInputDocument.addField(solrFieldSearchable, dsf.getValuesWithoutNaValues()); | |
| 690 if (dsfType.getSolrField().isFacetable()) { | |
| 691 solrInputDocument.addField(solrFieldFacetable, dsf.getValuesWithoutNaValues()); | |
| 692 } | |
| 693 } | |
| 694 } | |
| 695 } | |
| 696 } | |
| 697 } | |
| 698 } | |
| 699 | |
| 700 solrInputDocument.addField(SearchFields.SUBTREE, dataversePaths); | |
| 701 // solrInputDocument.addField(SearchFields.HOST_DATAVERSE, dataset.getOwner().getName()); | |
| 702 solrInputDocument.addField(SearchFields.PARENT_ID, dataset.getOwner().getId()); | |
| 703 solrInputDocument.addField(SearchFields.PARENT_NAME, dataset.getOwner().getName()); | |
| 704 | |
| 705 if (state.equals(indexableDataset.getDatasetState().DEACCESSIONED)) { | |
| 706 String deaccessionNote = datasetVersion.getVersionNote(); | |
| 707 if (deaccessionNote != null) { | |
| 708 solrInputDocument.addField(SearchFields.DATASET_DEACCESSION_REASON, deaccessionNote); | |
| 709 } | |
| 710 } | |
| 711 | |
| 712 docs.add(solrInputDocument); | |
| 713 | |
| 714 List<String> filesIndexed = new ArrayList<>(); | |
| 715 if (datasetVersion != null) { | |
| 716 List<FileMetadata> fileMetadatas = datasetVersion.getFileMetadatas(); | |
| 717 boolean checkForDuplicateMetadata = false; | |
| 718 if (datasetVersion.isDraft() && dataset.isReleased() && dataset.getReleasedVersion() != null) { | |
| 719 checkForDuplicateMetadata = true; | |
| 720 logger.fine("We are indexing a draft version of a dataset that has a released version. We'll be checking file metadatas if they are exact clones of the released versions."); | |
| 721 } | |
| 722 | |
| 723 for (FileMetadata fileMetadata : fileMetadatas) { | |
| 724 boolean indexThisMetadata = true; | |
| 725 if (checkForDuplicateMetadata) { | |
| 726 logger.fine("Checking if this file metadata is a duplicate."); | |
| 727 for (FileMetadata releasedFileMetadata : dataset.getReleasedVersion().getFileMetadatas()) { | |
| 728 if (fileMetadata.getDataFile() != null && fileMetadata.getDataFile().equals(releasedFileMetadata.getDataFile())) { | |
| 729 if (fileMetadata.contentEquals(releasedFileMetadata)) { | |
| 730 indexThisMetadata = false; | |
| 731 logger.fine("This file metadata hasn't changed since the released version; skipping indexing."); | |
| 732 } else { | |
| 733 logger.fine("This file metadata has changed since the released version; we want to index it!"); | |
| 734 } | |
| 735 break; | |
| 736 } | |
| 737 } | |
| 738 } | |
| 739 if (indexThisMetadata) { | |
| 740 SolrInputDocument datafileSolrInputDocument = new SolrInputDocument(); | |
| 741 Long fileEntityId = fileMetadata.getDataFile().getId(); | |
| 742 datafileSolrInputDocument.addField(SearchFields.ENTITY_ID, fileEntityId); | |
| 743 datafileSolrInputDocument.addField(SearchFields.IDENTIFIER, fileEntityId); | |
| 744 datafileSolrInputDocument.addField(SearchFields.PERSISTENT_URL, dataset.getPersistentURL()); | |
| 745 datafileSolrInputDocument.addField(SearchFields.TYPE, "files"); | |
| 746 | |
| 747 String filenameCompleteFinal = ""; | |
| 748 if (fileMetadata != null) { | |
| 749 String filenameComplete = fileMetadata.getLabel(); | |
| 750 if (filenameComplete != null) { | |
| 751 String filenameWithoutExtension = ""; | |
| 752 // String extension = ""; | |
| 753 int i = filenameComplete.lastIndexOf('.'); | |
| 754 if (i > 0) { | |
| 755 // extension = filenameComplete.substring(i + 1); | |
| 756 try { | |
| 757 filenameWithoutExtension = filenameComplete.substring(0, i); | |
| 758 datafileSolrInputDocument.addField(SearchFields.FILENAME_WITHOUT_EXTENSION, filenameWithoutExtension); | |
| 759 datafileSolrInputDocument.addField(SearchFields.FILE_NAME, filenameWithoutExtension); | |
| 760 } catch (IndexOutOfBoundsException ex) { | |
| 761 filenameWithoutExtension = ""; | |
| 762 } | |
| 763 } else { | |
| 764 logger.info("problem with filename '" + filenameComplete + "': no extension? empty string as filename?"); | |
| 765 filenameWithoutExtension = filenameComplete; | |
| 766 } | |
| 767 filenameCompleteFinal = filenameComplete; | |
| 768 } | |
| 769 } | |
| 770 datafileSolrInputDocument.addField(SearchFields.NAME, filenameCompleteFinal); | |
| 771 datafileSolrInputDocument.addField(SearchFields.NAME_SORT, filenameCompleteFinal); | |
| 772 datafileSolrInputDocument.addField(SearchFields.FILE_NAME, filenameCompleteFinal); | |
| 773 | |
| 774 datafileSolrInputDocument.addField(SearchFields.DATASET_VERSION_ID, datasetVersion.getId()); | |
| 775 | |
| 776 /** | |
| 777 * for rules on sorting files see | |
| 778 * https://docs.google.com/a/harvard.edu/document/d/1DWsEqT8KfheKZmMB3n_VhJpl9nIxiUjai_AIQPAjiyA/edit?usp=sharing | |
| 779 * via https://redmine.hmdc.harvard.edu/issues/3701 | |
| 780 */ | |
| 781 Date fileSortByDate = new Date(); | |
| 782 DataFile datafile = fileMetadata.getDataFile(); | |
| 783 if (datafile != null) { | |
| 784 boolean fileHasBeenReleased = datafile.isReleased(); | |
| 785 if (fileHasBeenReleased) { | |
| 786 logger.fine("indexing file with filePublicationTimestamp. " + fileMetadata.getId() + " (file id " + datafile.getId() + ")"); | |
| 787 Timestamp filePublicationTimestamp = datafile.getPublicationDate(); | |
| 788 if (filePublicationTimestamp != null) { | |
| 789 fileSortByDate = filePublicationTimestamp; | |
| 790 } else { | |
| 791 String msg = "filePublicationTimestamp was null for fileMetadata id " + fileMetadata.getId() + " (file id " + datafile.getId() + ")"; | |
| 792 logger.info(msg); | |
| 793 } | |
| 794 } else { | |
| 795 logger.fine("indexing file with fileCreateTimestamp. " + fileMetadata.getId() + " (file id " + datafile.getId() + ")"); | |
| 796 Timestamp fileCreateTimestamp = datafile.getCreateDate(); | |
| 797 if (fileCreateTimestamp != null) { | |
| 798 fileSortByDate = fileCreateTimestamp; | |
| 799 } else { | |
| 800 String msg = "fileCreateTimestamp was null for fileMetadata id " + fileMetadata.getId() + " (file id " + datafile.getId() + ")"; | |
| 801 logger.info(msg); | |
| 802 } | |
| 803 } | |
| 804 } | |
| 805 if (fileSortByDate == null) { | |
| 806 if (datasetSortByDate != null) { | |
| 807 logger.info("fileSortByDate was null, assigning datasetSortByDate"); | |
| 808 fileSortByDate = datasetSortByDate; | |
| 809 } else { | |
| 810 logger.info("fileSortByDate and datasetSortByDate were null, assigning 'now'"); | |
| 811 fileSortByDate = new Date(); | |
| 812 } | |
| 813 } | |
| 814 datafileSolrInputDocument.addField(SearchFields.RELEASE_OR_CREATE_DATE, fileSortByDate); | |
| 815 datafileSolrInputDocument.addField(SearchFields.RELEASE_OR_CREATE_DATE_SEARCHABLE_TEXT, convertToFriendlyDate(fileSortByDate)); | |
| 816 | |
| 817 if (majorVersionReleaseDate == null) { | |
| 818 datafileSolrInputDocument.addField(SearchFields.PUBLICATION_STATUS, UNPUBLISHED_STRING); | |
| 819 } | |
| 820 | |
| 821 String fileSolrDocId = solrDocIdentifierFile + fileEntityId; | |
| 822 if (indexableDataset.getDatasetState().equals(indexableDataset.getDatasetState().PUBLISHED)) { | |
| 823 fileSolrDocId = solrDocIdentifierFile + fileEntityId; | |
| 824 datafileSolrInputDocument.addField(SearchFields.PUBLICATION_STATUS, PUBLISHED_STRING); | |
| 825 // datafileSolrInputDocument.addField(SearchFields.PERMS, publicGroupString); | |
| 826 addDatasetReleaseDateToSolrDoc(datafileSolrInputDocument, dataset); | |
| 827 } else if (indexableDataset.getDatasetState().equals(indexableDataset.getDatasetState().WORKING_COPY)) { | |
| 828 fileSolrDocId = solrDocIdentifierFile + fileEntityId + indexableDataset.getDatasetState().getSuffix(); | |
| 829 datafileSolrInputDocument.addField(SearchFields.PUBLICATION_STATUS, DRAFT_STRING); | |
| 830 } | |
| 831 datafileSolrInputDocument.addField(SearchFields.ID, fileSolrDocId); | |
| 832 | |
| 833 datafileSolrInputDocument.addField(SearchFields.FILE_TYPE_FRIENDLY, fileMetadata.getDataFile().getFriendlyType()); | |
| 834 datafileSolrInputDocument.addField(SearchFields.FILE_CONTENT_TYPE, fileMetadata.getDataFile().getContentType()); | |
| 835 datafileSolrInputDocument.addField(SearchFields.FILE_TYPE_SEARCHABLE, fileMetadata.getDataFile().getFriendlyType()); | |
| 836 // For the file type facets, we have a property file that maps mime types | |
| 837 // to facet-friendly names; "application/fits" should become "FITS", etc.: | |
| 838 datafileSolrInputDocument.addField(SearchFields.FILE_TYPE, FileUtil.getFacetFileType(fileMetadata.getDataFile())); | |
| 839 datafileSolrInputDocument.addField(SearchFields.FILE_TYPE_SEARCHABLE, FileUtil.getFacetFileType(fileMetadata.getDataFile())); | |
| 840 datafileSolrInputDocument.addField(SearchFields.FILE_SIZE_IN_BYTES, fileMetadata.getDataFile().getFilesize()); | |
| 841 datafileSolrInputDocument.addField(SearchFields.FILE_MD5, fileMetadata.getDataFile().getmd5()); | |
| 842 datafileSolrInputDocument.addField(SearchFields.DESCRIPTION, fileMetadata.getDescription()); | |
| 843 datafileSolrInputDocument.addField(SearchFields.FILE_DESCRIPTION, fileMetadata.getDescription()); | |
| 844 datafileSolrInputDocument.addField(SearchFields.UNF, fileMetadata.getDataFile().getUnf()); | |
| 845 datafileSolrInputDocument.addField(SearchFields.SUBTREE, dataversePaths); | |
| 846 // datafileSolrInputDocument.addField(SearchFields.HOST_DATAVERSE, dataFile.getOwner().getOwner().getName()); | |
| 847 // datafileSolrInputDocument.addField(SearchFields.PARENT_NAME, dataFile.getDataset().getTitle()); | |
| 848 datafileSolrInputDocument.addField(SearchFields.PARENT_ID, fileMetadata.getDataFile().getOwner().getId()); | |
| 849 datafileSolrInputDocument.addField(SearchFields.PARENT_IDENTIFIER, fileMetadata.getDataFile().getOwner().getGlobalId()); | |
| 850 datafileSolrInputDocument.addField(SearchFields.PARENT_CITATION, fileMetadata.getDataFile().getOwner().getCitation()); | |
| 851 | |
| 852 datafileSolrInputDocument.addField(SearchFields.PARENT_NAME, parentDatasetTitle); | |
| 853 | |
| 854 // If this is a tabular data file -- i.e., if there are data | |
| 855 // variables associated with this file, we index the variable | |
| 856 // names and labels: | |
| 857 if (fileMetadata.getDataFile().isTabularData()) { | |
| 858 List<DataVariable> variables = fileMetadata.getDataFile().getDataTable().getDataVariables(); | |
| 859 for (DataVariable var : variables) { | |
| 860 // Hard-coded search fields, for now: | |
| 861 // TODO: eventually: review, decide how datavariables should | |
| 862 // be handled for indexing purposes. (should it be a fixed | |
| 863 // setup, defined in the code? should it be flexible? unlikely | |
| 864 // that this needs to be domain-specific... since these data | |
| 865 // variables are quite specific to tabular data, which in turn | |
| 866 // is something social science-specific... | |
| 867 // anyway -- needs to be reviewed. -- L.A. 4.0alpha1 | |
| 868 | |
| 869 if (var.getName() != null && !var.getName().equals("")) { | |
| 870 datafileSolrInputDocument.addField(SearchFields.VARIABLE_NAME, var.getName()); | |
| 871 } | |
| 872 if (var.getLabel() != null && !var.getLabel().equals("")) { | |
| 873 datafileSolrInputDocument.addField(SearchFields.VARIABLE_LABEL, var.getLabel()); | |
| 874 } | |
| 875 } | |
| 876 } | |
| 877 | |
| 878 if (indexableDataset.isFilesShouldBeIndexed()) { | |
| 879 filesIndexed.add(fileSolrDocId); | |
| 880 docs.add(datafileSolrInputDocument); | |
| 881 } | |
| 882 } | |
| 883 } | |
| 884 } | |
| 885 | |
| 886 SolrServer server = new HttpSolrServer("http://" + systemConfig.getSolrHostColonPort() + "/solr"); | |
| 887 | |
| 888 try { | |
| 889 server.add(docs); | |
| 890 } catch (SolrServerException | IOException ex) { | |
| 891 return ex.toString(); | |
| 892 } | |
| 893 try { | |
| 894 server.commit(); | |
| 895 } catch (SolrServerException | IOException ex) { | |
| 896 return ex.toString(); | |
| 897 } | |
| 898 | |
| 899 dvObjectService.updateContentIndexTime(dataset); | |
| 900 | |
| 901 // return "indexed dataset " + dataset.getId() + " as " + solrDocId + "\nindexFilesResults for " + solrDocId + ":" + fileInfo.toString(); | |
| 902 return "indexed dataset " + dataset.getId() + " as " + datasetSolrDocId + ". filesIndexed: " + filesIndexed; | |
| 903 } | |
| 904 | |
| 905 public List<String> findPathSegments(Dataverse dataverse, List<String> segments) { | |
| 906 Dataverse rootDataverse = findRootDataverseCached(); | |
| 907 if (!dataverse.equals(rootDataverse)) { | |
| 908 // important when creating root dataverse | |
| 909 if (dataverse.getOwner() != null) { | |
| 910 findPathSegments(dataverse.getOwner(), segments); | |
| 911 } | |
| 912 segments.add(dataverse.getId().toString()); | |
| 913 return segments; | |
| 914 } else { | |
| 915 // base case | |
| 916 return segments; | |
| 917 } | |
| 918 } | |
| 919 | |
| 920 List<String> getDataversePathsFromSegments(List<String> dataversePathSegments) { | |
| 921 List<String> subtrees = new ArrayList<>(); | |
| 922 for (int i = 0; i < dataversePathSegments.size(); i++) { | |
| 923 StringBuilder pathBuilder = new StringBuilder(); | |
| 924 int numSegments = dataversePathSegments.size(); | |
| 925 for (int j = 0; j < numSegments; j++) { | |
| 926 if (j <= i) { | |
| 927 pathBuilder.append("/" + dataversePathSegments.get(j)); | |
| 928 } | |
| 929 } | |
| 930 subtrees.add(pathBuilder.toString()); | |
| 931 } | |
| 932 return subtrees; | |
| 933 } | |
| 934 | |
| 935 private void addDataverseReleaseDateToSolrDoc(SolrInputDocument solrInputDocument, Dataverse dataverse) { | |
| 936 if (dataverse.getPublicationDate() != null) { | |
| 937 Calendar calendar = Calendar.getInstance(); | |
| 938 calendar.setTimeInMillis(dataverse.getPublicationDate().getTime()); | |
| 939 int YYYY = calendar.get(Calendar.YEAR); | |
| 940 solrInputDocument.addField(SearchFields.PUBLICATION_DATE, YYYY); | |
| 941 } | |
| 942 } | |
| 943 | |
| 944 private void addDatasetReleaseDateToSolrDoc(SolrInputDocument solrInputDocument, Dataset dataset) { | |
| 945 if (dataset.getPublicationDate() != null) { | |
| 946 Calendar calendar = Calendar.getInstance(); | |
| 947 calendar.setTimeInMillis(dataset.getPublicationDate().getTime()); | |
| 948 int YYYY = calendar.get(Calendar.YEAR); | |
| 949 solrInputDocument.addField(SearchFields.PUBLICATION_DATE, YYYY); | |
| 950 solrInputDocument.addField(SearchFields.DATASET_PUBLICATION_DATE, YYYY); | |
| 951 } | |
| 952 } | |
| 953 | |
| 954 public static String getGroupPrefix() { | |
| 955 return groupPrefix; | |
| 956 } | |
| 957 | |
| 958 public static String getGroupPerUserPrefix() { | |
| 959 return groupPerUserPrefix; | |
| 960 } | |
| 961 | |
| 962 public static String getPublicGroupString() { | |
| 963 return publicGroupString; | |
| 964 } | |
| 965 | |
| 966 public static String getPUBLISHED_STRING() { | |
| 967 return PUBLISHED_STRING; | |
| 968 } | |
| 969 | |
| 970 public static String getUNPUBLISHED_STRING() { | |
| 971 return UNPUBLISHED_STRING; | |
| 972 } | |
| 973 | |
| 974 public static String getDRAFT_STRING() { | |
| 975 return DRAFT_STRING; | |
| 976 } | |
| 977 | |
| 978 public static String getDEACCESSIONED_STRING() { | |
| 979 return DEACCESSIONED_STRING; | |
| 980 } | |
| 981 | |
| 982 public String delete(Dataverse doomed) { | |
| 983 SolrServer server = new HttpSolrServer("http://" + systemConfig.getSolrHostColonPort() + "/solr"); | |
| 984 | |
| 985 logger.fine("deleting Solr document for dataverse " + doomed.getId()); | |
| 986 UpdateResponse updateResponse; | |
| 987 try { | |
| 988 updateResponse = server.deleteById(solrDocIdentifierDataverse + doomed.getId()); | |
| 989 } catch (SolrServerException | IOException ex) { | |
| 990 return ex.toString(); | |
| 991 } | |
| 992 try { | |
| 993 server.commit(); | |
| 994 } catch (SolrServerException | IOException ex) { | |
| 995 return ex.toString(); | |
| 996 } | |
| 997 String response = "Successfully deleted dataverse " + doomed.getId() + " from Solr index. updateReponse was: " + updateResponse.toString(); | |
| 998 logger.fine(response); | |
| 999 return response; | |
| 1000 } | |
| 1001 | |
| 1002 /** | |
| 1003 * @todo call this in fewer places, favoring | |
| 1004 * SolrIndexServiceBeans.deleteMultipleSolrIds instead to operate in batches | |
| 1005 * | |
| 1006 * https://github.com/IQSS/dataverse/issues/142 | |
| 1007 */ | |
| 1008 public String removeSolrDocFromIndex(String doomed) { | |
| 1009 SolrServer server = new HttpSolrServer("http://" + systemConfig.getSolrHostColonPort() + "/solr"); | |
| 1010 | |
| 1011 logger.fine("deleting Solr document: " + doomed); | |
| 1012 UpdateResponse updateResponse; | |
| 1013 try { | |
| 1014 updateResponse = server.deleteById(doomed); | |
| 1015 } catch (SolrServerException | IOException ex) { | |
| 1016 return ex.toString(); | |
| 1017 } | |
| 1018 try { | |
| 1019 server.commit(); | |
| 1020 } catch (SolrServerException | IOException ex) { | |
| 1021 return ex.toString(); | |
| 1022 } | |
| 1023 String response = "Attempted to delete " + doomed + " from Solr index. updateReponse was: " + updateResponse.toString(); | |
| 1024 logger.fine(response); | |
| 1025 return response; | |
| 1026 } | |
| 1027 | |
| 1028 public String convertToFriendlyDate(Date dateAsDate) { | |
| 1029 if (dateAsDate == null) { | |
| 1030 dateAsDate = new Date(); | |
| 1031 } | |
| 1032 // using DateFormat.MEDIUM for May 5, 2014 to match what's in DVN 3.x | |
| 1033 DateFormat format = DateFormat.getDateInstance(DateFormat.MEDIUM); | |
| 1034 String friendlyDate = format.format(dateAsDate); | |
| 1035 return friendlyDate; | |
| 1036 } | |
| 1037 | |
| 1038 private List<String> findSolrDocIdsForDraftFilesToDelete(Dataset datasetWithDraftFilesToDelete) { | |
| 1039 List<String> solrIdsOfFilesToDelete = new ArrayList<>(); | |
| 1040 for (DatasetVersion datasetVersion : datasetWithDraftFilesToDelete.getVersions()) { | |
| 1041 for (FileMetadata fileMetadata : datasetVersion.getFileMetadatas()) { | |
| 1042 DataFile datafile = fileMetadata.getDataFile(); | |
| 1043 if (datafile != null) { | |
| 1044 solrIdsOfFilesToDelete.add(solrDocIdentifierFile + datafile.getId() + draftSuffix); | |
| 1045 } | |
| 1046 } | |
| 1047 | |
| 1048 } | |
| 1049 return solrIdsOfFilesToDelete; | |
| 1050 } | |
| 1051 | |
| 1052 private List<String> findSolrDocIdsForFilesToDelete(Dataset dataset, IndexableDataset.DatasetState state) { | |
| 1053 List<String> solrIdsOfFilesToDelete = new ArrayList<>(); | |
| 1054 for (DataFile file : dataset.getFiles()) { | |
| 1055 solrIdsOfFilesToDelete.add(solrDocIdentifierFile + file.getId() + state.getSuffix()); | |
| 1056 } | |
| 1057 return solrIdsOfFilesToDelete; | |
| 1058 } | |
| 1059 | |
| 1060 private String removeMultipleSolrDocs(List<String> docIds) { | |
| 1061 IndexResponse indexResponse = solrIndexService.deleteMultipleSolrIds(docIds); | |
| 1062 return indexResponse.toString(); | |
| 1063 } | |
| 1064 | |
| 1065 private String determinePublishedDatasetSolrDocId(Dataset dataset) { | |
| 1066 return IndexableObject.IndexableTypes.DATASET.getName() + "_" + dataset.getId() + IndexableDataset.DatasetState.PUBLISHED.getSuffix(); | |
| 1067 } | |
| 1068 | |
| 1069 private String determineDeaccessionedDatasetId(Dataset dataset) { | |
| 1070 return IndexableObject.IndexableTypes.DATASET.getName() + "_" + dataset.getId() + IndexableDataset.DatasetState.DEACCESSIONED.getSuffix(); | |
| 1071 } | |
| 1072 | |
| 1073 private String removeDeaccessioned(Dataset dataset) { | |
| 1074 StringBuilder result = new StringBuilder(); | |
| 1075 String deleteDeaccessionedResult = removeSolrDocFromIndex(determineDeaccessionedDatasetId(dataset)); | |
| 1076 result.append(deleteDeaccessionedResult); | |
| 1077 List<String> docIds = findSolrDocIdsForFilesToDelete(dataset, IndexableDataset.DatasetState.DEACCESSIONED); | |
| 1078 String deleteFilesResult = removeMultipleSolrDocs(docIds); | |
| 1079 result.append(deleteFilesResult); | |
| 1080 return result.toString(); | |
| 1081 } | |
| 1082 | |
| 1083 private String removePublished(Dataset dataset) { | |
| 1084 StringBuilder result = new StringBuilder(); | |
| 1085 String deletePublishedResult = removeSolrDocFromIndex(determinePublishedDatasetSolrDocId(dataset)); | |
| 1086 result.append(deletePublishedResult); | |
| 1087 List<String> docIds = findSolrDocIdsForFilesToDelete(dataset, IndexableDataset.DatasetState.PUBLISHED); | |
| 1088 String deleteFilesResult = removeMultipleSolrDocs(docIds); | |
| 1089 result.append(deleteFilesResult); | |
| 1090 return result.toString(); | |
| 1091 } | |
| 1092 | |
| 1093 private Dataverse findRootDataverseCached() { | |
| 1094 if (true) { | |
| 1095 /** | |
| 1096 * @todo Is the code below working at all? We don't want the root | |
| 1097 * dataverse to be indexed into Solr. Specifically, we don't want a | |
| 1098 * dataverse "card" to show up while browsing. | |
| 1099 * | |
| 1100 * Let's just find the root dataverse and be done with it. We'll | |
| 1101 * figure out the caching later. | |
| 1102 */ | |
| 1103 try { | |
| 1104 Dataverse rootDataverse = dataverseService.findRootDataverse(); | |
| 1105 return rootDataverse; | |
| 1106 } catch (EJBException ex) { | |
| 1107 logger.info("caught " + ex); | |
| 1108 Throwable cause = ex.getCause(); | |
| 1109 while (cause.getCause() != null) { | |
| 1110 logger.info("caused by... " + cause); | |
| 1111 cause = cause.getCause(); | |
| 1112 } | |
| 1113 return null; | |
| 1114 } | |
| 1115 } | |
| 1116 | |
| 1117 /** | |
| 1118 * @todo Why isn't this code working? | |
| 1119 */ | |
| 1120 if (rootDataverseCached != null) { | |
| 1121 return rootDataverseCached; | |
| 1122 } else { | |
| 1123 rootDataverseCached = dataverseService.findRootDataverse(); | |
| 1124 if (rootDataverseCached != null) { | |
| 1125 return rootDataverseCached; | |
| 1126 } else { | |
| 1127 throw new RuntimeException("unable to determine root dataverse"); | |
| 1128 } | |
| 1129 } | |
| 1130 } | |
| 1131 | |
| 1132 private String getDesiredCardState(Map<DatasetVersion.VersionState, Boolean> desiredCards) { | |
| 1133 /** | |
| 1134 * @todo make a JVM option to enforce sanity checks? Call it dev=true? | |
| 1135 */ | |
| 1136 boolean sanityCheck = true; | |
| 1137 if (sanityCheck) { | |
| 1138 Set<DatasetVersion.VersionState> expected = new HashSet<>(); | |
| 1139 expected.add(DatasetVersion.VersionState.DRAFT); | |
| 1140 expected.add(DatasetVersion.VersionState.RELEASED); | |
| 1141 expected.add(DatasetVersion.VersionState.DEACCESSIONED); | |
| 1142 if (!desiredCards.keySet().equals(expected)) { | |
| 1143 throw new RuntimeException("Mismatch between expected version states (" + expected + ") and version states passed in (" + desiredCards.keySet() + ")"); | |
| 1144 } | |
| 1145 } | |
| 1146 return "Desired state for existence of cards: " + desiredCards + "\n"; | |
| 1147 } | |
| 1148 | |
| 1149 /** | |
| 1150 * @return Dataverses that should be reindexed either because they have | |
| 1151 * never been indexed or their index time is before their modification time. | |
| 1152 */ | |
| 1153 public List findStaleOrMissingDataverses() { | |
| 1154 List<Dataverse> staleDataverses = new ArrayList<>(); | |
| 1155 for (Dataverse dataverse : dataverseService.findAll()) { | |
| 1156 if (dataverse.equals(dataverseService.findRootDataverse())) { | |
| 1157 continue; | |
| 1158 } | |
| 1159 if (stale(dataverse)) { | |
| 1160 staleDataverses.add(dataverse); | |
| 1161 } | |
| 1162 } | |
| 1163 return staleDataverses; | |
| 1164 } | |
| 1165 | |
| 1166 /** | |
| 1167 * @return Datasets that should be reindexed either because they have never | |
| 1168 * been indexed or their index time is before their modification time. | |
| 1169 */ | |
| 1170 public List<Dataset> findStaleOrMissingDatasets() { | |
| 1171 List<Dataset> staleDatasets = new ArrayList<>(); | |
| 1172 for (Dataset dataset : datasetService.findAll()) { | |
| 1173 if (stale(dataset)) { | |
| 1174 staleDatasets.add(dataset); | |
| 1175 } | |
| 1176 } | |
| 1177 return staleDatasets; | |
| 1178 } | |
| 1179 | |
| 1180 private boolean stale(DvObject dvObject) { | |
| 1181 Timestamp indexTime = dvObject.getIndexTime(); | |
| 1182 Timestamp modificationTime = dvObject.getModificationTime(); | |
| 1183 if (indexTime == null) { | |
| 1184 return true; | |
| 1185 } else { | |
| 1186 if (indexTime.before(modificationTime)) { | |
| 1187 return true; | |
| 1188 } | |
| 1189 } | |
| 1190 return false; | |
| 1191 } | |
| 1192 | |
| 1193 public List<Long> findDataversesInSolrOnly() throws SearchException { | |
| 1194 try { | |
| 1195 /** | |
| 1196 * @todo define this centrally and statically | |
| 1197 */ | |
| 1198 return findDvObjectInSolrOnly("dataverses"); | |
| 1199 } catch (SearchException ex) { | |
| 1200 throw ex; | |
| 1201 } | |
| 1202 } | |
| 1203 | |
| 1204 public List<Long> findDatasetsInSolrOnly() throws SearchException { | |
| 1205 try { | |
| 1206 /** | |
| 1207 * @todo define this centrally and statically | |
| 1208 */ | |
| 1209 return findDvObjectInSolrOnly("datasets"); | |
| 1210 } catch (SearchException ex) { | |
| 1211 throw ex; | |
| 1212 } | |
| 1213 } | |
| 1214 | |
| 1215 private List<Long> findDvObjectInSolrOnly(String type) throws SearchException { | |
| 1216 SolrServer solrServer = new HttpSolrServer("http://" + systemConfig.getSolrHostColonPort() + "/solr"); | |
| 1217 SolrQuery solrQuery = new SolrQuery(); | |
| 1218 solrQuery.setQuery("*"); | |
| 1219 solrQuery.setRows(Integer.SIZE); | |
| 1220 solrQuery.addFilterQuery(SearchFields.TYPE + ":" + type); | |
| 1221 List<Long> dvObjectInSolrOnly = new ArrayList<>(); | |
| 1222 QueryResponse queryResponse = null; | |
| 1223 try { | |
| 1224 queryResponse = solrServer.query(solrQuery); | |
| 1225 } catch (SolrServerException ex) { | |
| 1226 throw new SearchException("Error searching Solr for " + type, ex); | |
| 1227 } | |
| 1228 SolrDocumentList results = queryResponse.getResults(); | |
| 1229 for (SolrDocument solrDocument : results) { | |
| 1230 Object idObject = solrDocument.getFieldValue(SearchFields.ENTITY_ID); | |
| 1231 if (idObject != null) { | |
| 1232 try { | |
| 1233 long id = (Long) idObject; | |
| 1234 DvObject dvobject = dvObjectService.findDvObject(id); | |
| 1235 if (dvobject == null) { | |
| 1236 dvObjectInSolrOnly.add(id); | |
| 1237 } | |
| 1238 } catch (ClassCastException ex) { | |
| 1239 throw new SearchException("Found " + SearchFields.ENTITY_ID + " but error casting " + idObject + " to long", ex); | |
| 1240 } | |
| 1241 } | |
| 1242 } | |
| 1243 return dvObjectInSolrOnly; | |
| 1244 } | |
| 1245 | |
| 1246 } |
