Mercurial > hg > LGDataverses
view src/main/java/edu/harvard/iq/dataverse/util/json/JsonParser.java @ 10:a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
| author | Zoe Hong <zhong@mpiwg-berlin.mpg.de> |
|---|---|
| date | Tue, 08 Sep 2015 17:00:21 +0200 |
| parents | |
| children |
line wrap: on
line source
package edu.harvard.iq.dataverse.util.json; import com.google.gson.Gson; import edu.harvard.iq.dataverse.ControlledVocabularyValue; import edu.harvard.iq.dataverse.Dataset; import edu.harvard.iq.dataverse.DatasetField; import edu.harvard.iq.dataverse.DatasetFieldConstant; import edu.harvard.iq.dataverse.DatasetFieldCompoundValue; import edu.harvard.iq.dataverse.DatasetFieldServiceBean; import edu.harvard.iq.dataverse.DatasetFieldType; import edu.harvard.iq.dataverse.DatasetFieldValue; import edu.harvard.iq.dataverse.DatasetVersion; import edu.harvard.iq.dataverse.Dataverse; import edu.harvard.iq.dataverse.DataverseContact; import edu.harvard.iq.dataverse.MetadataBlockServiceBean; import edu.harvard.iq.dataverse.api.Util; import edu.harvard.iq.dataverse.api.dto.FieldDTO; import edu.harvard.iq.dataverse.authorization.groups.impl.ipaddress.IpGroup; import edu.harvard.iq.dataverse.authorization.groups.impl.ipaddress.ip.IpAddress; import edu.harvard.iq.dataverse.authorization.groups.impl.ipaddress.ip.IpAddressRange; import edu.harvard.iq.dataverse.settings.SettingsServiceBean; import java.io.StringReader; import java.text.ParseException; import java.util.ArrayList; import java.util.Collections; import java.util.Date; import java.util.HashSet; import java.util.LinkedList; import java.util.List; import java.util.Set; import javax.json.Json; import javax.json.JsonArray; import javax.json.JsonObject; import javax.json.JsonReader; import javax.json.JsonString; import javax.json.JsonValue; /** * Parses JSON objects into domain objects. * * @author michael */ public class JsonParser { DatasetFieldServiceBean datasetFieldSvc; MetadataBlockServiceBean blockService; SettingsServiceBean settingsService; boolean lenient = false; // if lenient, we will accept alternate spellings for controlled vocabulary values public JsonParser(DatasetFieldServiceBean datasetFieldSvc, MetadataBlockServiceBean blockService, SettingsServiceBean settingsService) { this.datasetFieldSvc = datasetFieldSvc; this.blockService = blockService; this.settingsService = settingsService; } public boolean isLenient() { return lenient; } public void setLenient(boolean lenient) { this.lenient = lenient; } public Dataverse parseDataverse(JsonObject jobj) throws JsonParseException { Dataverse dv = new Dataverse(); dv.setAlias(getMandatoryString(jobj, "alias")); dv.setName(getMandatoryString(jobj, "name")); dv.setDescription(jobj.getString("description", null)); dv.setPermissionRoot(jobj.getBoolean("permissionRoot", false)); dv.setFacetRoot(jobj.getBoolean("facetRoot", false)); if (jobj.containsKey("dataverseContacts")) { JsonArray dvContacts = jobj.getJsonArray("dataverseContacts"); int i = 0; List<DataverseContact> dvContactList = new LinkedList<>(); for (JsonValue jsv : dvContacts) { DataverseContact dvc = new DataverseContact(dv); dvc.setContactEmail(getMandatoryString((JsonObject) jsv, "contactEmail")); dvc.setDisplayOrder(i++); dvContactList.add(dvc); } dv.setDataverseContacts(dvContactList); } /* We decided that subject is not user set, but gotten from the subject of the dataverse's datasets - leavig this code in for now, in case we need to go back to it at some point if (jobj.containsKey("dataverseSubjects")) { List<ControlledVocabularyValue> dvSubjectList = new LinkedList<>(); DatasetFieldType subjectType = datasetFieldSvc.findByName(DatasetFieldConstant.subject); List<JsonString> subjectList = jobj.getJsonArray("dataverseSubjects").getValuesAs(JsonString.class); if (subjectList.size() > 0) { // check first value for "all" if (subjectList.get(0).getString().trim().toLowerCase().equals("all")) { dvSubjectList.addAll(subjectType.getControlledVocabularyValues()); } else { for (JsonString subject : subjectList) { ControlledVocabularyValue cvv = datasetFieldSvc.findControlledVocabularyValueByDatasetFieldTypeAndStrValue(subjectType, subject.getString(),lenient); if (cvv != null) { dvSubjectList.add(cvv); } else { throw new JsonParseException("Value '" + subject.getString() + "' does not exist in type '" + subjectType.getName() + "'"); } } } } dv.setDataverseSubjects(dvSubjectList); } */ return dv; } private static String getMandatoryString(JsonObject jobj, String name) throws JsonParseException { if (jobj.containsKey(name)) { return jobj.getString(name); } throw new JsonParseException("Field " + name + " is mandatory"); } public IpGroup parseIpGroup(JsonObject obj) { IpGroup retVal = new IpGroup(); if (obj.containsKey("id")) { retVal.setId(Long.valueOf(obj.getString("id"))); } retVal.setDisplayName(obj.getString("name", null)); retVal.setDescription(obj.getString("description", null)); retVal.setPersistedGroupAlias(obj.getString("alias", null)); JsonArray rangeArray = obj.getJsonArray("ranges"); for (JsonValue range : rangeArray) { if (range.getValueType() == JsonValue.ValueType.ARRAY) { JsonArray rr = (JsonArray) range; retVal.add(IpAddressRange.make(IpAddress.valueOf(rr.getString(0)), IpAddress.valueOf(rr.getString(1)))); } } return retVal; } public DatasetVersion parseDatasetVersion(JsonObject obj) throws JsonParseException { return parseDatasetVersion(obj, new DatasetVersion()); } public Dataset parseDataset(JsonObject obj) throws JsonParseException { Dataset dataset = new Dataset(); dataset.setAuthority(obj.getString("authority", null) == null ? settingsService.getValueForKey(SettingsServiceBean.Key.Authority) : obj.getString("authority")); dataset.setProtocol(obj.getString("protocol", null) == null ? settingsService.getValueForKey(SettingsServiceBean.Key.Protocol) : obj.getString("protocol")); dataset.setDoiSeparator(obj.getString("doiSeparator", null) == null ? settingsService.getValueForKey(SettingsServiceBean.Key.DoiSeparator) : obj.getString("doiSeparator")); dataset.setIdentifier(obj.getString("identifier",null)); DatasetVersion dsv = parseDatasetVersion(obj.getJsonObject("datasetVersion")); LinkedList<DatasetVersion> versions = new LinkedList<>(); versions.add(dsv); dsv.setDataset(dataset); dataset.setVersions(versions); return dataset; } public DatasetVersion parseDatasetVersion(JsonObject obj, DatasetVersion dsv) throws JsonParseException { try { String archiveNote = obj.getString("archiveNote", null); if (archiveNote != null) { dsv.setArchiveNote(archiveNote); } dsv.setDeaccessionLink(obj.getString("deaccessionLink", null)); int versionNumberInt = obj.getInt("versionNumber", -1); Long versionNumber = null; if (versionNumberInt !=-1) { versionNumber = new Long(versionNumberInt); } dsv.setVersionNumber(versionNumber); dsv.setMinorVersionNumber(parseLong(obj.getString("minorVersionNumber", null))); // if the existing datasetversion doesn not have an id // use the id from the json object. if (dsv.getId()==null) { dsv.setId(parseLong(obj.getString("id", null))); } String versionStateStr = obj.getString("versionState", null); if (versionStateStr != null) { dsv.setVersionState(DatasetVersion.VersionState.valueOf(versionStateStr)); } dsv.setInReview(obj.getBoolean("inReview", false)); dsv.setReleaseTime(parseDate(obj.getString("releaseDate", null))); dsv.setLastUpdateTime(parseTime(obj.getString("lastUpdateTime", null))); dsv.setCreateTime(parseTime(obj.getString("createTime", null))); dsv.setArchiveTime(parseTime(obj.getString("archiveTime", null))); // Terms of Use related fields dsv.setTermsOfUse(obj.getString("termsOfUse", null)); dsv.setTermsOfAccess(obj.getString("termsOfAccess", null)); dsv.setConfidentialityDeclaration(obj.getString("confidentialityDeclaration", null)); dsv.setSpecialPermissions(obj.getString("specialPermissions", null)); dsv.setRestrictions(obj.getString("restrictions", null)); dsv.setCitationRequirements(obj.getString("citationRequirements", null)); dsv.setDepositorRequirements(obj.getString("depositorRequirements", null)); dsv.setConditions(obj.getString("conditions", null)); dsv.setDisclaimer(obj.getString("disclaimer", null)); dsv.setDataAccessPlace(obj.getString("dataAccessPlace", null)); dsv.setOriginalArchive(obj.getString("originalArchive", null)); dsv.setAvailabilityStatus(obj.getString("availabilityStatus", null)); dsv.setContactForAccess(obj.getString("contactForAccess", null)); dsv.setSizeOfCollection(obj.getString("sizeOfCollection", null)); dsv.setStudyCompletion(obj.getString("studyCompletion", null)); dsv.setDatasetFields(parseMetadataBlocks(obj.getJsonObject("metadataBlocks"))); return dsv; } catch (ParseException ex) { throw new JsonParseException("Error parsing date:" + ex.getMessage(), ex); } catch (NumberFormatException ex) { throw new JsonParseException("Error parsing number:" + ex.getMessage(), ex); } } public List<DatasetField> parseMetadataBlocks(JsonObject json) throws JsonParseException { Set<String> keys = json.keySet(); List<DatasetField> fields = new LinkedList<>(); for (String blockName : keys) { JsonObject blockJson = json.getJsonObject(blockName); JsonArray fieldsJson = blockJson.getJsonArray("fields"); for (JsonObject fieldJson : fieldsJson.getValuesAs(JsonObject.class)) { try { fields.add(parseField(fieldJson)); } catch (CompoundVocabularyException ex) { DatasetFieldType fieldType = datasetFieldSvc.findByNameOpt(fieldJson.getString("typeName", "")); if (lenient && (DatasetFieldConstant.geographicCoverage).equals(fieldType.getName())) { fields.add(remapGeographicCoverage( ex)); } else { // if not lenient mode, re-throw exception throw ex; } } } } convertKeywordsToSubjects(fields); return fields; } /** * Special processing for GeographicCoverage compound field: * Handle parsing exceptions caused by invalid controlled vocabulary in the "country" field by * putting the invalid data in "otherGeographicCoverage" in a new compound value. * * @param ex - contains the invalid values to be processed * @return a compound DatasetField that contains the newly created values, in addition to * the original valid values. * @throws JsonParseException */ private DatasetField remapGeographicCoverage(CompoundVocabularyException ex) throws JsonParseException{ List<HashSet<FieldDTO>> geoCoverageList = new ArrayList<>(); // For each exception, create HashSet of otherGeographic Coverage and add to list for (ControlledVocabularyException vocabEx : ex.getExList()) { HashSet<FieldDTO> set = new HashSet<>(); set.add(FieldDTO.createPrimitiveFieldDTO(DatasetFieldConstant.otherGeographicCoverage, vocabEx.getStrValue())); geoCoverageList.add(set); } FieldDTO geoCoverageDTO = FieldDTO.createMultipleCompoundFieldDTO(DatasetFieldConstant.geographicCoverage, geoCoverageList); // convert DTO to datasetField so we can back valid values. Gson gson = new Gson(); String jsonString = gson.toJson(geoCoverageDTO); JsonReader jsonReader = Json.createReader(new StringReader(jsonString)); JsonObject obj = jsonReader.readObject(); DatasetField geoCoverageField = parseField(obj); // add back valid values for (DatasetFieldCompoundValue dsfcv : ex.getValidValues()) { if (!dsfcv.getChildDatasetFields().isEmpty()) { dsfcv.setParentDatasetField(geoCoverageField); geoCoverageField.getDatasetFieldCompoundValues().add(dsfcv); } } return geoCoverageField; } public DatasetField parseField(JsonObject json) throws JsonParseException { if (json == null) { return null; } DatasetField ret = new DatasetField(); DatasetFieldType type = datasetFieldSvc.findByNameOpt(json.getString("typeName", "")); if (type == null) { throw new JsonParseException("Can't find type '" + json.getString("typeName", "") + "'"); } if (type.isAllowMultiples() != json.getBoolean("multiple")) { throw new JsonParseException("incorrect multiple for field " + json.getString("typeName", "")); } if (type.isCompound() && !json.getString("typeClass").equals("compound")) { throw new JsonParseException("incorrect typeClass for field " + json.getString("typeName", "") + ", should be compound."); } if (!type.isControlledVocabulary() && type.isPrimitive() && !json.getString("typeClass").equals("primitive")) { throw new JsonParseException("incorrect typeClass for field: " + json.getString("typeName", "") + ", should be primitive"); } if (type.isControlledVocabulary() && !json.getString("typeClass").equals("controlledVocabulary")) { throw new JsonParseException("incorrect typeClass for field " + json.getString("typeName", "") + ", should be controlledVocabulary"); } ret.setDatasetFieldType(type); if (type.isCompound()) { List<DatasetFieldCompoundValue> vals = parseCompoundValue(type, json); for (DatasetFieldCompoundValue dsfcv : vals) { dsfcv.setParentDatasetField(ret); } ret.setDatasetFieldCompoundValues(vals); } else if (type.isControlledVocabulary()) { List<ControlledVocabularyValue> vals = parseControlledVocabularyValue(type, json); for (ControlledVocabularyValue cvv : vals) { cvv.setDatasetFieldType(type); } ret.setControlledVocabularyValues(vals); } else { // primitive List<DatasetFieldValue> values = parsePrimitiveValue(json); for (DatasetFieldValue val : values) { val.setDatasetField(ret); } ret.setDatasetFieldValues(values); } return ret; } /** * Special processing of keywords and subjects. All keywords and subjects will be input * from foreign formats (DDI, dcterms, etc) as keywords. * As part of the parsing, we will move keywords that match subject controlled vocabulary values * into the subjects datasetField. * @param fields - the parsed datasetFields */ public void convertKeywordsToSubjects(List<DatasetField> fields) { DatasetField keywordField = null; for (DatasetField field : fields) { if (field.getDatasetFieldType().getName().equals("keyword")) { keywordField = field; break; } } if (keywordField == null) { // if we don't have a keyword in the current list of datasetFields, // nothing to do. return; } DatasetFieldType type = datasetFieldSvc.findByNameOpt(DatasetFieldConstant.subject); // new list to hold subjects that we find List<ControlledVocabularyValue> subjects = new ArrayList<>(); // Make new list to hold the non-subject keywords List<DatasetFieldCompoundValue> filteredValues = new ArrayList<>(); for (DatasetFieldCompoundValue compoundVal : keywordField.getDatasetFieldCompoundValues()) { // Loop through the child fields to find the "keywordValue" field for (DatasetField childField : compoundVal.getChildDatasetFields()) { if (childField.getDatasetFieldType().getName().equals(DatasetFieldConstant.keywordValue)) { // check if this value is a subject ControlledVocabularyValue cvv = datasetFieldSvc.findControlledVocabularyValueByDatasetFieldTypeAndStrValue(type, childField.getValue(),lenient); if (cvv == null) { // the keyword was not found in the subject list, so retain it in filtered list filteredValues.add(compoundVal); } else { // save the value for our subject field if (!subjects.contains(cvv)) { subjects.add(cvv); } } } } } // if we have found any subjects in the keyword list, then update the keyword and subject fields appropriately. if (subjects.size() > 0) { keywordField.setDatasetFieldCompoundValues(filteredValues); DatasetField subjectField = new DatasetField(); subjectField.setDatasetFieldType(type); for (ControlledVocabularyValue val : subjects) { int order = 0; val.setDisplayOrder(order); val.setDatasetFieldType(type); order++; } subjectField.setControlledVocabularyValues(subjects); fields.add(subjectField); } } public List<DatasetFieldCompoundValue> parseCompoundValue(DatasetFieldType compoundType, JsonObject json) throws JsonParseException { List<ControlledVocabularyException> vocabExceptions = new ArrayList<>(); List<DatasetFieldCompoundValue> vals = new LinkedList<>(); if (json.getBoolean("multiple")) { int order = 0; for (JsonObject obj : json.getJsonArray("value").getValuesAs(JsonObject.class)) { DatasetFieldCompoundValue cv = new DatasetFieldCompoundValue(); List<DatasetField> fields = new LinkedList<>(); for (String fieldName : obj.keySet()) { JsonObject childFieldJson = obj.getJsonObject(fieldName); DatasetField f=null; try { f = parseField(childFieldJson); } catch(ControlledVocabularyException ex) { vocabExceptions.add(ex); } if (f!=null) { if (!compoundType.getChildDatasetFieldTypes().contains(f.getDatasetFieldType())) { throw new JsonParseException("field " + f.getDatasetFieldType().getName() + " is not a child of " + compoundType.getName()); } f.setParentDatasetFieldCompoundValue(cv); fields.add(f); } } if (!fields.isEmpty()) { cv.setChildDatasetFields(fields); cv.setDisplayOrder(order); vals.add(cv); } order++; } } else { DatasetFieldCompoundValue cv = new DatasetFieldCompoundValue(); List<DatasetField> fields = new LinkedList<>(); JsonObject value = json.getJsonObject("value"); for (String key : value.keySet()) { JsonObject childFieldJson = value.getJsonObject(key); DatasetField f = null; try { f=parseField(childFieldJson); } catch(ControlledVocabularyException ex ) { vocabExceptions.add(ex); } if (f!=null) { f.setParentDatasetFieldCompoundValue(cv); fields.add(f); } } if (!fields.isEmpty()) { cv.setChildDatasetFields(fields); vals.add(cv); } } if (!vocabExceptions.isEmpty()) { throw new CompoundVocabularyException( "Invalid controlled vocabulary in compound field ", vocabExceptions, vals); } return vals; } public List<DatasetFieldValue> parsePrimitiveValue(JsonObject json) throws JsonParseException { List<DatasetFieldValue> vals = new LinkedList<>(); if (json.getBoolean("multiple")) { for (JsonString val : json.getJsonArray("value").getValuesAs(JsonString.class)) { DatasetFieldValue datasetFieldValue = new DatasetFieldValue(); datasetFieldValue.setDisplayOrder(vals.size() - 1); datasetFieldValue.setValue(val.getString().trim()); vals.add(datasetFieldValue); } } else { DatasetFieldValue datasetFieldValue = new DatasetFieldValue(); datasetFieldValue.setValue(json.getString("value", "").trim()); vals.add(datasetFieldValue); } return vals; } public List<ControlledVocabularyValue> parseControlledVocabularyValue(DatasetFieldType cvvType, JsonObject json) throws JsonParseException { if (json.getBoolean("multiple")) { List<ControlledVocabularyValue> vals = new LinkedList<>(); for (JsonString strVal : json.getJsonArray("value").getValuesAs(JsonString.class)) { String strValue = strVal.getString(); ControlledVocabularyValue cvv = datasetFieldSvc.findControlledVocabularyValueByDatasetFieldTypeAndStrValue(cvvType, strValue,lenient); if (cvv == null) { throw new ControlledVocabularyException("Value '" + strValue + "' does not exist in type '" + cvvType.getName() + "'", cvvType, strValue); } // Only add value to the list if it is not a duplicate if (strValue.equals("Other")) { System.out.println("vals = "+vals+", contains: "+vals.contains(cvv)); } if (!vals.contains(cvv)) { vals.add(cvv); } } return vals; } else { String strValue = json.getString("value", ""); ControlledVocabularyValue cvv = datasetFieldSvc.findControlledVocabularyValueByDatasetFieldTypeAndStrValue(cvvType, strValue,lenient); if (cvv == null) { throw new ControlledVocabularyException("Value '" + strValue + "' does not exist in type '" + cvvType.getName() + "'", cvvType, strValue); } return Collections.singletonList(cvv); } } Date parseDate(String str) throws ParseException { return str == null ? null : Util.getDateFormat().parse(str); } Date parseTime(String str) throws ParseException { return str == null ? null : Util.getDateTimeFormat().parse(str); } Long parseLong(String str) throws NumberFormatException { return (str == null) ? null : Long.valueOf(str); } int parsePrimitiveInt(String str, int defaultValue) { return str == null ? defaultValue : Integer.parseInt(str); } }
