Mercurial > hg > LGDataverses
diff src/main/java/edu/harvard/iq/dataverse/util/json/JsonParser.java @ 10:a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
| author | Zoe Hong <zhong@mpiwg-berlin.mpg.de> |
|---|---|
| date | Tue, 08 Sep 2015 17:00:21 +0200 |
| parents | |
| children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/main/java/edu/harvard/iq/dataverse/util/json/JsonParser.java Tue Sep 08 17:00:21 2015 +0200 @@ -0,0 +1,539 @@ +package edu.harvard.iq.dataverse.util.json; + +import com.google.gson.Gson; +import edu.harvard.iq.dataverse.ControlledVocabularyValue; +import edu.harvard.iq.dataverse.Dataset; +import edu.harvard.iq.dataverse.DatasetField; +import edu.harvard.iq.dataverse.DatasetFieldConstant; +import edu.harvard.iq.dataverse.DatasetFieldCompoundValue; +import edu.harvard.iq.dataverse.DatasetFieldServiceBean; +import edu.harvard.iq.dataverse.DatasetFieldType; +import edu.harvard.iq.dataverse.DatasetFieldValue; +import edu.harvard.iq.dataverse.DatasetVersion; +import edu.harvard.iq.dataverse.Dataverse; +import edu.harvard.iq.dataverse.DataverseContact; +import edu.harvard.iq.dataverse.MetadataBlockServiceBean; +import edu.harvard.iq.dataverse.api.Util; +import edu.harvard.iq.dataverse.api.dto.FieldDTO; +import edu.harvard.iq.dataverse.authorization.groups.impl.ipaddress.IpGroup; +import edu.harvard.iq.dataverse.authorization.groups.impl.ipaddress.ip.IpAddress; +import edu.harvard.iq.dataverse.authorization.groups.impl.ipaddress.ip.IpAddressRange; +import edu.harvard.iq.dataverse.settings.SettingsServiceBean; +import java.io.StringReader; +import java.text.ParseException; +import java.util.ArrayList; +import java.util.Collections; +import java.util.Date; +import java.util.HashSet; +import java.util.LinkedList; +import java.util.List; +import java.util.Set; +import javax.json.Json; +import javax.json.JsonArray; +import javax.json.JsonObject; +import javax.json.JsonReader; +import javax.json.JsonString; +import javax.json.JsonValue; + +/** + * Parses JSON objects into domain objects. + * + * @author michael + */ +public class JsonParser { + + DatasetFieldServiceBean datasetFieldSvc; + MetadataBlockServiceBean blockService; + SettingsServiceBean settingsService; + boolean lenient = false; // if lenient, we will accept alternate spellings for controlled vocabulary values + + public JsonParser(DatasetFieldServiceBean datasetFieldSvc, MetadataBlockServiceBean blockService, SettingsServiceBean settingsService) { + this.datasetFieldSvc = datasetFieldSvc; + this.blockService = blockService; + this.settingsService = settingsService; + } + + public boolean isLenient() { + return lenient; + } + + public void setLenient(boolean lenient) { + this.lenient = lenient; + } + + public Dataverse parseDataverse(JsonObject jobj) throws JsonParseException { + Dataverse dv = new Dataverse(); + + dv.setAlias(getMandatoryString(jobj, "alias")); + dv.setName(getMandatoryString(jobj, "name")); + dv.setDescription(jobj.getString("description", null)); + dv.setPermissionRoot(jobj.getBoolean("permissionRoot", false)); + dv.setFacetRoot(jobj.getBoolean("facetRoot", false)); + if (jobj.containsKey("dataverseContacts")) { + JsonArray dvContacts = jobj.getJsonArray("dataverseContacts"); + int i = 0; + List<DataverseContact> dvContactList = new LinkedList<>(); + for (JsonValue jsv : dvContacts) { + DataverseContact dvc = new DataverseContact(dv); + dvc.setContactEmail(getMandatoryString((JsonObject) jsv, "contactEmail")); + dvc.setDisplayOrder(i++); + dvContactList.add(dvc); + } + dv.setDataverseContacts(dvContactList); + } + + /* We decided that subject is not user set, but gotten from the subject of the dataverse's + datasets - leavig this code in for now, in case we need to go back to it at some point + + if (jobj.containsKey("dataverseSubjects")) { + List<ControlledVocabularyValue> dvSubjectList = new LinkedList<>(); + DatasetFieldType subjectType = datasetFieldSvc.findByName(DatasetFieldConstant.subject); + List<JsonString> subjectList = jobj.getJsonArray("dataverseSubjects").getValuesAs(JsonString.class); + if (subjectList.size() > 0) { + // check first value for "all" + if (subjectList.get(0).getString().trim().toLowerCase().equals("all")) { + dvSubjectList.addAll(subjectType.getControlledVocabularyValues()); + } else { + for (JsonString subject : subjectList) { + ControlledVocabularyValue cvv = datasetFieldSvc.findControlledVocabularyValueByDatasetFieldTypeAndStrValue(subjectType, subject.getString(),lenient); + if (cvv != null) { + dvSubjectList.add(cvv); + } else { + throw new JsonParseException("Value '" + subject.getString() + "' does not exist in type '" + subjectType.getName() + "'"); + } + } + } + } + dv.setDataverseSubjects(dvSubjectList); + } + */ + + return dv; + } + + private static String getMandatoryString(JsonObject jobj, String name) throws JsonParseException { + if (jobj.containsKey(name)) { + return jobj.getString(name); + } + throw new JsonParseException("Field " + name + " is mandatory"); + } + + public IpGroup parseIpGroup(JsonObject obj) { + IpGroup retVal = new IpGroup(); + + if (obj.containsKey("id")) { + retVal.setId(Long.valueOf(obj.getString("id"))); + } + retVal.setDisplayName(obj.getString("name", null)); + retVal.setDescription(obj.getString("description", null)); + retVal.setPersistedGroupAlias(obj.getString("alias", null)); + + JsonArray rangeArray = obj.getJsonArray("ranges"); + for (JsonValue range : rangeArray) { + if (range.getValueType() == JsonValue.ValueType.ARRAY) { + JsonArray rr = (JsonArray) range; + retVal.add(IpAddressRange.make(IpAddress.valueOf(rr.getString(0)), + IpAddress.valueOf(rr.getString(1)))); + + } + } + + return retVal; + } + + public DatasetVersion parseDatasetVersion(JsonObject obj) throws JsonParseException { + return parseDatasetVersion(obj, new DatasetVersion()); + } + + public Dataset parseDataset(JsonObject obj) throws JsonParseException { + Dataset dataset = new Dataset(); + + dataset.setAuthority(obj.getString("authority", null) == null ? settingsService.getValueForKey(SettingsServiceBean.Key.Authority) : obj.getString("authority")); + dataset.setProtocol(obj.getString("protocol", null) == null ? settingsService.getValueForKey(SettingsServiceBean.Key.Protocol) : obj.getString("protocol")); + dataset.setDoiSeparator(obj.getString("doiSeparator", null) == null ? settingsService.getValueForKey(SettingsServiceBean.Key.DoiSeparator) : obj.getString("doiSeparator")); + dataset.setIdentifier(obj.getString("identifier",null)); + DatasetVersion dsv = parseDatasetVersion(obj.getJsonObject("datasetVersion")); + LinkedList<DatasetVersion> versions = new LinkedList<>(); + versions.add(dsv); + dsv.setDataset(dataset); + + dataset.setVersions(versions); + return dataset; + } + + public DatasetVersion parseDatasetVersion(JsonObject obj, DatasetVersion dsv) throws JsonParseException { + try { + + String archiveNote = obj.getString("archiveNote", null); + if (archiveNote != null) { + dsv.setArchiveNote(archiveNote); + } + + dsv.setDeaccessionLink(obj.getString("deaccessionLink", null)); + int versionNumberInt = obj.getInt("versionNumber", -1); + Long versionNumber = null; + if (versionNumberInt !=-1) { + versionNumber = new Long(versionNumberInt); + } + dsv.setVersionNumber(versionNumber); + dsv.setMinorVersionNumber(parseLong(obj.getString("minorVersionNumber", null))); + // if the existing datasetversion doesn not have an id + // use the id from the json object. + if (dsv.getId()==null) { + dsv.setId(parseLong(obj.getString("id", null))); + } + + String versionStateStr = obj.getString("versionState", null); + if (versionStateStr != null) { + dsv.setVersionState(DatasetVersion.VersionState.valueOf(versionStateStr)); + } + dsv.setInReview(obj.getBoolean("inReview", false)); + dsv.setReleaseTime(parseDate(obj.getString("releaseDate", null))); + dsv.setLastUpdateTime(parseTime(obj.getString("lastUpdateTime", null))); + dsv.setCreateTime(parseTime(obj.getString("createTime", null))); + dsv.setArchiveTime(parseTime(obj.getString("archiveTime", null))); + // Terms of Use related fields + dsv.setTermsOfUse(obj.getString("termsOfUse", null)); + dsv.setTermsOfAccess(obj.getString("termsOfAccess", null)); + dsv.setConfidentialityDeclaration(obj.getString("confidentialityDeclaration", null)); + dsv.setSpecialPermissions(obj.getString("specialPermissions", null)); + dsv.setRestrictions(obj.getString("restrictions", null)); + dsv.setCitationRequirements(obj.getString("citationRequirements", null)); + dsv.setDepositorRequirements(obj.getString("depositorRequirements", null)); + dsv.setConditions(obj.getString("conditions", null)); + dsv.setDisclaimer(obj.getString("disclaimer", null)); + dsv.setDataAccessPlace(obj.getString("dataAccessPlace", null)); + dsv.setOriginalArchive(obj.getString("originalArchive", null)); + dsv.setAvailabilityStatus(obj.getString("availabilityStatus", null)); + dsv.setContactForAccess(obj.getString("contactForAccess", null)); + dsv.setSizeOfCollection(obj.getString("sizeOfCollection", null)); + dsv.setStudyCompletion(obj.getString("studyCompletion", null)); + + dsv.setDatasetFields(parseMetadataBlocks(obj.getJsonObject("metadataBlocks"))); + + return dsv; + + } catch (ParseException ex) { + throw new JsonParseException("Error parsing date:" + ex.getMessage(), ex); + } catch (NumberFormatException ex) { + throw new JsonParseException("Error parsing number:" + ex.getMessage(), ex); + } + } + + public List<DatasetField> parseMetadataBlocks(JsonObject json) throws JsonParseException { + Set<String> keys = json.keySet(); + List<DatasetField> fields = new LinkedList<>(); + + for (String blockName : keys) { + JsonObject blockJson = json.getJsonObject(blockName); + JsonArray fieldsJson = blockJson.getJsonArray("fields"); + for (JsonObject fieldJson : fieldsJson.getValuesAs(JsonObject.class)) { + try { + fields.add(parseField(fieldJson)); + } catch (CompoundVocabularyException ex) { + DatasetFieldType fieldType = datasetFieldSvc.findByNameOpt(fieldJson.getString("typeName", "")); + if (lenient && (DatasetFieldConstant.geographicCoverage).equals(fieldType.getName())) { + fields.add(remapGeographicCoverage( ex)); + } else { + // if not lenient mode, re-throw exception + throw ex; + } + } + + } + } + convertKeywordsToSubjects(fields); + return fields; + } + /** + * Special processing for GeographicCoverage compound field: + * Handle parsing exceptions caused by invalid controlled vocabulary in the "country" field by + * putting the invalid data in "otherGeographicCoverage" in a new compound value. + * + * @param ex - contains the invalid values to be processed + * @return a compound DatasetField that contains the newly created values, in addition to + * the original valid values. + * @throws JsonParseException + */ + private DatasetField remapGeographicCoverage(CompoundVocabularyException ex) throws JsonParseException{ + List<HashSet<FieldDTO>> geoCoverageList = new ArrayList<>(); + // For each exception, create HashSet of otherGeographic Coverage and add to list + for (ControlledVocabularyException vocabEx : ex.getExList()) { + HashSet<FieldDTO> set = new HashSet<>(); + set.add(FieldDTO.createPrimitiveFieldDTO(DatasetFieldConstant.otherGeographicCoverage, vocabEx.getStrValue())); + geoCoverageList.add(set); + } + FieldDTO geoCoverageDTO = FieldDTO.createMultipleCompoundFieldDTO(DatasetFieldConstant.geographicCoverage, geoCoverageList); + + // convert DTO to datasetField so we can back valid values. + Gson gson = new Gson(); + String jsonString = gson.toJson(geoCoverageDTO); + JsonReader jsonReader = Json.createReader(new StringReader(jsonString)); + JsonObject obj = jsonReader.readObject(); + DatasetField geoCoverageField = parseField(obj); + + // add back valid values + for (DatasetFieldCompoundValue dsfcv : ex.getValidValues()) { + if (!dsfcv.getChildDatasetFields().isEmpty()) { + dsfcv.setParentDatasetField(geoCoverageField); + geoCoverageField.getDatasetFieldCompoundValues().add(dsfcv); + } + } + return geoCoverageField; + } + + + + + public DatasetField parseField(JsonObject json) throws JsonParseException { + if (json == null) { + return null; + } + + DatasetField ret = new DatasetField(); + DatasetFieldType type = datasetFieldSvc.findByNameOpt(json.getString("typeName", "")); + + + if (type == null) { + throw new JsonParseException("Can't find type '" + json.getString("typeName", "") + "'"); + } + if (type.isAllowMultiples() != json.getBoolean("multiple")) { + throw new JsonParseException("incorrect multiple for field " + json.getString("typeName", "")); + } + if (type.isCompound() && !json.getString("typeClass").equals("compound")) { + throw new JsonParseException("incorrect typeClass for field " + json.getString("typeName", "") + ", should be compound."); + } + if (!type.isControlledVocabulary() && type.isPrimitive() && !json.getString("typeClass").equals("primitive")) { + throw new JsonParseException("incorrect typeClass for field: " + json.getString("typeName", "") + ", should be primitive"); + } + if (type.isControlledVocabulary() && !json.getString("typeClass").equals("controlledVocabulary")) { + throw new JsonParseException("incorrect typeClass for field " + json.getString("typeName", "") + ", should be controlledVocabulary"); + } + + ret.setDatasetFieldType(type); + + if (type.isCompound()) { + List<DatasetFieldCompoundValue> vals = parseCompoundValue(type, json); + for (DatasetFieldCompoundValue dsfcv : vals) { + dsfcv.setParentDatasetField(ret); + } + ret.setDatasetFieldCompoundValues(vals); + + } else if (type.isControlledVocabulary()) { + List<ControlledVocabularyValue> vals = parseControlledVocabularyValue(type, json); + for (ControlledVocabularyValue cvv : vals) { + cvv.setDatasetFieldType(type); + } + ret.setControlledVocabularyValues(vals); + + } else { + // primitive + List<DatasetFieldValue> values = parsePrimitiveValue(json); + for (DatasetFieldValue val : values) { + val.setDatasetField(ret); + } + ret.setDatasetFieldValues(values); + } + + return ret; + } + + /** + * Special processing of keywords and subjects. All keywords and subjects will be input + * from foreign formats (DDI, dcterms, etc) as keywords. + * As part of the parsing, we will move keywords that match subject controlled vocabulary values + * into the subjects datasetField. + * @param fields - the parsed datasetFields + */ + public void convertKeywordsToSubjects(List<DatasetField> fields) { + + DatasetField keywordField = null; + for (DatasetField field : fields) { + if (field.getDatasetFieldType().getName().equals("keyword")) { + keywordField = field; + break; + } + } + if (keywordField == null) { + // if we don't have a keyword in the current list of datasetFields, + // nothing to do. + return; + } + DatasetFieldType type = datasetFieldSvc.findByNameOpt(DatasetFieldConstant.subject); + // new list to hold subjects that we find + List<ControlledVocabularyValue> subjects = new ArrayList<>(); + // Make new list to hold the non-subject keywords + List<DatasetFieldCompoundValue> filteredValues = new ArrayList<>(); + for (DatasetFieldCompoundValue compoundVal : keywordField.getDatasetFieldCompoundValues()) { + // Loop through the child fields to find the "keywordValue" field + for (DatasetField childField : compoundVal.getChildDatasetFields()) { + if (childField.getDatasetFieldType().getName().equals(DatasetFieldConstant.keywordValue)) { + // check if this value is a subject + ControlledVocabularyValue cvv = datasetFieldSvc.findControlledVocabularyValueByDatasetFieldTypeAndStrValue(type, childField.getValue(),lenient); + if (cvv == null) { + // the keyword was not found in the subject list, so retain it in filtered list + filteredValues.add(compoundVal); + } else { + // save the value for our subject field + if (!subjects.contains(cvv)) + { + subjects.add(cvv); + } + } + } + + } + + } + // if we have found any subjects in the keyword list, then update the keyword and subject fields appropriately. + if (subjects.size() > 0) { + keywordField.setDatasetFieldCompoundValues(filteredValues); + + DatasetField subjectField = new DatasetField(); + subjectField.setDatasetFieldType(type); + for (ControlledVocabularyValue val : subjects) { + int order = 0; + + val.setDisplayOrder(order); + val.setDatasetFieldType(type); + order++; + + } + + subjectField.setControlledVocabularyValues(subjects); + fields.add(subjectField); + } + + } + + public List<DatasetFieldCompoundValue> parseCompoundValue(DatasetFieldType compoundType, JsonObject json) throws JsonParseException { + List<ControlledVocabularyException> vocabExceptions = new ArrayList<>(); + List<DatasetFieldCompoundValue> vals = new LinkedList<>(); + if (json.getBoolean("multiple")) { + int order = 0; + for (JsonObject obj : json.getJsonArray("value").getValuesAs(JsonObject.class)) { + DatasetFieldCompoundValue cv = new DatasetFieldCompoundValue(); + List<DatasetField> fields = new LinkedList<>(); + for (String fieldName : obj.keySet()) { + JsonObject childFieldJson = obj.getJsonObject(fieldName); + DatasetField f=null; + try { + f = parseField(childFieldJson); + } catch(ControlledVocabularyException ex) { + vocabExceptions.add(ex); + } + + if (f!=null) { + if (!compoundType.getChildDatasetFieldTypes().contains(f.getDatasetFieldType())) { + throw new JsonParseException("field " + f.getDatasetFieldType().getName() + " is not a child of " + compoundType.getName()); + } + f.setParentDatasetFieldCompoundValue(cv); + fields.add(f); + } + } + if (!fields.isEmpty()) { + cv.setChildDatasetFields(fields); + cv.setDisplayOrder(order); + vals.add(cv); + } + order++; + } + + + + } else { + + DatasetFieldCompoundValue cv = new DatasetFieldCompoundValue(); + List<DatasetField> fields = new LinkedList<>(); + JsonObject value = json.getJsonObject("value"); + for (String key : value.keySet()) { + JsonObject childFieldJson = value.getJsonObject(key); + DatasetField f = null; + try { + f=parseField(childFieldJson); + } catch(ControlledVocabularyException ex ) { + vocabExceptions.add(ex); + } + if (f!=null) { + f.setParentDatasetFieldCompoundValue(cv); + fields.add(f); + } + } + if (!fields.isEmpty()) { + cv.setChildDatasetFields(fields); + vals.add(cv); + } + + } + if (!vocabExceptions.isEmpty()) { + throw new CompoundVocabularyException( "Invalid controlled vocabulary in compound field ", vocabExceptions, vals); + } + return vals; + } + + public List<DatasetFieldValue> parsePrimitiveValue(JsonObject json) throws JsonParseException { + + List<DatasetFieldValue> vals = new LinkedList<>(); + if (json.getBoolean("multiple")) { + for (JsonString val : json.getJsonArray("value").getValuesAs(JsonString.class)) { + DatasetFieldValue datasetFieldValue = new DatasetFieldValue(); + datasetFieldValue.setDisplayOrder(vals.size() - 1); + datasetFieldValue.setValue(val.getString().trim()); + vals.add(datasetFieldValue); + } + + } else { + DatasetFieldValue datasetFieldValue = new DatasetFieldValue(); + datasetFieldValue.setValue(json.getString("value", "").trim()); + vals.add(datasetFieldValue); + } + + return vals; + } + + public List<ControlledVocabularyValue> parseControlledVocabularyValue(DatasetFieldType cvvType, JsonObject json) throws JsonParseException { + if (json.getBoolean("multiple")) { + List<ControlledVocabularyValue> vals = new LinkedList<>(); + for (JsonString strVal : json.getJsonArray("value").getValuesAs(JsonString.class)) { + String strValue = strVal.getString(); + ControlledVocabularyValue cvv = datasetFieldSvc.findControlledVocabularyValueByDatasetFieldTypeAndStrValue(cvvType, strValue,lenient); + if (cvv == null) { + throw new ControlledVocabularyException("Value '" + strValue + "' does not exist in type '" + cvvType.getName() + "'", cvvType, strValue); + } + // Only add value to the list if it is not a duplicate + if (strValue.equals("Other")) { + System.out.println("vals = "+vals+", contains: "+vals.contains(cvv)); + } + if (!vals.contains(cvv)) { + vals.add(cvv); + } + } + return vals; + + } else { + String strValue = json.getString("value", ""); + ControlledVocabularyValue cvv = datasetFieldSvc.findControlledVocabularyValueByDatasetFieldTypeAndStrValue(cvvType, strValue,lenient); + if (cvv == null) { + throw new ControlledVocabularyException("Value '" + strValue + "' does not exist in type '" + cvvType.getName() + "'", cvvType, strValue); + } + return Collections.singletonList(cvv); + } + } + + Date parseDate(String str) throws ParseException { + return str == null ? null : Util.getDateFormat().parse(str); + } + + Date parseTime(String str) throws ParseException { + return str == null ? null : Util.getDateTimeFormat().parse(str); + } + + Long parseLong(String str) throws NumberFormatException { + return (str == null) ? null : Long.valueOf(str); + } + + int parsePrimitiveInt(String str, int defaultValue) { + return str == null ? defaultValue : Integer.parseInt(str); + } + +}
