diff src/main/java/edu/harvard/iq/dataverse/util/json/JsonParser.java @ 10:a50cf11e5178

Rewrite LGDataverse completely upgrading to dataverse4.0
author Zoe Hong <zhong@mpiwg-berlin.mpg.de>
date Tue, 08 Sep 2015 17:00:21 +0200
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/main/java/edu/harvard/iq/dataverse/util/json/JsonParser.java	Tue Sep 08 17:00:21 2015 +0200
@@ -0,0 +1,539 @@
+package edu.harvard.iq.dataverse.util.json;
+
+import com.google.gson.Gson;
+import edu.harvard.iq.dataverse.ControlledVocabularyValue;
+import edu.harvard.iq.dataverse.Dataset;
+import edu.harvard.iq.dataverse.DatasetField;
+import edu.harvard.iq.dataverse.DatasetFieldConstant;
+import edu.harvard.iq.dataverse.DatasetFieldCompoundValue;
+import edu.harvard.iq.dataverse.DatasetFieldServiceBean;
+import edu.harvard.iq.dataverse.DatasetFieldType;
+import edu.harvard.iq.dataverse.DatasetFieldValue;
+import edu.harvard.iq.dataverse.DatasetVersion;
+import edu.harvard.iq.dataverse.Dataverse;
+import edu.harvard.iq.dataverse.DataverseContact;
+import edu.harvard.iq.dataverse.MetadataBlockServiceBean;
+import edu.harvard.iq.dataverse.api.Util;
+import edu.harvard.iq.dataverse.api.dto.FieldDTO;
+import edu.harvard.iq.dataverse.authorization.groups.impl.ipaddress.IpGroup;
+import edu.harvard.iq.dataverse.authorization.groups.impl.ipaddress.ip.IpAddress;
+import edu.harvard.iq.dataverse.authorization.groups.impl.ipaddress.ip.IpAddressRange;
+import edu.harvard.iq.dataverse.settings.SettingsServiceBean;
+import java.io.StringReader;
+import java.text.ParseException;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.Date;
+import java.util.HashSet;
+import java.util.LinkedList;
+import java.util.List;
+import java.util.Set;
+import javax.json.Json;
+import javax.json.JsonArray;
+import javax.json.JsonObject;
+import javax.json.JsonReader;
+import javax.json.JsonString;
+import javax.json.JsonValue;
+
+/**
+ * Parses JSON objects into domain objects.
+ *
+ * @author michael
+ */
+public class JsonParser {
+
+    DatasetFieldServiceBean datasetFieldSvc;
+    MetadataBlockServiceBean blockService;
+    SettingsServiceBean settingsService;
+    boolean lenient = false;  // if lenient, we will accept alternate spellings for controlled vocabulary values
+
+    public JsonParser(DatasetFieldServiceBean datasetFieldSvc, MetadataBlockServiceBean blockService, SettingsServiceBean settingsService) {
+        this.datasetFieldSvc = datasetFieldSvc;
+        this.blockService = blockService;
+        this.settingsService = settingsService;
+    }
+
+    public boolean isLenient() {
+        return lenient;
+    }
+
+    public void setLenient(boolean lenient) {
+        this.lenient = lenient;
+    }
+
+    public Dataverse parseDataverse(JsonObject jobj) throws JsonParseException {
+        Dataverse dv = new Dataverse();
+
+        dv.setAlias(getMandatoryString(jobj, "alias"));
+        dv.setName(getMandatoryString(jobj, "name"));
+        dv.setDescription(jobj.getString("description", null));
+        dv.setPermissionRoot(jobj.getBoolean("permissionRoot", false));
+        dv.setFacetRoot(jobj.getBoolean("facetRoot", false));
+        if (jobj.containsKey("dataverseContacts")) {
+            JsonArray dvContacts = jobj.getJsonArray("dataverseContacts");
+            int i = 0;
+            List<DataverseContact> dvContactList = new LinkedList<>();
+            for (JsonValue jsv : dvContacts) {
+                DataverseContact dvc = new DataverseContact(dv);
+                dvc.setContactEmail(getMandatoryString((JsonObject) jsv, "contactEmail"));
+                dvc.setDisplayOrder(i++);
+                dvContactList.add(dvc);
+            }
+            dv.setDataverseContacts(dvContactList);
+        }
+        
+        /*  We decided that subject is not user set, but gotten from the subject of the dataverse's
+            datasets - leavig this code in for now, in case we need to go back to it at some point
+        
+        if (jobj.containsKey("dataverseSubjects")) {
+            List<ControlledVocabularyValue> dvSubjectList = new LinkedList<>();
+            DatasetFieldType subjectType = datasetFieldSvc.findByName(DatasetFieldConstant.subject);
+            List<JsonString> subjectList = jobj.getJsonArray("dataverseSubjects").getValuesAs(JsonString.class);
+            if (subjectList.size() > 0) {
+                // check first value for "all"
+                if (subjectList.get(0).getString().trim().toLowerCase().equals("all")) {
+                    dvSubjectList.addAll(subjectType.getControlledVocabularyValues());
+                } else {
+                    for (JsonString subject : subjectList) {
+                        ControlledVocabularyValue cvv = datasetFieldSvc.findControlledVocabularyValueByDatasetFieldTypeAndStrValue(subjectType, subject.getString(),lenient);
+                        if (cvv != null) {
+                            dvSubjectList.add(cvv);
+                        } else {
+                            throw new JsonParseException("Value '" + subject.getString() + "' does not exist in type '" + subjectType.getName() + "'");
+                        }
+                    }
+                }
+            }
+            dv.setDataverseSubjects(dvSubjectList);
+        }
+        */
+                
+        return dv;
+    }
+
+    private static String getMandatoryString(JsonObject jobj, String name) throws JsonParseException {
+        if (jobj.containsKey(name)) {
+            return jobj.getString(name);
+        }
+        throw new JsonParseException("Field " + name + " is mandatory");
+    }
+
+    public IpGroup parseIpGroup(JsonObject obj) {
+        IpGroup retVal = new IpGroup();
+
+        if (obj.containsKey("id")) {
+            retVal.setId(Long.valueOf(obj.getString("id")));
+        }
+        retVal.setDisplayName(obj.getString("name", null));
+        retVal.setDescription(obj.getString("description", null));
+        retVal.setPersistedGroupAlias(obj.getString("alias", null));
+
+        JsonArray rangeArray = obj.getJsonArray("ranges");
+        for (JsonValue range : rangeArray) {
+            if (range.getValueType() == JsonValue.ValueType.ARRAY) {
+                JsonArray rr = (JsonArray) range;
+                retVal.add(IpAddressRange.make(IpAddress.valueOf(rr.getString(0)),
+                        IpAddress.valueOf(rr.getString(1))));
+
+            }
+        }
+
+        return retVal;
+    }
+
+    public DatasetVersion parseDatasetVersion(JsonObject obj) throws JsonParseException {
+        return parseDatasetVersion(obj, new DatasetVersion());
+    }
+
+    public Dataset parseDataset(JsonObject obj) throws JsonParseException {
+        Dataset dataset = new Dataset();
+
+        dataset.setAuthority(obj.getString("authority", null) == null ? settingsService.getValueForKey(SettingsServiceBean.Key.Authority) : obj.getString("authority"));
+        dataset.setProtocol(obj.getString("protocol", null) == null ? settingsService.getValueForKey(SettingsServiceBean.Key.Protocol) : obj.getString("protocol"));
+        dataset.setDoiSeparator(obj.getString("doiSeparator", null) == null ? settingsService.getValueForKey(SettingsServiceBean.Key.DoiSeparator) : obj.getString("doiSeparator"));
+        dataset.setIdentifier(obj.getString("identifier",null));
+        DatasetVersion dsv = parseDatasetVersion(obj.getJsonObject("datasetVersion"));
+        LinkedList<DatasetVersion> versions = new LinkedList<>();
+        versions.add(dsv);
+        dsv.setDataset(dataset);
+
+        dataset.setVersions(versions);
+        return dataset;
+    }
+
+    public DatasetVersion parseDatasetVersion(JsonObject obj, DatasetVersion dsv) throws JsonParseException {
+        try {
+
+            String archiveNote = obj.getString("archiveNote", null);
+            if (archiveNote != null) {
+                dsv.setArchiveNote(archiveNote);
+            }
+
+            dsv.setDeaccessionLink(obj.getString("deaccessionLink", null));
+            int versionNumberInt = obj.getInt("versionNumber", -1);
+            Long versionNumber = null;
+            if (versionNumberInt !=-1) {
+                versionNumber = new Long(versionNumberInt);
+            }
+            dsv.setVersionNumber(versionNumber);
+            dsv.setMinorVersionNumber(parseLong(obj.getString("minorVersionNumber", null)));
+            // if the existing datasetversion doesn not have an id
+            // use the id from the json object.
+            if (dsv.getId()==null) {
+                 dsv.setId(parseLong(obj.getString("id", null)));
+            }
+           
+            String versionStateStr = obj.getString("versionState", null);
+            if (versionStateStr != null) {
+                dsv.setVersionState(DatasetVersion.VersionState.valueOf(versionStateStr));
+            }
+            dsv.setInReview(obj.getBoolean("inReview", false));
+            dsv.setReleaseTime(parseDate(obj.getString("releaseDate", null)));
+            dsv.setLastUpdateTime(parseTime(obj.getString("lastUpdateTime", null)));
+            dsv.setCreateTime(parseTime(obj.getString("createTime", null)));
+            dsv.setArchiveTime(parseTime(obj.getString("archiveTime", null)));
+            // Terms of Use related fields
+            dsv.setTermsOfUse(obj.getString("termsOfUse", null));
+            dsv.setTermsOfAccess(obj.getString("termsOfAccess", null));
+            dsv.setConfidentialityDeclaration(obj.getString("confidentialityDeclaration", null));
+            dsv.setSpecialPermissions(obj.getString("specialPermissions", null));
+            dsv.setRestrictions(obj.getString("restrictions", null));
+            dsv.setCitationRequirements(obj.getString("citationRequirements", null));
+            dsv.setDepositorRequirements(obj.getString("depositorRequirements", null));
+            dsv.setConditions(obj.getString("conditions", null));
+            dsv.setDisclaimer(obj.getString("disclaimer", null));
+            dsv.setDataAccessPlace(obj.getString("dataAccessPlace", null));
+            dsv.setOriginalArchive(obj.getString("originalArchive", null));
+            dsv.setAvailabilityStatus(obj.getString("availabilityStatus", null));
+            dsv.setContactForAccess(obj.getString("contactForAccess", null));
+            dsv.setSizeOfCollection(obj.getString("sizeOfCollection", null));
+            dsv.setStudyCompletion(obj.getString("studyCompletion", null));
+
+            dsv.setDatasetFields(parseMetadataBlocks(obj.getJsonObject("metadataBlocks")));
+
+            return dsv;
+
+        } catch (ParseException ex) {
+            throw new JsonParseException("Error parsing date:" + ex.getMessage(), ex);
+        } catch (NumberFormatException ex) {
+            throw new JsonParseException("Error parsing number:" + ex.getMessage(), ex);
+        }
+    }
+
+    public List<DatasetField> parseMetadataBlocks(JsonObject json) throws JsonParseException {
+        Set<String> keys = json.keySet();
+        List<DatasetField> fields = new LinkedList<>();
+
+        for (String blockName : keys) {
+            JsonObject blockJson = json.getJsonObject(blockName);
+            JsonArray fieldsJson = blockJson.getJsonArray("fields");
+            for (JsonObject fieldJson : fieldsJson.getValuesAs(JsonObject.class)) {
+                try {
+                    fields.add(parseField(fieldJson));
+                } catch (CompoundVocabularyException ex) {
+                    DatasetFieldType fieldType = datasetFieldSvc.findByNameOpt(fieldJson.getString("typeName", ""));
+                    if (lenient && (DatasetFieldConstant.geographicCoverage).equals(fieldType.getName())) {
+                        fields.add(remapGeographicCoverage( ex));                       
+                    } else {
+                        // if not lenient mode, re-throw exception
+                        throw ex;
+                    }
+                }
+
+            }
+        }
+        convertKeywordsToSubjects(fields);
+        return fields;
+    }
+    /**
+     * Special processing for GeographicCoverage compound field:
+     * Handle parsing exceptions caused by invalid controlled vocabulary in the "country" field by
+     * putting the invalid data in "otherGeographicCoverage" in a new compound value.
+     * 
+     * @param ex - contains the invalid values to be processed
+     * @return a compound DatasetField that contains the newly created values, in addition to 
+     * the original valid values.
+     * @throws JsonParseException 
+     */
+    private DatasetField remapGeographicCoverage(CompoundVocabularyException ex) throws JsonParseException{
+        List<HashSet<FieldDTO>> geoCoverageList = new ArrayList<>();
+        // For each exception, create HashSet of otherGeographic Coverage and add to list
+        for (ControlledVocabularyException vocabEx : ex.getExList()) {
+            HashSet<FieldDTO> set = new HashSet<>();
+            set.add(FieldDTO.createPrimitiveFieldDTO(DatasetFieldConstant.otherGeographicCoverage, vocabEx.getStrValue()));
+            geoCoverageList.add(set);
+        }
+        FieldDTO geoCoverageDTO = FieldDTO.createMultipleCompoundFieldDTO(DatasetFieldConstant.geographicCoverage, geoCoverageList);
+
+        // convert DTO to datasetField so we can back valid values.
+        Gson gson = new Gson();
+        String jsonString = gson.toJson(geoCoverageDTO);
+        JsonReader jsonReader = Json.createReader(new StringReader(jsonString));
+        JsonObject obj = jsonReader.readObject();
+        DatasetField geoCoverageField = parseField(obj);
+
+        // add back valid values
+        for (DatasetFieldCompoundValue dsfcv : ex.getValidValues()) {
+            if (!dsfcv.getChildDatasetFields().isEmpty()) {
+                dsfcv.setParentDatasetField(geoCoverageField);
+                geoCoverageField.getDatasetFieldCompoundValues().add(dsfcv);
+            }
+        }
+        return geoCoverageField;
+    }
+     
+    
+  
+
+    public DatasetField parseField(JsonObject json) throws JsonParseException {
+        if (json == null) {
+            return null;
+        }
+
+        DatasetField ret = new DatasetField();
+        DatasetFieldType type = datasetFieldSvc.findByNameOpt(json.getString("typeName", ""));
+    
+
+        if (type == null) {
+            throw new JsonParseException("Can't find type '" + json.getString("typeName", "") + "'");
+        }
+        if (type.isAllowMultiples() != json.getBoolean("multiple")) {
+            throw new JsonParseException("incorrect multiple   for field " + json.getString("typeName", ""));
+        }
+        if (type.isCompound() && !json.getString("typeClass").equals("compound")) {
+            throw new JsonParseException("incorrect  typeClass for field " + json.getString("typeName", "") + ", should be compound.");
+        }
+        if (!type.isControlledVocabulary() && type.isPrimitive() && !json.getString("typeClass").equals("primitive")) {
+            throw new JsonParseException("incorrect  typeClass for field: " + json.getString("typeName", "") + ", should be primitive");
+        }
+        if (type.isControlledVocabulary() && !json.getString("typeClass").equals("controlledVocabulary")) {
+            throw new JsonParseException("incorrect  typeClass for field " + json.getString("typeName", "") + ", should be controlledVocabulary");
+        }
+       
+        ret.setDatasetFieldType(type);
+               
+        if (type.isCompound()) {
+            List<DatasetFieldCompoundValue> vals = parseCompoundValue(type, json);
+            for (DatasetFieldCompoundValue dsfcv : vals) {
+                dsfcv.setParentDatasetField(ret);
+            }
+            ret.setDatasetFieldCompoundValues(vals);
+
+        } else if (type.isControlledVocabulary()) {
+            List<ControlledVocabularyValue> vals = parseControlledVocabularyValue(type, json);
+            for (ControlledVocabularyValue cvv : vals) {
+                cvv.setDatasetFieldType(type);
+            }
+            ret.setControlledVocabularyValues(vals);
+
+        } else {
+            // primitive
+            List<DatasetFieldValue> values = parsePrimitiveValue(json);
+            for (DatasetFieldValue val : values) {
+                val.setDatasetField(ret);
+            }
+            ret.setDatasetFieldValues(values);
+        }
+        
+        return ret;
+    }
+
+    /**
+     * Special processing of keywords and subjects.  All keywords and subjects will be input 
+     * from foreign formats (DDI, dcterms, etc) as keywords.  
+     * As part of the parsing, we will move keywords that match subject controlled vocabulary values
+     * into the subjects datasetField.
+     * @param fields - the parsed datasetFields
+     */
+    public void convertKeywordsToSubjects(List<DatasetField> fields) {
+
+        DatasetField keywordField = null;
+        for (DatasetField field : fields) {
+            if (field.getDatasetFieldType().getName().equals("keyword")) {
+                keywordField = field;
+                break;
+            }
+        }
+        if (keywordField == null) {
+            // if we don't have a keyword in the current list of datasetFields,
+            // nothing to do.
+            return;
+        }
+        DatasetFieldType type = datasetFieldSvc.findByNameOpt(DatasetFieldConstant.subject);
+        // new list to hold subjects that we find
+        List<ControlledVocabularyValue> subjects = new ArrayList<>();
+        // Make new list to hold the non-subject keywords
+        List<DatasetFieldCompoundValue> filteredValues = new ArrayList<>();
+        for (DatasetFieldCompoundValue compoundVal : keywordField.getDatasetFieldCompoundValues()) {
+            // Loop through the child fields to find the "keywordValue" field
+            for (DatasetField childField : compoundVal.getChildDatasetFields()) {
+                if (childField.getDatasetFieldType().getName().equals(DatasetFieldConstant.keywordValue)) {
+                    // check if this value is a subject
+                    ControlledVocabularyValue cvv = datasetFieldSvc.findControlledVocabularyValueByDatasetFieldTypeAndStrValue(type, childField.getValue(),lenient);
+                    if (cvv == null) {
+                        // the keyword was not found in the subject list, so retain it in filtered list
+                        filteredValues.add(compoundVal);
+                    } else {
+                        // save the value for our subject field
+                        if (!subjects.contains(cvv)) 
+                        {
+                            subjects.add(cvv);
+                        }
+                    }
+                }
+
+            }
+
+        }
+        // if we have found any subjects in the keyword list, then update the keyword and subject fields appropriately.
+        if (subjects.size() > 0) {
+            keywordField.setDatasetFieldCompoundValues(filteredValues);
+
+               DatasetField subjectField = new DatasetField();
+            subjectField.setDatasetFieldType(type);
+            for (ControlledVocabularyValue val : subjects) {
+                int order = 0;
+              
+                val.setDisplayOrder(order);
+                val.setDatasetFieldType(type);
+                order++;
+                
+            }
+
+            subjectField.setControlledVocabularyValues(subjects);
+            fields.add(subjectField);
+        }
+
+    }
+    
+    public List<DatasetFieldCompoundValue> parseCompoundValue(DatasetFieldType compoundType, JsonObject json) throws JsonParseException {
+        List<ControlledVocabularyException> vocabExceptions = new ArrayList<>();
+        List<DatasetFieldCompoundValue> vals = new LinkedList<>();
+        if (json.getBoolean("multiple")) {
+            int order = 0;
+            for (JsonObject obj : json.getJsonArray("value").getValuesAs(JsonObject.class)) {
+                DatasetFieldCompoundValue cv = new DatasetFieldCompoundValue();
+                List<DatasetField> fields = new LinkedList<>();
+                for (String fieldName : obj.keySet()) {
+                    JsonObject childFieldJson = obj.getJsonObject(fieldName);
+                    DatasetField f=null;
+                    try {
+                        f = parseField(childFieldJson);
+                    } catch(ControlledVocabularyException ex) {
+                        vocabExceptions.add(ex);
+                    }
+                    
+                    if (f!=null) {
+                        if (!compoundType.getChildDatasetFieldTypes().contains(f.getDatasetFieldType())) {
+                            throw new JsonParseException("field " + f.getDatasetFieldType().getName() + " is not a child of " + compoundType.getName());
+                        }
+                        f.setParentDatasetFieldCompoundValue(cv);
+                            fields.add(f);
+                    }
+                }
+                if (!fields.isEmpty()) {
+                    cv.setChildDatasetFields(fields);
+                    cv.setDisplayOrder(order);
+                    vals.add(cv);
+                }
+                order++;
+            }
+
+           
+
+        } else {
+            
+            DatasetFieldCompoundValue cv = new DatasetFieldCompoundValue();
+            List<DatasetField> fields = new LinkedList<>();
+            JsonObject value = json.getJsonObject("value");
+            for (String key : value.keySet()) {
+                JsonObject childFieldJson = value.getJsonObject(key);
+                DatasetField f = null;
+                try {
+                    f=parseField(childFieldJson);
+                } catch(ControlledVocabularyException ex ) {
+                    vocabExceptions.add(ex);
+                }
+                if (f!=null) {
+                    f.setParentDatasetFieldCompoundValue(cv);
+                    fields.add(f);
+                }
+            }
+            if (!fields.isEmpty()) {
+                cv.setChildDatasetFields(fields);
+                vals.add(cv);
+            }
+      
+    }
+        if (!vocabExceptions.isEmpty()) {
+            throw new CompoundVocabularyException( "Invalid controlled vocabulary in compound field ", vocabExceptions, vals);
+        }
+          return vals;
+    }
+
+    public List<DatasetFieldValue> parsePrimitiveValue(JsonObject json) throws JsonParseException {
+
+        List<DatasetFieldValue> vals = new LinkedList<>();
+        if (json.getBoolean("multiple")) {
+            for (JsonString val : json.getJsonArray("value").getValuesAs(JsonString.class)) {
+                DatasetFieldValue datasetFieldValue = new DatasetFieldValue();
+                datasetFieldValue.setDisplayOrder(vals.size() - 1);
+                datasetFieldValue.setValue(val.getString().trim());
+                vals.add(datasetFieldValue);
+            }
+
+        } else {
+            DatasetFieldValue datasetFieldValue = new DatasetFieldValue();
+            datasetFieldValue.setValue(json.getString("value", "").trim());
+            vals.add(datasetFieldValue);
+        }
+
+        return vals;
+    }
+
+    public List<ControlledVocabularyValue> parseControlledVocabularyValue(DatasetFieldType cvvType, JsonObject json) throws JsonParseException {
+        if (json.getBoolean("multiple")) {
+            List<ControlledVocabularyValue> vals = new LinkedList<>();
+            for (JsonString strVal : json.getJsonArray("value").getValuesAs(JsonString.class)) {
+                String strValue = strVal.getString();
+                ControlledVocabularyValue cvv = datasetFieldSvc.findControlledVocabularyValueByDatasetFieldTypeAndStrValue(cvvType, strValue,lenient);
+                if (cvv == null) {
+                    throw new ControlledVocabularyException("Value '" + strValue + "' does not exist in type '" + cvvType.getName() + "'", cvvType, strValue);
+                }
+                // Only add value to the list if it is not a duplicate 
+                if (strValue.equals("Other")) {
+                    System.out.println("vals = "+vals+", contains: "+vals.contains(cvv));
+                }
+                if (!vals.contains(cvv)) {
+                    vals.add(cvv);
+                }
+            }
+            return vals;
+
+        } else {
+            String strValue = json.getString("value", "");
+            ControlledVocabularyValue cvv = datasetFieldSvc.findControlledVocabularyValueByDatasetFieldTypeAndStrValue(cvvType, strValue,lenient);
+            if (cvv == null) {
+                throw new ControlledVocabularyException("Value '" + strValue + "' does not exist in type '" + cvvType.getName() + "'", cvvType, strValue);
+            }
+            return Collections.singletonList(cvv);
+        }
+    }
+
+    Date parseDate(String str) throws ParseException {
+        return str == null ? null : Util.getDateFormat().parse(str);
+    }
+
+    Date parseTime(String str) throws ParseException {
+        return str == null ? null : Util.getDateTimeFormat().parse(str);
+    }
+
+    Long parseLong(String str) throws NumberFormatException {
+        return (str == null) ? null : Long.valueOf(str);
+    }
+
+    int parsePrimitiveInt(String str, int defaultValue) {
+        return str == null ? defaultValue : Integer.parseInt(str);
+    }
+
+}