# HG changeset patch # User casties # Date 1454428443 -3600 # Node ID d535f11a0d81264acf23f5b8c03f0e48dd3ca488 # Parent 74dfaed3600bf312fdbb1c1017944ea428c8f7ab be more aggressive about parsing dates in text type fields. diff -r 74dfaed3600b -r d535f11a0d81 importFromOpenMind/importer/ismi2model.py --- a/importFromOpenMind/importer/ismi2model.py Tue Feb 02 15:16:29 2016 +0100 +++ b/importFromOpenMind/importer/ismi2model.py Tue Feb 02 16:54:03 2016 +0100 @@ -9,8 +9,8 @@ output_fn = "ismi_graph.gpickle" # OpenMind base URL -baseURL="http://localhost:18080/om4-ismi/jsonInterface?" -#baseURL="http://localhost:18080/ismi-richfaces/jsonInterface?" +#baseURL="http://localhost:18080/om4-ismi/jsonInterface?" +baseURL="http://localhost:18080/ismi-richfaces/jsonInterface?" # node types to exclude from the graph exclude_objects_of_type = ['DIGITALIZATION', 'REFERENCE'] @@ -73,6 +73,24 @@ return name + +def parseYear(val): + year = None + try: + date_json = json.loads(val) + if 'from' in date_json: + year = date_json['from'].get('year', None) + elif 'date' in date_json: + year = date_json['date'].get('year', None) + else: + print("don't know what to do with date %s"%(val)) + + except: + pass + + return year + + def nodeFromEnt(ent, etype): """Create a Neo4J node from the given JSON entity. @@ -82,53 +100,42 @@ # go through all attributes for att in ent['atts']: ct = att.get('content_type', None) - if ct is None or ct.lower() in ['text', 'arabic', 'bool', 'boolean', 'url', 'language']: + name = att.get('name', None) + if name in exclude_attributes_of_type: + # exclude attribute + continue + + # try date first since some date attributes have the wrong type + if ct == 'date': + # date attribute + val = att['ov'] + # try to parse date object to get gregorian year + year = parseYear(val) + if year is not None: + attrs[name] = year + + elif ct is None or ct.lower() in ['text', 'arabic', 'bool', 'boolean', 'url', 'language']: # normal text attribute (assume no content_type is text too...) - key = att['name'] val = att['ov'] - if key in exclude_attributes_of_type: - # exclude attribute - continue - + if val[0] == '{': + # try to parse as date + year = parseYear(val) + if year is not None: + val = year + # keep attribute - attrs[key] = val + attrs[name] = val if 'nov' in att: # add normalized value - attrs['_n_'+key] = att['nov'] + attrs['_n_'+name] = att['nov'] elif ct == 'num': # number attribute - key = att['name'] val = att['ov'] - if key in exclude_attributes_of_type: - # exclude attribute - continue - # keep attribute, assume num is int - attrs[key] = int(val) - - elif ct == 'date': - # date attribute - key = att['name'] - val = att['ov'] - # try to parse date object to get gregorian year - try: - year = None - date_json = json.loads(val) - if 'from' in date_json: - year = date_json['from'].get('year', None) - elif 'date' in date_json: - year = date_json['date'].get('year', None) - else: - print("don't know what to do with date on %s: %s=%s"%(ent['id'],key,val)) - - if year is not None: - attrs[key] = year - - except: - print("ERROR: invalid JSON in date: %s"%repr(val)) + attrs[name] = int(val) elif ct == 'old': # ignore attribute