Mercurial > hg > drupalISMI
diff importFromOpenMind/importer/ismi2neo4j.py @ 19:ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
ismi2model: openmind importer like ismi2neo4j that saves networkx pickle file.
author | casties |
---|---|
date | Wed, 09 Sep 2015 17:32:42 +0200 |
parents | 0827156df210 |
children | a9bfd49355f8 |
line wrap: on
line diff
--- a/importFromOpenMind/importer/ismi2neo4j.py Mon Sep 07 16:57:10 2015 +0200 +++ b/importFromOpenMind/importer/ismi2neo4j.py Wed Sep 09 17:32:42 2015 +0200 @@ -10,11 +10,8 @@ # add relations to these objects as attributes with the relations name contract_relations_into_attributes = ['PLACE', 'ALIAS'] -# try to find and re-use existing nodes in neo4j (slow!) -keep_nodes = False - # label added to all nodes -project_label = '_ismi2' +project_label = '_ismi3' # OpenMind base URL baseURL="http://localhost:18080/ismi-richfaces/jsonInterface?" @@ -25,10 +22,13 @@ entsURL=baseURL+"method=get_ents&oc=%s" +entsByIdURL = baseURL+"method=get_ents&include_content=True&ids=%s" + entURL=baseURL+"method=get_ent&id=%s&include_content=True" def readJSON(url): + #print("JSON loading %s"%url) wsh=urllib.request.urlopen(url) txt = wsh.read() return json.loads(txt.decode("utf-8")) @@ -243,7 +243,7 @@ # add target node's label as attribute #print("contracting src to attribute %s on id=%s"%(att_name, tar_id)) - src.set(att_name, src.get('label')) + tar.set(att_name, src.get('label')) if add_inverse_relations: n4j_rel = [gdb.relationships.create(src, fixName(rel_name, is_src_rel=True), tar), @@ -264,36 +264,36 @@ json = readJSON(entsURL%etype) ents = json['ents'] print("importing %s %ss"%(len(ents),etype)) + size = 100 + batches = [ents[pos:pos + size] for pos in range(0, len(ents), size)] cnt = 0 - for ent in ents: - cnt += 1 + for batch in batches: + cnt += size if cnt % 100 == 0: print(" %s %ss"%(cnt, etype)) - # extract ismi id - ismi_id = ent['id'] - - node = None + # extract list of ismi ids + ismi_ids = [str(ent['id']) for ent in batch] - # fetch full data for entity - ent_json = readJSON(entURL%ismi_id) - ent_data = ent_json['ent'] - # create neo4j node - if keep_nodes: - node = getNode(ismi_id) + # fetch full data for list of entities + ent_json = readJSON(entsByIdURL%','.join(ismi_ids)) + ents_data = ent_json['ents'] - if ismi_id in n4j_nodes: - print("ERROR: entity with id=%s exists!"%ismi_id) - return - - if node is None: + # iterate through results batch + for ent_data in ents_data: + ismi_id = ent_data['id'] + if ismi_id in n4j_nodes: + print("ERROR: entity with id=%s exists!"%ismi_id) + return + + # create neo4j node node = nodeFromEnt(ent_data, etype) - - # save node reference - n4j_nodes[ismi_id] = node - - # extract relations - relsFromEnt(ent_data, ismi_relations) + + # save node reference + n4j_nodes[ismi_id] = node + + # extract relations + relsFromEnt(ent_data, ismi_relations) #if cnt >= 100: # return