comparison importFromOpenMind/importer/ismi2neo4j.py @ 19:ca1e02a2a9c4

unfilteredIsmi: openmind to json exporter like filterISMI. ismi2model: openmind importer like ismi2neo4j that saves networkx pickle file.
author casties
date Wed, 09 Sep 2015 17:32:42 +0200
parents 0827156df210
children a9bfd49355f8
comparison
equal deleted inserted replaced
18:0827156df210 19:ca1e02a2a9c4
8 add_inverse_relations = True 8 add_inverse_relations = True
9 9
10 # add relations to these objects as attributes with the relations name 10 # add relations to these objects as attributes with the relations name
11 contract_relations_into_attributes = ['PLACE', 'ALIAS'] 11 contract_relations_into_attributes = ['PLACE', 'ALIAS']
12 12
13 # try to find and re-use existing nodes in neo4j (slow!)
14 keep_nodes = False
15
16 # label added to all nodes 13 # label added to all nodes
17 project_label = '_ismi2' 14 project_label = '_ismi3'
18 15
19 # OpenMind base URL 16 # OpenMind base URL
20 baseURL="http://localhost:18080/ismi-richfaces/jsonInterface?" 17 baseURL="http://localhost:18080/ismi-richfaces/jsonInterface?"
21 18
22 # neo4j base URL 19 # neo4j base URL
23 neo4jBaseURL = "http://localhost:7474/db/data/" 20 neo4jBaseURL = "http://localhost:7474/db/data/"
24 21
25 22
26 entsURL=baseURL+"method=get_ents&oc=%s" 23 entsURL=baseURL+"method=get_ents&oc=%s"
27 24
25 entsByIdURL = baseURL+"method=get_ents&include_content=True&ids=%s"
26
28 entURL=baseURL+"method=get_ent&id=%s&include_content=True" 27 entURL=baseURL+"method=get_ent&id=%s&include_content=True"
29 28
30 29
31 def readJSON(url): 30 def readJSON(url):
31 #print("JSON loading %s"%url)
32 wsh=urllib.request.urlopen(url) 32 wsh=urllib.request.urlopen(url)
33 txt = wsh.read() 33 txt = wsh.read()
34 return json.loads(txt.decode("utf-8")) 34 return json.loads(txt.decode("utf-8"))
35 35
36 defs_json = readJSON(baseURL+"method=get_defs") 36 defs_json = readJSON(baseURL+"method=get_defs")
241 else: 241 else:
242 att_name += '2' 242 att_name += '2'
243 243
244 # add target node's label as attribute 244 # add target node's label as attribute
245 #print("contracting src to attribute %s on id=%s"%(att_name, tar_id)) 245 #print("contracting src to attribute %s on id=%s"%(att_name, tar_id))
246 src.set(att_name, src.get('label')) 246 tar.set(att_name, src.get('label'))
247 247
248 if add_inverse_relations: 248 if add_inverse_relations:
249 n4j_rel = [gdb.relationships.create(src, fixName(rel_name, is_src_rel=True), tar), 249 n4j_rel = [gdb.relationships.create(src, fixName(rel_name, is_src_rel=True), tar),
250 gdb.relationships.create(tar, fixName(rel_name, is_tar_rel=True), src)] 250 gdb.relationships.create(tar, fixName(rel_name, is_tar_rel=True), src)]
251 251
262 """ 262 """
263 # read json for all entities of given type 263 # read json for all entities of given type
264 json = readJSON(entsURL%etype) 264 json = readJSON(entsURL%etype)
265 ents = json['ents'] 265 ents = json['ents']
266 print("importing %s %ss"%(len(ents),etype)) 266 print("importing %s %ss"%(len(ents),etype))
267 size = 100
268 batches = [ents[pos:pos + size] for pos in range(0, len(ents), size)]
267 cnt = 0 269 cnt = 0
268 for ent in ents: 270 for batch in batches:
269 cnt += 1 271 cnt += size
270 if cnt % 100 == 0: 272 if cnt % 100 == 0:
271 print(" %s %ss"%(cnt, etype)) 273 print(" %s %ss"%(cnt, etype))
272 274
273 # extract ismi id 275 # extract list of ismi ids
274 ismi_id = ent['id'] 276 ismi_ids = [str(ent['id']) for ent in batch]
275 277
276 node = None 278 # fetch full data for list of entities
277 279 ent_json = readJSON(entsByIdURL%','.join(ismi_ids))
278 # fetch full data for entity 280 ents_data = ent_json['ents']
279 ent_json = readJSON(entURL%ismi_id) 281
280 ent_data = ent_json['ent'] 282 # iterate through results batch
281 # create neo4j node 283 for ent_data in ents_data:
282 if keep_nodes: 284 ismi_id = ent_data['id']
283 node = getNode(ismi_id) 285 if ismi_id in n4j_nodes:
284 286 print("ERROR: entity with id=%s exists!"%ismi_id)
285 if ismi_id in n4j_nodes: 287 return
286 print("ERROR: entity with id=%s exists!"%ismi_id) 288
287 return 289 # create neo4j node
288
289 if node is None:
290 node = nodeFromEnt(ent_data, etype) 290 node = nodeFromEnt(ent_data, etype)
291 291
292 # save node reference 292 # save node reference
293 n4j_nodes[ismi_id] = node 293 n4j_nodes[ismi_id] = node
294 294
295 # extract relations 295 # extract relations
296 relsFromEnt(ent_data, ismi_relations) 296 relsFromEnt(ent_data, ismi_relations)
297 297
298 #if cnt >= 100: 298 #if cnt >= 100:
299 # return 299 # return
300 300
301 301