Mercurial > hg > drupalISMI
comparison importFromOpenMind/importer/ismi2neo4j.py @ 19:ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
ismi2model: openmind importer like ismi2neo4j that saves networkx pickle file.
author | casties |
---|---|
date | Wed, 09 Sep 2015 17:32:42 +0200 |
parents | 0827156df210 |
children | a9bfd49355f8 |
comparison
equal
deleted
inserted
replaced
18:0827156df210 | 19:ca1e02a2a9c4 |
---|---|
8 add_inverse_relations = True | 8 add_inverse_relations = True |
9 | 9 |
10 # add relations to these objects as attributes with the relations name | 10 # add relations to these objects as attributes with the relations name |
11 contract_relations_into_attributes = ['PLACE', 'ALIAS'] | 11 contract_relations_into_attributes = ['PLACE', 'ALIAS'] |
12 | 12 |
13 # try to find and re-use existing nodes in neo4j (slow!) | |
14 keep_nodes = False | |
15 | |
16 # label added to all nodes | 13 # label added to all nodes |
17 project_label = '_ismi2' | 14 project_label = '_ismi3' |
18 | 15 |
19 # OpenMind base URL | 16 # OpenMind base URL |
20 baseURL="http://localhost:18080/ismi-richfaces/jsonInterface?" | 17 baseURL="http://localhost:18080/ismi-richfaces/jsonInterface?" |
21 | 18 |
22 # neo4j base URL | 19 # neo4j base URL |
23 neo4jBaseURL = "http://localhost:7474/db/data/" | 20 neo4jBaseURL = "http://localhost:7474/db/data/" |
24 | 21 |
25 | 22 |
26 entsURL=baseURL+"method=get_ents&oc=%s" | 23 entsURL=baseURL+"method=get_ents&oc=%s" |
27 | 24 |
25 entsByIdURL = baseURL+"method=get_ents&include_content=True&ids=%s" | |
26 | |
28 entURL=baseURL+"method=get_ent&id=%s&include_content=True" | 27 entURL=baseURL+"method=get_ent&id=%s&include_content=True" |
29 | 28 |
30 | 29 |
31 def readJSON(url): | 30 def readJSON(url): |
31 #print("JSON loading %s"%url) | |
32 wsh=urllib.request.urlopen(url) | 32 wsh=urllib.request.urlopen(url) |
33 txt = wsh.read() | 33 txt = wsh.read() |
34 return json.loads(txt.decode("utf-8")) | 34 return json.loads(txt.decode("utf-8")) |
35 | 35 |
36 defs_json = readJSON(baseURL+"method=get_defs") | 36 defs_json = readJSON(baseURL+"method=get_defs") |
241 else: | 241 else: |
242 att_name += '2' | 242 att_name += '2' |
243 | 243 |
244 # add target node's label as attribute | 244 # add target node's label as attribute |
245 #print("contracting src to attribute %s on id=%s"%(att_name, tar_id)) | 245 #print("contracting src to attribute %s on id=%s"%(att_name, tar_id)) |
246 src.set(att_name, src.get('label')) | 246 tar.set(att_name, src.get('label')) |
247 | 247 |
248 if add_inverse_relations: | 248 if add_inverse_relations: |
249 n4j_rel = [gdb.relationships.create(src, fixName(rel_name, is_src_rel=True), tar), | 249 n4j_rel = [gdb.relationships.create(src, fixName(rel_name, is_src_rel=True), tar), |
250 gdb.relationships.create(tar, fixName(rel_name, is_tar_rel=True), src)] | 250 gdb.relationships.create(tar, fixName(rel_name, is_tar_rel=True), src)] |
251 | 251 |
262 """ | 262 """ |
263 # read json for all entities of given type | 263 # read json for all entities of given type |
264 json = readJSON(entsURL%etype) | 264 json = readJSON(entsURL%etype) |
265 ents = json['ents'] | 265 ents = json['ents'] |
266 print("importing %s %ss"%(len(ents),etype)) | 266 print("importing %s %ss"%(len(ents),etype)) |
267 size = 100 | |
268 batches = [ents[pos:pos + size] for pos in range(0, len(ents), size)] | |
267 cnt = 0 | 269 cnt = 0 |
268 for ent in ents: | 270 for batch in batches: |
269 cnt += 1 | 271 cnt += size |
270 if cnt % 100 == 0: | 272 if cnt % 100 == 0: |
271 print(" %s %ss"%(cnt, etype)) | 273 print(" %s %ss"%(cnt, etype)) |
272 | 274 |
273 # extract ismi id | 275 # extract list of ismi ids |
274 ismi_id = ent['id'] | 276 ismi_ids = [str(ent['id']) for ent in batch] |
275 | 277 |
276 node = None | 278 # fetch full data for list of entities |
277 | 279 ent_json = readJSON(entsByIdURL%','.join(ismi_ids)) |
278 # fetch full data for entity | 280 ents_data = ent_json['ents'] |
279 ent_json = readJSON(entURL%ismi_id) | 281 |
280 ent_data = ent_json['ent'] | 282 # iterate through results batch |
281 # create neo4j node | 283 for ent_data in ents_data: |
282 if keep_nodes: | 284 ismi_id = ent_data['id'] |
283 node = getNode(ismi_id) | 285 if ismi_id in n4j_nodes: |
284 | 286 print("ERROR: entity with id=%s exists!"%ismi_id) |
285 if ismi_id in n4j_nodes: | 287 return |
286 print("ERROR: entity with id=%s exists!"%ismi_id) | 288 |
287 return | 289 # create neo4j node |
288 | |
289 if node is None: | |
290 node = nodeFromEnt(ent_data, etype) | 290 node = nodeFromEnt(ent_data, etype) |
291 | 291 |
292 # save node reference | 292 # save node reference |
293 n4j_nodes[ismi_id] = node | 293 n4j_nodes[ismi_id] = node |
294 | 294 |
295 # extract relations | 295 # extract relations |
296 relsFromEnt(ent_data, ismi_relations) | 296 relsFromEnt(ent_data, ismi_relations) |
297 | 297 |
298 #if cnt >= 100: | 298 #if cnt >= 100: |
299 # return | 299 # return |
300 | 300 |
301 | 301 |