annotate importFromOpenMind/importer/ismi2model.py @ 19:ca1e02a2a9c4

unfilteredIsmi: openmind to json exporter like filterISMI. ismi2model: openmind importer like ismi2neo4j that saves networkx pickle file.
author casties
date Wed, 09 Sep 2015 17:32:42 +0200
parents
children 45a823b5bf33
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
19
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
1 import urllib.request
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
2 import json
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
3 import networkx
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
4
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
5 ## configure behaviour
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
6
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
7 # output filename
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
8 output_fn = "ismi_graph.gpickle"
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
9
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
10 # contract relations to these objects into attributes with the relations' name
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
11 #contract_relations_into_attributes = ['PLACE', 'ALIAS']
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
12 contract_relations_into_attributes = []
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
13
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
14 # OpenMind base URL
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
15 baseURL="http://localhost:18080/ismi-richfaces/jsonInterface?"
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
16
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
17
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
18 entsURL=baseURL+"method=get_ents&oc=%s"
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
19
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
20 entsByIdURL = baseURL+"method=get_ents&include_content=True&ids=%s"
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
21
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
22 entURL=baseURL+"method=get_ent&id=%s&include_content=True"
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
23
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
24
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
25 def readJSON(url):
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
26 #print("JSON loading %s"%url)
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
27 wsh=urllib.request.urlopen(url)
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
28 txt = wsh.read()
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
29 return json.loads(txt.decode("utf-8"))
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
30
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
31 defs_json = readJSON(baseURL+"method=get_defs")
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
32
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
33 # current list of all definitions
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
34 ismi_defs = [atts['ov'] for atts in defs_json['defs']]
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
35
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
36 #ismi_types=["PERSON","WITNESS","CODEX","PLACE","COLLECTION","REPOSITORY"]
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
37
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
38
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
39 nx_graph = networkx.MultiDiGraph()
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
40
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
41 nx_nodes = {}
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
42 ismi_relations = {}
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
43 nx_relations = {}
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
44
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
45 ent_exclude_attrs = [
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
46 'lw',
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
47 'node_type',
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
48 'nov'
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
49 ]
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
50
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
51 def fixName(name, is_src_rel=False, is_tar_rel=False, att_from_rel=False):
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
52 # these are too embarrassing...
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
53 if 'FLORUIT' in name:
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
54 name = name.replace('FLORUIT', 'FLOURISH')
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
55
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
56 elif 'floruit' in name:
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
57 name = name.replace('floruit', 'flourish')
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
58
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
59 if is_src_rel:
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
60 #name = name + '>'
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
61 pass
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
62
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
63 if is_tar_rel:
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
64 name = '<' + name
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
65
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
66 if att_from_rel:
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
67 # clean up relations as attribute names
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
68 name = name.replace('is_', '')
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
69 name = name.replace('has_', '')
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
70 name = name.replace('was_', '')
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
71 name = name.replace('_of', '')
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
72
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
73 return name
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
74
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
75
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
76 def nodeFromEnt(ent, etype):
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
77 """Create a Neo4J node from the given JSON entity.
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
78
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
79 Creates the node in gdb and returns the node.
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
80 """
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
81 attrs = {}
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
82 # go through all attributes
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
83 for att in ent['atts']:
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
84 ct = att.get('content_type', None)
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
85 if ct is None or ct.lower() in ['text', 'arabic', 'bool', 'boolean', 'url', 'language']:
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
86 # normal text attribute (assume no content_type is text too...)
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
87 key = att['name']
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
88 val = att['ov']
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
89
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
90 if key in ent_exclude_attrs:
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
91 # exclude attribute
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
92 continue
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
93
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
94 # keep attribute
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
95 attrs[key] = val
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
96
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
97 elif ct == 'num':
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
98 # number attribute
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
99 key = att['name']
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
100 val = att['ov']
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
101
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
102 if key in ent_exclude_attrs:
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
103 # exclude attribute
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
104 continue
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
105
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
106 # keep attribute, assume num is int
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
107 attrs[key] = int(val)
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
108
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
109 elif ct == 'date':
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
110 # date attribute
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
111 key = att['name']
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
112 val = att['ov']
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
113 #print("don't know what to do with date: %s=%s"%(key,val))
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
114
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
115 elif ct == 'old':
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
116 # ignore attribute
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
117 continue
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
118
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
119 else:
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
120 print("WARN: attribute with unknown content_type: %s"%repr(att))
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
121 # ignore other content types
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
122 continue
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
123
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
124 # process base attributes
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
125 oc = ent['oc']
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
126 if oc != etype:
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
127 print("ERROR: entity type doesn't match!")
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
128 return null
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
129
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
130 attrs['type'] = fixName(oc)
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
131
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
132 ismi_id = ent['id']
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
133 # rename id to ismi_id
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
134 attrs['ismi_id'] = ismi_id
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
135
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
136 ov = ent.get('ov', None)
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
137 if ov is not None:
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
138 # save ov as label
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
139 attrs['label'] = ov
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
140
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
141 # create node with attributes
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
142 nx_graph.add_node(ismi_id, **attrs)
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
143 node = nx_graph.node[ismi_id]
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
144
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
145 return node
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
146
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
147
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
148 def relsFromEnt(ent, relations):
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
149 """Extract all relations from JSON entity.
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
150
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
151 Adds JSON to dict relations under relation's id.
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
152 """
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
153 # go through src_rels and tar_rels
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
154 rels = ent.get('src_rels', []) + ent.get('tar_rels', [])
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
155 for rel in rels:
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
156 rel_id = rel['id']
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
157 if rel_id in relations:
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
158 old_rel = relations[rel_id]
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
159 if rel != old_rel:
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
160 print("ERROR: relation is different: %s != %s"%(repr(rel), repr(old_rel)))
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
161 continue
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
162
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
163 relations[rel_id] = rel
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
164
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
165 return relations
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
166
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
167
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
168 def relationsFromRels(rels, nodes):
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
169 """Create relations in Neo4J.
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
170
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
171 Args:
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
172 rels: dict of JSON relations
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
173 nodes: dict of existing Neo4J nodes
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
174 Returns:
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
175 dict of Neo4J relations
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
176 """
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
177 # go through all rels
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
178 print("importing %s relations"%len(rels))
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
179 cnt = 0
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
180 for rel in rels.values():
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
181 cnt += 1
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
182 if cnt % 100 == 0:
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
183 print(" %s relations"%cnt)
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
184
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
185 rel_id = rel['id']
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
186 rel_name = rel['name']
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
187 src_id = rel['src_id']
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
188 tar_id = rel['tar_id']
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
189 if not src_id in nodes:
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
190 print("ERROR: relation %s src node %s missing!"%(rel_id,src_id))
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
191 continue
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
192
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
193 if not tar_id in nodes:
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
194 print("ERROR: relation %s tar node %s missing!"%(rel_id,tar_id))
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
195 continue
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
196
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
197 if contract_relations_into_attributes:
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
198 # contract source relations
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
199 tar_type = rel['tar_oc']
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
200 if tar_type in contract_relations_into_attributes:
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
201 att_name = fixName(rel_name, att_from_rel=True)
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
202 # TODO: clean up attribute names
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
203 while src.get(att_name, None) is not None:
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
204 # attribute exists
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
205 if att_name[-1].isnumeric():
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
206 # increment last digit
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
207 att_name = att_name[:-1] + str(int(att_name[-1]) + 1)
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
208 else:
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
209 att_name += '2'
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
210
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
211 # add target node's label as attribute
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
212 #print("contracting tar to attribute %s on id=%s"%(att_name, src_id))
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
213 nx_graph.node[src_id][att_name] = nx_graph.node[tar_id]['label']
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
214
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
215 # contract target relations
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
216 src_type = rel['src_oc']
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
217 if src_type in contract_relations_into_attributes:
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
218 att_name = fixName(rel_name, att_from_rel=True)
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
219 # TODO: clean up attribute names
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
220 while tar.get(att_name, None) is not None:
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
221 # attribute exists
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
222 if att_name[-1].isnumeric():
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
223 # increment last digit
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
224 att_name = att_name[:-1] + str(int(att_name[-1]) + 1)
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
225 else:
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
226 att_name += '2'
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
227
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
228 # add target node's label as attribute
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
229 #print("contracting src to attribute %s on id=%s"%(att_name, tar_id))
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
230 nx_graph.node[tar_id][att_name] = nx_graph.node[src_id]['label']
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
231
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
232 # create relation with type
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
233 nx_rel = nx_graph.add_edge(src_id, tar_id, type=fixName(rel_name))
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
234
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
235 nx_relations[rel_id] = nx_rel
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
236
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
237 return nx_relations
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
238
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
239
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
240 def importEnts(etype):
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
241 """Import all entities of the given type.
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
242 """
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
243 # read json for all entities of given type
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
244 json = readJSON(entsURL%etype)
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
245 ents = json['ents']
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
246 print("importing %s %ss"%(len(ents),etype))
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
247 size = 100
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
248 batches = [ents[pos:pos + size] for pos in range(0, len(ents), size)]
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
249 cnt = 0
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
250 for batch in batches:
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
251 cnt += size
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
252 if cnt % 100 == 0:
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
253 print(" %s %ss"%(cnt, etype))
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
254
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
255 # extract list of ismi ids
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
256 ismi_ids = [str(ent['id']) for ent in batch]
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
257
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
258 # fetch full data for list of entities
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
259 ent_json = readJSON(entsByIdURL%','.join(ismi_ids))
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
260 ents_data = ent_json['ents']
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
261
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
262 # iterate through results batch
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
263 for ent_data in ents_data:
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
264 ismi_id = ent_data['id']
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
265 if ismi_id in nx_nodes:
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
266 print("ERROR: entity with id=%s exists!"%ismi_id)
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
267 return
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
268
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
269 # create neo4j node
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
270 node = nodeFromEnt(ent_data, etype)
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
271
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
272 # save node reference
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
273 nx_nodes[ismi_id] = node
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
274
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
275 # extract relations
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
276 relsFromEnt(ent_data, ismi_relations)
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
277
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
278 #if cnt >= 100:
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
279 # return
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
280
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
281
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
282 # In[119]:
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
283
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
284 def importAllEnts(etypes):
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
285
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
286 for etype in etypes:
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
287 importEnts(etype)
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
288
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
289 relationsFromRels(ismi_relations, nx_nodes)
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
290
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
291
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
292 # In[120]:
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
293
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
294 importAllEnts(ismi_defs)
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
295 #importAllEnts(['TEXT'])
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
296
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
297 print("Graph info: %s"%networkx.info(nx_graph))
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
298 print("Number of nodes: %s"%networkx.number_of_nodes(nx_graph))
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
299 print("Number of edges: %s"%networkx.number_of_edges(nx_graph))
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
300 #print(" nodes:%s"%repr(nx_graph.nodes(data=True)))
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
301 # export pickle
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
302 networkx.write_gpickle(nx_graph, output_fn)
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
303 print("Wrote file %s"%output_fn)