comparison importFromOpenMind/importer/ismi2model.py @ 25:5bdcb5805d29

updated openmind-networkx-neo4j conversion with dates, locations and links.
author casties
date Thu, 24 Sep 2015 18:17:41 +0200
parents 45a823b5bf33
children 3fce3fa9097e
comparison
equal deleted inserted replaced
24:97f2da68fb5f 25:5bdcb5805d29
6 ## configure behaviour 6 ## configure behaviour
7 7
8 # output filename 8 # output filename
9 output_fn = "ismi_graph.gpickle" 9 output_fn = "ismi_graph.gpickle"
10 10
11 # contract relations to these objects into attributes with the relations' name
12 #contract_relations_into_attributes = ['PLACE', 'ALIAS']
13 contract_relations_into_attributes = []
14
15 # OpenMind base URL 11 # OpenMind base URL
16 baseURL="http://localhost:18080/ismi-richfaces/jsonInterface?" 12 baseURL="http://localhost:18080/ismi-richfaces/jsonInterface?"
13
14 # node types to exclude from the graph
15 exclude_objects_of_type = ['DIGITALIZATION', 'REFERENCE']
17 16
18 17
19 entsURL=baseURL+"method=get_ents&oc=%s" 18 entsURL=baseURL+"method=get_ents&oc=%s"
20 19
21 entsByIdURL = baseURL+"method=get_ents&include_content=True&ids=%s" 20 entsByIdURL = baseURL+"method=get_ents&include_content=True&ids=%s"
109 108
110 elif ct == 'date': 109 elif ct == 'date':
111 # date attribute 110 # date attribute
112 key = att['name'] 111 key = att['name']
113 val = att['ov'] 112 val = att['ov']
114 print("don't know what to do with date: %s=%s"%(key,val)) 113 # try to parse date object to get gregorian year
114 try:
115 year = None
116 date_json = json.loads(val)
117 if 'from' in date_json:
118 year = date_json['from'].get('year', None)
119 elif 'date' in date_json:
120 year = date_json['date'].get('year', None)
121 else:
122 print("don't know what to do with date on %s: %s=%s"%(ent['id'],key,val))
123
124 if year is not None:
125 attrs[key] = year
126
127 except:
128 print("ERROR: invalid JSON in date: %s"%repr(val))
115 129
116 elif ct == 'old': 130 elif ct == 'old':
117 # ignore attribute 131 # ignore attribute
118 continue 132 continue
119 133
126 oc = ent['oc'] 140 oc = ent['oc']
127 if oc != etype: 141 if oc != etype:
128 print("ERROR: entity type doesn't match!") 142 print("ERROR: entity type doesn't match!")
129 return null 143 return null
130 144
145 # rename if type attr exists
146 if 'type' in attrs:
147 attrs['type2'] = attrs['type']
148
149 # set type
131 attrs['type'] = fixName(oc) 150 attrs['type'] = fixName(oc)
132 151
133 ismi_id = ent['id'] 152 ismi_id = ent['id']
134 # rename id to ismi_id 153 # rename id to ismi_id
135 attrs['ismi_id'] = ismi_id 154 attrs['ismi_id'] = ismi_id
152 Adds JSON to dict relations under relation's id. 171 Adds JSON to dict relations under relation's id.
153 """ 172 """
154 # go through src_rels and tar_rels 173 # go through src_rels and tar_rels
155 rels = ent.get('src_rels', []) + ent.get('tar_rels', []) 174 rels = ent.get('src_rels', []) + ent.get('tar_rels', [])
156 for rel in rels: 175 for rel in rels:
176 src_type = rel['src_oc']
177 tar_type = rel['tar_oc']
178 if src_type in exclude_objects_of_type or tar_type in exclude_objects_of_type:
179 # skip relation to excluded objects
180 continue
181
157 rel_id = rel['id'] 182 rel_id = rel['id']
158 if rel_id in relations: 183 if rel_id in relations:
159 old_rel = relations[rel_id] 184 old_rel = relations[rel_id]
160 if rel != old_rel: 185 if rel != old_rel:
161 print("ERROR: relation is different: %s != %s"%(repr(rel), repr(old_rel))) 186 print("ERROR: relation is different: %s != %s"%(repr(rel), repr(old_rel)))
192 continue 217 continue
193 218
194 if not tar_id in nodes: 219 if not tar_id in nodes:
195 print("ERROR: relation %s tar node %s missing!"%(rel_id,tar_id)) 220 print("ERROR: relation %s tar node %s missing!"%(rel_id,tar_id))
196 continue 221 continue
197
198 if contract_relations_into_attributes:
199 # contract source relations
200 tar_type = rel['tar_oc']
201 if tar_type in contract_relations_into_attributes:
202 att_name = fixName(rel_name, att_from_rel=True)
203 # TODO: clean up attribute names
204 while src.get(att_name, None) is not None:
205 # attribute exists
206 if att_name[-1].isnumeric():
207 # increment last digit
208 att_name = att_name[:-1] + str(int(att_name[-1]) + 1)
209 else:
210 att_name += '2'
211
212 # add target node's label as attribute
213 #print("contracting tar to attribute %s on id=%s"%(att_name, src_id))
214 nx_graph.node[src_id][att_name] = nx_graph.node[tar_id]['label']
215
216 # contract target relations
217 src_type = rel['src_oc']
218 if src_type in contract_relations_into_attributes:
219 att_name = fixName(rel_name, att_from_rel=True)
220 # TODO: clean up attribute names
221 while tar.get(att_name, None) is not None:
222 # attribute exists
223 if att_name[-1].isnumeric():
224 # increment last digit
225 att_name = att_name[:-1] + str(int(att_name[-1]) + 1)
226 else:
227 att_name += '2'
228
229 # add target node's label as attribute
230 #print("contracting src to attribute %s on id=%s"%(att_name, tar_id))
231 nx_graph.node[tar_id][att_name] = nx_graph.node[src_id]['label']
232 222
233 # create relation with type 223 # create relation with type
234 nx_rel = nx_graph.add_edge(src_id, tar_id, type=fixName(rel_name), ismi_id=rel_id) 224 nx_rel = nx_graph.add_edge(src_id, tar_id, type=fixName(rel_name), ismi_id=rel_id)
235 225
236 nx_relations[rel_id] = nx_rel 226 nx_relations[rel_id] = nx_rel
265 ismi_id = ent_data['id'] 255 ismi_id = ent_data['id']
266 if ismi_id in nx_nodes: 256 if ismi_id in nx_nodes:
267 print("ERROR: entity with id=%s exists!"%ismi_id) 257 print("ERROR: entity with id=%s exists!"%ismi_id)
268 return 258 return
269 259
270 # create neo4j node 260 # create networkx node
271 node = nodeFromEnt(ent_data, etype) 261 node = nodeFromEnt(ent_data, etype)
272 262
273 # save node reference 263 # save node reference
274 nx_nodes[ismi_id] = node 264 nx_nodes[ismi_id] = node
275 265
283 # In[119]: 273 # In[119]:
284 274
285 def importAllEnts(etypes): 275 def importAllEnts(etypes):
286 276
287 for etype in etypes: 277 for etype in etypes:
278 if etype in exclude_objects_of_type:
279 # skip this type
280 continue
281
288 importEnts(etype) 282 importEnts(etype)
289 283
290 relationsFromRels(ismi_relations, nx_nodes) 284 relationsFromRels(ismi_relations, nx_nodes)
291 285
292 286
298 if len(sys.argv) > 1: 292 if len(sys.argv) > 1:
299 output_fn = sys.argv[1] 293 output_fn = sys.argv[1]
300 294
301 # import everything 295 # import everything
302 print("Reading graph from OpenMind at %s"%baseURL) 296 print("Reading graph from OpenMind at %s"%baseURL)
297 if len(exclude_objects_of_type) > 0:
298 print(" Skipping objects of type %s"%exclude_objects_of_type);
299
303 importAllEnts(ismi_defs) 300 importAllEnts(ismi_defs)
304 #importAllEnts(['TEXT']) 301 #importAllEnts(['TEXT'])
305 302
306 print("Graph info: %s"%networkx.info(nx_graph)) 303 print("Graph info: %s"%networkx.info(nx_graph))
307 #print(" nodes:%s"%repr(nx_graph.nodes(data=True))) 304 #print(" nodes:%s"%repr(nx_graph.nodes(data=True)))