Mercurial > hg > drupalISMI
comparison importFromOpenMind/importer/ismi2model.py @ 25:5bdcb5805d29
updated openmind-networkx-neo4j conversion with dates, locations and links.
author | casties |
---|---|
date | Thu, 24 Sep 2015 18:17:41 +0200 |
parents | 45a823b5bf33 |
children | 3fce3fa9097e |
comparison
equal
deleted
inserted
replaced
24:97f2da68fb5f | 25:5bdcb5805d29 |
---|---|
6 ## configure behaviour | 6 ## configure behaviour |
7 | 7 |
8 # output filename | 8 # output filename |
9 output_fn = "ismi_graph.gpickle" | 9 output_fn = "ismi_graph.gpickle" |
10 | 10 |
11 # contract relations to these objects into attributes with the relations' name | |
12 #contract_relations_into_attributes = ['PLACE', 'ALIAS'] | |
13 contract_relations_into_attributes = [] | |
14 | |
15 # OpenMind base URL | 11 # OpenMind base URL |
16 baseURL="http://localhost:18080/ismi-richfaces/jsonInterface?" | 12 baseURL="http://localhost:18080/ismi-richfaces/jsonInterface?" |
13 | |
14 # node types to exclude from the graph | |
15 exclude_objects_of_type = ['DIGITALIZATION', 'REFERENCE'] | |
17 | 16 |
18 | 17 |
19 entsURL=baseURL+"method=get_ents&oc=%s" | 18 entsURL=baseURL+"method=get_ents&oc=%s" |
20 | 19 |
21 entsByIdURL = baseURL+"method=get_ents&include_content=True&ids=%s" | 20 entsByIdURL = baseURL+"method=get_ents&include_content=True&ids=%s" |
109 | 108 |
110 elif ct == 'date': | 109 elif ct == 'date': |
111 # date attribute | 110 # date attribute |
112 key = att['name'] | 111 key = att['name'] |
113 val = att['ov'] | 112 val = att['ov'] |
114 print("don't know what to do with date: %s=%s"%(key,val)) | 113 # try to parse date object to get gregorian year |
114 try: | |
115 year = None | |
116 date_json = json.loads(val) | |
117 if 'from' in date_json: | |
118 year = date_json['from'].get('year', None) | |
119 elif 'date' in date_json: | |
120 year = date_json['date'].get('year', None) | |
121 else: | |
122 print("don't know what to do with date on %s: %s=%s"%(ent['id'],key,val)) | |
123 | |
124 if year is not None: | |
125 attrs[key] = year | |
126 | |
127 except: | |
128 print("ERROR: invalid JSON in date: %s"%repr(val)) | |
115 | 129 |
116 elif ct == 'old': | 130 elif ct == 'old': |
117 # ignore attribute | 131 # ignore attribute |
118 continue | 132 continue |
119 | 133 |
126 oc = ent['oc'] | 140 oc = ent['oc'] |
127 if oc != etype: | 141 if oc != etype: |
128 print("ERROR: entity type doesn't match!") | 142 print("ERROR: entity type doesn't match!") |
129 return null | 143 return null |
130 | 144 |
145 # rename if type attr exists | |
146 if 'type' in attrs: | |
147 attrs['type2'] = attrs['type'] | |
148 | |
149 # set type | |
131 attrs['type'] = fixName(oc) | 150 attrs['type'] = fixName(oc) |
132 | 151 |
133 ismi_id = ent['id'] | 152 ismi_id = ent['id'] |
134 # rename id to ismi_id | 153 # rename id to ismi_id |
135 attrs['ismi_id'] = ismi_id | 154 attrs['ismi_id'] = ismi_id |
152 Adds JSON to dict relations under relation's id. | 171 Adds JSON to dict relations under relation's id. |
153 """ | 172 """ |
154 # go through src_rels and tar_rels | 173 # go through src_rels and tar_rels |
155 rels = ent.get('src_rels', []) + ent.get('tar_rels', []) | 174 rels = ent.get('src_rels', []) + ent.get('tar_rels', []) |
156 for rel in rels: | 175 for rel in rels: |
176 src_type = rel['src_oc'] | |
177 tar_type = rel['tar_oc'] | |
178 if src_type in exclude_objects_of_type or tar_type in exclude_objects_of_type: | |
179 # skip relation to excluded objects | |
180 continue | |
181 | |
157 rel_id = rel['id'] | 182 rel_id = rel['id'] |
158 if rel_id in relations: | 183 if rel_id in relations: |
159 old_rel = relations[rel_id] | 184 old_rel = relations[rel_id] |
160 if rel != old_rel: | 185 if rel != old_rel: |
161 print("ERROR: relation is different: %s != %s"%(repr(rel), repr(old_rel))) | 186 print("ERROR: relation is different: %s != %s"%(repr(rel), repr(old_rel))) |
192 continue | 217 continue |
193 | 218 |
194 if not tar_id in nodes: | 219 if not tar_id in nodes: |
195 print("ERROR: relation %s tar node %s missing!"%(rel_id,tar_id)) | 220 print("ERROR: relation %s tar node %s missing!"%(rel_id,tar_id)) |
196 continue | 221 continue |
197 | |
198 if contract_relations_into_attributes: | |
199 # contract source relations | |
200 tar_type = rel['tar_oc'] | |
201 if tar_type in contract_relations_into_attributes: | |
202 att_name = fixName(rel_name, att_from_rel=True) | |
203 # TODO: clean up attribute names | |
204 while src.get(att_name, None) is not None: | |
205 # attribute exists | |
206 if att_name[-1].isnumeric(): | |
207 # increment last digit | |
208 att_name = att_name[:-1] + str(int(att_name[-1]) + 1) | |
209 else: | |
210 att_name += '2' | |
211 | |
212 # add target node's label as attribute | |
213 #print("contracting tar to attribute %s on id=%s"%(att_name, src_id)) | |
214 nx_graph.node[src_id][att_name] = nx_graph.node[tar_id]['label'] | |
215 | |
216 # contract target relations | |
217 src_type = rel['src_oc'] | |
218 if src_type in contract_relations_into_attributes: | |
219 att_name = fixName(rel_name, att_from_rel=True) | |
220 # TODO: clean up attribute names | |
221 while tar.get(att_name, None) is not None: | |
222 # attribute exists | |
223 if att_name[-1].isnumeric(): | |
224 # increment last digit | |
225 att_name = att_name[:-1] + str(int(att_name[-1]) + 1) | |
226 else: | |
227 att_name += '2' | |
228 | |
229 # add target node's label as attribute | |
230 #print("contracting src to attribute %s on id=%s"%(att_name, tar_id)) | |
231 nx_graph.node[tar_id][att_name] = nx_graph.node[src_id]['label'] | |
232 | 222 |
233 # create relation with type | 223 # create relation with type |
234 nx_rel = nx_graph.add_edge(src_id, tar_id, type=fixName(rel_name), ismi_id=rel_id) | 224 nx_rel = nx_graph.add_edge(src_id, tar_id, type=fixName(rel_name), ismi_id=rel_id) |
235 | 225 |
236 nx_relations[rel_id] = nx_rel | 226 nx_relations[rel_id] = nx_rel |
265 ismi_id = ent_data['id'] | 255 ismi_id = ent_data['id'] |
266 if ismi_id in nx_nodes: | 256 if ismi_id in nx_nodes: |
267 print("ERROR: entity with id=%s exists!"%ismi_id) | 257 print("ERROR: entity with id=%s exists!"%ismi_id) |
268 return | 258 return |
269 | 259 |
270 # create neo4j node | 260 # create networkx node |
271 node = nodeFromEnt(ent_data, etype) | 261 node = nodeFromEnt(ent_data, etype) |
272 | 262 |
273 # save node reference | 263 # save node reference |
274 nx_nodes[ismi_id] = node | 264 nx_nodes[ismi_id] = node |
275 | 265 |
283 # In[119]: | 273 # In[119]: |
284 | 274 |
285 def importAllEnts(etypes): | 275 def importAllEnts(etypes): |
286 | 276 |
287 for etype in etypes: | 277 for etype in etypes: |
278 if etype in exclude_objects_of_type: | |
279 # skip this type | |
280 continue | |
281 | |
288 importEnts(etype) | 282 importEnts(etype) |
289 | 283 |
290 relationsFromRels(ismi_relations, nx_nodes) | 284 relationsFromRels(ismi_relations, nx_nodes) |
291 | 285 |
292 | 286 |
298 if len(sys.argv) > 1: | 292 if len(sys.argv) > 1: |
299 output_fn = sys.argv[1] | 293 output_fn = sys.argv[1] |
300 | 294 |
301 # import everything | 295 # import everything |
302 print("Reading graph from OpenMind at %s"%baseURL) | 296 print("Reading graph from OpenMind at %s"%baseURL) |
297 if len(exclude_objects_of_type) > 0: | |
298 print(" Skipping objects of type %s"%exclude_objects_of_type); | |
299 | |
303 importAllEnts(ismi_defs) | 300 importAllEnts(ismi_defs) |
304 #importAllEnts(['TEXT']) | 301 #importAllEnts(['TEXT']) |
305 | 302 |
306 print("Graph info: %s"%networkx.info(nx_graph)) | 303 print("Graph info: %s"%networkx.info(nx_graph)) |
307 #print(" nodes:%s"%repr(nx_graph.nodes(data=True))) | 304 #print(" nodes:%s"%repr(nx_graph.nodes(data=True))) |