Mercurial > hg > drupalISMI
diff importFromOpenMind/importer/model2model.py @ 25:5bdcb5805d29
updated openmind-networkx-neo4j conversion with dates, locations and links.
author | casties |
---|---|
date | Thu, 24 Sep 2015 18:17:41 +0200 |
parents | 97f2da68fb5f |
children | 248bf8d1e2e7 |
line wrap: on
line diff
--- a/importFromOpenMind/importer/model2model.py Wed Sep 23 19:47:02 2015 +0200 +++ b/importFromOpenMind/importer/model2model.py Thu Sep 24 18:17:41 2015 +0200 @@ -1,5 +1,6 @@ import networkx as nx import sys +import csv ## configure behaviour @@ -8,13 +9,24 @@ output_fn = 'ismi_graph_mod.gpickle' # operations -ops = ['contract', 'inv_rels'] +ops = ['locate', 'contract', 'inv_rels', 'add_links'] + +# types of object to locate +locate_objects_of_type = ['PLACE'] -# add relations to these objects as attributes with the relations name -contract_relations_into_attributes = {'PLACE': ['label'], +# file with place location information +places_fn = 'ismi_places_loc.csv' + +# node types to remove from the graph +#remove_objects_of_type = ['DIGITALIZATION', 'REFERENCE'] + +# add relations to these objects as attributes with the relation's name +contract_relations_into_attributes = {'PLACE': ['label', 'latitude', 'longitude'], 'ALIAS': ['label']} +# add URLs to nodes using an attribute in a pattern +add_link_attributes = {'ismi_id': 'https://ismi-dev.mpiwg-berlin.mpg.de/drupal-ismi/entity/%s'} def fixName(name, is_src_rel=False, is_tar_rel=False, att_from_rel=False): @@ -42,11 +54,158 @@ return name +def locatePlaces(nx_graph): + """add location information to objects in the graph""" + + print("Adding location information from %s to %s."%(places_fn, locate_objects_of_type)) + cnt = 0 + + # read place location file + locations = {} + with open(places_fn, encoding='utf-8') as csvfile: + reader = csv.DictReader(csvfile) + for row in reader: + lat = row['Latitude'] + lon = row['Longitude'] + name = row['Address'] + if lat and lon: + locations[name] = {'latitude': lat, 'longitude': lon} + + # iterate all nodes + for n in nx.nodes_iter(nx_graph): + attrs = nx_graph.node[n] + if attrs['type'] in locate_objects_of_type: + # locatable object + name = attrs['label'] + if name in locations: + # place name match + location = locations[name] + attrs['latitude'] = location['latitude'] + attrs['longitude'] = location['longitude'] + + else: + print("WARNING: no location for name '%s'"%name) + + cnt += 1 + if cnt % 100 == 0: + print(" %s nodes"%cnt) + + + +def genAttName(attrs, name): + """Generate new attribute name. + """ + while attrs.get(name, None) is not None: + # attribute exists + if name[-1].isnumeric(): # increment last digit + name = name[:-1] + str(int(name[-1]) + 1) + else: + name += '2' + + return name + + def contractRelations(nx_graph): """contract relations into attributes""" + print("Contracting relations to attributes.") + cnt = 0 + for nx_edge in nx.edges_iter(nx_graph): + (nx_src, nx_tar) = nx_edge + # get attributes of edge + rel_attrs = nx_graph.edge[nx_src][nx_tar][0] + rel_type = rel_attrs['type'] + # get attributes of source and target nodes + src_attrs = nx_graph.node[nx_src] + tar_attrs = nx_graph.node[nx_tar] + + # contract source relations + tar_type = tar_attrs['type'] + if tar_type in contract_relations_into_attributes: + # get list of attributes to transfer + transfer_atts = contract_relations_into_attributes[tar_type] + for transfer_att in transfer_atts: + if transfer_att not in tar_attrs: + # target has no attribute + continue + + # name for new attribute starts with relation name + att_name = fixName(rel_type, att_from_rel=True) + # then attribute name + if transfer_att != 'label': + att_name += "_%s"%transfer_att + + # then generate unique name + att_name = genAttName(src_attrs, att_name) + # add target node's attribute + src_attrs[att_name] = tar_attrs.get(transfer_att) + + # contract target relations + src_type = src_attrs['type'] + if src_type in contract_relations_into_attributes: + # get list of attributes to transfer + transfer_atts = contract_relations_into_attributes[src_type] + for transfer_att in transfer_atts: + if transfer_att not in src_attrs: + # target has no attribute + continue + + # name for new attribute starts with relation name + att_name = fixName(rel_type, att_from_rel=True) + # then attribute name + if transfer_att != 'label': + att_name += "_%s"%transfer_att + + # then generate unique name + att_name = genAttName(tar_attrs, att_name) + # add target node's attribute + tar_attrs[att_name] = src_attrs.get(transfer_att) + + cnt += 1 + if cnt % 100 == 0: + print(" %s relations"%cnt) + + +def invertRelations(nx_graph): + """Add inverse relations to each relation""" + print("Adding inverse relations.") + # copy list of edges because we add edges in the loop + edges = nx.edges(nx_graph)[:] + # iterate list + cnt = 0 + for nx_edge in edges: + (nx_src, nx_tar) = nx_edge + # get attributes of edge + rel_attrs = nx_graph.edge[nx_src][nx_tar][0] + rel_type = rel_attrs['type'] + rel_id = rel_attrs['ismi_id'] + # create new relation + nx_graph.add_edge(nx_tar, nx_src, type=fixName(rel_type, is_tar_rel=True), ismi_id=-rel_id) + + cnt += 1 + if cnt % 100 == 0: + print(" %s relations"%cnt) + +def addLinks(nx_graph): + """Add link attributes to all nodes.""" + + print("Adding links: %s"%repr(add_link_attributes)) + cnt = 0 + for link_att, link_pattern in add_link_attributes.items(): + # iterate all nodes + for n in nx.nodes_iter(nx_graph): + attrs = nx_graph.node[n] + if link_att in attrs: + url = link_pattern%attrs[link_att] + # TODO: which target attribute for multiple? + attrs['link'] = url + + cnt += 1 + if cnt % 100 == 0: + print(" %s nodes"%cnt) + ## main @@ -65,11 +224,19 @@ # operate for op in ops: - if op == 'contract': + if op == 'locate': + locatePlaces(nx_graph) + + elif op == 'contract': contractRelations(nx_graph) elif op == 'inv_rels': invertRelations(nx_graph) + elif op == 'add_links': + addLinks(nx_graph) + +print("Writing graph to %s"%output_fn) +nx_graph = nx.write_gpickle(nx_graph, output_fn) print("Done.")