Mercurial > hg > drupalISMI
view importFromOpenMind/importer/model2model.py @ 31:48bbba800c03
remove unused method.
author | casties |
---|---|
date | Thu, 21 Jan 2016 18:24:51 +0100 |
parents | 870b0b3b272f |
children | 7e2e344c3b87 |
line wrap: on
line source
import networkx as nx import sys import csv ## configure behaviour # metworkx graph files input_fn = 'ismi_graph.gpickle' output_fn = 'ismi_graph_mod.gpickle' # operations ops = ['locate', 'contract', 'inv_rels', 'add_links'] # types of object to locate locate_objects_of_type = ['PLACE'] # file with place location information places_fn = 'ismi_places_loc.csv' # node types to remove from the graph #remove_objects_of_type = ['DIGITALIZATION', 'REFERENCE'] # add relations to these objects as attributes with the relation's name contract_relations_into_attributes = {'PLACE': ['label', 'latitude', 'longitude'], 'ALIAS': ['label']} # add URLs to nodes using an attribute in a pattern #add_link_attributes = {'ismi_id': 'https://ismi-dev.mpiwg-berlin.mpg.de/drupal-ismi/entity/%s'} add_link_attributes = {'ismi_id': 'https://ismi-dev.mpiwg-berlin.mpg.de/om4-ismi/browse/entityDetails.xhtml?eid=%s'} def fixName(name, is_src_rel=False, is_tar_rel=False, att_from_rel=False): # these are too embarrassing... if 'FLORUIT' in name: name = name.replace('FLORUIT', 'FLOURISH') elif 'floruit' in name: name = name.replace('floruit', 'flourish') if is_src_rel: #name = name + '>' pass if is_tar_rel: name = '<' + name if att_from_rel: # clean up relations as attribute names name = name.replace('is_', '') name = name.replace('has_', '') name = name.replace('was_', '') name = name.replace('_of', '') return name def locatePlaces(nx_graph): """add location information to objects in the graph""" print("Adding location information from %s to %s."%(places_fn, locate_objects_of_type)) cnt = 0 # read place location file locations = {} with open(places_fn, encoding='utf-8') as csvfile: reader = csv.DictReader(csvfile) for row in reader: lat = row['Latitude'] lon = row['Longitude'] name = row['Address'] if lat and lon: locations[name] = {'latitude': lat, 'longitude': lon} # iterate all nodes for n in nx.nodes_iter(nx_graph): attrs = nx_graph.node[n] if attrs['type'] in locate_objects_of_type: # locatable object name = attrs['label'] if name in locations: # place name match location = locations[name] attrs['latitude'] = location['latitude'] attrs['longitude'] = location['longitude'] else: print("WARNING: no location for name '%s'"%name) cnt += 1 if cnt % 100 == 0: print(" %s nodes"%cnt) def genAttName(attrs, name): """Generate new attribute name. """ while attrs.get(name, None) is not None: # attribute exists if name[-1].isnumeric(): # increment last digit name = name[:-1] + str(int(name[-1]) + 1) else: name += '2' return name def contractRelations(nx_graph): """contract relations into attributes""" print("Contracting relations to attributes.") cnt = 0 for nx_edge in nx.edges_iter(nx_graph): (nx_src, nx_tar) = nx_edge # get attributes of edge rel_attrs = nx_graph.edge[nx_src][nx_tar][0] rel_type = rel_attrs['type'] # get attributes of source and target nodes src_attrs = nx_graph.node[nx_src] tar_attrs = nx_graph.node[nx_tar] # contract source relations tar_type = tar_attrs['type'] if tar_type in contract_relations_into_attributes: # get list of attributes to transfer transfer_atts = contract_relations_into_attributes[tar_type] for transfer_att in transfer_atts: if transfer_att not in tar_attrs: # target has no attribute continue # name for new attribute starts with relation name att_name = fixName(rel_type, att_from_rel=True) # then attribute name if transfer_att != 'label': att_name += "_%s"%transfer_att # then generate unique name att_name = genAttName(src_attrs, att_name) # add target node's attribute src_attrs[att_name] = tar_attrs.get(transfer_att) # also add normalized attribute if '_n_'+transfer_att in tar_attrs: src_attrs['_n_'+att_name] = tar_attrs.get('_n_'+transfer_att) # contract target relations src_type = src_attrs['type'] if src_type in contract_relations_into_attributes: # get list of attributes to transfer transfer_atts = contract_relations_into_attributes[src_type] for transfer_att in transfer_atts: if transfer_att not in src_attrs: # target has no attribute continue # name for new attribute starts with relation name att_name = fixName(rel_type, att_from_rel=True) # then attribute name if transfer_att != 'label': att_name += "_%s"%transfer_att # then generate unique name att_name = genAttName(tar_attrs, att_name) # add target node's attribute tar_attrs[att_name] = src_attrs.get(transfer_att) # also add normalized attribute if '_n_'+transfer_att in src_attrs: tar_attrs['_n_'+att_name] = src_attrs.get('_n_'+transfer_att) cnt += 1 if cnt % 100 == 0: print(" %s relations"%cnt) def invertRelations(nx_graph): """Add inverse relations to each relation""" print("Adding inverse relations.") # copy list of edges because we add edges in the loop edges = nx.edges(nx_graph)[:] # iterate list cnt = 0 for nx_edge in edges: (nx_src, nx_tar) = nx_edge # get attributes of edge rel_attrs = nx_graph.edge[nx_src][nx_tar][0] rel_type = rel_attrs['type'] rel_id = rel_attrs['ismi_id'] # create new relation nx_graph.add_edge(nx_tar, nx_src, type=fixName(rel_type, is_tar_rel=True), ismi_id=-rel_id) cnt += 1 if cnt % 100 == 0: print(" %s relations"%cnt) def addLinks(nx_graph): """Add link attributes to all nodes.""" print("Adding links: %s"%repr(add_link_attributes)) cnt = 0 for link_att, link_pattern in add_link_attributes.items(): # iterate all nodes for n in nx.nodes_iter(nx_graph): attrs = nx_graph.node[n] if link_att in attrs: url = link_pattern%attrs[link_att] # TODO: which target attribute for multiple? attrs['link'] = url cnt += 1 if cnt % 100 == 0: print(" %s nodes"%cnt) ## main print("Modify networkx graph") # read commandline parameters if len(sys.argv) > 2: input_fn = sys.argv[1] output_fn = sys.argv[2] if len(sys.argv) > 3: ops = sys.argv[3].split(',') # read networkx graph from pickle print("Reading graph from %s"%input_fn) nx_graph = nx.read_gpickle(input_fn) print("Graph info: %s"%nx.info(nx_graph)) # operate for op in ops: if op == 'locate': locatePlaces(nx_graph) elif op == 'contract': contractRelations(nx_graph) elif op == 'inv_rels': invertRelations(nx_graph) elif op == 'add_links': addLinks(nx_graph) else: print("ERROR: unknown operation %s"%op) print("Writing graph to %s"%output_fn) nx_graph = nx.write_gpickle(nx_graph, output_fn) print("Done.")