Mercurial > hg > drupalISMI
comparison importFromOpenMind/importer/model2model.py @ 25:5bdcb5805d29
updated openmind-networkx-neo4j conversion with dates, locations and links.
author | casties |
---|---|
date | Thu, 24 Sep 2015 18:17:41 +0200 |
parents | 97f2da68fb5f |
children | 248bf8d1e2e7 |
comparison
equal
deleted
inserted
replaced
24:97f2da68fb5f | 25:5bdcb5805d29 |
---|---|
1 import networkx as nx | 1 import networkx as nx |
2 import sys | 2 import sys |
3 import csv | |
3 | 4 |
4 ## configure behaviour | 5 ## configure behaviour |
5 | 6 |
6 # metworkx graph files | 7 # metworkx graph files |
7 input_fn = 'ismi_graph.gpickle' | 8 input_fn = 'ismi_graph.gpickle' |
8 output_fn = 'ismi_graph_mod.gpickle' | 9 output_fn = 'ismi_graph_mod.gpickle' |
9 | 10 |
10 # operations | 11 # operations |
11 ops = ['contract', 'inv_rels'] | 12 ops = ['locate', 'contract', 'inv_rels', 'add_links'] |
12 | 13 |
13 # add relations to these objects as attributes with the relations name | 14 # types of object to locate |
14 contract_relations_into_attributes = {'PLACE': ['label'], | 15 locate_objects_of_type = ['PLACE'] |
16 | |
17 # file with place location information | |
18 places_fn = 'ismi_places_loc.csv' | |
19 | |
20 # node types to remove from the graph | |
21 #remove_objects_of_type = ['DIGITALIZATION', 'REFERENCE'] | |
22 | |
23 # add relations to these objects as attributes with the relation's name | |
24 contract_relations_into_attributes = {'PLACE': ['label', 'latitude', 'longitude'], | |
15 'ALIAS': ['label']} | 25 'ALIAS': ['label']} |
16 | 26 |
17 | 27 |
28 # add URLs to nodes using an attribute in a pattern | |
29 add_link_attributes = {'ismi_id': 'https://ismi-dev.mpiwg-berlin.mpg.de/drupal-ismi/entity/%s'} | |
18 | 30 |
19 | 31 |
20 def fixName(name, is_src_rel=False, is_tar_rel=False, att_from_rel=False): | 32 def fixName(name, is_src_rel=False, is_tar_rel=False, att_from_rel=False): |
21 # these are too embarrassing... | 33 # these are too embarrassing... |
22 if 'FLORUIT' in name: | 34 if 'FLORUIT' in name: |
40 name = name.replace('_of', '') | 52 name = name.replace('_of', '') |
41 | 53 |
42 return name | 54 return name |
43 | 55 |
44 | 56 |
57 def locatePlaces(nx_graph): | |
58 """add location information to objects in the graph""" | |
59 | |
60 print("Adding location information from %s to %s."%(places_fn, locate_objects_of_type)) | |
61 cnt = 0 | |
62 | |
63 # read place location file | |
64 locations = {} | |
65 with open(places_fn, encoding='utf-8') as csvfile: | |
66 reader = csv.DictReader(csvfile) | |
67 for row in reader: | |
68 lat = row['Latitude'] | |
69 lon = row['Longitude'] | |
70 name = row['Address'] | |
71 if lat and lon: | |
72 locations[name] = {'latitude': lat, 'longitude': lon} | |
73 | |
74 # iterate all nodes | |
75 for n in nx.nodes_iter(nx_graph): | |
76 attrs = nx_graph.node[n] | |
77 if attrs['type'] in locate_objects_of_type: | |
78 # locatable object | |
79 name = attrs['label'] | |
80 if name in locations: | |
81 # place name match | |
82 location = locations[name] | |
83 attrs['latitude'] = location['latitude'] | |
84 attrs['longitude'] = location['longitude'] | |
85 | |
86 else: | |
87 print("WARNING: no location for name '%s'"%name) | |
88 | |
89 cnt += 1 | |
90 if cnt % 100 == 0: | |
91 print(" %s nodes"%cnt) | |
92 | |
93 | |
94 | |
95 def genAttName(attrs, name): | |
96 """Generate new attribute name. | |
97 """ | |
98 while attrs.get(name, None) is not None: | |
99 # attribute exists | |
100 if name[-1].isnumeric(): # increment last digit | |
101 name = name[:-1] + str(int(name[-1]) + 1) | |
102 else: | |
103 name += '2' | |
104 | |
105 return name | |
106 | |
107 | |
45 def contractRelations(nx_graph): | 108 def contractRelations(nx_graph): |
46 """contract relations into attributes""" | 109 """contract relations into attributes""" |
47 | 110 |
48 | 111 print("Contracting relations to attributes.") |
49 | 112 cnt = 0 |
113 for nx_edge in nx.edges_iter(nx_graph): | |
114 (nx_src, nx_tar) = nx_edge | |
115 # get attributes of edge | |
116 rel_attrs = nx_graph.edge[nx_src][nx_tar][0] | |
117 rel_type = rel_attrs['type'] | |
118 # get attributes of source and target nodes | |
119 src_attrs = nx_graph.node[nx_src] | |
120 tar_attrs = nx_graph.node[nx_tar] | |
121 | |
122 # contract source relations | |
123 tar_type = tar_attrs['type'] | |
124 if tar_type in contract_relations_into_attributes: | |
125 # get list of attributes to transfer | |
126 transfer_atts = contract_relations_into_attributes[tar_type] | |
127 for transfer_att in transfer_atts: | |
128 if transfer_att not in tar_attrs: | |
129 # target has no attribute | |
130 continue | |
131 | |
132 # name for new attribute starts with relation name | |
133 att_name = fixName(rel_type, att_from_rel=True) | |
134 # then attribute name | |
135 if transfer_att != 'label': | |
136 att_name += "_%s"%transfer_att | |
137 | |
138 # then generate unique name | |
139 att_name = genAttName(src_attrs, att_name) | |
140 # add target node's attribute | |
141 src_attrs[att_name] = tar_attrs.get(transfer_att) | |
142 | |
143 # contract target relations | |
144 src_type = src_attrs['type'] | |
145 if src_type in contract_relations_into_attributes: | |
146 # get list of attributes to transfer | |
147 transfer_atts = contract_relations_into_attributes[src_type] | |
148 for transfer_att in transfer_atts: | |
149 if transfer_att not in src_attrs: | |
150 # target has no attribute | |
151 continue | |
152 | |
153 # name for new attribute starts with relation name | |
154 att_name = fixName(rel_type, att_from_rel=True) | |
155 # then attribute name | |
156 if transfer_att != 'label': | |
157 att_name += "_%s"%transfer_att | |
158 | |
159 # then generate unique name | |
160 att_name = genAttName(tar_attrs, att_name) | |
161 # add target node's attribute | |
162 tar_attrs[att_name] = src_attrs.get(transfer_att) | |
163 | |
164 cnt += 1 | |
165 if cnt % 100 == 0: | |
166 print(" %s relations"%cnt) | |
167 | |
168 | |
169 def invertRelations(nx_graph): | |
170 """Add inverse relations to each relation""" | |
171 | |
172 print("Adding inverse relations.") | |
173 # copy list of edges because we add edges in the loop | |
174 edges = nx.edges(nx_graph)[:] | |
175 # iterate list | |
176 cnt = 0 | |
177 for nx_edge in edges: | |
178 (nx_src, nx_tar) = nx_edge | |
179 # get attributes of edge | |
180 rel_attrs = nx_graph.edge[nx_src][nx_tar][0] | |
181 rel_type = rel_attrs['type'] | |
182 rel_id = rel_attrs['ismi_id'] | |
183 # create new relation | |
184 nx_graph.add_edge(nx_tar, nx_src, type=fixName(rel_type, is_tar_rel=True), ismi_id=-rel_id) | |
185 | |
186 cnt += 1 | |
187 if cnt % 100 == 0: | |
188 print(" %s relations"%cnt) | |
189 | |
190 | |
191 def addLinks(nx_graph): | |
192 """Add link attributes to all nodes.""" | |
193 | |
194 print("Adding links: %s"%repr(add_link_attributes)) | |
195 cnt = 0 | |
196 for link_att, link_pattern in add_link_attributes.items(): | |
197 # iterate all nodes | |
198 for n in nx.nodes_iter(nx_graph): | |
199 attrs = nx_graph.node[n] | |
200 if link_att in attrs: | |
201 url = link_pattern%attrs[link_att] | |
202 # TODO: which target attribute for multiple? | |
203 attrs['link'] = url | |
204 | |
205 cnt += 1 | |
206 if cnt % 100 == 0: | |
207 print(" %s nodes"%cnt) | |
208 | |
50 | 209 |
51 ## main | 210 ## main |
52 | 211 |
53 print("Modify networkx graph") | 212 print("Modify networkx graph") |
54 | 213 |
63 nx_graph = nx.read_gpickle(input_fn) | 222 nx_graph = nx.read_gpickle(input_fn) |
64 print("Graph info: %s"%nx.info(nx_graph)) | 223 print("Graph info: %s"%nx.info(nx_graph)) |
65 | 224 |
66 # operate | 225 # operate |
67 for op in ops: | 226 for op in ops: |
68 if op == 'contract': | 227 if op == 'locate': |
228 locatePlaces(nx_graph) | |
229 | |
230 elif op == 'contract': | |
69 contractRelations(nx_graph) | 231 contractRelations(nx_graph) |
70 | 232 |
71 elif op == 'inv_rels': | 233 elif op == 'inv_rels': |
72 invertRelations(nx_graph) | 234 invertRelations(nx_graph) |
73 | 235 |
236 elif op == 'add_links': | |
237 addLinks(nx_graph) | |
238 | |
239 print("Writing graph to %s"%output_fn) | |
240 nx_graph = nx.write_gpickle(nx_graph, output_fn) | |
74 | 241 |
75 print("Done.") | 242 print("Done.") |