comparison importFromOpenMind/importer/model2model.py @ 25:5bdcb5805d29

updated openmind-networkx-neo4j conversion with dates, locations and links.
author casties
date Thu, 24 Sep 2015 18:17:41 +0200
parents 97f2da68fb5f
children 248bf8d1e2e7
comparison
equal deleted inserted replaced
24:97f2da68fb5f 25:5bdcb5805d29
1 import networkx as nx 1 import networkx as nx
2 import sys 2 import sys
3 import csv
3 4
4 ## configure behaviour 5 ## configure behaviour
5 6
6 # metworkx graph files 7 # metworkx graph files
7 input_fn = 'ismi_graph.gpickle' 8 input_fn = 'ismi_graph.gpickle'
8 output_fn = 'ismi_graph_mod.gpickle' 9 output_fn = 'ismi_graph_mod.gpickle'
9 10
10 # operations 11 # operations
11 ops = ['contract', 'inv_rels'] 12 ops = ['locate', 'contract', 'inv_rels', 'add_links']
12 13
13 # add relations to these objects as attributes with the relations name 14 # types of object to locate
14 contract_relations_into_attributes = {'PLACE': ['label'], 15 locate_objects_of_type = ['PLACE']
16
17 # file with place location information
18 places_fn = 'ismi_places_loc.csv'
19
20 # node types to remove from the graph
21 #remove_objects_of_type = ['DIGITALIZATION', 'REFERENCE']
22
23 # add relations to these objects as attributes with the relation's name
24 contract_relations_into_attributes = {'PLACE': ['label', 'latitude', 'longitude'],
15 'ALIAS': ['label']} 25 'ALIAS': ['label']}
16 26
17 27
28 # add URLs to nodes using an attribute in a pattern
29 add_link_attributes = {'ismi_id': 'https://ismi-dev.mpiwg-berlin.mpg.de/drupal-ismi/entity/%s'}
18 30
19 31
20 def fixName(name, is_src_rel=False, is_tar_rel=False, att_from_rel=False): 32 def fixName(name, is_src_rel=False, is_tar_rel=False, att_from_rel=False):
21 # these are too embarrassing... 33 # these are too embarrassing...
22 if 'FLORUIT' in name: 34 if 'FLORUIT' in name:
40 name = name.replace('_of', '') 52 name = name.replace('_of', '')
41 53
42 return name 54 return name
43 55
44 56
57 def locatePlaces(nx_graph):
58 """add location information to objects in the graph"""
59
60 print("Adding location information from %s to %s."%(places_fn, locate_objects_of_type))
61 cnt = 0
62
63 # read place location file
64 locations = {}
65 with open(places_fn, encoding='utf-8') as csvfile:
66 reader = csv.DictReader(csvfile)
67 for row in reader:
68 lat = row['Latitude']
69 lon = row['Longitude']
70 name = row['Address']
71 if lat and lon:
72 locations[name] = {'latitude': lat, 'longitude': lon}
73
74 # iterate all nodes
75 for n in nx.nodes_iter(nx_graph):
76 attrs = nx_graph.node[n]
77 if attrs['type'] in locate_objects_of_type:
78 # locatable object
79 name = attrs['label']
80 if name in locations:
81 # place name match
82 location = locations[name]
83 attrs['latitude'] = location['latitude']
84 attrs['longitude'] = location['longitude']
85
86 else:
87 print("WARNING: no location for name '%s'"%name)
88
89 cnt += 1
90 if cnt % 100 == 0:
91 print(" %s nodes"%cnt)
92
93
94
95 def genAttName(attrs, name):
96 """Generate new attribute name.
97 """
98 while attrs.get(name, None) is not None:
99 # attribute exists
100 if name[-1].isnumeric(): # increment last digit
101 name = name[:-1] + str(int(name[-1]) + 1)
102 else:
103 name += '2'
104
105 return name
106
107
45 def contractRelations(nx_graph): 108 def contractRelations(nx_graph):
46 """contract relations into attributes""" 109 """contract relations into attributes"""
47 110
48 111 print("Contracting relations to attributes.")
49 112 cnt = 0
113 for nx_edge in nx.edges_iter(nx_graph):
114 (nx_src, nx_tar) = nx_edge
115 # get attributes of edge
116 rel_attrs = nx_graph.edge[nx_src][nx_tar][0]
117 rel_type = rel_attrs['type']
118 # get attributes of source and target nodes
119 src_attrs = nx_graph.node[nx_src]
120 tar_attrs = nx_graph.node[nx_tar]
121
122 # contract source relations
123 tar_type = tar_attrs['type']
124 if tar_type in contract_relations_into_attributes:
125 # get list of attributes to transfer
126 transfer_atts = contract_relations_into_attributes[tar_type]
127 for transfer_att in transfer_atts:
128 if transfer_att not in tar_attrs:
129 # target has no attribute
130 continue
131
132 # name for new attribute starts with relation name
133 att_name = fixName(rel_type, att_from_rel=True)
134 # then attribute name
135 if transfer_att != 'label':
136 att_name += "_%s"%transfer_att
137
138 # then generate unique name
139 att_name = genAttName(src_attrs, att_name)
140 # add target node's attribute
141 src_attrs[att_name] = tar_attrs.get(transfer_att)
142
143 # contract target relations
144 src_type = src_attrs['type']
145 if src_type in contract_relations_into_attributes:
146 # get list of attributes to transfer
147 transfer_atts = contract_relations_into_attributes[src_type]
148 for transfer_att in transfer_atts:
149 if transfer_att not in src_attrs:
150 # target has no attribute
151 continue
152
153 # name for new attribute starts with relation name
154 att_name = fixName(rel_type, att_from_rel=True)
155 # then attribute name
156 if transfer_att != 'label':
157 att_name += "_%s"%transfer_att
158
159 # then generate unique name
160 att_name = genAttName(tar_attrs, att_name)
161 # add target node's attribute
162 tar_attrs[att_name] = src_attrs.get(transfer_att)
163
164 cnt += 1
165 if cnt % 100 == 0:
166 print(" %s relations"%cnt)
167
168
169 def invertRelations(nx_graph):
170 """Add inverse relations to each relation"""
171
172 print("Adding inverse relations.")
173 # copy list of edges because we add edges in the loop
174 edges = nx.edges(nx_graph)[:]
175 # iterate list
176 cnt = 0
177 for nx_edge in edges:
178 (nx_src, nx_tar) = nx_edge
179 # get attributes of edge
180 rel_attrs = nx_graph.edge[nx_src][nx_tar][0]
181 rel_type = rel_attrs['type']
182 rel_id = rel_attrs['ismi_id']
183 # create new relation
184 nx_graph.add_edge(nx_tar, nx_src, type=fixName(rel_type, is_tar_rel=True), ismi_id=-rel_id)
185
186 cnt += 1
187 if cnt % 100 == 0:
188 print(" %s relations"%cnt)
189
190
191 def addLinks(nx_graph):
192 """Add link attributes to all nodes."""
193
194 print("Adding links: %s"%repr(add_link_attributes))
195 cnt = 0
196 for link_att, link_pattern in add_link_attributes.items():
197 # iterate all nodes
198 for n in nx.nodes_iter(nx_graph):
199 attrs = nx_graph.node[n]
200 if link_att in attrs:
201 url = link_pattern%attrs[link_att]
202 # TODO: which target attribute for multiple?
203 attrs['link'] = url
204
205 cnt += 1
206 if cnt % 100 == 0:
207 print(" %s nodes"%cnt)
208
50 209
51 ## main 210 ## main
52 211
53 print("Modify networkx graph") 212 print("Modify networkx graph")
54 213
63 nx_graph = nx.read_gpickle(input_fn) 222 nx_graph = nx.read_gpickle(input_fn)
64 print("Graph info: %s"%nx.info(nx_graph)) 223 print("Graph info: %s"%nx.info(nx_graph))
65 224
66 # operate 225 # operate
67 for op in ops: 226 for op in ops:
68 if op == 'contract': 227 if op == 'locate':
228 locatePlaces(nx_graph)
229
230 elif op == 'contract':
69 contractRelations(nx_graph) 231 contractRelations(nx_graph)
70 232
71 elif op == 'inv_rels': 233 elif op == 'inv_rels':
72 invertRelations(nx_graph) 234 invertRelations(nx_graph)
73 235
236 elif op == 'add_links':
237 addLinks(nx_graph)
238
239 print("Writing graph to %s"%output_fn)
240 nx_graph = nx.write_gpickle(nx_graph, output_fn)
74 241
75 print("Done.") 242 print("Done.")