Mercurial > hg > drupalISMI
annotate importFromOpenMind/importer/model2model.py @ 30:870b0b3b272f
fix entityDetails url in link attribute.
author | casties |
---|---|
date | Fri, 11 Dec 2015 17:13:58 -0500 |
parents | 1a1877812757 |
children | 7e2e344c3b87 |
rev | line source |
---|---|
24
97f2da68fb5f
first version of model2model graph manipulation tool. doesn't work yet.
casties
parents:
diff
changeset
|
1 import networkx as nx |
97f2da68fb5f
first version of model2model graph manipulation tool. doesn't work yet.
casties
parents:
diff
changeset
|
2 import sys |
25
5bdcb5805d29
updated openmind-networkx-neo4j conversion with dates, locations and links.
casties
parents:
24
diff
changeset
|
3 import csv |
24
97f2da68fb5f
first version of model2model graph manipulation tool. doesn't work yet.
casties
parents:
diff
changeset
|
4 |
97f2da68fb5f
first version of model2model graph manipulation tool. doesn't work yet.
casties
parents:
diff
changeset
|
5 ## configure behaviour |
97f2da68fb5f
first version of model2model graph manipulation tool. doesn't work yet.
casties
parents:
diff
changeset
|
6 |
97f2da68fb5f
first version of model2model graph manipulation tool. doesn't work yet.
casties
parents:
diff
changeset
|
7 # metworkx graph files |
97f2da68fb5f
first version of model2model graph manipulation tool. doesn't work yet.
casties
parents:
diff
changeset
|
8 input_fn = 'ismi_graph.gpickle' |
97f2da68fb5f
first version of model2model graph manipulation tool. doesn't work yet.
casties
parents:
diff
changeset
|
9 output_fn = 'ismi_graph_mod.gpickle' |
97f2da68fb5f
first version of model2model graph manipulation tool. doesn't work yet.
casties
parents:
diff
changeset
|
10 |
97f2da68fb5f
first version of model2model graph manipulation tool. doesn't work yet.
casties
parents:
diff
changeset
|
11 # operations |
25
5bdcb5805d29
updated openmind-networkx-neo4j conversion with dates, locations and links.
casties
parents:
24
diff
changeset
|
12 ops = ['locate', 'contract', 'inv_rels', 'add_links'] |
5bdcb5805d29
updated openmind-networkx-neo4j conversion with dates, locations and links.
casties
parents:
24
diff
changeset
|
13 |
5bdcb5805d29
updated openmind-networkx-neo4j conversion with dates, locations and links.
casties
parents:
24
diff
changeset
|
14 # types of object to locate |
5bdcb5805d29
updated openmind-networkx-neo4j conversion with dates, locations and links.
casties
parents:
24
diff
changeset
|
15 locate_objects_of_type = ['PLACE'] |
24
97f2da68fb5f
first version of model2model graph manipulation tool. doesn't work yet.
casties
parents:
diff
changeset
|
16 |
25
5bdcb5805d29
updated openmind-networkx-neo4j conversion with dates, locations and links.
casties
parents:
24
diff
changeset
|
17 # file with place location information |
5bdcb5805d29
updated openmind-networkx-neo4j conversion with dates, locations and links.
casties
parents:
24
diff
changeset
|
18 places_fn = 'ismi_places_loc.csv' |
5bdcb5805d29
updated openmind-networkx-neo4j conversion with dates, locations and links.
casties
parents:
24
diff
changeset
|
19 |
5bdcb5805d29
updated openmind-networkx-neo4j conversion with dates, locations and links.
casties
parents:
24
diff
changeset
|
20 # node types to remove from the graph |
5bdcb5805d29
updated openmind-networkx-neo4j conversion with dates, locations and links.
casties
parents:
24
diff
changeset
|
21 #remove_objects_of_type = ['DIGITALIZATION', 'REFERENCE'] |
5bdcb5805d29
updated openmind-networkx-neo4j conversion with dates, locations and links.
casties
parents:
24
diff
changeset
|
22 |
5bdcb5805d29
updated openmind-networkx-neo4j conversion with dates, locations and links.
casties
parents:
24
diff
changeset
|
23 # add relations to these objects as attributes with the relation's name |
5bdcb5805d29
updated openmind-networkx-neo4j conversion with dates, locations and links.
casties
parents:
24
diff
changeset
|
24 contract_relations_into_attributes = {'PLACE': ['label', 'latitude', 'longitude'], |
24
97f2da68fb5f
first version of model2model graph manipulation tool. doesn't work yet.
casties
parents:
diff
changeset
|
25 'ALIAS': ['label']} |
97f2da68fb5f
first version of model2model graph manipulation tool. doesn't work yet.
casties
parents:
diff
changeset
|
26 |
97f2da68fb5f
first version of model2model graph manipulation tool. doesn't work yet.
casties
parents:
diff
changeset
|
27 |
25
5bdcb5805d29
updated openmind-networkx-neo4j conversion with dates, locations and links.
casties
parents:
24
diff
changeset
|
28 # add URLs to nodes using an attribute in a pattern |
28 | 29 #add_link_attributes = {'ismi_id': 'https://ismi-dev.mpiwg-berlin.mpg.de/drupal-ismi/entity/%s'} |
30 | 30 add_link_attributes = {'ismi_id': 'https://ismi-dev.mpiwg-berlin.mpg.de/om4-ismi/browse/entityDetails.xhtml?eid=%s'} |
24
97f2da68fb5f
first version of model2model graph manipulation tool. doesn't work yet.
casties
parents:
diff
changeset
|
31 |
97f2da68fb5f
first version of model2model graph manipulation tool. doesn't work yet.
casties
parents:
diff
changeset
|
32 |
97f2da68fb5f
first version of model2model graph manipulation tool. doesn't work yet.
casties
parents:
diff
changeset
|
33 def fixName(name, is_src_rel=False, is_tar_rel=False, att_from_rel=False): |
97f2da68fb5f
first version of model2model graph manipulation tool. doesn't work yet.
casties
parents:
diff
changeset
|
34 # these are too embarrassing... |
97f2da68fb5f
first version of model2model graph manipulation tool. doesn't work yet.
casties
parents:
diff
changeset
|
35 if 'FLORUIT' in name: |
97f2da68fb5f
first version of model2model graph manipulation tool. doesn't work yet.
casties
parents:
diff
changeset
|
36 name = name.replace('FLORUIT', 'FLOURISH') |
97f2da68fb5f
first version of model2model graph manipulation tool. doesn't work yet.
casties
parents:
diff
changeset
|
37 |
97f2da68fb5f
first version of model2model graph manipulation tool. doesn't work yet.
casties
parents:
diff
changeset
|
38 elif 'floruit' in name: |
97f2da68fb5f
first version of model2model graph manipulation tool. doesn't work yet.
casties
parents:
diff
changeset
|
39 name = name.replace('floruit', 'flourish') |
97f2da68fb5f
first version of model2model graph manipulation tool. doesn't work yet.
casties
parents:
diff
changeset
|
40 |
97f2da68fb5f
first version of model2model graph manipulation tool. doesn't work yet.
casties
parents:
diff
changeset
|
41 if is_src_rel: |
97f2da68fb5f
first version of model2model graph manipulation tool. doesn't work yet.
casties
parents:
diff
changeset
|
42 #name = name + '>' |
97f2da68fb5f
first version of model2model graph manipulation tool. doesn't work yet.
casties
parents:
diff
changeset
|
43 pass |
97f2da68fb5f
first version of model2model graph manipulation tool. doesn't work yet.
casties
parents:
diff
changeset
|
44 |
97f2da68fb5f
first version of model2model graph manipulation tool. doesn't work yet.
casties
parents:
diff
changeset
|
45 if is_tar_rel: |
97f2da68fb5f
first version of model2model graph manipulation tool. doesn't work yet.
casties
parents:
diff
changeset
|
46 name = '<' + name |
97f2da68fb5f
first version of model2model graph manipulation tool. doesn't work yet.
casties
parents:
diff
changeset
|
47 |
97f2da68fb5f
first version of model2model graph manipulation tool. doesn't work yet.
casties
parents:
diff
changeset
|
48 if att_from_rel: |
97f2da68fb5f
first version of model2model graph manipulation tool. doesn't work yet.
casties
parents:
diff
changeset
|
49 # clean up relations as attribute names |
97f2da68fb5f
first version of model2model graph manipulation tool. doesn't work yet.
casties
parents:
diff
changeset
|
50 name = name.replace('is_', '') |
97f2da68fb5f
first version of model2model graph manipulation tool. doesn't work yet.
casties
parents:
diff
changeset
|
51 name = name.replace('has_', '') |
97f2da68fb5f
first version of model2model graph manipulation tool. doesn't work yet.
casties
parents:
diff
changeset
|
52 name = name.replace('was_', '') |
97f2da68fb5f
first version of model2model graph manipulation tool. doesn't work yet.
casties
parents:
diff
changeset
|
53 name = name.replace('_of', '') |
97f2da68fb5f
first version of model2model graph manipulation tool. doesn't work yet.
casties
parents:
diff
changeset
|
54 |
97f2da68fb5f
first version of model2model graph manipulation tool. doesn't work yet.
casties
parents:
diff
changeset
|
55 return name |
97f2da68fb5f
first version of model2model graph manipulation tool. doesn't work yet.
casties
parents:
diff
changeset
|
56 |
97f2da68fb5f
first version of model2model graph manipulation tool. doesn't work yet.
casties
parents:
diff
changeset
|
57 |
25
5bdcb5805d29
updated openmind-networkx-neo4j conversion with dates, locations and links.
casties
parents:
24
diff
changeset
|
58 def locatePlaces(nx_graph): |
5bdcb5805d29
updated openmind-networkx-neo4j conversion with dates, locations and links.
casties
parents:
24
diff
changeset
|
59 """add location information to objects in the graph""" |
5bdcb5805d29
updated openmind-networkx-neo4j conversion with dates, locations and links.
casties
parents:
24
diff
changeset
|
60 |
5bdcb5805d29
updated openmind-networkx-neo4j conversion with dates, locations and links.
casties
parents:
24
diff
changeset
|
61 print("Adding location information from %s to %s."%(places_fn, locate_objects_of_type)) |
5bdcb5805d29
updated openmind-networkx-neo4j conversion with dates, locations and links.
casties
parents:
24
diff
changeset
|
62 cnt = 0 |
5bdcb5805d29
updated openmind-networkx-neo4j conversion with dates, locations and links.
casties
parents:
24
diff
changeset
|
63 |
5bdcb5805d29
updated openmind-networkx-neo4j conversion with dates, locations and links.
casties
parents:
24
diff
changeset
|
64 # read place location file |
5bdcb5805d29
updated openmind-networkx-neo4j conversion with dates, locations and links.
casties
parents:
24
diff
changeset
|
65 locations = {} |
5bdcb5805d29
updated openmind-networkx-neo4j conversion with dates, locations and links.
casties
parents:
24
diff
changeset
|
66 with open(places_fn, encoding='utf-8') as csvfile: |
5bdcb5805d29
updated openmind-networkx-neo4j conversion with dates, locations and links.
casties
parents:
24
diff
changeset
|
67 reader = csv.DictReader(csvfile) |
5bdcb5805d29
updated openmind-networkx-neo4j conversion with dates, locations and links.
casties
parents:
24
diff
changeset
|
68 for row in reader: |
5bdcb5805d29
updated openmind-networkx-neo4j conversion with dates, locations and links.
casties
parents:
24
diff
changeset
|
69 lat = row['Latitude'] |
5bdcb5805d29
updated openmind-networkx-neo4j conversion with dates, locations and links.
casties
parents:
24
diff
changeset
|
70 lon = row['Longitude'] |
5bdcb5805d29
updated openmind-networkx-neo4j conversion with dates, locations and links.
casties
parents:
24
diff
changeset
|
71 name = row['Address'] |
5bdcb5805d29
updated openmind-networkx-neo4j conversion with dates, locations and links.
casties
parents:
24
diff
changeset
|
72 if lat and lon: |
5bdcb5805d29
updated openmind-networkx-neo4j conversion with dates, locations and links.
casties
parents:
24
diff
changeset
|
73 locations[name] = {'latitude': lat, 'longitude': lon} |
5bdcb5805d29
updated openmind-networkx-neo4j conversion with dates, locations and links.
casties
parents:
24
diff
changeset
|
74 |
5bdcb5805d29
updated openmind-networkx-neo4j conversion with dates, locations and links.
casties
parents:
24
diff
changeset
|
75 # iterate all nodes |
5bdcb5805d29
updated openmind-networkx-neo4j conversion with dates, locations and links.
casties
parents:
24
diff
changeset
|
76 for n in nx.nodes_iter(nx_graph): |
5bdcb5805d29
updated openmind-networkx-neo4j conversion with dates, locations and links.
casties
parents:
24
diff
changeset
|
77 attrs = nx_graph.node[n] |
5bdcb5805d29
updated openmind-networkx-neo4j conversion with dates, locations and links.
casties
parents:
24
diff
changeset
|
78 if attrs['type'] in locate_objects_of_type: |
5bdcb5805d29
updated openmind-networkx-neo4j conversion with dates, locations and links.
casties
parents:
24
diff
changeset
|
79 # locatable object |
5bdcb5805d29
updated openmind-networkx-neo4j conversion with dates, locations and links.
casties
parents:
24
diff
changeset
|
80 name = attrs['label'] |
5bdcb5805d29
updated openmind-networkx-neo4j conversion with dates, locations and links.
casties
parents:
24
diff
changeset
|
81 if name in locations: |
5bdcb5805d29
updated openmind-networkx-neo4j conversion with dates, locations and links.
casties
parents:
24
diff
changeset
|
82 # place name match |
5bdcb5805d29
updated openmind-networkx-neo4j conversion with dates, locations and links.
casties
parents:
24
diff
changeset
|
83 location = locations[name] |
5bdcb5805d29
updated openmind-networkx-neo4j conversion with dates, locations and links.
casties
parents:
24
diff
changeset
|
84 attrs['latitude'] = location['latitude'] |
5bdcb5805d29
updated openmind-networkx-neo4j conversion with dates, locations and links.
casties
parents:
24
diff
changeset
|
85 attrs['longitude'] = location['longitude'] |
5bdcb5805d29
updated openmind-networkx-neo4j conversion with dates, locations and links.
casties
parents:
24
diff
changeset
|
86 |
5bdcb5805d29
updated openmind-networkx-neo4j conversion with dates, locations and links.
casties
parents:
24
diff
changeset
|
87 else: |
5bdcb5805d29
updated openmind-networkx-neo4j conversion with dates, locations and links.
casties
parents:
24
diff
changeset
|
88 print("WARNING: no location for name '%s'"%name) |
5bdcb5805d29
updated openmind-networkx-neo4j conversion with dates, locations and links.
casties
parents:
24
diff
changeset
|
89 |
5bdcb5805d29
updated openmind-networkx-neo4j conversion with dates, locations and links.
casties
parents:
24
diff
changeset
|
90 cnt += 1 |
5bdcb5805d29
updated openmind-networkx-neo4j conversion with dates, locations and links.
casties
parents:
24
diff
changeset
|
91 if cnt % 100 == 0: |
5bdcb5805d29
updated openmind-networkx-neo4j conversion with dates, locations and links.
casties
parents:
24
diff
changeset
|
92 print(" %s nodes"%cnt) |
5bdcb5805d29
updated openmind-networkx-neo4j conversion with dates, locations and links.
casties
parents:
24
diff
changeset
|
93 |
5bdcb5805d29
updated openmind-networkx-neo4j conversion with dates, locations and links.
casties
parents:
24
diff
changeset
|
94 |
5bdcb5805d29
updated openmind-networkx-neo4j conversion with dates, locations and links.
casties
parents:
24
diff
changeset
|
95 |
5bdcb5805d29
updated openmind-networkx-neo4j conversion with dates, locations and links.
casties
parents:
24
diff
changeset
|
96 def genAttName(attrs, name): |
5bdcb5805d29
updated openmind-networkx-neo4j conversion with dates, locations and links.
casties
parents:
24
diff
changeset
|
97 """Generate new attribute name. |
5bdcb5805d29
updated openmind-networkx-neo4j conversion with dates, locations and links.
casties
parents:
24
diff
changeset
|
98 """ |
5bdcb5805d29
updated openmind-networkx-neo4j conversion with dates, locations and links.
casties
parents:
24
diff
changeset
|
99 while attrs.get(name, None) is not None: |
5bdcb5805d29
updated openmind-networkx-neo4j conversion with dates, locations and links.
casties
parents:
24
diff
changeset
|
100 # attribute exists |
5bdcb5805d29
updated openmind-networkx-neo4j conversion with dates, locations and links.
casties
parents:
24
diff
changeset
|
101 if name[-1].isnumeric(): # increment last digit |
5bdcb5805d29
updated openmind-networkx-neo4j conversion with dates, locations and links.
casties
parents:
24
diff
changeset
|
102 name = name[:-1] + str(int(name[-1]) + 1) |
5bdcb5805d29
updated openmind-networkx-neo4j conversion with dates, locations and links.
casties
parents:
24
diff
changeset
|
103 else: |
5bdcb5805d29
updated openmind-networkx-neo4j conversion with dates, locations and links.
casties
parents:
24
diff
changeset
|
104 name += '2' |
5bdcb5805d29
updated openmind-networkx-neo4j conversion with dates, locations and links.
casties
parents:
24
diff
changeset
|
105 |
5bdcb5805d29
updated openmind-networkx-neo4j conversion with dates, locations and links.
casties
parents:
24
diff
changeset
|
106 return name |
5bdcb5805d29
updated openmind-networkx-neo4j conversion with dates, locations and links.
casties
parents:
24
diff
changeset
|
107 |
5bdcb5805d29
updated openmind-networkx-neo4j conversion with dates, locations and links.
casties
parents:
24
diff
changeset
|
108 |
24
97f2da68fb5f
first version of model2model graph manipulation tool. doesn't work yet.
casties
parents:
diff
changeset
|
109 def contractRelations(nx_graph): |
97f2da68fb5f
first version of model2model graph manipulation tool. doesn't work yet.
casties
parents:
diff
changeset
|
110 """contract relations into attributes""" |
97f2da68fb5f
first version of model2model graph manipulation tool. doesn't work yet.
casties
parents:
diff
changeset
|
111 |
25
5bdcb5805d29
updated openmind-networkx-neo4j conversion with dates, locations and links.
casties
parents:
24
diff
changeset
|
112 print("Contracting relations to attributes.") |
5bdcb5805d29
updated openmind-networkx-neo4j conversion with dates, locations and links.
casties
parents:
24
diff
changeset
|
113 cnt = 0 |
5bdcb5805d29
updated openmind-networkx-neo4j conversion with dates, locations and links.
casties
parents:
24
diff
changeset
|
114 for nx_edge in nx.edges_iter(nx_graph): |
5bdcb5805d29
updated openmind-networkx-neo4j conversion with dates, locations and links.
casties
parents:
24
diff
changeset
|
115 (nx_src, nx_tar) = nx_edge |
5bdcb5805d29
updated openmind-networkx-neo4j conversion with dates, locations and links.
casties
parents:
24
diff
changeset
|
116 # get attributes of edge |
5bdcb5805d29
updated openmind-networkx-neo4j conversion with dates, locations and links.
casties
parents:
24
diff
changeset
|
117 rel_attrs = nx_graph.edge[nx_src][nx_tar][0] |
5bdcb5805d29
updated openmind-networkx-neo4j conversion with dates, locations and links.
casties
parents:
24
diff
changeset
|
118 rel_type = rel_attrs['type'] |
5bdcb5805d29
updated openmind-networkx-neo4j conversion with dates, locations and links.
casties
parents:
24
diff
changeset
|
119 # get attributes of source and target nodes |
5bdcb5805d29
updated openmind-networkx-neo4j conversion with dates, locations and links.
casties
parents:
24
diff
changeset
|
120 src_attrs = nx_graph.node[nx_src] |
5bdcb5805d29
updated openmind-networkx-neo4j conversion with dates, locations and links.
casties
parents:
24
diff
changeset
|
121 tar_attrs = nx_graph.node[nx_tar] |
5bdcb5805d29
updated openmind-networkx-neo4j conversion with dates, locations and links.
casties
parents:
24
diff
changeset
|
122 |
5bdcb5805d29
updated openmind-networkx-neo4j conversion with dates, locations and links.
casties
parents:
24
diff
changeset
|
123 # contract source relations |
5bdcb5805d29
updated openmind-networkx-neo4j conversion with dates, locations and links.
casties
parents:
24
diff
changeset
|
124 tar_type = tar_attrs['type'] |
5bdcb5805d29
updated openmind-networkx-neo4j conversion with dates, locations and links.
casties
parents:
24
diff
changeset
|
125 if tar_type in contract_relations_into_attributes: |
5bdcb5805d29
updated openmind-networkx-neo4j conversion with dates, locations and links.
casties
parents:
24
diff
changeset
|
126 # get list of attributes to transfer |
5bdcb5805d29
updated openmind-networkx-neo4j conversion with dates, locations and links.
casties
parents:
24
diff
changeset
|
127 transfer_atts = contract_relations_into_attributes[tar_type] |
5bdcb5805d29
updated openmind-networkx-neo4j conversion with dates, locations and links.
casties
parents:
24
diff
changeset
|
128 for transfer_att in transfer_atts: |
5bdcb5805d29
updated openmind-networkx-neo4j conversion with dates, locations and links.
casties
parents:
24
diff
changeset
|
129 if transfer_att not in tar_attrs: |
5bdcb5805d29
updated openmind-networkx-neo4j conversion with dates, locations and links.
casties
parents:
24
diff
changeset
|
130 # target has no attribute |
5bdcb5805d29
updated openmind-networkx-neo4j conversion with dates, locations and links.
casties
parents:
24
diff
changeset
|
131 continue |
5bdcb5805d29
updated openmind-networkx-neo4j conversion with dates, locations and links.
casties
parents:
24
diff
changeset
|
132 |
5bdcb5805d29
updated openmind-networkx-neo4j conversion with dates, locations and links.
casties
parents:
24
diff
changeset
|
133 # name for new attribute starts with relation name |
5bdcb5805d29
updated openmind-networkx-neo4j conversion with dates, locations and links.
casties
parents:
24
diff
changeset
|
134 att_name = fixName(rel_type, att_from_rel=True) |
5bdcb5805d29
updated openmind-networkx-neo4j conversion with dates, locations and links.
casties
parents:
24
diff
changeset
|
135 # then attribute name |
5bdcb5805d29
updated openmind-networkx-neo4j conversion with dates, locations and links.
casties
parents:
24
diff
changeset
|
136 if transfer_att != 'label': |
5bdcb5805d29
updated openmind-networkx-neo4j conversion with dates, locations and links.
casties
parents:
24
diff
changeset
|
137 att_name += "_%s"%transfer_att |
5bdcb5805d29
updated openmind-networkx-neo4j conversion with dates, locations and links.
casties
parents:
24
diff
changeset
|
138 |
5bdcb5805d29
updated openmind-networkx-neo4j conversion with dates, locations and links.
casties
parents:
24
diff
changeset
|
139 # then generate unique name |
5bdcb5805d29
updated openmind-networkx-neo4j conversion with dates, locations and links.
casties
parents:
24
diff
changeset
|
140 att_name = genAttName(src_attrs, att_name) |
5bdcb5805d29
updated openmind-networkx-neo4j conversion with dates, locations and links.
casties
parents:
24
diff
changeset
|
141 # add target node's attribute |
5bdcb5805d29
updated openmind-networkx-neo4j conversion with dates, locations and links.
casties
parents:
24
diff
changeset
|
142 src_attrs[att_name] = tar_attrs.get(transfer_att) |
29
1a1877812757
include normalized attributes in neo4j with prefix "_n_"
casties
parents:
28
diff
changeset
|
143 # also add normalized attribute |
1a1877812757
include normalized attributes in neo4j with prefix "_n_"
casties
parents:
28
diff
changeset
|
144 if '_n_'+transfer_att in tar_attrs: |
1a1877812757
include normalized attributes in neo4j with prefix "_n_"
casties
parents:
28
diff
changeset
|
145 src_attrs['_n_'+att_name] = tar_attrs.get('_n_'+transfer_att) |
25
5bdcb5805d29
updated openmind-networkx-neo4j conversion with dates, locations and links.
casties
parents:
24
diff
changeset
|
146 |
5bdcb5805d29
updated openmind-networkx-neo4j conversion with dates, locations and links.
casties
parents:
24
diff
changeset
|
147 # contract target relations |
5bdcb5805d29
updated openmind-networkx-neo4j conversion with dates, locations and links.
casties
parents:
24
diff
changeset
|
148 src_type = src_attrs['type'] |
5bdcb5805d29
updated openmind-networkx-neo4j conversion with dates, locations and links.
casties
parents:
24
diff
changeset
|
149 if src_type in contract_relations_into_attributes: |
5bdcb5805d29
updated openmind-networkx-neo4j conversion with dates, locations and links.
casties
parents:
24
diff
changeset
|
150 # get list of attributes to transfer |
5bdcb5805d29
updated openmind-networkx-neo4j conversion with dates, locations and links.
casties
parents:
24
diff
changeset
|
151 transfer_atts = contract_relations_into_attributes[src_type] |
5bdcb5805d29
updated openmind-networkx-neo4j conversion with dates, locations and links.
casties
parents:
24
diff
changeset
|
152 for transfer_att in transfer_atts: |
5bdcb5805d29
updated openmind-networkx-neo4j conversion with dates, locations and links.
casties
parents:
24
diff
changeset
|
153 if transfer_att not in src_attrs: |
5bdcb5805d29
updated openmind-networkx-neo4j conversion with dates, locations and links.
casties
parents:
24
diff
changeset
|
154 # target has no attribute |
5bdcb5805d29
updated openmind-networkx-neo4j conversion with dates, locations and links.
casties
parents:
24
diff
changeset
|
155 continue |
5bdcb5805d29
updated openmind-networkx-neo4j conversion with dates, locations and links.
casties
parents:
24
diff
changeset
|
156 |
5bdcb5805d29
updated openmind-networkx-neo4j conversion with dates, locations and links.
casties
parents:
24
diff
changeset
|
157 # name for new attribute starts with relation name |
5bdcb5805d29
updated openmind-networkx-neo4j conversion with dates, locations and links.
casties
parents:
24
diff
changeset
|
158 att_name = fixName(rel_type, att_from_rel=True) |
5bdcb5805d29
updated openmind-networkx-neo4j conversion with dates, locations and links.
casties
parents:
24
diff
changeset
|
159 # then attribute name |
5bdcb5805d29
updated openmind-networkx-neo4j conversion with dates, locations and links.
casties
parents:
24
diff
changeset
|
160 if transfer_att != 'label': |
5bdcb5805d29
updated openmind-networkx-neo4j conversion with dates, locations and links.
casties
parents:
24
diff
changeset
|
161 att_name += "_%s"%transfer_att |
5bdcb5805d29
updated openmind-networkx-neo4j conversion with dates, locations and links.
casties
parents:
24
diff
changeset
|
162 |
5bdcb5805d29
updated openmind-networkx-neo4j conversion with dates, locations and links.
casties
parents:
24
diff
changeset
|
163 # then generate unique name |
5bdcb5805d29
updated openmind-networkx-neo4j conversion with dates, locations and links.
casties
parents:
24
diff
changeset
|
164 att_name = genAttName(tar_attrs, att_name) |
5bdcb5805d29
updated openmind-networkx-neo4j conversion with dates, locations and links.
casties
parents:
24
diff
changeset
|
165 # add target node's attribute |
5bdcb5805d29
updated openmind-networkx-neo4j conversion with dates, locations and links.
casties
parents:
24
diff
changeset
|
166 tar_attrs[att_name] = src_attrs.get(transfer_att) |
29
1a1877812757
include normalized attributes in neo4j with prefix "_n_"
casties
parents:
28
diff
changeset
|
167 # also add normalized attribute |
1a1877812757
include normalized attributes in neo4j with prefix "_n_"
casties
parents:
28
diff
changeset
|
168 if '_n_'+transfer_att in src_attrs: |
1a1877812757
include normalized attributes in neo4j with prefix "_n_"
casties
parents:
28
diff
changeset
|
169 tar_attrs['_n_'+att_name] = src_attrs.get('_n_'+transfer_att) |
25
5bdcb5805d29
updated openmind-networkx-neo4j conversion with dates, locations and links.
casties
parents:
24
diff
changeset
|
170 |
5bdcb5805d29
updated openmind-networkx-neo4j conversion with dates, locations and links.
casties
parents:
24
diff
changeset
|
171 cnt += 1 |
5bdcb5805d29
updated openmind-networkx-neo4j conversion with dates, locations and links.
casties
parents:
24
diff
changeset
|
172 if cnt % 100 == 0: |
5bdcb5805d29
updated openmind-networkx-neo4j conversion with dates, locations and links.
casties
parents:
24
diff
changeset
|
173 print(" %s relations"%cnt) |
5bdcb5805d29
updated openmind-networkx-neo4j conversion with dates, locations and links.
casties
parents:
24
diff
changeset
|
174 |
5bdcb5805d29
updated openmind-networkx-neo4j conversion with dates, locations and links.
casties
parents:
24
diff
changeset
|
175 |
5bdcb5805d29
updated openmind-networkx-neo4j conversion with dates, locations and links.
casties
parents:
24
diff
changeset
|
176 def invertRelations(nx_graph): |
5bdcb5805d29
updated openmind-networkx-neo4j conversion with dates, locations and links.
casties
parents:
24
diff
changeset
|
177 """Add inverse relations to each relation""" |
24
97f2da68fb5f
first version of model2model graph manipulation tool. doesn't work yet.
casties
parents:
diff
changeset
|
178 |
25
5bdcb5805d29
updated openmind-networkx-neo4j conversion with dates, locations and links.
casties
parents:
24
diff
changeset
|
179 print("Adding inverse relations.") |
5bdcb5805d29
updated openmind-networkx-neo4j conversion with dates, locations and links.
casties
parents:
24
diff
changeset
|
180 # copy list of edges because we add edges in the loop |
5bdcb5805d29
updated openmind-networkx-neo4j conversion with dates, locations and links.
casties
parents:
24
diff
changeset
|
181 edges = nx.edges(nx_graph)[:] |
5bdcb5805d29
updated openmind-networkx-neo4j conversion with dates, locations and links.
casties
parents:
24
diff
changeset
|
182 # iterate list |
5bdcb5805d29
updated openmind-networkx-neo4j conversion with dates, locations and links.
casties
parents:
24
diff
changeset
|
183 cnt = 0 |
5bdcb5805d29
updated openmind-networkx-neo4j conversion with dates, locations and links.
casties
parents:
24
diff
changeset
|
184 for nx_edge in edges: |
5bdcb5805d29
updated openmind-networkx-neo4j conversion with dates, locations and links.
casties
parents:
24
diff
changeset
|
185 (nx_src, nx_tar) = nx_edge |
5bdcb5805d29
updated openmind-networkx-neo4j conversion with dates, locations and links.
casties
parents:
24
diff
changeset
|
186 # get attributes of edge |
5bdcb5805d29
updated openmind-networkx-neo4j conversion with dates, locations and links.
casties
parents:
24
diff
changeset
|
187 rel_attrs = nx_graph.edge[nx_src][nx_tar][0] |
5bdcb5805d29
updated openmind-networkx-neo4j conversion with dates, locations and links.
casties
parents:
24
diff
changeset
|
188 rel_type = rel_attrs['type'] |
5bdcb5805d29
updated openmind-networkx-neo4j conversion with dates, locations and links.
casties
parents:
24
diff
changeset
|
189 rel_id = rel_attrs['ismi_id'] |
5bdcb5805d29
updated openmind-networkx-neo4j conversion with dates, locations and links.
casties
parents:
24
diff
changeset
|
190 # create new relation |
5bdcb5805d29
updated openmind-networkx-neo4j conversion with dates, locations and links.
casties
parents:
24
diff
changeset
|
191 nx_graph.add_edge(nx_tar, nx_src, type=fixName(rel_type, is_tar_rel=True), ismi_id=-rel_id) |
5bdcb5805d29
updated openmind-networkx-neo4j conversion with dates, locations and links.
casties
parents:
24
diff
changeset
|
192 |
5bdcb5805d29
updated openmind-networkx-neo4j conversion with dates, locations and links.
casties
parents:
24
diff
changeset
|
193 cnt += 1 |
5bdcb5805d29
updated openmind-networkx-neo4j conversion with dates, locations and links.
casties
parents:
24
diff
changeset
|
194 if cnt % 100 == 0: |
5bdcb5805d29
updated openmind-networkx-neo4j conversion with dates, locations and links.
casties
parents:
24
diff
changeset
|
195 print(" %s relations"%cnt) |
5bdcb5805d29
updated openmind-networkx-neo4j conversion with dates, locations and links.
casties
parents:
24
diff
changeset
|
196 |
24
97f2da68fb5f
first version of model2model graph manipulation tool. doesn't work yet.
casties
parents:
diff
changeset
|
197 |
25
5bdcb5805d29
updated openmind-networkx-neo4j conversion with dates, locations and links.
casties
parents:
24
diff
changeset
|
198 def addLinks(nx_graph): |
5bdcb5805d29
updated openmind-networkx-neo4j conversion with dates, locations and links.
casties
parents:
24
diff
changeset
|
199 """Add link attributes to all nodes.""" |
5bdcb5805d29
updated openmind-networkx-neo4j conversion with dates, locations and links.
casties
parents:
24
diff
changeset
|
200 |
5bdcb5805d29
updated openmind-networkx-neo4j conversion with dates, locations and links.
casties
parents:
24
diff
changeset
|
201 print("Adding links: %s"%repr(add_link_attributes)) |
5bdcb5805d29
updated openmind-networkx-neo4j conversion with dates, locations and links.
casties
parents:
24
diff
changeset
|
202 cnt = 0 |
5bdcb5805d29
updated openmind-networkx-neo4j conversion with dates, locations and links.
casties
parents:
24
diff
changeset
|
203 for link_att, link_pattern in add_link_attributes.items(): |
5bdcb5805d29
updated openmind-networkx-neo4j conversion with dates, locations and links.
casties
parents:
24
diff
changeset
|
204 # iterate all nodes |
5bdcb5805d29
updated openmind-networkx-neo4j conversion with dates, locations and links.
casties
parents:
24
diff
changeset
|
205 for n in nx.nodes_iter(nx_graph): |
5bdcb5805d29
updated openmind-networkx-neo4j conversion with dates, locations and links.
casties
parents:
24
diff
changeset
|
206 attrs = nx_graph.node[n] |
5bdcb5805d29
updated openmind-networkx-neo4j conversion with dates, locations and links.
casties
parents:
24
diff
changeset
|
207 if link_att in attrs: |
5bdcb5805d29
updated openmind-networkx-neo4j conversion with dates, locations and links.
casties
parents:
24
diff
changeset
|
208 url = link_pattern%attrs[link_att] |
5bdcb5805d29
updated openmind-networkx-neo4j conversion with dates, locations and links.
casties
parents:
24
diff
changeset
|
209 # TODO: which target attribute for multiple? |
5bdcb5805d29
updated openmind-networkx-neo4j conversion with dates, locations and links.
casties
parents:
24
diff
changeset
|
210 attrs['link'] = url |
5bdcb5805d29
updated openmind-networkx-neo4j conversion with dates, locations and links.
casties
parents:
24
diff
changeset
|
211 |
5bdcb5805d29
updated openmind-networkx-neo4j conversion with dates, locations and links.
casties
parents:
24
diff
changeset
|
212 cnt += 1 |
5bdcb5805d29
updated openmind-networkx-neo4j conversion with dates, locations and links.
casties
parents:
24
diff
changeset
|
213 if cnt % 100 == 0: |
5bdcb5805d29
updated openmind-networkx-neo4j conversion with dates, locations and links.
casties
parents:
24
diff
changeset
|
214 print(" %s nodes"%cnt) |
5bdcb5805d29
updated openmind-networkx-neo4j conversion with dates, locations and links.
casties
parents:
24
diff
changeset
|
215 |
24
97f2da68fb5f
first version of model2model graph manipulation tool. doesn't work yet.
casties
parents:
diff
changeset
|
216 |
97f2da68fb5f
first version of model2model graph manipulation tool. doesn't work yet.
casties
parents:
diff
changeset
|
217 ## main |
97f2da68fb5f
first version of model2model graph manipulation tool. doesn't work yet.
casties
parents:
diff
changeset
|
218 |
97f2da68fb5f
first version of model2model graph manipulation tool. doesn't work yet.
casties
parents:
diff
changeset
|
219 print("Modify networkx graph") |
97f2da68fb5f
first version of model2model graph manipulation tool. doesn't work yet.
casties
parents:
diff
changeset
|
220 |
97f2da68fb5f
first version of model2model graph manipulation tool. doesn't work yet.
casties
parents:
diff
changeset
|
221 # read commandline parameters |
97f2da68fb5f
first version of model2model graph manipulation tool. doesn't work yet.
casties
parents:
diff
changeset
|
222 if len(sys.argv) > 2: |
97f2da68fb5f
first version of model2model graph manipulation tool. doesn't work yet.
casties
parents:
diff
changeset
|
223 input_fn = sys.argv[1] |
97f2da68fb5f
first version of model2model graph manipulation tool. doesn't work yet.
casties
parents:
diff
changeset
|
224 output_fn = sys.argv[2] |
26
248bf8d1e2e7
make operations command-line configurable. parameters: infile outfile ops
casties
parents:
25
diff
changeset
|
225 |
248bf8d1e2e7
make operations command-line configurable. parameters: infile outfile ops
casties
parents:
25
diff
changeset
|
226 if len(sys.argv) > 3: |
248bf8d1e2e7
make operations command-line configurable. parameters: infile outfile ops
casties
parents:
25
diff
changeset
|
227 ops = sys.argv[3].split(',') |
24
97f2da68fb5f
first version of model2model graph manipulation tool. doesn't work yet.
casties
parents:
diff
changeset
|
228 |
97f2da68fb5f
first version of model2model graph manipulation tool. doesn't work yet.
casties
parents:
diff
changeset
|
229 # read networkx graph from pickle |
97f2da68fb5f
first version of model2model graph manipulation tool. doesn't work yet.
casties
parents:
diff
changeset
|
230 print("Reading graph from %s"%input_fn) |
97f2da68fb5f
first version of model2model graph manipulation tool. doesn't work yet.
casties
parents:
diff
changeset
|
231 nx_graph = nx.read_gpickle(input_fn) |
97f2da68fb5f
first version of model2model graph manipulation tool. doesn't work yet.
casties
parents:
diff
changeset
|
232 print("Graph info: %s"%nx.info(nx_graph)) |
97f2da68fb5f
first version of model2model graph manipulation tool. doesn't work yet.
casties
parents:
diff
changeset
|
233 |
97f2da68fb5f
first version of model2model graph manipulation tool. doesn't work yet.
casties
parents:
diff
changeset
|
234 # operate |
97f2da68fb5f
first version of model2model graph manipulation tool. doesn't work yet.
casties
parents:
diff
changeset
|
235 for op in ops: |
25
5bdcb5805d29
updated openmind-networkx-neo4j conversion with dates, locations and links.
casties
parents:
24
diff
changeset
|
236 if op == 'locate': |
5bdcb5805d29
updated openmind-networkx-neo4j conversion with dates, locations and links.
casties
parents:
24
diff
changeset
|
237 locatePlaces(nx_graph) |
5bdcb5805d29
updated openmind-networkx-neo4j conversion with dates, locations and links.
casties
parents:
24
diff
changeset
|
238 |
5bdcb5805d29
updated openmind-networkx-neo4j conversion with dates, locations and links.
casties
parents:
24
diff
changeset
|
239 elif op == 'contract': |
24
97f2da68fb5f
first version of model2model graph manipulation tool. doesn't work yet.
casties
parents:
diff
changeset
|
240 contractRelations(nx_graph) |
97f2da68fb5f
first version of model2model graph manipulation tool. doesn't work yet.
casties
parents:
diff
changeset
|
241 |
97f2da68fb5f
first version of model2model graph manipulation tool. doesn't work yet.
casties
parents:
diff
changeset
|
242 elif op == 'inv_rels': |
97f2da68fb5f
first version of model2model graph manipulation tool. doesn't work yet.
casties
parents:
diff
changeset
|
243 invertRelations(nx_graph) |
97f2da68fb5f
first version of model2model graph manipulation tool. doesn't work yet.
casties
parents:
diff
changeset
|
244 |
25
5bdcb5805d29
updated openmind-networkx-neo4j conversion with dates, locations and links.
casties
parents:
24
diff
changeset
|
245 elif op == 'add_links': |
5bdcb5805d29
updated openmind-networkx-neo4j conversion with dates, locations and links.
casties
parents:
24
diff
changeset
|
246 addLinks(nx_graph) |
26
248bf8d1e2e7
make operations command-line configurable. parameters: infile outfile ops
casties
parents:
25
diff
changeset
|
247 |
248bf8d1e2e7
make operations command-line configurable. parameters: infile outfile ops
casties
parents:
25
diff
changeset
|
248 else: |
248bf8d1e2e7
make operations command-line configurable. parameters: infile outfile ops
casties
parents:
25
diff
changeset
|
249 print("ERROR: unknown operation %s"%op) |
25
5bdcb5805d29
updated openmind-networkx-neo4j conversion with dates, locations and links.
casties
parents:
24
diff
changeset
|
250 |
5bdcb5805d29
updated openmind-networkx-neo4j conversion with dates, locations and links.
casties
parents:
24
diff
changeset
|
251 print("Writing graph to %s"%output_fn) |
5bdcb5805d29
updated openmind-networkx-neo4j conversion with dates, locations and links.
casties
parents:
24
diff
changeset
|
252 nx_graph = nx.write_gpickle(nx_graph, output_fn) |
24
97f2da68fb5f
first version of model2model graph manipulation tool. doesn't work yet.
casties
parents:
diff
changeset
|
253 |
97f2da68fb5f
first version of model2model graph manipulation tool. doesn't work yet.
casties
parents:
diff
changeset
|
254 print("Done.") |