Mercurial > hg > drupalISMI
annotate importFromOpenMind/importer/ismi2model.py @ 46:f3945ef1e6a4
new importer for OM4XML dump file.
author | casties |
---|---|
date | Fri, 03 Feb 2017 18:46:16 +0100 |
parents | 9a9a6da1d415 |
children |
rev | line source |
---|---|
19
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
1 import urllib.request |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
2 import json |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
3 import networkx |
23
45a823b5bf33
updated ismi2model importer and model2neo4j exporter.
casties
parents:
19
diff
changeset
|
4 import sys |
19
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
5 |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
6 ## configure behaviour |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
7 |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
8 # output filename |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
9 output_fn = "ismi_graph.gpickle" |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
10 |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
11 # OpenMind base URL |
46 | 12 #baseURL="http://ismi.mpiwg-berlin.mpg.de//om4-ismi/jsonInterface?" |
35
d535f11a0d81
be more aggressive about parsing dates in text type fields.
casties
parents:
34
diff
changeset
|
13 baseURL="http://localhost:18080/ismi-richfaces/jsonInterface?" |
19
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
14 |
25
5bdcb5805d29
updated openmind-networkx-neo4j conversion with dates, locations and links.
casties
parents:
23
diff
changeset
|
15 # node types to exclude from the graph |
5bdcb5805d29
updated openmind-networkx-neo4j conversion with dates, locations and links.
casties
parents:
23
diff
changeset
|
16 exclude_objects_of_type = ['DIGITALIZATION', 'REFERENCE'] |
5bdcb5805d29
updated openmind-networkx-neo4j conversion with dates, locations and links.
casties
parents:
23
diff
changeset
|
17 |
27 | 18 # attributes to exclude |
19 exclude_attributes_of_type = [ | |
20 'lw', | |
21 'node_type', | |
22 'nov', | |
23 'notes_old' | |
24 ] | |
25 | |
33
7e2e344c3b87
make name of type attribute configurable. default '_type' for nodes.
casties
parents:
32
diff
changeset
|
26 # name of type attribute |
7e2e344c3b87
make name of type attribute configurable. default '_type' for nodes.
casties
parents:
32
diff
changeset
|
27 node_type_attribute = '_type' |
34
74dfaed3600b
keep relation attributes but no attributes from openmind. change relation type attribute to '_type' too.
casties
parents:
33
diff
changeset
|
28 rel_type_attribute = '_type' |
19
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
29 |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
30 entsURL=baseURL+"method=get_ents&oc=%s" |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
31 |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
32 entsByIdURL = baseURL+"method=get_ents&include_content=True&ids=%s" |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
33 |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
34 entURL=baseURL+"method=get_ent&id=%s&include_content=True" |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
35 |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
36 |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
37 def readJSON(url): |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
38 #print("JSON loading %s"%url) |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
39 wsh=urllib.request.urlopen(url) |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
40 txt = wsh.read() |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
41 return json.loads(txt.decode("utf-8")) |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
42 |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
43 defs_json = readJSON(baseURL+"method=get_defs") |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
44 |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
45 # current list of all definitions |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
46 ismi_defs = [atts['ov'] for atts in defs_json['defs']] |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
47 |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
48 #ismi_types=["PERSON","WITNESS","CODEX","PLACE","COLLECTION","REPOSITORY"] |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
49 |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
50 |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
51 nx_graph = networkx.MultiDiGraph() |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
52 |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
53 nx_nodes = {} |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
54 ismi_relations = {} |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
55 nx_relations = {} |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
56 |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
57 |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
58 def fixName(name, is_src_rel=False, is_tar_rel=False, att_from_rel=False): |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
59 if is_src_rel: |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
60 #name = name + '>' |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
61 pass |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
62 |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
63 if is_tar_rel: |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
64 name = '<' + name |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
65 |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
66 if att_from_rel: |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
67 # clean up relations as attribute names |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
68 name = name.replace('is_', '') |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
69 name = name.replace('has_', '') |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
70 name = name.replace('was_', '') |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
71 name = name.replace('_of', '') |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
72 |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
73 return name |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
74 |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
75 |
35
d535f11a0d81
be more aggressive about parsing dates in text type fields.
casties
parents:
34
diff
changeset
|
76 |
d535f11a0d81
be more aggressive about parsing dates in text type fields.
casties
parents:
34
diff
changeset
|
77 def parseYear(val): |
d535f11a0d81
be more aggressive about parsing dates in text type fields.
casties
parents:
34
diff
changeset
|
78 year = None |
d535f11a0d81
be more aggressive about parsing dates in text type fields.
casties
parents:
34
diff
changeset
|
79 try: |
d535f11a0d81
be more aggressive about parsing dates in text type fields.
casties
parents:
34
diff
changeset
|
80 date_json = json.loads(val) |
d535f11a0d81
be more aggressive about parsing dates in text type fields.
casties
parents:
34
diff
changeset
|
81 if 'from' in date_json: |
d535f11a0d81
be more aggressive about parsing dates in text type fields.
casties
parents:
34
diff
changeset
|
82 year = date_json['from'].get('year', None) |
d535f11a0d81
be more aggressive about parsing dates in text type fields.
casties
parents:
34
diff
changeset
|
83 elif 'date' in date_json: |
d535f11a0d81
be more aggressive about parsing dates in text type fields.
casties
parents:
34
diff
changeset
|
84 year = date_json['date'].get('year', None) |
d535f11a0d81
be more aggressive about parsing dates in text type fields.
casties
parents:
34
diff
changeset
|
85 else: |
d535f11a0d81
be more aggressive about parsing dates in text type fields.
casties
parents:
34
diff
changeset
|
86 print("don't know what to do with date %s"%(val)) |
d535f11a0d81
be more aggressive about parsing dates in text type fields.
casties
parents:
34
diff
changeset
|
87 |
d535f11a0d81
be more aggressive about parsing dates in text type fields.
casties
parents:
34
diff
changeset
|
88 except: |
d535f11a0d81
be more aggressive about parsing dates in text type fields.
casties
parents:
34
diff
changeset
|
89 pass |
d535f11a0d81
be more aggressive about parsing dates in text type fields.
casties
parents:
34
diff
changeset
|
90 |
d535f11a0d81
be more aggressive about parsing dates in text type fields.
casties
parents:
34
diff
changeset
|
91 return year |
d535f11a0d81
be more aggressive about parsing dates in text type fields.
casties
parents:
34
diff
changeset
|
92 |
d535f11a0d81
be more aggressive about parsing dates in text type fields.
casties
parents:
34
diff
changeset
|
93 |
19
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
94 def nodeFromEnt(ent, etype): |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
95 """Create a Neo4J node from the given JSON entity. |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
96 |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
97 Creates the node in gdb and returns the node. |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
98 """ |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
99 attrs = {} |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
100 # go through all attributes |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
101 for att in ent['atts']: |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
102 ct = att.get('content_type', None) |
35
d535f11a0d81
be more aggressive about parsing dates in text type fields.
casties
parents:
34
diff
changeset
|
103 name = att.get('name', None) |
d535f11a0d81
be more aggressive about parsing dates in text type fields.
casties
parents:
34
diff
changeset
|
104 if name in exclude_attributes_of_type: |
d535f11a0d81
be more aggressive about parsing dates in text type fields.
casties
parents:
34
diff
changeset
|
105 # exclude attribute |
d535f11a0d81
be more aggressive about parsing dates in text type fields.
casties
parents:
34
diff
changeset
|
106 continue |
d535f11a0d81
be more aggressive about parsing dates in text type fields.
casties
parents:
34
diff
changeset
|
107 |
36 | 108 if ct is None or ct.lower() in ['text', 'arabic', 'bool', 'boolean', 'url', 'language']: |
19
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
109 # normal text attribute (assume no content_type is text too...) |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
110 val = att['ov'] |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
111 |
35
d535f11a0d81
be more aggressive about parsing dates in text type fields.
casties
parents:
34
diff
changeset
|
112 if val[0] == '{': |
d535f11a0d81
be more aggressive about parsing dates in text type fields.
casties
parents:
34
diff
changeset
|
113 # try to parse as date |
d535f11a0d81
be more aggressive about parsing dates in text type fields.
casties
parents:
34
diff
changeset
|
114 year = parseYear(val) |
d535f11a0d81
be more aggressive about parsing dates in text type fields.
casties
parents:
34
diff
changeset
|
115 if year is not None: |
d535f11a0d81
be more aggressive about parsing dates in text type fields.
casties
parents:
34
diff
changeset
|
116 val = year |
d535f11a0d81
be more aggressive about parsing dates in text type fields.
casties
parents:
34
diff
changeset
|
117 |
19
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
118 # keep attribute |
35
d535f11a0d81
be more aggressive about parsing dates in text type fields.
casties
parents:
34
diff
changeset
|
119 attrs[name] = val |
29
1a1877812757
include normalized attributes in neo4j with prefix "_n_"
casties
parents:
28
diff
changeset
|
120 if 'nov' in att: |
1a1877812757
include normalized attributes in neo4j with prefix "_n_"
casties
parents:
28
diff
changeset
|
121 # add normalized value |
35
d535f11a0d81
be more aggressive about parsing dates in text type fields.
casties
parents:
34
diff
changeset
|
122 attrs['_n_'+name] = att['nov'] |
19
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
123 |
36 | 124 elif ct == 'date': |
125 # date attribute | |
126 val = att['ov'] | |
127 # try to parse date object to get gregorian year | |
128 year = parseYear(val) | |
129 if year is not None: | |
130 attrs[name] = year | |
131 | |
19
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
132 elif ct == 'num': |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
133 # number attribute |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
134 val = att['ov'] |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
135 |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
136 # keep attribute, assume num is int |
35
d535f11a0d81
be more aggressive about parsing dates in text type fields.
casties
parents:
34
diff
changeset
|
137 attrs[name] = int(val) |
19
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
138 |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
139 elif ct == 'old': |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
140 # ignore attribute |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
141 continue |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
142 |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
143 else: |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
144 print("WARN: attribute with unknown content_type: %s"%repr(att)) |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
145 # ignore other content types |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
146 continue |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
147 |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
148 # process base attributes |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
149 oc = ent['oc'] |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
150 if oc != etype: |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
151 print("ERROR: entity type doesn't match!") |
29
1a1877812757
include normalized attributes in neo4j with prefix "_n_"
casties
parents:
28
diff
changeset
|
152 return None |
19
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
153 |
25
5bdcb5805d29
updated openmind-networkx-neo4j conversion with dates, locations and links.
casties
parents:
23
diff
changeset
|
154 # set type |
33
7e2e344c3b87
make name of type attribute configurable. default '_type' for nodes.
casties
parents:
32
diff
changeset
|
155 attrs[node_type_attribute] = fixName(oc) |
19
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
156 |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
157 ismi_id = ent['id'] |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
158 # rename id to ismi_id |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
159 attrs['ismi_id'] = ismi_id |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
160 |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
161 ov = ent.get('ov', None) |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
162 if ov is not None: |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
163 # save ov as label |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
164 attrs['label'] = ov |
29
1a1877812757
include normalized attributes in neo4j with prefix "_n_"
casties
parents:
28
diff
changeset
|
165 if 'nov' in ent: |
1a1877812757
include normalized attributes in neo4j with prefix "_n_"
casties
parents:
28
diff
changeset
|
166 # add normalized value |
1a1877812757
include normalized attributes in neo4j with prefix "_n_"
casties
parents:
28
diff
changeset
|
167 attrs['_n_label'] = ent.get('nov') |
19
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
168 |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
169 nx_graph.add_node(ismi_id, **attrs) |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
170 node = nx_graph.node[ismi_id] |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
171 |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
172 return node |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
173 |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
174 |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
175 def relsFromEnt(ent, relations): |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
176 """Extract all relations from JSON entity. |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
177 |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
178 Adds JSON to dict relations under relation's id. |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
179 """ |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
180 # go through src_rels and tar_rels |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
181 rels = ent.get('src_rels', []) + ent.get('tar_rels', []) |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
182 for rel in rels: |
25
5bdcb5805d29
updated openmind-networkx-neo4j conversion with dates, locations and links.
casties
parents:
23
diff
changeset
|
183 src_type = rel['src_oc'] |
5bdcb5805d29
updated openmind-networkx-neo4j conversion with dates, locations and links.
casties
parents:
23
diff
changeset
|
184 tar_type = rel['tar_oc'] |
5bdcb5805d29
updated openmind-networkx-neo4j conversion with dates, locations and links.
casties
parents:
23
diff
changeset
|
185 if src_type in exclude_objects_of_type or tar_type in exclude_objects_of_type: |
5bdcb5805d29
updated openmind-networkx-neo4j conversion with dates, locations and links.
casties
parents:
23
diff
changeset
|
186 # skip relation to excluded objects |
5bdcb5805d29
updated openmind-networkx-neo4j conversion with dates, locations and links.
casties
parents:
23
diff
changeset
|
187 continue |
5bdcb5805d29
updated openmind-networkx-neo4j conversion with dates, locations and links.
casties
parents:
23
diff
changeset
|
188 |
19
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
189 rel_id = rel['id'] |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
190 if rel_id in relations: |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
191 old_rel = relations[rel_id] |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
192 if rel != old_rel: |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
193 print("ERROR: relation is different: %s != %s"%(repr(rel), repr(old_rel))) |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
194 continue |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
195 |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
196 relations[rel_id] = rel |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
197 |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
198 return relations |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
199 |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
200 |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
201 def relationsFromRels(rels, nodes): |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
202 """Create relations in Neo4J. |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
203 |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
204 Args: |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
205 rels: dict of JSON relations |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
206 nodes: dict of existing Neo4J nodes |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
207 Returns: |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
208 dict of Neo4J relations |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
209 """ |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
210 # go through all rels |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
211 print("importing %s relations"%len(rels)) |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
212 cnt = 0 |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
213 for rel in rels.values(): |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
214 cnt += 1 |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
215 if cnt % 100 == 0: |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
216 print(" %s relations"%cnt) |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
217 |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
218 rel_id = rel['id'] |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
219 rel_name = rel['name'] |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
220 src_id = rel['src_id'] |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
221 tar_id = rel['tar_id'] |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
222 if not src_id in nodes: |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
223 print("ERROR: relation %s src node %s missing!"%(rel_id,src_id)) |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
224 continue |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
225 |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
226 if not tar_id in nodes: |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
227 print("ERROR: relation %s tar node %s missing!"%(rel_id,tar_id)) |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
228 continue |
34
74dfaed3600b
keep relation attributes but no attributes from openmind. change relation type attribute to '_type' too.
casties
parents:
33
diff
changeset
|
229 |
74dfaed3600b
keep relation attributes but no attributes from openmind. change relation type attribute to '_type' too.
casties
parents:
33
diff
changeset
|
230 # TODO: what about attributes of relation? |
74dfaed3600b
keep relation attributes but no attributes from openmind. change relation type attribute to '_type' too.
casties
parents:
33
diff
changeset
|
231 if len(rel['atts']) > 0: |
74dfaed3600b
keep relation attributes but no attributes from openmind. change relation type attribute to '_type' too.
casties
parents:
33
diff
changeset
|
232 print("INFO: relation with attributes! name=%s id=%s atts=%s"%(rel_name, rel_id, repr(rel['atts']))) |
74dfaed3600b
keep relation attributes but no attributes from openmind. change relation type attribute to '_type' too.
casties
parents:
33
diff
changeset
|
233 |
19
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
234 # create relation with type |
33
7e2e344c3b87
make name of type attribute configurable. default '_type' for nodes.
casties
parents:
32
diff
changeset
|
235 rel_atts = {rel_type_attribute: fixName(rel_name), 'ismi_id': rel_id} |
34
74dfaed3600b
keep relation attributes but no attributes from openmind. change relation type attribute to '_type' too.
casties
parents:
33
diff
changeset
|
236 nx_rel = nx_graph.add_edge(src_id, tar_id, attr_dict=rel_atts) |
19
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
237 |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
238 nx_relations[rel_id] = nx_rel |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
239 |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
240 return nx_relations |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
241 |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
242 |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
243 def importEnts(etype): |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
244 """Import all entities of the given type. |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
245 """ |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
246 # read json for all entities of given type |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
247 json = readJSON(entsURL%etype) |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
248 ents = json['ents'] |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
249 print("importing %s %ss"%(len(ents),etype)) |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
250 size = 100 |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
251 batches = [ents[pos:pos + size] for pos in range(0, len(ents), size)] |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
252 cnt = 0 |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
253 for batch in batches: |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
254 cnt += size |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
255 if cnt % 100 == 0: |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
256 print(" %s %ss"%(cnt, etype)) |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
257 |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
258 # extract list of ismi ids |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
259 ismi_ids = [str(ent['id']) for ent in batch] |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
260 |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
261 # fetch full data for list of entities |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
262 ent_json = readJSON(entsByIdURL%','.join(ismi_ids)) |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
263 ents_data = ent_json['ents'] |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
264 |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
265 # iterate through results batch |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
266 for ent_data in ents_data: |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
267 ismi_id = ent_data['id'] |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
268 if ismi_id in nx_nodes: |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
269 print("ERROR: entity with id=%s exists!"%ismi_id) |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
270 return |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
271 |
25
5bdcb5805d29
updated openmind-networkx-neo4j conversion with dates, locations and links.
casties
parents:
23
diff
changeset
|
272 # create networkx node |
19
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
273 node = nodeFromEnt(ent_data, etype) |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
274 |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
275 # save node reference |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
276 nx_nodes[ismi_id] = node |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
277 |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
278 # extract relations |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
279 relsFromEnt(ent_data, ismi_relations) |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
280 |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
281 #if cnt >= 100: |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
282 # return |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
283 |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
284 |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
285 def importAllEnts(etypes): |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
286 |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
287 for etype in etypes: |
25
5bdcb5805d29
updated openmind-networkx-neo4j conversion with dates, locations and links.
casties
parents:
23
diff
changeset
|
288 if etype in exclude_objects_of_type: |
5bdcb5805d29
updated openmind-networkx-neo4j conversion with dates, locations and links.
casties
parents:
23
diff
changeset
|
289 # skip this type |
5bdcb5805d29
updated openmind-networkx-neo4j conversion with dates, locations and links.
casties
parents:
23
diff
changeset
|
290 continue |
5bdcb5805d29
updated openmind-networkx-neo4j conversion with dates, locations and links.
casties
parents:
23
diff
changeset
|
291 |
19
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
292 importEnts(etype) |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
293 |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
294 relationsFromRels(ismi_relations, nx_nodes) |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
295 |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
296 |
23
45a823b5bf33
updated ismi2model importer and model2neo4j exporter.
casties
parents:
19
diff
changeset
|
297 ## main |
45a823b5bf33
updated ismi2model importer and model2neo4j exporter.
casties
parents:
19
diff
changeset
|
298 |
45a823b5bf33
updated ismi2model importer and model2neo4j exporter.
casties
parents:
19
diff
changeset
|
299 print("Copy graph from OpenMind to networkx pickle") |
19
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
300 |
23
45a823b5bf33
updated ismi2model importer and model2neo4j exporter.
casties
parents:
19
diff
changeset
|
301 # parse command line parameters |
45a823b5bf33
updated ismi2model importer and model2neo4j exporter.
casties
parents:
19
diff
changeset
|
302 if len(sys.argv) > 1: |
45a823b5bf33
updated ismi2model importer and model2neo4j exporter.
casties
parents:
19
diff
changeset
|
303 output_fn = sys.argv[1] |
45a823b5bf33
updated ismi2model importer and model2neo4j exporter.
casties
parents:
19
diff
changeset
|
304 |
45a823b5bf33
updated ismi2model importer and model2neo4j exporter.
casties
parents:
19
diff
changeset
|
305 # import everything |
45a823b5bf33
updated ismi2model importer and model2neo4j exporter.
casties
parents:
19
diff
changeset
|
306 print("Reading graph from OpenMind at %s"%baseURL) |
25
5bdcb5805d29
updated openmind-networkx-neo4j conversion with dates, locations and links.
casties
parents:
23
diff
changeset
|
307 if len(exclude_objects_of_type) > 0: |
5bdcb5805d29
updated openmind-networkx-neo4j conversion with dates, locations and links.
casties
parents:
23
diff
changeset
|
308 print(" Skipping objects of type %s"%exclude_objects_of_type); |
5bdcb5805d29
updated openmind-networkx-neo4j conversion with dates, locations and links.
casties
parents:
23
diff
changeset
|
309 |
19
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
310 importAllEnts(ismi_defs) |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
311 #importAllEnts(['TEXT']) |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
312 |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
313 print("Graph info: %s"%networkx.info(nx_graph)) |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
314 #print(" nodes:%s"%repr(nx_graph.nodes(data=True))) |
23
45a823b5bf33
updated ismi2model importer and model2neo4j exporter.
casties
parents:
19
diff
changeset
|
315 |
19
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
316 # export pickle |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
317 networkx.write_gpickle(nx_graph, output_fn) |
23
45a823b5bf33
updated ismi2model importer and model2neo4j exporter.
casties
parents:
19
diff
changeset
|
318 print("Wrote networkx pickle file %s"%output_fn) |