annotate importFromOpenMind/importer/ismi2model.py @ 46:f3945ef1e6a4

new importer for OM4XML dump file.
author casties
date Fri, 03 Feb 2017 18:46:16 +0100
parents 9a9a6da1d415
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
19
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
1 import urllib.request
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
2 import json
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
3 import networkx
23
45a823b5bf33 updated ismi2model importer and model2neo4j exporter.
casties
parents: 19
diff changeset
4 import sys
19
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
5
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
6 ## configure behaviour
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
7
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
8 # output filename
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
9 output_fn = "ismi_graph.gpickle"
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
10
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
11 # OpenMind base URL
46
f3945ef1e6a4 new importer for OM4XML dump file.
casties
parents: 36
diff changeset
12 #baseURL="http://ismi.mpiwg-berlin.mpg.de//om4-ismi/jsonInterface?"
35
d535f11a0d81 be more aggressive about parsing dates in text type fields.
casties
parents: 34
diff changeset
13 baseURL="http://localhost:18080/ismi-richfaces/jsonInterface?"
19
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
14
25
5bdcb5805d29 updated openmind-networkx-neo4j conversion with dates, locations and links.
casties
parents: 23
diff changeset
15 # node types to exclude from the graph
5bdcb5805d29 updated openmind-networkx-neo4j conversion with dates, locations and links.
casties
parents: 23
diff changeset
16 exclude_objects_of_type = ['DIGITALIZATION', 'REFERENCE']
5bdcb5805d29 updated openmind-networkx-neo4j conversion with dates, locations and links.
casties
parents: 23
diff changeset
17
27
3fce3fa9097e ignore notes_old.
casties
parents: 25
diff changeset
18 # attributes to exclude
3fce3fa9097e ignore notes_old.
casties
parents: 25
diff changeset
19 exclude_attributes_of_type = [
3fce3fa9097e ignore notes_old.
casties
parents: 25
diff changeset
20 'lw',
3fce3fa9097e ignore notes_old.
casties
parents: 25
diff changeset
21 'node_type',
3fce3fa9097e ignore notes_old.
casties
parents: 25
diff changeset
22 'nov',
3fce3fa9097e ignore notes_old.
casties
parents: 25
diff changeset
23 'notes_old'
3fce3fa9097e ignore notes_old.
casties
parents: 25
diff changeset
24 ]
3fce3fa9097e ignore notes_old.
casties
parents: 25
diff changeset
25
33
7e2e344c3b87 make name of type attribute configurable. default '_type' for nodes.
casties
parents: 32
diff changeset
26 # name of type attribute
7e2e344c3b87 make name of type attribute configurable. default '_type' for nodes.
casties
parents: 32
diff changeset
27 node_type_attribute = '_type'
34
74dfaed3600b keep relation attributes but no attributes from openmind. change relation type attribute to '_type' too.
casties
parents: 33
diff changeset
28 rel_type_attribute = '_type'
19
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
29
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
30 entsURL=baseURL+"method=get_ents&oc=%s"
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
31
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
32 entsByIdURL = baseURL+"method=get_ents&include_content=True&ids=%s"
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
33
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
34 entURL=baseURL+"method=get_ent&id=%s&include_content=True"
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
35
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
36
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
37 def readJSON(url):
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
38 #print("JSON loading %s"%url)
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
39 wsh=urllib.request.urlopen(url)
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
40 txt = wsh.read()
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
41 return json.loads(txt.decode("utf-8"))
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
42
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
43 defs_json = readJSON(baseURL+"method=get_defs")
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
44
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
45 # current list of all definitions
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
46 ismi_defs = [atts['ov'] for atts in defs_json['defs']]
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
47
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
48 #ismi_types=["PERSON","WITNESS","CODEX","PLACE","COLLECTION","REPOSITORY"]
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
49
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
50
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
51 nx_graph = networkx.MultiDiGraph()
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
52
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
53 nx_nodes = {}
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
54 ismi_relations = {}
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
55 nx_relations = {}
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
56
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
57
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
58 def fixName(name, is_src_rel=False, is_tar_rel=False, att_from_rel=False):
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
59 if is_src_rel:
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
60 #name = name + '>'
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
61 pass
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
62
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
63 if is_tar_rel:
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
64 name = '<' + name
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
65
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
66 if att_from_rel:
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
67 # clean up relations as attribute names
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
68 name = name.replace('is_', '')
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
69 name = name.replace('has_', '')
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
70 name = name.replace('was_', '')
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
71 name = name.replace('_of', '')
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
72
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
73 return name
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
74
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
75
35
d535f11a0d81 be more aggressive about parsing dates in text type fields.
casties
parents: 34
diff changeset
76
d535f11a0d81 be more aggressive about parsing dates in text type fields.
casties
parents: 34
diff changeset
77 def parseYear(val):
d535f11a0d81 be more aggressive about parsing dates in text type fields.
casties
parents: 34
diff changeset
78 year = None
d535f11a0d81 be more aggressive about parsing dates in text type fields.
casties
parents: 34
diff changeset
79 try:
d535f11a0d81 be more aggressive about parsing dates in text type fields.
casties
parents: 34
diff changeset
80 date_json = json.loads(val)
d535f11a0d81 be more aggressive about parsing dates in text type fields.
casties
parents: 34
diff changeset
81 if 'from' in date_json:
d535f11a0d81 be more aggressive about parsing dates in text type fields.
casties
parents: 34
diff changeset
82 year = date_json['from'].get('year', None)
d535f11a0d81 be more aggressive about parsing dates in text type fields.
casties
parents: 34
diff changeset
83 elif 'date' in date_json:
d535f11a0d81 be more aggressive about parsing dates in text type fields.
casties
parents: 34
diff changeset
84 year = date_json['date'].get('year', None)
d535f11a0d81 be more aggressive about parsing dates in text type fields.
casties
parents: 34
diff changeset
85 else:
d535f11a0d81 be more aggressive about parsing dates in text type fields.
casties
parents: 34
diff changeset
86 print("don't know what to do with date %s"%(val))
d535f11a0d81 be more aggressive about parsing dates in text type fields.
casties
parents: 34
diff changeset
87
d535f11a0d81 be more aggressive about parsing dates in text type fields.
casties
parents: 34
diff changeset
88 except:
d535f11a0d81 be more aggressive about parsing dates in text type fields.
casties
parents: 34
diff changeset
89 pass
d535f11a0d81 be more aggressive about parsing dates in text type fields.
casties
parents: 34
diff changeset
90
d535f11a0d81 be more aggressive about parsing dates in text type fields.
casties
parents: 34
diff changeset
91 return year
d535f11a0d81 be more aggressive about parsing dates in text type fields.
casties
parents: 34
diff changeset
92
d535f11a0d81 be more aggressive about parsing dates in text type fields.
casties
parents: 34
diff changeset
93
19
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
94 def nodeFromEnt(ent, etype):
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
95 """Create a Neo4J node from the given JSON entity.
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
96
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
97 Creates the node in gdb and returns the node.
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
98 """
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
99 attrs = {}
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
100 # go through all attributes
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
101 for att in ent['atts']:
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
102 ct = att.get('content_type', None)
35
d535f11a0d81 be more aggressive about parsing dates in text type fields.
casties
parents: 34
diff changeset
103 name = att.get('name', None)
d535f11a0d81 be more aggressive about parsing dates in text type fields.
casties
parents: 34
diff changeset
104 if name in exclude_attributes_of_type:
d535f11a0d81 be more aggressive about parsing dates in text type fields.
casties
parents: 34
diff changeset
105 # exclude attribute
d535f11a0d81 be more aggressive about parsing dates in text type fields.
casties
parents: 34
diff changeset
106 continue
d535f11a0d81 be more aggressive about parsing dates in text type fields.
casties
parents: 34
diff changeset
107
36
9a9a6da1d415 reorder code a bit.
casties
parents: 35
diff changeset
108 if ct is None or ct.lower() in ['text', 'arabic', 'bool', 'boolean', 'url', 'language']:
19
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
109 # normal text attribute (assume no content_type is text too...)
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
110 val = att['ov']
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
111
35
d535f11a0d81 be more aggressive about parsing dates in text type fields.
casties
parents: 34
diff changeset
112 if val[0] == '{':
d535f11a0d81 be more aggressive about parsing dates in text type fields.
casties
parents: 34
diff changeset
113 # try to parse as date
d535f11a0d81 be more aggressive about parsing dates in text type fields.
casties
parents: 34
diff changeset
114 year = parseYear(val)
d535f11a0d81 be more aggressive about parsing dates in text type fields.
casties
parents: 34
diff changeset
115 if year is not None:
d535f11a0d81 be more aggressive about parsing dates in text type fields.
casties
parents: 34
diff changeset
116 val = year
d535f11a0d81 be more aggressive about parsing dates in text type fields.
casties
parents: 34
diff changeset
117
19
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
118 # keep attribute
35
d535f11a0d81 be more aggressive about parsing dates in text type fields.
casties
parents: 34
diff changeset
119 attrs[name] = val
29
1a1877812757 include normalized attributes in neo4j with prefix "_n_"
casties
parents: 28
diff changeset
120 if 'nov' in att:
1a1877812757 include normalized attributes in neo4j with prefix "_n_"
casties
parents: 28
diff changeset
121 # add normalized value
35
d535f11a0d81 be more aggressive about parsing dates in text type fields.
casties
parents: 34
diff changeset
122 attrs['_n_'+name] = att['nov']
19
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
123
36
9a9a6da1d415 reorder code a bit.
casties
parents: 35
diff changeset
124 elif ct == 'date':
9a9a6da1d415 reorder code a bit.
casties
parents: 35
diff changeset
125 # date attribute
9a9a6da1d415 reorder code a bit.
casties
parents: 35
diff changeset
126 val = att['ov']
9a9a6da1d415 reorder code a bit.
casties
parents: 35
diff changeset
127 # try to parse date object to get gregorian year
9a9a6da1d415 reorder code a bit.
casties
parents: 35
diff changeset
128 year = parseYear(val)
9a9a6da1d415 reorder code a bit.
casties
parents: 35
diff changeset
129 if year is not None:
9a9a6da1d415 reorder code a bit.
casties
parents: 35
diff changeset
130 attrs[name] = year
9a9a6da1d415 reorder code a bit.
casties
parents: 35
diff changeset
131
19
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
132 elif ct == 'num':
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
133 # number attribute
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
134 val = att['ov']
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
135
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
136 # keep attribute, assume num is int
35
d535f11a0d81 be more aggressive about parsing dates in text type fields.
casties
parents: 34
diff changeset
137 attrs[name] = int(val)
19
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
138
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
139 elif ct == 'old':
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
140 # ignore attribute
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
141 continue
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
142
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
143 else:
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
144 print("WARN: attribute with unknown content_type: %s"%repr(att))
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
145 # ignore other content types
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
146 continue
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
147
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
148 # process base attributes
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
149 oc = ent['oc']
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
150 if oc != etype:
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
151 print("ERROR: entity type doesn't match!")
29
1a1877812757 include normalized attributes in neo4j with prefix "_n_"
casties
parents: 28
diff changeset
152 return None
19
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
153
25
5bdcb5805d29 updated openmind-networkx-neo4j conversion with dates, locations and links.
casties
parents: 23
diff changeset
154 # set type
33
7e2e344c3b87 make name of type attribute configurable. default '_type' for nodes.
casties
parents: 32
diff changeset
155 attrs[node_type_attribute] = fixName(oc)
19
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
156
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
157 ismi_id = ent['id']
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
158 # rename id to ismi_id
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
159 attrs['ismi_id'] = ismi_id
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
160
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
161 ov = ent.get('ov', None)
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
162 if ov is not None:
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
163 # save ov as label
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
164 attrs['label'] = ov
29
1a1877812757 include normalized attributes in neo4j with prefix "_n_"
casties
parents: 28
diff changeset
165 if 'nov' in ent:
1a1877812757 include normalized attributes in neo4j with prefix "_n_"
casties
parents: 28
diff changeset
166 # add normalized value
1a1877812757 include normalized attributes in neo4j with prefix "_n_"
casties
parents: 28
diff changeset
167 attrs['_n_label'] = ent.get('nov')
19
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
168
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
169 nx_graph.add_node(ismi_id, **attrs)
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
170 node = nx_graph.node[ismi_id]
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
171
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
172 return node
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
173
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
174
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
175 def relsFromEnt(ent, relations):
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
176 """Extract all relations from JSON entity.
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
177
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
178 Adds JSON to dict relations under relation's id.
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
179 """
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
180 # go through src_rels and tar_rels
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
181 rels = ent.get('src_rels', []) + ent.get('tar_rels', [])
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
182 for rel in rels:
25
5bdcb5805d29 updated openmind-networkx-neo4j conversion with dates, locations and links.
casties
parents: 23
diff changeset
183 src_type = rel['src_oc']
5bdcb5805d29 updated openmind-networkx-neo4j conversion with dates, locations and links.
casties
parents: 23
diff changeset
184 tar_type = rel['tar_oc']
5bdcb5805d29 updated openmind-networkx-neo4j conversion with dates, locations and links.
casties
parents: 23
diff changeset
185 if src_type in exclude_objects_of_type or tar_type in exclude_objects_of_type:
5bdcb5805d29 updated openmind-networkx-neo4j conversion with dates, locations and links.
casties
parents: 23
diff changeset
186 # skip relation to excluded objects
5bdcb5805d29 updated openmind-networkx-neo4j conversion with dates, locations and links.
casties
parents: 23
diff changeset
187 continue
5bdcb5805d29 updated openmind-networkx-neo4j conversion with dates, locations and links.
casties
parents: 23
diff changeset
188
19
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
189 rel_id = rel['id']
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
190 if rel_id in relations:
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
191 old_rel = relations[rel_id]
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
192 if rel != old_rel:
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
193 print("ERROR: relation is different: %s != %s"%(repr(rel), repr(old_rel)))
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
194 continue
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
195
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
196 relations[rel_id] = rel
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
197
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
198 return relations
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
199
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
200
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
201 def relationsFromRels(rels, nodes):
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
202 """Create relations in Neo4J.
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
203
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
204 Args:
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
205 rels: dict of JSON relations
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
206 nodes: dict of existing Neo4J nodes
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
207 Returns:
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
208 dict of Neo4J relations
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
209 """
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
210 # go through all rels
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
211 print("importing %s relations"%len(rels))
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
212 cnt = 0
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
213 for rel in rels.values():
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
214 cnt += 1
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
215 if cnt % 100 == 0:
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
216 print(" %s relations"%cnt)
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
217
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
218 rel_id = rel['id']
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
219 rel_name = rel['name']
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
220 src_id = rel['src_id']
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
221 tar_id = rel['tar_id']
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
222 if not src_id in nodes:
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
223 print("ERROR: relation %s src node %s missing!"%(rel_id,src_id))
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
224 continue
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
225
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
226 if not tar_id in nodes:
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
227 print("ERROR: relation %s tar node %s missing!"%(rel_id,tar_id))
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
228 continue
34
74dfaed3600b keep relation attributes but no attributes from openmind. change relation type attribute to '_type' too.
casties
parents: 33
diff changeset
229
74dfaed3600b keep relation attributes but no attributes from openmind. change relation type attribute to '_type' too.
casties
parents: 33
diff changeset
230 # TODO: what about attributes of relation?
74dfaed3600b keep relation attributes but no attributes from openmind. change relation type attribute to '_type' too.
casties
parents: 33
diff changeset
231 if len(rel['atts']) > 0:
74dfaed3600b keep relation attributes but no attributes from openmind. change relation type attribute to '_type' too.
casties
parents: 33
diff changeset
232 print("INFO: relation with attributes! name=%s id=%s atts=%s"%(rel_name, rel_id, repr(rel['atts'])))
74dfaed3600b keep relation attributes but no attributes from openmind. change relation type attribute to '_type' too.
casties
parents: 33
diff changeset
233
19
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
234 # create relation with type
33
7e2e344c3b87 make name of type attribute configurable. default '_type' for nodes.
casties
parents: 32
diff changeset
235 rel_atts = {rel_type_attribute: fixName(rel_name), 'ismi_id': rel_id}
34
74dfaed3600b keep relation attributes but no attributes from openmind. change relation type attribute to '_type' too.
casties
parents: 33
diff changeset
236 nx_rel = nx_graph.add_edge(src_id, tar_id, attr_dict=rel_atts)
19
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
237
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
238 nx_relations[rel_id] = nx_rel
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
239
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
240 return nx_relations
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
241
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
242
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
243 def importEnts(etype):
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
244 """Import all entities of the given type.
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
245 """
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
246 # read json for all entities of given type
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
247 json = readJSON(entsURL%etype)
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
248 ents = json['ents']
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
249 print("importing %s %ss"%(len(ents),etype))
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
250 size = 100
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
251 batches = [ents[pos:pos + size] for pos in range(0, len(ents), size)]
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
252 cnt = 0
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
253 for batch in batches:
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
254 cnt += size
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
255 if cnt % 100 == 0:
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
256 print(" %s %ss"%(cnt, etype))
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
257
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
258 # extract list of ismi ids
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
259 ismi_ids = [str(ent['id']) for ent in batch]
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
260
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
261 # fetch full data for list of entities
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
262 ent_json = readJSON(entsByIdURL%','.join(ismi_ids))
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
263 ents_data = ent_json['ents']
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
264
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
265 # iterate through results batch
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
266 for ent_data in ents_data:
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
267 ismi_id = ent_data['id']
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
268 if ismi_id in nx_nodes:
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
269 print("ERROR: entity with id=%s exists!"%ismi_id)
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
270 return
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
271
25
5bdcb5805d29 updated openmind-networkx-neo4j conversion with dates, locations and links.
casties
parents: 23
diff changeset
272 # create networkx node
19
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
273 node = nodeFromEnt(ent_data, etype)
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
274
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
275 # save node reference
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
276 nx_nodes[ismi_id] = node
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
277
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
278 # extract relations
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
279 relsFromEnt(ent_data, ismi_relations)
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
280
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
281 #if cnt >= 100:
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
282 # return
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
283
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
284
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
285 def importAllEnts(etypes):
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
286
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
287 for etype in etypes:
25
5bdcb5805d29 updated openmind-networkx-neo4j conversion with dates, locations and links.
casties
parents: 23
diff changeset
288 if etype in exclude_objects_of_type:
5bdcb5805d29 updated openmind-networkx-neo4j conversion with dates, locations and links.
casties
parents: 23
diff changeset
289 # skip this type
5bdcb5805d29 updated openmind-networkx-neo4j conversion with dates, locations and links.
casties
parents: 23
diff changeset
290 continue
5bdcb5805d29 updated openmind-networkx-neo4j conversion with dates, locations and links.
casties
parents: 23
diff changeset
291
19
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
292 importEnts(etype)
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
293
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
294 relationsFromRels(ismi_relations, nx_nodes)
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
295
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
296
23
45a823b5bf33 updated ismi2model importer and model2neo4j exporter.
casties
parents: 19
diff changeset
297 ## main
45a823b5bf33 updated ismi2model importer and model2neo4j exporter.
casties
parents: 19
diff changeset
298
45a823b5bf33 updated ismi2model importer and model2neo4j exporter.
casties
parents: 19
diff changeset
299 print("Copy graph from OpenMind to networkx pickle")
19
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
300
23
45a823b5bf33 updated ismi2model importer and model2neo4j exporter.
casties
parents: 19
diff changeset
301 # parse command line parameters
45a823b5bf33 updated ismi2model importer and model2neo4j exporter.
casties
parents: 19
diff changeset
302 if len(sys.argv) > 1:
45a823b5bf33 updated ismi2model importer and model2neo4j exporter.
casties
parents: 19
diff changeset
303 output_fn = sys.argv[1]
45a823b5bf33 updated ismi2model importer and model2neo4j exporter.
casties
parents: 19
diff changeset
304
45a823b5bf33 updated ismi2model importer and model2neo4j exporter.
casties
parents: 19
diff changeset
305 # import everything
45a823b5bf33 updated ismi2model importer and model2neo4j exporter.
casties
parents: 19
diff changeset
306 print("Reading graph from OpenMind at %s"%baseURL)
25
5bdcb5805d29 updated openmind-networkx-neo4j conversion with dates, locations and links.
casties
parents: 23
diff changeset
307 if len(exclude_objects_of_type) > 0:
5bdcb5805d29 updated openmind-networkx-neo4j conversion with dates, locations and links.
casties
parents: 23
diff changeset
308 print(" Skipping objects of type %s"%exclude_objects_of_type);
5bdcb5805d29 updated openmind-networkx-neo4j conversion with dates, locations and links.
casties
parents: 23
diff changeset
309
19
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
310 importAllEnts(ismi_defs)
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
311 #importAllEnts(['TEXT'])
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
312
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
313 print("Graph info: %s"%networkx.info(nx_graph))
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
314 #print(" nodes:%s"%repr(nx_graph.nodes(data=True)))
23
45a823b5bf33 updated ismi2model importer and model2neo4j exporter.
casties
parents: 19
diff changeset
315
19
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
316 # export pickle
ca1e02a2a9c4 unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff changeset
317 networkx.write_gpickle(nx_graph, output_fn)
23
45a823b5bf33 updated ismi2model importer and model2neo4j exporter.
casties
parents: 19
diff changeset
318 print("Wrote networkx pickle file %s"%output_fn)