Mercurial > hg > drupalISMI
annotate importFromOpenMind/importer/ismi2model.py @ 28:a9bfd49355f8
updated config for ismi-dev.
author | casties |
---|---|
date | Wed, 18 Nov 2015 15:22:05 +0100 |
parents | 3fce3fa9097e |
children | 1a1877812757 |
rev | line source |
---|---|
19
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
1 import urllib.request |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
2 import json |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
3 import networkx |
23
45a823b5bf33
updated ismi2model importer and model2neo4j exporter.
casties
parents:
19
diff
changeset
|
4 import sys |
19
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
5 |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
6 ## configure behaviour |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
7 |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
8 # output filename |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
9 output_fn = "ismi_graph.gpickle" |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
10 |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
11 # OpenMind base URL |
28 | 12 baseURL="http://localhost:18080/om4-ismi/jsonInterface?" |
19
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
13 |
25
5bdcb5805d29
updated openmind-networkx-neo4j conversion with dates, locations and links.
casties
parents:
23
diff
changeset
|
14 # node types to exclude from the graph |
5bdcb5805d29
updated openmind-networkx-neo4j conversion with dates, locations and links.
casties
parents:
23
diff
changeset
|
15 exclude_objects_of_type = ['DIGITALIZATION', 'REFERENCE'] |
5bdcb5805d29
updated openmind-networkx-neo4j conversion with dates, locations and links.
casties
parents:
23
diff
changeset
|
16 |
27 | 17 # attributes to exclude |
18 exclude_attributes_of_type = [ | |
19 'lw', | |
20 'node_type', | |
21 'nov', | |
22 'notes_old' | |
23 ] | |
24 | |
19
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
25 |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
26 entsURL=baseURL+"method=get_ents&oc=%s" |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
27 |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
28 entsByIdURL = baseURL+"method=get_ents&include_content=True&ids=%s" |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
29 |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
30 entURL=baseURL+"method=get_ent&id=%s&include_content=True" |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
31 |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
32 |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
33 def readJSON(url): |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
34 #print("JSON loading %s"%url) |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
35 wsh=urllib.request.urlopen(url) |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
36 txt = wsh.read() |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
37 return json.loads(txt.decode("utf-8")) |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
38 |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
39 defs_json = readJSON(baseURL+"method=get_defs") |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
40 |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
41 # current list of all definitions |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
42 ismi_defs = [atts['ov'] for atts in defs_json['defs']] |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
43 |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
44 #ismi_types=["PERSON","WITNESS","CODEX","PLACE","COLLECTION","REPOSITORY"] |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
45 |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
46 |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
47 nx_graph = networkx.MultiDiGraph() |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
48 |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
49 nx_nodes = {} |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
50 ismi_relations = {} |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
51 nx_relations = {} |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
52 |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
53 |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
54 def fixName(name, is_src_rel=False, is_tar_rel=False, att_from_rel=False): |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
55 # these are too embarrassing... |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
56 if 'FLORUIT' in name: |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
57 name = name.replace('FLORUIT', 'FLOURISH') |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
58 |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
59 elif 'floruit' in name: |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
60 name = name.replace('floruit', 'flourish') |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
61 |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
62 if is_src_rel: |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
63 #name = name + '>' |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
64 pass |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
65 |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
66 if is_tar_rel: |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
67 name = '<' + name |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
68 |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
69 if att_from_rel: |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
70 # clean up relations as attribute names |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
71 name = name.replace('is_', '') |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
72 name = name.replace('has_', '') |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
73 name = name.replace('was_', '') |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
74 name = name.replace('_of', '') |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
75 |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
76 return name |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
77 |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
78 |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
79 def nodeFromEnt(ent, etype): |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
80 """Create a Neo4J node from the given JSON entity. |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
81 |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
82 Creates the node in gdb and returns the node. |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
83 """ |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
84 attrs = {} |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
85 # go through all attributes |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
86 for att in ent['atts']: |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
87 ct = att.get('content_type', None) |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
88 if ct is None or ct.lower() in ['text', 'arabic', 'bool', 'boolean', 'url', 'language']: |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
89 # normal text attribute (assume no content_type is text too...) |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
90 key = att['name'] |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
91 val = att['ov'] |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
92 |
27 | 93 if key in exclude_attributes_of_type: |
19
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
94 # exclude attribute |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
95 continue |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
96 |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
97 # keep attribute |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
98 attrs[key] = val |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
99 |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
100 elif ct == 'num': |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
101 # number attribute |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
102 key = att['name'] |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
103 val = att['ov'] |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
104 |
27 | 105 if key in exclude_attributes_of_type: |
19
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
106 # exclude attribute |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
107 continue |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
108 |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
109 # keep attribute, assume num is int |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
110 attrs[key] = int(val) |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
111 |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
112 elif ct == 'date': |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
113 # date attribute |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
114 key = att['name'] |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
115 val = att['ov'] |
25
5bdcb5805d29
updated openmind-networkx-neo4j conversion with dates, locations and links.
casties
parents:
23
diff
changeset
|
116 # try to parse date object to get gregorian year |
5bdcb5805d29
updated openmind-networkx-neo4j conversion with dates, locations and links.
casties
parents:
23
diff
changeset
|
117 try: |
5bdcb5805d29
updated openmind-networkx-neo4j conversion with dates, locations and links.
casties
parents:
23
diff
changeset
|
118 year = None |
5bdcb5805d29
updated openmind-networkx-neo4j conversion with dates, locations and links.
casties
parents:
23
diff
changeset
|
119 date_json = json.loads(val) |
5bdcb5805d29
updated openmind-networkx-neo4j conversion with dates, locations and links.
casties
parents:
23
diff
changeset
|
120 if 'from' in date_json: |
5bdcb5805d29
updated openmind-networkx-neo4j conversion with dates, locations and links.
casties
parents:
23
diff
changeset
|
121 year = date_json['from'].get('year', None) |
5bdcb5805d29
updated openmind-networkx-neo4j conversion with dates, locations and links.
casties
parents:
23
diff
changeset
|
122 elif 'date' in date_json: |
5bdcb5805d29
updated openmind-networkx-neo4j conversion with dates, locations and links.
casties
parents:
23
diff
changeset
|
123 year = date_json['date'].get('year', None) |
5bdcb5805d29
updated openmind-networkx-neo4j conversion with dates, locations and links.
casties
parents:
23
diff
changeset
|
124 else: |
5bdcb5805d29
updated openmind-networkx-neo4j conversion with dates, locations and links.
casties
parents:
23
diff
changeset
|
125 print("don't know what to do with date on %s: %s=%s"%(ent['id'],key,val)) |
5bdcb5805d29
updated openmind-networkx-neo4j conversion with dates, locations and links.
casties
parents:
23
diff
changeset
|
126 |
5bdcb5805d29
updated openmind-networkx-neo4j conversion with dates, locations and links.
casties
parents:
23
diff
changeset
|
127 if year is not None: |
5bdcb5805d29
updated openmind-networkx-neo4j conversion with dates, locations and links.
casties
parents:
23
diff
changeset
|
128 attrs[key] = year |
5bdcb5805d29
updated openmind-networkx-neo4j conversion with dates, locations and links.
casties
parents:
23
diff
changeset
|
129 |
5bdcb5805d29
updated openmind-networkx-neo4j conversion with dates, locations and links.
casties
parents:
23
diff
changeset
|
130 except: |
5bdcb5805d29
updated openmind-networkx-neo4j conversion with dates, locations and links.
casties
parents:
23
diff
changeset
|
131 print("ERROR: invalid JSON in date: %s"%repr(val)) |
19
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
132 |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
133 elif ct == 'old': |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
134 # ignore attribute |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
135 continue |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
136 |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
137 else: |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
138 print("WARN: attribute with unknown content_type: %s"%repr(att)) |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
139 # ignore other content types |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
140 continue |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
141 |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
142 # process base attributes |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
143 oc = ent['oc'] |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
144 if oc != etype: |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
145 print("ERROR: entity type doesn't match!") |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
146 return null |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
147 |
25
5bdcb5805d29
updated openmind-networkx-neo4j conversion with dates, locations and links.
casties
parents:
23
diff
changeset
|
148 # rename if type attr exists |
5bdcb5805d29
updated openmind-networkx-neo4j conversion with dates, locations and links.
casties
parents:
23
diff
changeset
|
149 if 'type' in attrs: |
5bdcb5805d29
updated openmind-networkx-neo4j conversion with dates, locations and links.
casties
parents:
23
diff
changeset
|
150 attrs['type2'] = attrs['type'] |
5bdcb5805d29
updated openmind-networkx-neo4j conversion with dates, locations and links.
casties
parents:
23
diff
changeset
|
151 |
5bdcb5805d29
updated openmind-networkx-neo4j conversion with dates, locations and links.
casties
parents:
23
diff
changeset
|
152 # set type |
19
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
153 attrs['type'] = fixName(oc) |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
154 |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
155 ismi_id = ent['id'] |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
156 # rename id to ismi_id |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
157 attrs['ismi_id'] = ismi_id |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
158 |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
159 ov = ent.get('ov', None) |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
160 if ov is not None: |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
161 # save ov as label |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
162 attrs['label'] = ov |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
163 |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
164 # create node with attributes |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
165 nx_graph.add_node(ismi_id, **attrs) |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
166 node = nx_graph.node[ismi_id] |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
167 |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
168 return node |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
169 |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
170 |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
171 def relsFromEnt(ent, relations): |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
172 """Extract all relations from JSON entity. |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
173 |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
174 Adds JSON to dict relations under relation's id. |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
175 """ |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
176 # go through src_rels and tar_rels |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
177 rels = ent.get('src_rels', []) + ent.get('tar_rels', []) |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
178 for rel in rels: |
25
5bdcb5805d29
updated openmind-networkx-neo4j conversion with dates, locations and links.
casties
parents:
23
diff
changeset
|
179 src_type = rel['src_oc'] |
5bdcb5805d29
updated openmind-networkx-neo4j conversion with dates, locations and links.
casties
parents:
23
diff
changeset
|
180 tar_type = rel['tar_oc'] |
5bdcb5805d29
updated openmind-networkx-neo4j conversion with dates, locations and links.
casties
parents:
23
diff
changeset
|
181 if src_type in exclude_objects_of_type or tar_type in exclude_objects_of_type: |
5bdcb5805d29
updated openmind-networkx-neo4j conversion with dates, locations and links.
casties
parents:
23
diff
changeset
|
182 # skip relation to excluded objects |
5bdcb5805d29
updated openmind-networkx-neo4j conversion with dates, locations and links.
casties
parents:
23
diff
changeset
|
183 continue |
5bdcb5805d29
updated openmind-networkx-neo4j conversion with dates, locations and links.
casties
parents:
23
diff
changeset
|
184 |
19
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
185 rel_id = rel['id'] |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
186 if rel_id in relations: |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
187 old_rel = relations[rel_id] |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
188 if rel != old_rel: |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
189 print("ERROR: relation is different: %s != %s"%(repr(rel), repr(old_rel))) |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
190 continue |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
191 |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
192 relations[rel_id] = rel |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
193 |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
194 return relations |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
195 |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
196 |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
197 def relationsFromRels(rels, nodes): |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
198 """Create relations in Neo4J. |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
199 |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
200 Args: |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
201 rels: dict of JSON relations |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
202 nodes: dict of existing Neo4J nodes |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
203 Returns: |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
204 dict of Neo4J relations |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
205 """ |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
206 # go through all rels |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
207 print("importing %s relations"%len(rels)) |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
208 cnt = 0 |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
209 for rel in rels.values(): |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
210 cnt += 1 |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
211 if cnt % 100 == 0: |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
212 print(" %s relations"%cnt) |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
213 |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
214 rel_id = rel['id'] |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
215 rel_name = rel['name'] |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
216 src_id = rel['src_id'] |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
217 tar_id = rel['tar_id'] |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
218 if not src_id in nodes: |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
219 print("ERROR: relation %s src node %s missing!"%(rel_id,src_id)) |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
220 continue |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
221 |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
222 if not tar_id in nodes: |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
223 print("ERROR: relation %s tar node %s missing!"%(rel_id,tar_id)) |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
224 continue |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
225 |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
226 # create relation with type |
23
45a823b5bf33
updated ismi2model importer and model2neo4j exporter.
casties
parents:
19
diff
changeset
|
227 nx_rel = nx_graph.add_edge(src_id, tar_id, type=fixName(rel_name), ismi_id=rel_id) |
19
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
228 |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
229 nx_relations[rel_id] = nx_rel |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
230 |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
231 return nx_relations |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
232 |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
233 |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
234 def importEnts(etype): |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
235 """Import all entities of the given type. |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
236 """ |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
237 # read json for all entities of given type |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
238 json = readJSON(entsURL%etype) |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
239 ents = json['ents'] |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
240 print("importing %s %ss"%(len(ents),etype)) |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
241 size = 100 |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
242 batches = [ents[pos:pos + size] for pos in range(0, len(ents), size)] |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
243 cnt = 0 |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
244 for batch in batches: |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
245 cnt += size |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
246 if cnt % 100 == 0: |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
247 print(" %s %ss"%(cnt, etype)) |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
248 |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
249 # extract list of ismi ids |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
250 ismi_ids = [str(ent['id']) for ent in batch] |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
251 |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
252 # fetch full data for list of entities |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
253 ent_json = readJSON(entsByIdURL%','.join(ismi_ids)) |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
254 ents_data = ent_json['ents'] |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
255 |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
256 # iterate through results batch |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
257 for ent_data in ents_data: |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
258 ismi_id = ent_data['id'] |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
259 if ismi_id in nx_nodes: |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
260 print("ERROR: entity with id=%s exists!"%ismi_id) |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
261 return |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
262 |
25
5bdcb5805d29
updated openmind-networkx-neo4j conversion with dates, locations and links.
casties
parents:
23
diff
changeset
|
263 # create networkx node |
19
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
264 node = nodeFromEnt(ent_data, etype) |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
265 |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
266 # save node reference |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
267 nx_nodes[ismi_id] = node |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
268 |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
269 # extract relations |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
270 relsFromEnt(ent_data, ismi_relations) |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
271 |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
272 #if cnt >= 100: |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
273 # return |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
274 |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
275 |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
276 # In[119]: |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
277 |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
278 def importAllEnts(etypes): |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
279 |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
280 for etype in etypes: |
25
5bdcb5805d29
updated openmind-networkx-neo4j conversion with dates, locations and links.
casties
parents:
23
diff
changeset
|
281 if etype in exclude_objects_of_type: |
5bdcb5805d29
updated openmind-networkx-neo4j conversion with dates, locations and links.
casties
parents:
23
diff
changeset
|
282 # skip this type |
5bdcb5805d29
updated openmind-networkx-neo4j conversion with dates, locations and links.
casties
parents:
23
diff
changeset
|
283 continue |
5bdcb5805d29
updated openmind-networkx-neo4j conversion with dates, locations and links.
casties
parents:
23
diff
changeset
|
284 |
19
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
285 importEnts(etype) |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
286 |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
287 relationsFromRels(ismi_relations, nx_nodes) |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
288 |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
289 |
23
45a823b5bf33
updated ismi2model importer and model2neo4j exporter.
casties
parents:
19
diff
changeset
|
290 ## main |
45a823b5bf33
updated ismi2model importer and model2neo4j exporter.
casties
parents:
19
diff
changeset
|
291 |
45a823b5bf33
updated ismi2model importer and model2neo4j exporter.
casties
parents:
19
diff
changeset
|
292 print("Copy graph from OpenMind to networkx pickle") |
19
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
293 |
23
45a823b5bf33
updated ismi2model importer and model2neo4j exporter.
casties
parents:
19
diff
changeset
|
294 # parse command line parameters |
45a823b5bf33
updated ismi2model importer and model2neo4j exporter.
casties
parents:
19
diff
changeset
|
295 if len(sys.argv) > 1: |
45a823b5bf33
updated ismi2model importer and model2neo4j exporter.
casties
parents:
19
diff
changeset
|
296 output_fn = sys.argv[1] |
45a823b5bf33
updated ismi2model importer and model2neo4j exporter.
casties
parents:
19
diff
changeset
|
297 |
45a823b5bf33
updated ismi2model importer and model2neo4j exporter.
casties
parents:
19
diff
changeset
|
298 # import everything |
45a823b5bf33
updated ismi2model importer and model2neo4j exporter.
casties
parents:
19
diff
changeset
|
299 print("Reading graph from OpenMind at %s"%baseURL) |
25
5bdcb5805d29
updated openmind-networkx-neo4j conversion with dates, locations and links.
casties
parents:
23
diff
changeset
|
300 if len(exclude_objects_of_type) > 0: |
5bdcb5805d29
updated openmind-networkx-neo4j conversion with dates, locations and links.
casties
parents:
23
diff
changeset
|
301 print(" Skipping objects of type %s"%exclude_objects_of_type); |
5bdcb5805d29
updated openmind-networkx-neo4j conversion with dates, locations and links.
casties
parents:
23
diff
changeset
|
302 |
19
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
303 importAllEnts(ismi_defs) |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
304 #importAllEnts(['TEXT']) |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
305 |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
306 print("Graph info: %s"%networkx.info(nx_graph)) |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
307 #print(" nodes:%s"%repr(nx_graph.nodes(data=True))) |
23
45a823b5bf33
updated ismi2model importer and model2neo4j exporter.
casties
parents:
19
diff
changeset
|
308 |
19
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
309 # export pickle |
ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
casties
parents:
diff
changeset
|
310 networkx.write_gpickle(nx_graph, output_fn) |
23
45a823b5bf33
updated ismi2model importer and model2neo4j exporter.
casties
parents:
19
diff
changeset
|
311 print("Wrote networkx pickle file %s"%output_fn) |