Mercurial > hg > drupalISMI
comparison importFromOpenMind/importer/unfilteredISMI.py @ 19:ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
ismi2model: openmind importer like ismi2neo4j that saves networkx pickle file.
author | casties |
---|---|
date | Wed, 09 Sep 2015 17:32:42 +0200 |
parents | |
children | a9bfd49355f8 |
comparison
equal
deleted
inserted
replaced
18:0827156df210 | 19:ca1e02a2a9c4 |
---|---|
1 ''' | |
2 Created on 22.04.2014 | |
3 | |
4 @author: dwinter | |
5 ''' | |
6 | |
7 import os | |
8 import json | |
9 import urllib.request | |
10 | |
11 #ismiBaseUrl="https://ismi.mpiwg-berlin.mpg.de/om4-ismi" | |
12 ismiBaseUrl="http://localhost:18080/ismi-richfaces" | |
13 | |
14 class Importer: | |
15 | |
16 allents = {} | |
17 allrels = {} | |
18 | |
19 def loadJSON(self,url): | |
20 """Load JSON from URL. | |
21 | |
22 Saves JSON in data member. | |
23 """ | |
24 #print(" loading "+url) | |
25 response = urllib.request.urlopen(url) | |
26 str_response = response.readall().decode('utf-8') | |
27 | |
28 self.data = json.loads(str_response) | |
29 | |
30 | |
31 def loadJSONFromFile(self,fn): | |
32 """Load JSON from file. | |
33 | |
34 Saves JSON in data member. | |
35 """ | |
36 print(" loading "+fn+".json") | |
37 self.data = json.load(open(fn+".json",'r', encoding="utf-8"),encoding="utf-8") | |
38 | |
39 | |
40 def getEntIds(self): | |
41 """Extract entities from data member. | |
42 | |
43 Checks all relations. | |
44 Returns a set of ids of related objects and a list of the relations. | |
45 """ | |
46 | |
47 ents = self.data.get("ents") | |
48 | |
49 ret=set() | |
50 rels=[] | |
51 | |
52 for ent in ents: | |
53 ret.add(str(ent.get('id'))) | |
54 if 'src_rels' in ent: | |
55 print("src_rels: %s"%ent.get('src_rels')) | |
56 rels.extend(ent.get('src_rels')) | |
57 | |
58 if 'tar_rels' in ent: | |
59 print("tar_rels: %s"%ent.get('tar_rels')) | |
60 rels.extend(ent.get('tar_rels')) | |
61 | |
62 return ret,rels | |
63 | |
64 | |
65 def loadallEnts(self,kind="tar",filterOC=[]): | |
66 """Get related entities from OpenMind. | |
67 | |
68 Gets all related entities' ids using kind and filterOC via getEntIdsMentioned(). | |
69 Downloads the entities from OpenMind using the ids. | |
70 Returns the entities as JSON-string and a list of relations. | |
71 """ | |
72 | |
73 ids,rels = self.getEntIds() | |
74 | |
75 baseUrl=ismiBaseUrl+"/jsonInterface?include_content=true&include_romanization=true&method=get_ents" | |
76 | |
77 lenId = len(ids) | |
78 portions = int(lenId / 500) | |
79 print("loading %s entities"%lenId) | |
80 | |
81 ents = [] | |
82 for p in range(portions+1): | |
83 | |
84 start = p * 500 | |
85 end = min(lenId,(p+1)*500) | |
86 | |
87 idsFrak = list(ids)[start:end] | |
88 idsString = ",".join(idsFrak) | |
89 | |
90 | |
91 qs = baseUrl+"&ids="+idsString | |
92 #print(" loading ents from "+qs) | |
93 response = urllib.request.urlopen(qs) | |
94 entsJ = json.loads(response.readall().decode('utf-8')); | |
95 ents += entsJ.get("ents") | |
96 | |
97 # iterate all entities | |
98 for ent in entsJ.get("ents"): | |
99 ismi_id = ent.get('id') | |
100 if ismi_id in self.allents: | |
101 print("entity id=%s exists!"%ismi_id) | |
102 else: | |
103 self.allents[ismi_id] = ent | |
104 | |
105 # extract relations | |
106 if 'src_rels' in ent: | |
107 #print("src_rels: %s"%ent.get('src_rels')) | |
108 rels.extend(ent.get('src_rels')) | |
109 | |
110 for rel in ent.get('src_rels'): | |
111 rel_id = rel.get('id') | |
112 if rel_id in self.allrels: | |
113 print("relation id=%s exists!"%rel_id) | |
114 else: | |
115 self.allrels[rel_id] = rel | |
116 | |
117 if 'tar_rels' in ent: | |
118 #print("tar_rels: %s"%ent.get('tar_rels')) | |
119 rels.extend(ent.get('tar_rels')) | |
120 | |
121 for rel in ent.get('tar_rels'): | |
122 rel_id = rel.get('id') | |
123 if rel_id in self.allrels: | |
124 print("relation id=%s exists!"%rel_id) | |
125 else: | |
126 self.allrels[rel_id] = rel | |
127 | |
128 #str_response = json.dumps({"ents":ents}); | |
129 return ents,rels | |
130 | |
131 | |
132 def saveallEnts(self,filename,kind="tar",filterOC=[]): | |
133 """Loads all related entities and saves as JSON. | |
134 | |
135 Loads all related entities using kind and filterOC via LoadAllEnts(). | |
136 Saves entities in file filename.json. | |
137 Saves relations in file filename_rels.json. | |
138 """ | |
139 | |
140 ents,rels = self.loadallEnts(kind=kind,filterOC=filterOC) | |
141 | |
142 print(" writing ", filename+".json") | |
143 of = open(filename+".json","wb") | |
144 of.write(json.dumps({"ents":ents}).encode('utf-8')) | |
145 of.close() | |
146 | |
147 print(" writing ", filename+"_rels.json") | |
148 of = open(filename+"_rels.json","w") | |
149 json.dump({'rels':rels},of); | |
150 of.close() | |
151 | |
152 | |
153 if __name__ == '__main__': | |
154 imp = Importer() | |
155 | |
156 # get current list of all definitions | |
157 imp.loadJSON(ismiBaseUrl+"/jsonInterface?method=get_defs") | |
158 ismi_defs = [atts['ov'] for atts in imp.data['defs']] | |
159 | |
160 # create directory for export files | |
161 exportDir = '/tmp/ismi_data' | |
162 if not os.access(exportDir, os.R_OK): | |
163 # dir doesn't exist -> create | |
164 os.makedirs(exportDir) | |
165 | |
166 for ismi_def in ismi_defs: | |
167 print("loading entities of type %s"%ismi_def) | |
168 # | |
169 # load all entities of type ismi_def | |
170 # contains entities with attributes and first-order relations | |
171 # | |
172 url = ismiBaseUrl+"/jsonInterface?method=get_ents&oc=%s"%ismi_def | |
173 imp.loadJSON(url) | |
174 | |
175 # | |
176 # load and save all target relations of entities as entities.json | |
177 # | |
178 imp.saveallEnts(exportDir+"/%s"%ismi_def) | |
179 | |
180 # | |
181 # save all entities in one file | |
182 # | |
183 print(" writing ", "ALL.json") | |
184 of = open(exportDir+"/ALL.json","wb") | |
185 allents = [ent for ent in imp.allents.values()] | |
186 of.write(json.dumps({"ents":allents}).encode('utf-8')) | |
187 of.close() | |
188 | |
189 print(" writing ", "ALL_rels.json") | |
190 of = open(exportDir+"/ALL_rels.json","wb") | |
191 allrels = [rel for rel in imp.allrels.values()] | |
192 of.write(json.dumps({"rels":allrels}).encode('utf-8')) | |
193 of.close() |