comparison importFromOpenMind/importer/unfilteredISMI.py @ 19:ca1e02a2a9c4

unfilteredIsmi: openmind to json exporter like filterISMI. ismi2model: openmind importer like ismi2neo4j that saves networkx pickle file.
author casties
date Wed, 09 Sep 2015 17:32:42 +0200
parents
children a9bfd49355f8
comparison
equal deleted inserted replaced
18:0827156df210 19:ca1e02a2a9c4
1 '''
2 Created on 22.04.2014
3
4 @author: dwinter
5 '''
6
7 import os
8 import json
9 import urllib.request
10
11 #ismiBaseUrl="https://ismi.mpiwg-berlin.mpg.de/om4-ismi"
12 ismiBaseUrl="http://localhost:18080/ismi-richfaces"
13
14 class Importer:
15
16 allents = {}
17 allrels = {}
18
19 def loadJSON(self,url):
20 """Load JSON from URL.
21
22 Saves JSON in data member.
23 """
24 #print(" loading "+url)
25 response = urllib.request.urlopen(url)
26 str_response = response.readall().decode('utf-8')
27
28 self.data = json.loads(str_response)
29
30
31 def loadJSONFromFile(self,fn):
32 """Load JSON from file.
33
34 Saves JSON in data member.
35 """
36 print(" loading "+fn+".json")
37 self.data = json.load(open(fn+".json",'r', encoding="utf-8"),encoding="utf-8")
38
39
40 def getEntIds(self):
41 """Extract entities from data member.
42
43 Checks all relations.
44 Returns a set of ids of related objects and a list of the relations.
45 """
46
47 ents = self.data.get("ents")
48
49 ret=set()
50 rels=[]
51
52 for ent in ents:
53 ret.add(str(ent.get('id')))
54 if 'src_rels' in ent:
55 print("src_rels: %s"%ent.get('src_rels'))
56 rels.extend(ent.get('src_rels'))
57
58 if 'tar_rels' in ent:
59 print("tar_rels: %s"%ent.get('tar_rels'))
60 rels.extend(ent.get('tar_rels'))
61
62 return ret,rels
63
64
65 def loadallEnts(self,kind="tar",filterOC=[]):
66 """Get related entities from OpenMind.
67
68 Gets all related entities' ids using kind and filterOC via getEntIdsMentioned().
69 Downloads the entities from OpenMind using the ids.
70 Returns the entities as JSON-string and a list of relations.
71 """
72
73 ids,rels = self.getEntIds()
74
75 baseUrl=ismiBaseUrl+"/jsonInterface?include_content=true&include_romanization=true&method=get_ents"
76
77 lenId = len(ids)
78 portions = int(lenId / 500)
79 print("loading %s entities"%lenId)
80
81 ents = []
82 for p in range(portions+1):
83
84 start = p * 500
85 end = min(lenId,(p+1)*500)
86
87 idsFrak = list(ids)[start:end]
88 idsString = ",".join(idsFrak)
89
90
91 qs = baseUrl+"&ids="+idsString
92 #print(" loading ents from "+qs)
93 response = urllib.request.urlopen(qs)
94 entsJ = json.loads(response.readall().decode('utf-8'));
95 ents += entsJ.get("ents")
96
97 # iterate all entities
98 for ent in entsJ.get("ents"):
99 ismi_id = ent.get('id')
100 if ismi_id in self.allents:
101 print("entity id=%s exists!"%ismi_id)
102 else:
103 self.allents[ismi_id] = ent
104
105 # extract relations
106 if 'src_rels' in ent:
107 #print("src_rels: %s"%ent.get('src_rels'))
108 rels.extend(ent.get('src_rels'))
109
110 for rel in ent.get('src_rels'):
111 rel_id = rel.get('id')
112 if rel_id in self.allrels:
113 print("relation id=%s exists!"%rel_id)
114 else:
115 self.allrels[rel_id] = rel
116
117 if 'tar_rels' in ent:
118 #print("tar_rels: %s"%ent.get('tar_rels'))
119 rels.extend(ent.get('tar_rels'))
120
121 for rel in ent.get('tar_rels'):
122 rel_id = rel.get('id')
123 if rel_id in self.allrels:
124 print("relation id=%s exists!"%rel_id)
125 else:
126 self.allrels[rel_id] = rel
127
128 #str_response = json.dumps({"ents":ents});
129 return ents,rels
130
131
132 def saveallEnts(self,filename,kind="tar",filterOC=[]):
133 """Loads all related entities and saves as JSON.
134
135 Loads all related entities using kind and filterOC via LoadAllEnts().
136 Saves entities in file filename.json.
137 Saves relations in file filename_rels.json.
138 """
139
140 ents,rels = self.loadallEnts(kind=kind,filterOC=filterOC)
141
142 print(" writing ", filename+".json")
143 of = open(filename+".json","wb")
144 of.write(json.dumps({"ents":ents}).encode('utf-8'))
145 of.close()
146
147 print(" writing ", filename+"_rels.json")
148 of = open(filename+"_rels.json","w")
149 json.dump({'rels':rels},of);
150 of.close()
151
152
153 if __name__ == '__main__':
154 imp = Importer()
155
156 # get current list of all definitions
157 imp.loadJSON(ismiBaseUrl+"/jsonInterface?method=get_defs")
158 ismi_defs = [atts['ov'] for atts in imp.data['defs']]
159
160 # create directory for export files
161 exportDir = '/tmp/ismi_data'
162 if not os.access(exportDir, os.R_OK):
163 # dir doesn't exist -> create
164 os.makedirs(exportDir)
165
166 for ismi_def in ismi_defs:
167 print("loading entities of type %s"%ismi_def)
168 #
169 # load all entities of type ismi_def
170 # contains entities with attributes and first-order relations
171 #
172 url = ismiBaseUrl+"/jsonInterface?method=get_ents&oc=%s"%ismi_def
173 imp.loadJSON(url)
174
175 #
176 # load and save all target relations of entities as entities.json
177 #
178 imp.saveallEnts(exportDir+"/%s"%ismi_def)
179
180 #
181 # save all entities in one file
182 #
183 print(" writing ", "ALL.json")
184 of = open(exportDir+"/ALL.json","wb")
185 allents = [ent for ent in imp.allents.values()]
186 of.write(json.dumps({"ents":allents}).encode('utf-8'))
187 of.close()
188
189 print(" writing ", "ALL_rels.json")
190 of = open(exportDir+"/ALL_rels.json","wb")
191 allrels = [rel for rel in imp.allrels.values()]
192 of.write(json.dumps({"rels":allrels}).encode('utf-8'))
193 of.close()