Mercurial > hg > drupalISMI
view importFromOpenMind/importer/unfilteredISMI.py @ 19:ca1e02a2a9c4
unfilteredIsmi: openmind to json exporter like filterISMI.
ismi2model: openmind importer like ismi2neo4j that saves networkx pickle file.
author | casties |
---|---|
date | Wed, 09 Sep 2015 17:32:42 +0200 |
parents | |
children | a9bfd49355f8 |
line wrap: on
line source
''' Created on 22.04.2014 @author: dwinter ''' import os import json import urllib.request #ismiBaseUrl="https://ismi.mpiwg-berlin.mpg.de/om4-ismi" ismiBaseUrl="http://localhost:18080/ismi-richfaces" class Importer: allents = {} allrels = {} def loadJSON(self,url): """Load JSON from URL. Saves JSON in data member. """ #print(" loading "+url) response = urllib.request.urlopen(url) str_response = response.readall().decode('utf-8') self.data = json.loads(str_response) def loadJSONFromFile(self,fn): """Load JSON from file. Saves JSON in data member. """ print(" loading "+fn+".json") self.data = json.load(open(fn+".json",'r', encoding="utf-8"),encoding="utf-8") def getEntIds(self): """Extract entities from data member. Checks all relations. Returns a set of ids of related objects and a list of the relations. """ ents = self.data.get("ents") ret=set() rels=[] for ent in ents: ret.add(str(ent.get('id'))) if 'src_rels' in ent: print("src_rels: %s"%ent.get('src_rels')) rels.extend(ent.get('src_rels')) if 'tar_rels' in ent: print("tar_rels: %s"%ent.get('tar_rels')) rels.extend(ent.get('tar_rels')) return ret,rels def loadallEnts(self,kind="tar",filterOC=[]): """Get related entities from OpenMind. Gets all related entities' ids using kind and filterOC via getEntIdsMentioned(). Downloads the entities from OpenMind using the ids. Returns the entities as JSON-string and a list of relations. """ ids,rels = self.getEntIds() baseUrl=ismiBaseUrl+"/jsonInterface?include_content=true&include_romanization=true&method=get_ents" lenId = len(ids) portions = int(lenId / 500) print("loading %s entities"%lenId) ents = [] for p in range(portions+1): start = p * 500 end = min(lenId,(p+1)*500) idsFrak = list(ids)[start:end] idsString = ",".join(idsFrak) qs = baseUrl+"&ids="+idsString #print(" loading ents from "+qs) response = urllib.request.urlopen(qs) entsJ = json.loads(response.readall().decode('utf-8')); ents += entsJ.get("ents") # iterate all entities for ent in entsJ.get("ents"): ismi_id = ent.get('id') if ismi_id in self.allents: print("entity id=%s exists!"%ismi_id) else: self.allents[ismi_id] = ent # extract relations if 'src_rels' in ent: #print("src_rels: %s"%ent.get('src_rels')) rels.extend(ent.get('src_rels')) for rel in ent.get('src_rels'): rel_id = rel.get('id') if rel_id in self.allrels: print("relation id=%s exists!"%rel_id) else: self.allrels[rel_id] = rel if 'tar_rels' in ent: #print("tar_rels: %s"%ent.get('tar_rels')) rels.extend(ent.get('tar_rels')) for rel in ent.get('tar_rels'): rel_id = rel.get('id') if rel_id in self.allrels: print("relation id=%s exists!"%rel_id) else: self.allrels[rel_id] = rel #str_response = json.dumps({"ents":ents}); return ents,rels def saveallEnts(self,filename,kind="tar",filterOC=[]): """Loads all related entities and saves as JSON. Loads all related entities using kind and filterOC via LoadAllEnts(). Saves entities in file filename.json. Saves relations in file filename_rels.json. """ ents,rels = self.loadallEnts(kind=kind,filterOC=filterOC) print(" writing ", filename+".json") of = open(filename+".json","wb") of.write(json.dumps({"ents":ents}).encode('utf-8')) of.close() print(" writing ", filename+"_rels.json") of = open(filename+"_rels.json","w") json.dump({'rels':rels},of); of.close() if __name__ == '__main__': imp = Importer() # get current list of all definitions imp.loadJSON(ismiBaseUrl+"/jsonInterface?method=get_defs") ismi_defs = [atts['ov'] for atts in imp.data['defs']] # create directory for export files exportDir = '/tmp/ismi_data' if not os.access(exportDir, os.R_OK): # dir doesn't exist -> create os.makedirs(exportDir) for ismi_def in ismi_defs: print("loading entities of type %s"%ismi_def) # # load all entities of type ismi_def # contains entities with attributes and first-order relations # url = ismiBaseUrl+"/jsonInterface?method=get_ents&oc=%s"%ismi_def imp.loadJSON(url) # # load and save all target relations of entities as entities.json # imp.saveallEnts(exportDir+"/%s"%ismi_def) # # save all entities in one file # print(" writing ", "ALL.json") of = open(exportDir+"/ALL.json","wb") allents = [ent for ent in imp.allents.values()] of.write(json.dumps({"ents":allents}).encode('utf-8')) of.close() print(" writing ", "ALL_rels.json") of = open(exportDir+"/ALL_rels.json","wb") allrels = [rel for rel in imp.allrels.values()] of.write(json.dumps({"rels":allrels}).encode('utf-8')) of.close()