Mercurial > hg > djangoSolrSearchProxy
view proxy/views.py @ 0:af2f8fe486f6 default tip
initial
author | Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de> |
---|---|
date | Tue, 17 Feb 2015 12:44:40 +0100 |
parents | |
children |
line wrap: on
line source
from django.shortcuts import render from django.views.generic.base import View from proxy.models import Server import urllib from django.http.response import StreamingHttpResponse, HttpResponse,\ JsonResponse import json import mergedict class GetMD(View): def get(self,request,objid="",proxyId="cl1"): #ret={'title':objid,'creator':'EP','date':''} srv=Server.objects.get(proxyId=proxyId) objid=urllib.parse.unquote(objid) url=srv.metaDataUrl%objid+"?wskey="+srv.key txt=urllib.request.urlopen(url) print(txt); obj = json.loads(txt.read().decode('utf-8'))['object'] ret={} ret['title']="\n".join(obj['title']) ret['date']="\n".join(obj.get('year',[])) ret['viewerurl']="http://europeana.eu/portal/record/%s.html"%objid ret['thumburl']=obj['europeanaAggregation']['edmPreview'] labels=[] for agent in obj.get('agents',[]): label=";".join(agent['prefLabel']['def']) labels.append(label) ret['creator']="\n".join(labels) ret['accessType']="free" ret['provider']=srv.type ret['providerLogo']=srv.providerLogo return JsonResponse(ret) class SearchAllView(View): def get(self,request,command=""): #ret = mergedict.ConfigDict() ret={} #params = dict(request.REQUEST.dicts[1]) #params = request.GET.copy() #params['wt']="json" jsRes={} params_full= dict(request.REQUEST.dicts[1]) for proxy in Server.objects.all(): if proxy.type=="solr": params = params_full #params = request.GET.copy() params['wt']="json" #make surre that there is at minimum * for search in q params['q']=params.get('q',['*'])# want at minimum a start #this should not be the case normally it is list if params['q']=="": params['q']="*" if isinstance(params['q'], list): #should normally always be true if len(params['q']) == 0: params['q']=['*'] elif len(params['q']) == 1 and params['q'][0]=='': params['q']=['*'] selected_facets=params.get("fq",[]) facets_new=[] wrongFacet=False for facets in selected_facets: if facets.startswith("provider:"): wrongFacet=True if facets.startswith("data_provider:"): wrongFacet=True if facets.startswith("type:"): wrongFacet=True facets_new.append(facets) params["fq"] = facets_new; facetFields=params["facet.field"] try: del facetFields[facetFields.index("provider")] except: pass try: del facetFields[facetFields.index("data_provider")] except: pass try: del facetFields[facetFields.index("type")] except: pass params["facet.field"]=facetFields #if (proxy.key!=None) and (proxy.key!=""): # params['oauth_consumer_key']=proxy.key searchUrl =proxy.serverUrl+"/"+command+"?"+urllib.parse.urlencode(params,True) print (searchUrl) #search = urllib.request.urlopen(searchUrl) #ct = search.headers['content-type'] if wrongFacet: jsRes={} #keine ergebnis in dieser Suche else: #print("load") #print (searchUrl) txt=str(urllib.request.urlopen(searchUrl).read().decode('utf-8')) #print(txt) jsRes = json.loads(txt) elif proxy.type=="europeana": params = request.GET.copy() paramsNew = {} paramsNew['query']=params.get('q',"") if (paramsNew['query'])=="": paramsNew['query']="*" #Europeana doesn't want empty strings #special queries author_c ---> who paramsNew['query']= paramsNew['query'].replace("author_c","who") paramsNew['query']= paramsNew['query'].replace("title_s:","") #europeana doesn't want '"' paramsNew['query']= paramsNew['query'].replace('"','') selected_facets = params_full.get("fq",[]) #wenn faceten aus gewaehlt werdeb und external nicht ausgewaehlt is dann muss er hier nicht machen #print ("SD") #print (selected_facets) dt_selected=False external=False try: del selected_facets[selected_facets.index("doc-type:*")] # loesche den generischen aufruf, interssant nur wenn fasseten ausgesucht sind except: pass try: del selected_facets[selected_facets.index("access-type:free")] # loesche den access type free da nicht relevant fuer external except: pass facets_new=[] for facets in selected_facets: #facets_new.append(facets) if (facets=="doc-type:external"): external=True dt_selected=True elif facets.startswith("doc-type:"): dt_selected=True else: facets=facets.replace("data_provider:","DATA_PROVIDER:") facets=facets.replace("provider:","PROVIDER:") facets=facets.replace("type:","TYPE:") facets_new.append(facets) paramsNew["qf"]=facets_new # cont=True # # if (len(selected_facets)>0) and "doc-type:external" not in selected_facets: # cont=False #external nicht ausgesucht # #print("SD2") if (dt_selected and external) or not dt_selected: try: paramsNew['start']=int(params['start'][0])+1 #europeana starts with 1 except: paramsNew['start']=1 #print("start search proc") searchUrl =proxy.serverUrl+"?wskey="+proxy.key+"&"+urllib.parse.urlencode(paramsNew,True)+"&profile=facets" try: #print ("asking europeana") search = urllib.request.urlopen(searchUrl) except: #print("SEARCH ERROR:"+searchUrl) continue #stope this part of the search if there is an error (normally a facet not supported) #print("got an answer") ct = search.headers['content-type'] #print(searchUrl) jsResOrig = json.loads(urllib.request.urlopen(searchUrl).read().decode('utf-8')) resNr = jsResOrig['totalResults'] newitems=[] for item in jsResOrig.get("items",[]): newitem={} newitem['IM_title']=item['title'] newitem['title']=item['title'] newitem['year']=item.get('year',['']) #newitem['author']=item.get('dcCreator',['']) newitem['author']=[] agents = item.get('edmAgentLabel',[]) for agent in agents: newitem['author'].append(agent['def']) newitem['url']=item.get("edmIsShownAt",['']) newitem['image']=item.get("edmPreview",['']) newitem['archive-path']=item['id'] newitem['objid']=item['id'] newitem['doc-type']=['external'] newitem['mpiwg-dri']=item['id'] newitem['dataSource']=proxy.title newitem['provider']=item.get("provider",[]) newitems.append(newitem) response={} response['numFound']=resNr response['docs']=[] for item in newitems: response['docs'].append(item) jsRes={} jsRes['response']=response jsRes["facet_counts"]={} jsRes["facet_counts"]["facet_queries"]={} jsRes["facet_counts"]["facet_fields"]={"doc-type": {"external":resNr}} facets = jsResOrig.get("facets",[]) facetsDict={} for facet in facets: facetsDict[facet['name']]=facet['fields'] jsRes["facet_counts"]["facet_ranges"]={} jsRes["facet_counts"]["facet_fields"]["year_plain"]= self.remapFacetDict(facetsDict.get('YEAR',[])) jsRes["facet_counts"]["facet_ranges"]["year"]= {"counts":self.remapFacetDict(facetsDict.get('YEAR',[])),"gap":20,"start":1,"end":2021} jsRes["facet_counts"]["facet_fields"]["provider"]= self.remapFacetDict(facetsDict.get('PROVIDER',[])) jsRes["facet_counts"]["facet_fields"]["data_provider"]= self.remapFacetDict(facetsDict.get('DATA_PROVIDER',[])) jsRes["facet_counts"]["facet_fields"]["type"]= self.remapFacetDict(facetsDict.get('TYPE',[])) #ret.merge(jsRes) #ret.update(jsRes) ret=self.add(ret,jsRes) #print (ret) #print(jsRes) return JsonResponse(ret,safe=False) def remapFacetDict(self,facet): ret={} for x in facet: ret[x['label']]=x['count'] return ret #return [(x['label'],x['count']) for x in facet ] def add(self,x,y): if x=={}: return y x['response']['docs']=x['response']['docs']+(y['response']['docs']) x['response']['numFound']=x['response']['numFound']+(y['response']['numFound']) for ff in set(list(x['facet_counts']['facet_fields'].keys())+list(y['facet_counts']['facet_fields'].keys())): #print (ff) x1=x['facet_counts']['facet_fields'].get(ff,[]) y1=y['facet_counts']['facet_fields'].get(ff,[]) #print(x1) #print(y1) try: nw = x['facet_counts']['facet_fields'].get(ff,[])+y['facet_counts']['facet_fields'].get(ff,[]) except: x1=x['facet_counts']['facet_fields'].get(ff,{}) y1=y['facet_counts']['facet_fields'].get(ff,{}) #print(x1) #print(y1) try: x1.update(y1) nw = x1 except: nw = y1 x['facet_counts']['facet_fields'][ff]=nw return x class SearchProxyView(View): def get(self,request,proxyId="",command=""): proxy = Server.objects.get(proxyId=proxyId) #fullpath = request.get_full_path(); #params = request.GET.copy() params = dict(request.REQUEST.dicts[1]) #urlencode does not work correctly with a query dict. if proxy.type=="solr": searchUrl =proxy.serverUrl+"/"+command+"?"+urllib.parse.urlencode(params,True) #print(searchUrl) search = urllib.request.urlopen(searchUrl) ct = search.headers['content-type'] return StreamingHttpResponse(search,content_type=ct) else: paramsNew = {} paramsNew['query']=params['q'] try: paramsNew['start']=int(params['start'][0])+1 #europeana starts with 1 except: paramsNew['start']=1 searchUrl =proxy.serverUrl+"?wskey="+proxy.key+"&"+urllib.parse.urlencode(paramsNew,True) search = urllib.request.urlopen(searchUrl) ct = search.headers['content-type'] jsRes = json.loads(urllib.request.urlopen(searchUrl).read().decode('utf-8')) resNr = jsRes['totalResults'] newitems=[] for item in jsRes['items']: newitem={} newitem['IM_title']=item['title'] newitem['title']=item['title'] newitem['year']=item.get('year',['']) newitem['author']=item.get('dcCreator',['']) newitem['url']=item.get("edmIsShownAt",['']) newitem['image']=item.get("edmPreview",['']) newitem['archive-path']=item['id'] newitem['doc-type']=['external'] newitem['mpiwg-dri']=item['id'] newitem['provider']=item.get("provider",[]) newitems.append(newitem) response={} response['numFound']=resNr response['docs']=[] for item in newitems: response['docs'].append(item) #for key,values in item.items(): # xml+="""<arr name="%s">"""%key # # for val in values: # xml+="<str>%s</str>"%val # xml+="</arr>" # # xml+="</doc>" res={} res['response']=response res["facet_counts"]={} res["facet_counts"]["facet_queries"]={} res["facet_counts"]["facet_fields"]={"doc-type": {"external":100000}} return JsonResponse(res) return HttpResponse(res,content_type="application/json")# Create your views here.