Mercurial > hg > djangoSolrSearchProxy
diff proxy/views.py @ 0:af2f8fe486f6 default tip
initial
author | Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de> |
---|---|
date | Tue, 17 Feb 2015 12:44:40 +0100 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/proxy/views.py Tue Feb 17 12:44:40 2015 +0100 @@ -0,0 +1,494 @@ +from django.shortcuts import render +from django.views.generic.base import View +from proxy.models import Server +import urllib +from django.http.response import StreamingHttpResponse, HttpResponse,\ + JsonResponse + +import json +import mergedict + + +class GetMD(View): + def get(self,request,objid="",proxyId="cl1"): + + #ret={'title':objid,'creator':'EP','date':''} + + srv=Server.objects.get(proxyId=proxyId) + + + objid=urllib.parse.unquote(objid) + + + url=srv.metaDataUrl%objid+"?wskey="+srv.key + + + + txt=urllib.request.urlopen(url) + + print(txt); + + obj = json.loads(txt.read().decode('utf-8'))['object'] + + ret={} + ret['title']="\n".join(obj['title']) + ret['date']="\n".join(obj.get('year',[])) + ret['viewerurl']="http://europeana.eu/portal/record/%s.html"%objid + ret['thumburl']=obj['europeanaAggregation']['edmPreview'] + + labels=[] + for agent in obj.get('agents',[]): + label=";".join(agent['prefLabel']['def']) + labels.append(label) + ret['creator']="\n".join(labels) + + + + + ret['accessType']="free" + ret['provider']=srv.type + ret['providerLogo']=srv.providerLogo + return JsonResponse(ret) + + +class SearchAllView(View): + + + + + + def get(self,request,command=""): + #ret = mergedict.ConfigDict() + ret={} + + #params = dict(request.REQUEST.dicts[1]) + #params = request.GET.copy() + #params['wt']="json" + jsRes={} + + params_full= dict(request.REQUEST.dicts[1]) + + + for proxy in Server.objects.all(): + + if proxy.type=="solr": + params = params_full + #params = request.GET.copy() + params['wt']="json" + + + #make surre that there is at minimum * for search in q + params['q']=params.get('q',['*'])# want at minimum a start + + #this should not be the case normally it is list + if params['q']=="": + params['q']="*" + + if isinstance(params['q'], list): #should normally always be true + if len(params['q']) == 0: + params['q']=['*'] + elif len(params['q']) == 1 and params['q'][0]=='': + params['q']=['*'] + + + + + + selected_facets=params.get("fq",[]) + + facets_new=[] + + wrongFacet=False + for facets in selected_facets: + if facets.startswith("provider:"): + wrongFacet=True + + if facets.startswith("data_provider:"): + wrongFacet=True + + if facets.startswith("type:"): + wrongFacet=True + + + facets_new.append(facets) + + params["fq"] = facets_new; + + + facetFields=params["facet.field"] + + + try: + del facetFields[facetFields.index("provider")] + + except: + pass + + try: + del facetFields[facetFields.index("data_provider")] + + except: + pass + + try: + del facetFields[facetFields.index("type")] + + except: + pass + + params["facet.field"]=facetFields + + + #if (proxy.key!=None) and (proxy.key!=""): + # params['oauth_consumer_key']=proxy.key + + searchUrl =proxy.serverUrl+"/"+command+"?"+urllib.parse.urlencode(params,True) + + print (searchUrl) + + #search = urllib.request.urlopen(searchUrl) + #ct = search.headers['content-type'] + + + if wrongFacet: + jsRes={} #keine ergebnis in dieser Suche + else: + #print("load") + #print (searchUrl) + txt=str(urllib.request.urlopen(searchUrl).read().decode('utf-8')) + #print(txt) + jsRes = json.loads(txt) + + elif proxy.type=="europeana": + params = request.GET.copy() + paramsNew = {} + + paramsNew['query']=params.get('q',"") + if (paramsNew['query'])=="": + paramsNew['query']="*" #Europeana doesn't want empty strings + + + + #special queries author_c ---> who + paramsNew['query']= paramsNew['query'].replace("author_c","who") + paramsNew['query']= paramsNew['query'].replace("title_s:","") + + #europeana doesn't want '"' + paramsNew['query']= paramsNew['query'].replace('"','') + + + + + selected_facets = params_full.get("fq",[]) + + #wenn faceten aus gewaehlt werdeb und external nicht ausgewaehlt is dann muss er hier nicht machen + + #print ("SD") + #print (selected_facets) + + dt_selected=False + external=False + try: + del selected_facets[selected_facets.index("doc-type:*")] # loesche den generischen aufruf, interssant nur wenn fasseten ausgesucht sind + except: + pass + + try: + del selected_facets[selected_facets.index("access-type:free")] # loesche den access type free da nicht relevant fuer external + except: + pass + + + facets_new=[] + for facets in selected_facets: + + #facets_new.append(facets) + + + if (facets=="doc-type:external"): + external=True + dt_selected=True + elif facets.startswith("doc-type:"): + dt_selected=True + else: + + + facets=facets.replace("data_provider:","DATA_PROVIDER:") + facets=facets.replace("provider:","PROVIDER:") + facets=facets.replace("type:","TYPE:") + facets_new.append(facets) + + paramsNew["qf"]=facets_new +# cont=True +# + +# if (len(selected_facets)>0) and "doc-type:external" not in selected_facets: +# cont=False #external nicht ausgesucht +# + + + + + + #print("SD2") + + if (dt_selected and external) or not dt_selected: + + try: + paramsNew['start']=int(params['start'][0])+1 #europeana starts with 1 + except: + paramsNew['start']=1 + + + + + + #print("start search proc") + + searchUrl =proxy.serverUrl+"?wskey="+proxy.key+"&"+urllib.parse.urlencode(paramsNew,True)+"&profile=facets" + + + try: + #print ("asking europeana") + search = urllib.request.urlopen(searchUrl) + except: + #print("SEARCH ERROR:"+searchUrl) + continue #stope this part of the search if there is an error (normally a facet not supported) + + #print("got an answer") + ct = search.headers['content-type'] + + + #print(searchUrl) + + jsResOrig = json.loads(urllib.request.urlopen(searchUrl).read().decode('utf-8')) + + resNr = jsResOrig['totalResults'] + + newitems=[] + for item in jsResOrig.get("items",[]): + newitem={} + newitem['IM_title']=item['title'] + newitem['title']=item['title'] + newitem['year']=item.get('year',['']) + #newitem['author']=item.get('dcCreator',['']) + + newitem['author']=[] + agents = item.get('edmAgentLabel',[]) + for agent in agents: + newitem['author'].append(agent['def']) + + + + + newitem['url']=item.get("edmIsShownAt",['']) + newitem['image']=item.get("edmPreview",['']) + newitem['archive-path']=item['id'] + newitem['objid']=item['id'] + newitem['doc-type']=['external'] + newitem['mpiwg-dri']=item['id'] + newitem['dataSource']=proxy.title + newitem['provider']=item.get("provider",[]) + + newitems.append(newitem) + + + response={} + response['numFound']=resNr + response['docs']=[] + for item in newitems: + + response['docs'].append(item) + + + + + + + jsRes={} + jsRes['response']=response + jsRes["facet_counts"]={} + jsRes["facet_counts"]["facet_queries"]={} + jsRes["facet_counts"]["facet_fields"]={"doc-type": {"external":resNr}} + + + facets = jsResOrig.get("facets",[]) + + + facetsDict={} + for facet in facets: + facetsDict[facet['name']]=facet['fields'] + + + + jsRes["facet_counts"]["facet_ranges"]={} + + jsRes["facet_counts"]["facet_fields"]["year_plain"]= self.remapFacetDict(facetsDict.get('YEAR',[])) + jsRes["facet_counts"]["facet_ranges"]["year"]= {"counts":self.remapFacetDict(facetsDict.get('YEAR',[])),"gap":20,"start":1,"end":2021} + + + jsRes["facet_counts"]["facet_fields"]["provider"]= self.remapFacetDict(facetsDict.get('PROVIDER',[])) + jsRes["facet_counts"]["facet_fields"]["data_provider"]= self.remapFacetDict(facetsDict.get('DATA_PROVIDER',[])) + jsRes["facet_counts"]["facet_fields"]["type"]= self.remapFacetDict(facetsDict.get('TYPE',[])) + + + + + #ret.merge(jsRes) + #ret.update(jsRes) + ret=self.add(ret,jsRes) + #print (ret) + #print(jsRes) + + return JsonResponse(ret,safe=False) + + + + + + + def remapFacetDict(self,facet): + + ret={} + for x in facet: + ret[x['label']]=x['count'] + + return ret + #return [(x['label'],x['count']) for x in facet ] + + def add(self,x,y): + if x=={}: + return y + x['response']['docs']=x['response']['docs']+(y['response']['docs']) + x['response']['numFound']=x['response']['numFound']+(y['response']['numFound']) + + + for ff in set(list(x['facet_counts']['facet_fields'].keys())+list(y['facet_counts']['facet_fields'].keys())): + #print (ff) + x1=x['facet_counts']['facet_fields'].get(ff,[]) + y1=y['facet_counts']['facet_fields'].get(ff,[]) + + #print(x1) + #print(y1) + try: + nw = x['facet_counts']['facet_fields'].get(ff,[])+y['facet_counts']['facet_fields'].get(ff,[]) + except: + + + + + x1=x['facet_counts']['facet_fields'].get(ff,{}) + y1=y['facet_counts']['facet_fields'].get(ff,{}) + #print(x1) + #print(y1) + + try: + x1.update(y1) + nw = x1 + except: + nw = y1 + + x['facet_counts']['facet_fields'][ff]=nw + + + return x + + + +class SearchProxyView(View): + def get(self,request,proxyId="",command=""): + + + + + + proxy = Server.objects.get(proxyId=proxyId) + + + + + #fullpath = request.get_full_path(); + + + #params = request.GET.copy() + params = dict(request.REQUEST.dicts[1]) + #urlencode does not work correctly with a query dict. + + + if proxy.type=="solr": + + searchUrl =proxy.serverUrl+"/"+command+"?"+urllib.parse.urlencode(params,True) + + #print(searchUrl) + + search = urllib.request.urlopen(searchUrl) + ct = search.headers['content-type'] + + + return StreamingHttpResponse(search,content_type=ct) + + + + else: + paramsNew = {} + paramsNew['query']=params['q'] + + try: + paramsNew['start']=int(params['start'][0])+1 #europeana starts with 1 + except: + paramsNew['start']=1 + + searchUrl =proxy.serverUrl+"?wskey="+proxy.key+"&"+urllib.parse.urlencode(paramsNew,True) + + search = urllib.request.urlopen(searchUrl) + ct = search.headers['content-type'] + + + + jsRes = json.loads(urllib.request.urlopen(searchUrl).read().decode('utf-8')) + + resNr = jsRes['totalResults'] + + newitems=[] + for item in jsRes['items']: + newitem={} + newitem['IM_title']=item['title'] + newitem['title']=item['title'] + newitem['year']=item.get('year',['']) + newitem['author']=item.get('dcCreator',['']) + newitem['url']=item.get("edmIsShownAt",['']) + newitem['image']=item.get("edmPreview",['']) + newitem['archive-path']=item['id'] + newitem['doc-type']=['external'] + newitem['mpiwg-dri']=item['id'] + newitem['provider']=item.get("provider",[]) + + newitems.append(newitem) + + + response={} + response['numFound']=resNr + response['docs']=[] + for item in newitems: + + response['docs'].append(item) + #for key,values in item.items(): + # xml+="""<arr name="%s">"""%key + # + # for val in values: + # xml+="<str>%s</str>"%val + # xml+="</arr>" + # + + # xml+="</doc>" + + + + res={} + res['response']=response + res["facet_counts"]={} + res["facet_counts"]["facet_queries"]={} + res["facet_counts"]["facet_fields"]={"doc-type": {"external":100000}} + + return JsonResponse(res) + return HttpResponse(res,content_type="application/json")# Create your views here.