view proxy/views.py @ 0:af2f8fe486f6 default tip

initial
author Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
date Tue, 17 Feb 2015 12:44:40 +0100
parents
children
line wrap: on
line source

from django.shortcuts import render
from django.views.generic.base import View
from proxy.models import Server
import urllib
from django.http.response import StreamingHttpResponse, HttpResponse,\
    JsonResponse

import json
import mergedict


class GetMD(View):
    def get(self,request,objid="",proxyId="cl1"):
        
        #ret={'title':objid,'creator':'EP','date':''}
        
        srv=Server.objects.get(proxyId=proxyId)
        
        
        objid=urllib.parse.unquote(objid)
        
        
        url=srv.metaDataUrl%objid+"?wskey="+srv.key
        
    
        
        txt=urllib.request.urlopen(url)
        
        print(txt);
        
        obj = json.loads(txt.read().decode('utf-8'))['object']
        
        ret={}
        ret['title']="\n".join(obj['title'])
        ret['date']="\n".join(obj.get('year',[]))
        ret['viewerurl']="http://europeana.eu/portal/record/%s.html"%objid
        ret['thumburl']=obj['europeanaAggregation']['edmPreview']
        
        labels=[]
        for agent in obj.get('agents',[]):
            label=";".join(agent['prefLabel']['def'])
            labels.append(label)
        ret['creator']="\n".join(labels)
        
        
        
        
        ret['accessType']="free"
        ret['provider']=srv.type
        ret['providerLogo']=srv.providerLogo
        return JsonResponse(ret)
        

class SearchAllView(View):
    
    
    
   

    def get(self,request,command=""):
        #ret = mergedict.ConfigDict()
        ret={}
        
        #params = dict(request.REQUEST.dicts[1])
        #params = request.GET.copy()
        #params['wt']="json"
        jsRes={}
        
        params_full= dict(request.REQUEST.dicts[1])
        
        
        for proxy in Server.objects.all():
           
            if proxy.type=="solr":
                params = params_full
                #params = request.GET.copy()
                params['wt']="json"
                
                
                #make surre that there is at minimum * for search in q
                params['q']=params.get('q',['*'])# want at minimum a start
                
                #this should not be the case normally it is list
                if params['q']=="":
                    params['q']="*"
                    
                if isinstance(params['q'], list): #should normally always be true
                    if len(params['q']) == 0:
                        params['q']=['*']
                    elif len(params['q']) == 1 and params['q'][0]=='':
                        params['q']=['*']
                    
                    
                
                
       
                selected_facets=params.get("fq",[])
             
                facets_new=[]
                
                wrongFacet=False
                for facets in selected_facets:
                    if facets.startswith("provider:"):
                        wrongFacet=True
                    
                    if facets.startswith("data_provider:"):
                          wrongFacet=True
                          
                    if facets.startswith("type:"):
                          wrongFacet=True
                   
                   
                    facets_new.append(facets)
                
                params["fq"] = facets_new;
                
              
                facetFields=params["facet.field"]
                
               
                try:
                    del facetFields[facetFields.index("provider")]
                   
                except:
                    pass
                
                try:
                    del facetFields[facetFields.index("data_provider")]
                   
                except:
                    pass
                
                try:
                    del facetFields[facetFields.index("type")]
                   
                except:
                    pass
                
                params["facet.field"]=facetFields
                
                
                #if (proxy.key!=None) and (proxy.key!=""):
                #   params['oauth_consumer_key']=proxy.key
                
                searchUrl =proxy.serverUrl+"/"+command+"?"+urllib.parse.urlencode(params,True)
                
                print (searchUrl)
                
                #search = urllib.request.urlopen(searchUrl)
                #ct = search.headers['content-type']
                
                
                if wrongFacet:
                    jsRes={} #keine ergebnis in dieser Suche
                else:
                    #print("load")
                    #print (searchUrl)
                    txt=str(urllib.request.urlopen(searchUrl).read().decode('utf-8'))
                    #print(txt)
                    jsRes = json.loads(txt)
                   
            elif proxy.type=="europeana":
                params = request.GET.copy()
                paramsNew = {}
                
                paramsNew['query']=params.get('q',"")
                if (paramsNew['query'])=="":
                     paramsNew['query']="*" #Europeana doesn't want empty strings
                
                
                
                #special queries author_c ---> who
                paramsNew['query']= paramsNew['query'].replace("author_c","who")
                paramsNew['query']= paramsNew['query'].replace("title_s:","")
               
                #europeana doesn't want '"'
                paramsNew['query']= paramsNew['query'].replace('"','')
                
                
                
                     
                selected_facets = params_full.get("fq",[])
                
                #wenn faceten aus gewaehlt werdeb und external nicht ausgewaehlt is dann muss er hier nicht machen
                
                #print ("SD")
                #print (selected_facets)
                
                dt_selected=False
                external=False
                try:
                    del selected_facets[selected_facets.index("doc-type:*")] # loesche den generischen aufruf, interssant nur wenn fasseten ausgesucht sind
                except:
                    pass
                 
                try:
                    del selected_facets[selected_facets.index("access-type:free")] # loesche den access type free da nicht relevant fuer external
                except:
                    pass

                
                facets_new=[]
                for facets in selected_facets:
                   
                     #facets_new.append(facets)
                  
                    
                    if (facets=="doc-type:external"):
                        external=True
                        dt_selected=True
                    elif facets.startswith("doc-type:"):
                        dt_selected=True
                    else:
                        
                        
                        facets=facets.replace("data_provider:","DATA_PROVIDER:")
                        facets=facets.replace("provider:","PROVIDER:")
                        facets=facets.replace("type:","TYPE:")
                        facets_new.append(facets)
               
                paramsNew["qf"]=facets_new
#                 cont=True
#                 

#                 if (len(selected_facets)>0) and  "doc-type:external" not in selected_facets:
#                         cont=False #external nicht ausgesucht
#                 

               
                    
                
                
                #print("SD2")
                
                if  (dt_selected and external) or not dt_selected:
                    
                    try:
                        paramsNew['start']=int(params['start'][0])+1 #europeana starts with 1
                    except:
                        paramsNew['start']=1
                    
                    
                    
                    
                    
                    #print("start search proc")
             
                    searchUrl =proxy.serverUrl+"?wskey="+proxy.key+"&"+urllib.parse.urlencode(paramsNew,True)+"&profile=facets"
                    
                    
                    try:
                        #print ("asking europeana")
                        search = urllib.request.urlopen(searchUrl)
                    except:
                        #print("SEARCH ERROR:"+searchUrl)
                        continue #stope this part of the search if there is an error (normally a facet not supported)
                    
                    #print("got an answer")
                    ct = search.headers['content-type']
                    
                    
                    #print(searchUrl)
        
                    jsResOrig = json.loads(urllib.request.urlopen(searchUrl).read().decode('utf-8'))
                    
                    resNr = jsResOrig['totalResults']
                    
                    newitems=[]
                    for item in jsResOrig.get("items",[]):
                        newitem={}
                        newitem['IM_title']=item['title']
                        newitem['title']=item['title']
                        newitem['year']=item.get('year',[''])
                        #newitem['author']=item.get('dcCreator',[''])
                        
                        newitem['author']=[]
                        agents = item.get('edmAgentLabel',[])
                        for agent in agents:
                            newitem['author'].append(agent['def'])
                            
                        
                        
                        
                        newitem['url']=item.get("edmIsShownAt",[''])
                        newitem['image']=item.get("edmPreview",[''])
                        newitem['archive-path']=item['id']
                        newitem['objid']=item['id']
                        newitem['doc-type']=['external']
                        newitem['mpiwg-dri']=item['id']
                        newitem['dataSource']=proxy.title
                        newitem['provider']=item.get("provider",[])
                        
                        newitems.append(newitem)
                        
                        
                    response={}
                    response['numFound']=resNr
                    response['docs']=[]
                    for item in newitems:
                       
                        response['docs'].append(item)
        
                        
                         
                
                
                
                    jsRes={}
                    jsRes['response']=response
                    jsRes["facet_counts"]={}
                    jsRes["facet_counts"]["facet_queries"]={}
                    jsRes["facet_counts"]["facet_fields"]={"doc-type": {"external":resNr}}
                    
                    
                    facets = jsResOrig.get("facets",[])
                    
                    
                    facetsDict={}
                    for facet in facets:
                        facetsDict[facet['name']]=facet['fields']
                    
                    
                    
                    jsRes["facet_counts"]["facet_ranges"]={}
                    
                    jsRes["facet_counts"]["facet_fields"]["year_plain"]= self.remapFacetDict(facetsDict.get('YEAR',[]))
                    jsRes["facet_counts"]["facet_ranges"]["year"]= {"counts":self.remapFacetDict(facetsDict.get('YEAR',[])),"gap":20,"start":1,"end":2021}
                    
                    
                    jsRes["facet_counts"]["facet_fields"]["provider"]= self.remapFacetDict(facetsDict.get('PROVIDER',[]))
                    jsRes["facet_counts"]["facet_fields"]["data_provider"]= self.remapFacetDict(facetsDict.get('DATA_PROVIDER',[]))
                    jsRes["facet_counts"]["facet_fields"]["type"]= self.remapFacetDict(facetsDict.get('TYPE',[]))
                   
                   
                
                
            #ret.merge(jsRes)
            #ret.update(jsRes)
            ret=self.add(ret,jsRes)
            #print (ret)
            #print(jsRes)
        
        return JsonResponse(ret,safe=False)
        
    
        
        
        
        
    def remapFacetDict(self,facet):
        
        ret={}
        for x in facet:
           ret[x['label']]=x['count']
            
        return ret
        #return [(x['label'],x['count']) for x in facet ]  
        
    def add(self,x,y):
        if x=={}:
            return y
        x['response']['docs']=x['response']['docs']+(y['response']['docs'])
        x['response']['numFound']=x['response']['numFound']+(y['response']['numFound'])
            
            
        for ff in set(list(x['facet_counts']['facet_fields'].keys())+list(y['facet_counts']['facet_fields'].keys())):
            #print (ff)
            x1=x['facet_counts']['facet_fields'].get(ff,[])
            y1=y['facet_counts']['facet_fields'].get(ff,[])
            
            #print(x1)
            #print(y1)
            try:
                nw = x['facet_counts']['facet_fields'].get(ff,[])+y['facet_counts']['facet_fields'].get(ff,[])
            except:
                
                
                
               
                x1=x['facet_counts']['facet_fields'].get(ff,{})
                y1=y['facet_counts']['facet_fields'].get(ff,{})
                #print(x1)
                #print(y1)
                
                try:
                    x1.update(y1)
                    nw = x1
                except:
                    nw = y1
                
            x['facet_counts']['facet_fields'][ff]=nw
            
        
        return x
            
            

class SearchProxyView(View):
    def get(self,request,proxyId="",command=""):
        
        
       
        
       
        proxy = Server.objects.get(proxyId=proxyId)
        
        
        
        
        #fullpath = request.get_full_path();
        
            
        #params = request.GET.copy()
        params = dict(request.REQUEST.dicts[1])
        #urlencode does not work correctly with a query dict.
        
        
        if proxy.type=="solr":
        
            searchUrl =proxy.serverUrl+"/"+command+"?"+urllib.parse.urlencode(params,True)
        
            #print(searchUrl)
        
            search = urllib.request.urlopen(searchUrl)
            ct = search.headers['content-type']
        
        
            return StreamingHttpResponse(search,content_type=ct)
        
        
        
        else:
            paramsNew = {}
            paramsNew['query']=params['q']
            
            try:
                paramsNew['start']=int(params['start'][0])+1 #europeana starts with 1
            except:
                paramsNew['start']=1
                
            searchUrl =proxy.serverUrl+"?wskey="+proxy.key+"&"+urllib.parse.urlencode(paramsNew,True)
            
            search = urllib.request.urlopen(searchUrl)
            ct = search.headers['content-type']
        
           

            jsRes = json.loads(urllib.request.urlopen(searchUrl).read().decode('utf-8'))
            
            resNr = jsRes['totalResults']
            
            newitems=[]
            for item in jsRes['items']:
                newitem={}
                newitem['IM_title']=item['title']
                newitem['title']=item['title']
                newitem['year']=item.get('year',[''])
                newitem['author']=item.get('dcCreator',[''])
                newitem['url']=item.get("edmIsShownAt",[''])
                newitem['image']=item.get("edmPreview",[''])
                newitem['archive-path']=item['id']
                newitem['doc-type']=['external']
                newitem['mpiwg-dri']=item['id']
                newitem['provider']=item.get("provider",[])
                
                newitems.append(newitem)
                
                
            response={}
            response['numFound']=resNr
            response['docs']=[]
            for item in newitems:
               
                response['docs'].append(item)
                #for key,values in item.items():
                #    xml+="""<arr name="%s">"""%key
                #    
                #    for val in values:
                #        xml+="<str>%s</str>"%val
                #    xml+="</arr>"
               # 
                
               # xml+="</doc>"
                
            
            
        res={}
        res['response']=response
        res["facet_counts"]={}
        res["facet_counts"]["facet_queries"]={}
        res["facet_counts"]["facet_fields"]={"doc-type": {"external":100000}}
          
        return JsonResponse(res)
        return HttpResponse(res,content_type="application/json")# Create your views here.