comparison proxy/views.py @ 0:af2f8fe486f6 default tip

initial
author Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de>
date Tue, 17 Feb 2015 12:44:40 +0100
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:af2f8fe486f6
1 from django.shortcuts import render
2 from django.views.generic.base import View
3 from proxy.models import Server
4 import urllib
5 from django.http.response import StreamingHttpResponse, HttpResponse,\
6 JsonResponse
7
8 import json
9 import mergedict
10
11
12 class GetMD(View):
13 def get(self,request,objid="",proxyId="cl1"):
14
15 #ret={'title':objid,'creator':'EP','date':''}
16
17 srv=Server.objects.get(proxyId=proxyId)
18
19
20 objid=urllib.parse.unquote(objid)
21
22
23 url=srv.metaDataUrl%objid+"?wskey="+srv.key
24
25
26
27 txt=urllib.request.urlopen(url)
28
29 print(txt);
30
31 obj = json.loads(txt.read().decode('utf-8'))['object']
32
33 ret={}
34 ret['title']="\n".join(obj['title'])
35 ret['date']="\n".join(obj.get('year',[]))
36 ret['viewerurl']="http://europeana.eu/portal/record/%s.html"%objid
37 ret['thumburl']=obj['europeanaAggregation']['edmPreview']
38
39 labels=[]
40 for agent in obj.get('agents',[]):
41 label=";".join(agent['prefLabel']['def'])
42 labels.append(label)
43 ret['creator']="\n".join(labels)
44
45
46
47
48 ret['accessType']="free"
49 ret['provider']=srv.type
50 ret['providerLogo']=srv.providerLogo
51 return JsonResponse(ret)
52
53
54 class SearchAllView(View):
55
56
57
58
59
60 def get(self,request,command=""):
61 #ret = mergedict.ConfigDict()
62 ret={}
63
64 #params = dict(request.REQUEST.dicts[1])
65 #params = request.GET.copy()
66 #params['wt']="json"
67 jsRes={}
68
69 params_full= dict(request.REQUEST.dicts[1])
70
71
72 for proxy in Server.objects.all():
73
74 if proxy.type=="solr":
75 params = params_full
76 #params = request.GET.copy()
77 params['wt']="json"
78
79
80 #make surre that there is at minimum * for search in q
81 params['q']=params.get('q',['*'])# want at minimum a start
82
83 #this should not be the case normally it is list
84 if params['q']=="":
85 params['q']="*"
86
87 if isinstance(params['q'], list): #should normally always be true
88 if len(params['q']) == 0:
89 params['q']=['*']
90 elif len(params['q']) == 1 and params['q'][0]=='':
91 params['q']=['*']
92
93
94
95
96
97 selected_facets=params.get("fq",[])
98
99 facets_new=[]
100
101 wrongFacet=False
102 for facets in selected_facets:
103 if facets.startswith("provider:"):
104 wrongFacet=True
105
106 if facets.startswith("data_provider:"):
107 wrongFacet=True
108
109 if facets.startswith("type:"):
110 wrongFacet=True
111
112
113 facets_new.append(facets)
114
115 params["fq"] = facets_new;
116
117
118 facetFields=params["facet.field"]
119
120
121 try:
122 del facetFields[facetFields.index("provider")]
123
124 except:
125 pass
126
127 try:
128 del facetFields[facetFields.index("data_provider")]
129
130 except:
131 pass
132
133 try:
134 del facetFields[facetFields.index("type")]
135
136 except:
137 pass
138
139 params["facet.field"]=facetFields
140
141
142 #if (proxy.key!=None) and (proxy.key!=""):
143 # params['oauth_consumer_key']=proxy.key
144
145 searchUrl =proxy.serverUrl+"/"+command+"?"+urllib.parse.urlencode(params,True)
146
147 print (searchUrl)
148
149 #search = urllib.request.urlopen(searchUrl)
150 #ct = search.headers['content-type']
151
152
153 if wrongFacet:
154 jsRes={} #keine ergebnis in dieser Suche
155 else:
156 #print("load")
157 #print (searchUrl)
158 txt=str(urllib.request.urlopen(searchUrl).read().decode('utf-8'))
159 #print(txt)
160 jsRes = json.loads(txt)
161
162 elif proxy.type=="europeana":
163 params = request.GET.copy()
164 paramsNew = {}
165
166 paramsNew['query']=params.get('q',"")
167 if (paramsNew['query'])=="":
168 paramsNew['query']="*" #Europeana doesn't want empty strings
169
170
171
172 #special queries author_c ---> who
173 paramsNew['query']= paramsNew['query'].replace("author_c","who")
174 paramsNew['query']= paramsNew['query'].replace("title_s:","")
175
176 #europeana doesn't want '"'
177 paramsNew['query']= paramsNew['query'].replace('"','')
178
179
180
181
182 selected_facets = params_full.get("fq",[])
183
184 #wenn faceten aus gewaehlt werdeb und external nicht ausgewaehlt is dann muss er hier nicht machen
185
186 #print ("SD")
187 #print (selected_facets)
188
189 dt_selected=False
190 external=False
191 try:
192 del selected_facets[selected_facets.index("doc-type:*")] # loesche den generischen aufruf, interssant nur wenn fasseten ausgesucht sind
193 except:
194 pass
195
196 try:
197 del selected_facets[selected_facets.index("access-type:free")] # loesche den access type free da nicht relevant fuer external
198 except:
199 pass
200
201
202 facets_new=[]
203 for facets in selected_facets:
204
205 #facets_new.append(facets)
206
207
208 if (facets=="doc-type:external"):
209 external=True
210 dt_selected=True
211 elif facets.startswith("doc-type:"):
212 dt_selected=True
213 else:
214
215
216 facets=facets.replace("data_provider:","DATA_PROVIDER:")
217 facets=facets.replace("provider:","PROVIDER:")
218 facets=facets.replace("type:","TYPE:")
219 facets_new.append(facets)
220
221 paramsNew["qf"]=facets_new
222 # cont=True
223 #
224
225 # if (len(selected_facets)>0) and "doc-type:external" not in selected_facets:
226 # cont=False #external nicht ausgesucht
227 #
228
229
230
231
232
233 #print("SD2")
234
235 if (dt_selected and external) or not dt_selected:
236
237 try:
238 paramsNew['start']=int(params['start'][0])+1 #europeana starts with 1
239 except:
240 paramsNew['start']=1
241
242
243
244
245
246 #print("start search proc")
247
248 searchUrl =proxy.serverUrl+"?wskey="+proxy.key+"&"+urllib.parse.urlencode(paramsNew,True)+"&profile=facets"
249
250
251 try:
252 #print ("asking europeana")
253 search = urllib.request.urlopen(searchUrl)
254 except:
255 #print("SEARCH ERROR:"+searchUrl)
256 continue #stope this part of the search if there is an error (normally a facet not supported)
257
258 #print("got an answer")
259 ct = search.headers['content-type']
260
261
262 #print(searchUrl)
263
264 jsResOrig = json.loads(urllib.request.urlopen(searchUrl).read().decode('utf-8'))
265
266 resNr = jsResOrig['totalResults']
267
268 newitems=[]
269 for item in jsResOrig.get("items",[]):
270 newitem={}
271 newitem['IM_title']=item['title']
272 newitem['title']=item['title']
273 newitem['year']=item.get('year',[''])
274 #newitem['author']=item.get('dcCreator',[''])
275
276 newitem['author']=[]
277 agents = item.get('edmAgentLabel',[])
278 for agent in agents:
279 newitem['author'].append(agent['def'])
280
281
282
283
284 newitem['url']=item.get("edmIsShownAt",[''])
285 newitem['image']=item.get("edmPreview",[''])
286 newitem['archive-path']=item['id']
287 newitem['objid']=item['id']
288 newitem['doc-type']=['external']
289 newitem['mpiwg-dri']=item['id']
290 newitem['dataSource']=proxy.title
291 newitem['provider']=item.get("provider",[])
292
293 newitems.append(newitem)
294
295
296 response={}
297 response['numFound']=resNr
298 response['docs']=[]
299 for item in newitems:
300
301 response['docs'].append(item)
302
303
304
305
306
307
308 jsRes={}
309 jsRes['response']=response
310 jsRes["facet_counts"]={}
311 jsRes["facet_counts"]["facet_queries"]={}
312 jsRes["facet_counts"]["facet_fields"]={"doc-type": {"external":resNr}}
313
314
315 facets = jsResOrig.get("facets",[])
316
317
318 facetsDict={}
319 for facet in facets:
320 facetsDict[facet['name']]=facet['fields']
321
322
323
324 jsRes["facet_counts"]["facet_ranges"]={}
325
326 jsRes["facet_counts"]["facet_fields"]["year_plain"]= self.remapFacetDict(facetsDict.get('YEAR',[]))
327 jsRes["facet_counts"]["facet_ranges"]["year"]= {"counts":self.remapFacetDict(facetsDict.get('YEAR',[])),"gap":20,"start":1,"end":2021}
328
329
330 jsRes["facet_counts"]["facet_fields"]["provider"]= self.remapFacetDict(facetsDict.get('PROVIDER',[]))
331 jsRes["facet_counts"]["facet_fields"]["data_provider"]= self.remapFacetDict(facetsDict.get('DATA_PROVIDER',[]))
332 jsRes["facet_counts"]["facet_fields"]["type"]= self.remapFacetDict(facetsDict.get('TYPE',[]))
333
334
335
336
337 #ret.merge(jsRes)
338 #ret.update(jsRes)
339 ret=self.add(ret,jsRes)
340 #print (ret)
341 #print(jsRes)
342
343 return JsonResponse(ret,safe=False)
344
345
346
347
348
349
350 def remapFacetDict(self,facet):
351
352 ret={}
353 for x in facet:
354 ret[x['label']]=x['count']
355
356 return ret
357 #return [(x['label'],x['count']) for x in facet ]
358
359 def add(self,x,y):
360 if x=={}:
361 return y
362 x['response']['docs']=x['response']['docs']+(y['response']['docs'])
363 x['response']['numFound']=x['response']['numFound']+(y['response']['numFound'])
364
365
366 for ff in set(list(x['facet_counts']['facet_fields'].keys())+list(y['facet_counts']['facet_fields'].keys())):
367 #print (ff)
368 x1=x['facet_counts']['facet_fields'].get(ff,[])
369 y1=y['facet_counts']['facet_fields'].get(ff,[])
370
371 #print(x1)
372 #print(y1)
373 try:
374 nw = x['facet_counts']['facet_fields'].get(ff,[])+y['facet_counts']['facet_fields'].get(ff,[])
375 except:
376
377
378
379
380 x1=x['facet_counts']['facet_fields'].get(ff,{})
381 y1=y['facet_counts']['facet_fields'].get(ff,{})
382 #print(x1)
383 #print(y1)
384
385 try:
386 x1.update(y1)
387 nw = x1
388 except:
389 nw = y1
390
391 x['facet_counts']['facet_fields'][ff]=nw
392
393
394 return x
395
396
397
398 class SearchProxyView(View):
399 def get(self,request,proxyId="",command=""):
400
401
402
403
404
405 proxy = Server.objects.get(proxyId=proxyId)
406
407
408
409
410 #fullpath = request.get_full_path();
411
412
413 #params = request.GET.copy()
414 params = dict(request.REQUEST.dicts[1])
415 #urlencode does not work correctly with a query dict.
416
417
418 if proxy.type=="solr":
419
420 searchUrl =proxy.serverUrl+"/"+command+"?"+urllib.parse.urlencode(params,True)
421
422 #print(searchUrl)
423
424 search = urllib.request.urlopen(searchUrl)
425 ct = search.headers['content-type']
426
427
428 return StreamingHttpResponse(search,content_type=ct)
429
430
431
432 else:
433 paramsNew = {}
434 paramsNew['query']=params['q']
435
436 try:
437 paramsNew['start']=int(params['start'][0])+1 #europeana starts with 1
438 except:
439 paramsNew['start']=1
440
441 searchUrl =proxy.serverUrl+"?wskey="+proxy.key+"&"+urllib.parse.urlencode(paramsNew,True)
442
443 search = urllib.request.urlopen(searchUrl)
444 ct = search.headers['content-type']
445
446
447
448 jsRes = json.loads(urllib.request.urlopen(searchUrl).read().decode('utf-8'))
449
450 resNr = jsRes['totalResults']
451
452 newitems=[]
453 for item in jsRes['items']:
454 newitem={}
455 newitem['IM_title']=item['title']
456 newitem['title']=item['title']
457 newitem['year']=item.get('year',[''])
458 newitem['author']=item.get('dcCreator',[''])
459 newitem['url']=item.get("edmIsShownAt",[''])
460 newitem['image']=item.get("edmPreview",[''])
461 newitem['archive-path']=item['id']
462 newitem['doc-type']=['external']
463 newitem['mpiwg-dri']=item['id']
464 newitem['provider']=item.get("provider",[])
465
466 newitems.append(newitem)
467
468
469 response={}
470 response['numFound']=resNr
471 response['docs']=[]
472 for item in newitems:
473
474 response['docs'].append(item)
475 #for key,values in item.items():
476 # xml+="""<arr name="%s">"""%key
477 #
478 # for val in values:
479 # xml+="<str>%s</str>"%val
480 # xml+="</arr>"
481 #
482
483 # xml+="</doc>"
484
485
486
487 res={}
488 res['response']=response
489 res["facet_counts"]={}
490 res["facet_counts"]["facet_queries"]={}
491 res["facet_counts"]["facet_fields"]={"doc-type": {"external":100000}}
492
493 return JsonResponse(res)
494 return HttpResponse(res,content_type="application/json")# Create your views here.