Mercurial > hg > djangoSolrSearchProxy
comparison proxy/views.py @ 0:af2f8fe486f6 default tip
initial
author | Dirk Wintergruen <dwinter@mpiwg-berlin.mpg.de> |
---|---|
date | Tue, 17 Feb 2015 12:44:40 +0100 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:af2f8fe486f6 |
---|---|
1 from django.shortcuts import render | |
2 from django.views.generic.base import View | |
3 from proxy.models import Server | |
4 import urllib | |
5 from django.http.response import StreamingHttpResponse, HttpResponse,\ | |
6 JsonResponse | |
7 | |
8 import json | |
9 import mergedict | |
10 | |
11 | |
12 class GetMD(View): | |
13 def get(self,request,objid="",proxyId="cl1"): | |
14 | |
15 #ret={'title':objid,'creator':'EP','date':''} | |
16 | |
17 srv=Server.objects.get(proxyId=proxyId) | |
18 | |
19 | |
20 objid=urllib.parse.unquote(objid) | |
21 | |
22 | |
23 url=srv.metaDataUrl%objid+"?wskey="+srv.key | |
24 | |
25 | |
26 | |
27 txt=urllib.request.urlopen(url) | |
28 | |
29 print(txt); | |
30 | |
31 obj = json.loads(txt.read().decode('utf-8'))['object'] | |
32 | |
33 ret={} | |
34 ret['title']="\n".join(obj['title']) | |
35 ret['date']="\n".join(obj.get('year',[])) | |
36 ret['viewerurl']="http://europeana.eu/portal/record/%s.html"%objid | |
37 ret['thumburl']=obj['europeanaAggregation']['edmPreview'] | |
38 | |
39 labels=[] | |
40 for agent in obj.get('agents',[]): | |
41 label=";".join(agent['prefLabel']['def']) | |
42 labels.append(label) | |
43 ret['creator']="\n".join(labels) | |
44 | |
45 | |
46 | |
47 | |
48 ret['accessType']="free" | |
49 ret['provider']=srv.type | |
50 ret['providerLogo']=srv.providerLogo | |
51 return JsonResponse(ret) | |
52 | |
53 | |
54 class SearchAllView(View): | |
55 | |
56 | |
57 | |
58 | |
59 | |
60 def get(self,request,command=""): | |
61 #ret = mergedict.ConfigDict() | |
62 ret={} | |
63 | |
64 #params = dict(request.REQUEST.dicts[1]) | |
65 #params = request.GET.copy() | |
66 #params['wt']="json" | |
67 jsRes={} | |
68 | |
69 params_full= dict(request.REQUEST.dicts[1]) | |
70 | |
71 | |
72 for proxy in Server.objects.all(): | |
73 | |
74 if proxy.type=="solr": | |
75 params = params_full | |
76 #params = request.GET.copy() | |
77 params['wt']="json" | |
78 | |
79 | |
80 #make surre that there is at minimum * for search in q | |
81 params['q']=params.get('q',['*'])# want at minimum a start | |
82 | |
83 #this should not be the case normally it is list | |
84 if params['q']=="": | |
85 params['q']="*" | |
86 | |
87 if isinstance(params['q'], list): #should normally always be true | |
88 if len(params['q']) == 0: | |
89 params['q']=['*'] | |
90 elif len(params['q']) == 1 and params['q'][0]=='': | |
91 params['q']=['*'] | |
92 | |
93 | |
94 | |
95 | |
96 | |
97 selected_facets=params.get("fq",[]) | |
98 | |
99 facets_new=[] | |
100 | |
101 wrongFacet=False | |
102 for facets in selected_facets: | |
103 if facets.startswith("provider:"): | |
104 wrongFacet=True | |
105 | |
106 if facets.startswith("data_provider:"): | |
107 wrongFacet=True | |
108 | |
109 if facets.startswith("type:"): | |
110 wrongFacet=True | |
111 | |
112 | |
113 facets_new.append(facets) | |
114 | |
115 params["fq"] = facets_new; | |
116 | |
117 | |
118 facetFields=params["facet.field"] | |
119 | |
120 | |
121 try: | |
122 del facetFields[facetFields.index("provider")] | |
123 | |
124 except: | |
125 pass | |
126 | |
127 try: | |
128 del facetFields[facetFields.index("data_provider")] | |
129 | |
130 except: | |
131 pass | |
132 | |
133 try: | |
134 del facetFields[facetFields.index("type")] | |
135 | |
136 except: | |
137 pass | |
138 | |
139 params["facet.field"]=facetFields | |
140 | |
141 | |
142 #if (proxy.key!=None) and (proxy.key!=""): | |
143 # params['oauth_consumer_key']=proxy.key | |
144 | |
145 searchUrl =proxy.serverUrl+"/"+command+"?"+urllib.parse.urlencode(params,True) | |
146 | |
147 print (searchUrl) | |
148 | |
149 #search = urllib.request.urlopen(searchUrl) | |
150 #ct = search.headers['content-type'] | |
151 | |
152 | |
153 if wrongFacet: | |
154 jsRes={} #keine ergebnis in dieser Suche | |
155 else: | |
156 #print("load") | |
157 #print (searchUrl) | |
158 txt=str(urllib.request.urlopen(searchUrl).read().decode('utf-8')) | |
159 #print(txt) | |
160 jsRes = json.loads(txt) | |
161 | |
162 elif proxy.type=="europeana": | |
163 params = request.GET.copy() | |
164 paramsNew = {} | |
165 | |
166 paramsNew['query']=params.get('q',"") | |
167 if (paramsNew['query'])=="": | |
168 paramsNew['query']="*" #Europeana doesn't want empty strings | |
169 | |
170 | |
171 | |
172 #special queries author_c ---> who | |
173 paramsNew['query']= paramsNew['query'].replace("author_c","who") | |
174 paramsNew['query']= paramsNew['query'].replace("title_s:","") | |
175 | |
176 #europeana doesn't want '"' | |
177 paramsNew['query']= paramsNew['query'].replace('"','') | |
178 | |
179 | |
180 | |
181 | |
182 selected_facets = params_full.get("fq",[]) | |
183 | |
184 #wenn faceten aus gewaehlt werdeb und external nicht ausgewaehlt is dann muss er hier nicht machen | |
185 | |
186 #print ("SD") | |
187 #print (selected_facets) | |
188 | |
189 dt_selected=False | |
190 external=False | |
191 try: | |
192 del selected_facets[selected_facets.index("doc-type:*")] # loesche den generischen aufruf, interssant nur wenn fasseten ausgesucht sind | |
193 except: | |
194 pass | |
195 | |
196 try: | |
197 del selected_facets[selected_facets.index("access-type:free")] # loesche den access type free da nicht relevant fuer external | |
198 except: | |
199 pass | |
200 | |
201 | |
202 facets_new=[] | |
203 for facets in selected_facets: | |
204 | |
205 #facets_new.append(facets) | |
206 | |
207 | |
208 if (facets=="doc-type:external"): | |
209 external=True | |
210 dt_selected=True | |
211 elif facets.startswith("doc-type:"): | |
212 dt_selected=True | |
213 else: | |
214 | |
215 | |
216 facets=facets.replace("data_provider:","DATA_PROVIDER:") | |
217 facets=facets.replace("provider:","PROVIDER:") | |
218 facets=facets.replace("type:","TYPE:") | |
219 facets_new.append(facets) | |
220 | |
221 paramsNew["qf"]=facets_new | |
222 # cont=True | |
223 # | |
224 | |
225 # if (len(selected_facets)>0) and "doc-type:external" not in selected_facets: | |
226 # cont=False #external nicht ausgesucht | |
227 # | |
228 | |
229 | |
230 | |
231 | |
232 | |
233 #print("SD2") | |
234 | |
235 if (dt_selected and external) or not dt_selected: | |
236 | |
237 try: | |
238 paramsNew['start']=int(params['start'][0])+1 #europeana starts with 1 | |
239 except: | |
240 paramsNew['start']=1 | |
241 | |
242 | |
243 | |
244 | |
245 | |
246 #print("start search proc") | |
247 | |
248 searchUrl =proxy.serverUrl+"?wskey="+proxy.key+"&"+urllib.parse.urlencode(paramsNew,True)+"&profile=facets" | |
249 | |
250 | |
251 try: | |
252 #print ("asking europeana") | |
253 search = urllib.request.urlopen(searchUrl) | |
254 except: | |
255 #print("SEARCH ERROR:"+searchUrl) | |
256 continue #stope this part of the search if there is an error (normally a facet not supported) | |
257 | |
258 #print("got an answer") | |
259 ct = search.headers['content-type'] | |
260 | |
261 | |
262 #print(searchUrl) | |
263 | |
264 jsResOrig = json.loads(urllib.request.urlopen(searchUrl).read().decode('utf-8')) | |
265 | |
266 resNr = jsResOrig['totalResults'] | |
267 | |
268 newitems=[] | |
269 for item in jsResOrig.get("items",[]): | |
270 newitem={} | |
271 newitem['IM_title']=item['title'] | |
272 newitem['title']=item['title'] | |
273 newitem['year']=item.get('year',['']) | |
274 #newitem['author']=item.get('dcCreator',['']) | |
275 | |
276 newitem['author']=[] | |
277 agents = item.get('edmAgentLabel',[]) | |
278 for agent in agents: | |
279 newitem['author'].append(agent['def']) | |
280 | |
281 | |
282 | |
283 | |
284 newitem['url']=item.get("edmIsShownAt",['']) | |
285 newitem['image']=item.get("edmPreview",['']) | |
286 newitem['archive-path']=item['id'] | |
287 newitem['objid']=item['id'] | |
288 newitem['doc-type']=['external'] | |
289 newitem['mpiwg-dri']=item['id'] | |
290 newitem['dataSource']=proxy.title | |
291 newitem['provider']=item.get("provider",[]) | |
292 | |
293 newitems.append(newitem) | |
294 | |
295 | |
296 response={} | |
297 response['numFound']=resNr | |
298 response['docs']=[] | |
299 for item in newitems: | |
300 | |
301 response['docs'].append(item) | |
302 | |
303 | |
304 | |
305 | |
306 | |
307 | |
308 jsRes={} | |
309 jsRes['response']=response | |
310 jsRes["facet_counts"]={} | |
311 jsRes["facet_counts"]["facet_queries"]={} | |
312 jsRes["facet_counts"]["facet_fields"]={"doc-type": {"external":resNr}} | |
313 | |
314 | |
315 facets = jsResOrig.get("facets",[]) | |
316 | |
317 | |
318 facetsDict={} | |
319 for facet in facets: | |
320 facetsDict[facet['name']]=facet['fields'] | |
321 | |
322 | |
323 | |
324 jsRes["facet_counts"]["facet_ranges"]={} | |
325 | |
326 jsRes["facet_counts"]["facet_fields"]["year_plain"]= self.remapFacetDict(facetsDict.get('YEAR',[])) | |
327 jsRes["facet_counts"]["facet_ranges"]["year"]= {"counts":self.remapFacetDict(facetsDict.get('YEAR',[])),"gap":20,"start":1,"end":2021} | |
328 | |
329 | |
330 jsRes["facet_counts"]["facet_fields"]["provider"]= self.remapFacetDict(facetsDict.get('PROVIDER',[])) | |
331 jsRes["facet_counts"]["facet_fields"]["data_provider"]= self.remapFacetDict(facetsDict.get('DATA_PROVIDER',[])) | |
332 jsRes["facet_counts"]["facet_fields"]["type"]= self.remapFacetDict(facetsDict.get('TYPE',[])) | |
333 | |
334 | |
335 | |
336 | |
337 #ret.merge(jsRes) | |
338 #ret.update(jsRes) | |
339 ret=self.add(ret,jsRes) | |
340 #print (ret) | |
341 #print(jsRes) | |
342 | |
343 return JsonResponse(ret,safe=False) | |
344 | |
345 | |
346 | |
347 | |
348 | |
349 | |
350 def remapFacetDict(self,facet): | |
351 | |
352 ret={} | |
353 for x in facet: | |
354 ret[x['label']]=x['count'] | |
355 | |
356 return ret | |
357 #return [(x['label'],x['count']) for x in facet ] | |
358 | |
359 def add(self,x,y): | |
360 if x=={}: | |
361 return y | |
362 x['response']['docs']=x['response']['docs']+(y['response']['docs']) | |
363 x['response']['numFound']=x['response']['numFound']+(y['response']['numFound']) | |
364 | |
365 | |
366 for ff in set(list(x['facet_counts']['facet_fields'].keys())+list(y['facet_counts']['facet_fields'].keys())): | |
367 #print (ff) | |
368 x1=x['facet_counts']['facet_fields'].get(ff,[]) | |
369 y1=y['facet_counts']['facet_fields'].get(ff,[]) | |
370 | |
371 #print(x1) | |
372 #print(y1) | |
373 try: | |
374 nw = x['facet_counts']['facet_fields'].get(ff,[])+y['facet_counts']['facet_fields'].get(ff,[]) | |
375 except: | |
376 | |
377 | |
378 | |
379 | |
380 x1=x['facet_counts']['facet_fields'].get(ff,{}) | |
381 y1=y['facet_counts']['facet_fields'].get(ff,{}) | |
382 #print(x1) | |
383 #print(y1) | |
384 | |
385 try: | |
386 x1.update(y1) | |
387 nw = x1 | |
388 except: | |
389 nw = y1 | |
390 | |
391 x['facet_counts']['facet_fields'][ff]=nw | |
392 | |
393 | |
394 return x | |
395 | |
396 | |
397 | |
398 class SearchProxyView(View): | |
399 def get(self,request,proxyId="",command=""): | |
400 | |
401 | |
402 | |
403 | |
404 | |
405 proxy = Server.objects.get(proxyId=proxyId) | |
406 | |
407 | |
408 | |
409 | |
410 #fullpath = request.get_full_path(); | |
411 | |
412 | |
413 #params = request.GET.copy() | |
414 params = dict(request.REQUEST.dicts[1]) | |
415 #urlencode does not work correctly with a query dict. | |
416 | |
417 | |
418 if proxy.type=="solr": | |
419 | |
420 searchUrl =proxy.serverUrl+"/"+command+"?"+urllib.parse.urlencode(params,True) | |
421 | |
422 #print(searchUrl) | |
423 | |
424 search = urllib.request.urlopen(searchUrl) | |
425 ct = search.headers['content-type'] | |
426 | |
427 | |
428 return StreamingHttpResponse(search,content_type=ct) | |
429 | |
430 | |
431 | |
432 else: | |
433 paramsNew = {} | |
434 paramsNew['query']=params['q'] | |
435 | |
436 try: | |
437 paramsNew['start']=int(params['start'][0])+1 #europeana starts with 1 | |
438 except: | |
439 paramsNew['start']=1 | |
440 | |
441 searchUrl =proxy.serverUrl+"?wskey="+proxy.key+"&"+urllib.parse.urlencode(paramsNew,True) | |
442 | |
443 search = urllib.request.urlopen(searchUrl) | |
444 ct = search.headers['content-type'] | |
445 | |
446 | |
447 | |
448 jsRes = json.loads(urllib.request.urlopen(searchUrl).read().decode('utf-8')) | |
449 | |
450 resNr = jsRes['totalResults'] | |
451 | |
452 newitems=[] | |
453 for item in jsRes['items']: | |
454 newitem={} | |
455 newitem['IM_title']=item['title'] | |
456 newitem['title']=item['title'] | |
457 newitem['year']=item.get('year',['']) | |
458 newitem['author']=item.get('dcCreator',['']) | |
459 newitem['url']=item.get("edmIsShownAt",['']) | |
460 newitem['image']=item.get("edmPreview",['']) | |
461 newitem['archive-path']=item['id'] | |
462 newitem['doc-type']=['external'] | |
463 newitem['mpiwg-dri']=item['id'] | |
464 newitem['provider']=item.get("provider",[]) | |
465 | |
466 newitems.append(newitem) | |
467 | |
468 | |
469 response={} | |
470 response['numFound']=resNr | |
471 response['docs']=[] | |
472 for item in newitems: | |
473 | |
474 response['docs'].append(item) | |
475 #for key,values in item.items(): | |
476 # xml+="""<arr name="%s">"""%key | |
477 # | |
478 # for val in values: | |
479 # xml+="<str>%s</str>"%val | |
480 # xml+="</arr>" | |
481 # | |
482 | |
483 # xml+="</doc>" | |
484 | |
485 | |
486 | |
487 res={} | |
488 res['response']=response | |
489 res["facet_counts"]={} | |
490 res["facet_counts"]["facet_queries"]={} | |
491 res["facet_counts"]["facet_fields"]={"doc-type": {"external":100000}} | |
492 | |
493 return JsonResponse(res) | |
494 return HttpResponse(res,content_type="application/json")# Create your views here. |