annotate zopeSolr.py @ 12:2e2da6d5e30d default tip

bug in generateLink if searchterm ="*" dann wird _fc nicht in den Link eingebaut.
author dwinter
date Thu, 22 Aug 2013 14:21:25 +0200
parents f7fe88804cb8
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
834706423ac1 initial
dwinter
parents:
diff changeset
1 # -*- coding: utf-8 -*-
834706423ac1 initial
dwinter
parents:
diff changeset
2
834706423ac1 initial
dwinter
parents:
diff changeset
3 #Verbindet Zope mit solr. Vorraussetzung ist das Paket sunburnt, @see http://opensource.timetric.com/sunburnt/
834706423ac1 initial
dwinter
parents:
diff changeset
4
834706423ac1 initial
dwinter
parents:
diff changeset
5
834706423ac1 initial
dwinter
parents:
diff changeset
6 from OFS.SimpleItem import SimpleItem
834706423ac1 initial
dwinter
parents:
diff changeset
7 from Products.PageTemplates.PageTemplateFile import PageTemplateFile
834706423ac1 initial
dwinter
parents:
diff changeset
8 import os.path
834706423ac1 initial
dwinter
parents:
diff changeset
9 import sunburnt
834706423ac1 initial
dwinter
parents:
diff changeset
10 from Globals import package_home
834706423ac1 initial
dwinter
parents:
diff changeset
11 import httplib2
4
10733d367831 date ranges
dwinter
parents: 3
diff changeset
12 import urlparse
0
834706423ac1 initial
dwinter
parents:
diff changeset
13 import urllib
834706423ac1 initial
dwinter
parents:
diff changeset
14 import re
834706423ac1 initial
dwinter
parents:
diff changeset
15 import xml.etree.ElementTree as ET
834706423ac1 initial
dwinter
parents:
diff changeset
16 import json
6
c97b5cbcda52 random search in solr
dwinter
parents: 5
diff changeset
17 import random
9
896bea4f61a0 try to deal with timeout on connect.
casties
parents: 8
diff changeset
18 import logging
0
834706423ac1 initial
dwinter
parents:
diff changeset
19
834706423ac1 initial
dwinter
parents:
diff changeset
20 #Worte die nicht in der Termliste angezeigt werden sollen #TODO: make this configurable
834706423ac1 initial
dwinter
parents:
diff changeset
21
834706423ac1 initial
dwinter
parents:
diff changeset
22 STOPLIST={'main_content':['forward','drucken','history','science','part','publications','projects',
834706423ac1 initial
dwinter
parents:
diff changeset
23 'project','new','geschichte','institute','related','boltzmannstraße','14195'],
834706423ac1 initial
dwinter
parents:
diff changeset
24
834706423ac1 initial
dwinter
parents:
diff changeset
25 'title':['bd','10','11','12','18','19','20','abt','ad','di','history','geschichte','science']
834706423ac1 initial
dwinter
parents:
diff changeset
26 }
834706423ac1 initial
dwinter
parents:
diff changeset
27
834706423ac1 initial
dwinter
parents:
diff changeset
28
4
10733d367831 date ranges
dwinter
parents: 3
diff changeset
29 PURLSERVER="http://md.mpiwg-berlin.mpg.de/purls/"
10733d367831 date ranges
dwinter
parents: 3
diff changeset
30
10733d367831 date ranges
dwinter
parents: 3
diff changeset
31 #TODO: only needed for getTermsAsJSON, solle irgendwie aus dem sunburnt kommen
10733d367831 date ranges
dwinter
parents: 3
diff changeset
32 SOLRSERVER="/select?q=%s:%s&wt=xml&tv=on&qt=tvrh&fl=%s&tv.tf=true"
0
834706423ac1 initial
dwinter
parents:
diff changeset
33
834706423ac1 initial
dwinter
parents:
diff changeset
34 def zptFile(self, path, orphaned=False):
834706423ac1 initial
dwinter
parents:
diff changeset
35 """returns a page template file from the product"""
834706423ac1 initial
dwinter
parents:
diff changeset
36 if orphaned:
834706423ac1 initial
dwinter
parents:
diff changeset
37 # unusual case
834706423ac1 initial
dwinter
parents:
diff changeset
38 pt=PageTemplateFile(os.path.join(package_home(globals()), path))
834706423ac1 initial
dwinter
parents:
diff changeset
39 else:
834706423ac1 initial
dwinter
parents:
diff changeset
40
834706423ac1 initial
dwinter
parents:
diff changeset
41 pt=PageTemplateFile(os.path.join(package_home(globals()), path)).__of__(self)
834706423ac1 initial
dwinter
parents:
diff changeset
42 return pt
834706423ac1 initial
dwinter
parents:
diff changeset
43
834706423ac1 initial
dwinter
parents:
diff changeset
44
834706423ac1 initial
dwinter
parents:
diff changeset
45
834706423ac1 initial
dwinter
parents:
diff changeset
46 class ZopeSolr(SimpleItem):
6
c97b5cbcda52 random search in solr
dwinter
parents: 5
diff changeset
47
0
834706423ac1 initial
dwinter
parents:
diff changeset
48
834706423ac1 initial
dwinter
parents:
diff changeset
49 meta_type="ZopeSolr"
834706423ac1 initial
dwinter
parents:
diff changeset
50
834706423ac1 initial
dwinter
parents:
diff changeset
51 manage_options= ({'label':'Main Config','action': 'changeMain'},) + SimpleItem.manage_options
834706423ac1 initial
dwinter
parents:
diff changeset
52
834706423ac1 initial
dwinter
parents:
diff changeset
53 def __init__(self,id,title,solrURL):
834706423ac1 initial
dwinter
parents:
diff changeset
54 self.id=id
834706423ac1 initial
dwinter
parents:
diff changeset
55 self.title=title
834706423ac1 initial
dwinter
parents:
diff changeset
56 self.solrURL=solrURL #URL einer solr instance bzw. einer collection, falls nicht die default collection benutzt werden soll
834706423ac1 initial
dwinter
parents:
diff changeset
57
834706423ac1 initial
dwinter
parents:
diff changeset
58
834706423ac1 initial
dwinter
parents:
diff changeset
59 #Verbinde mit der solt Instance
834706423ac1 initial
dwinter
parents:
diff changeset
60
834706423ac1 initial
dwinter
parents:
diff changeset
61 def connect(self):
834706423ac1 initial
dwinter
parents:
diff changeset
62 self._v_solr=sunburnt.SolrInterface(url=self.solrURL)
4
10733d367831 date ranges
dwinter
parents: 3
diff changeset
63
10733d367831 date ranges
dwinter
parents: 3
diff changeset
64
6
c97b5cbcda52 random search in solr
dwinter
parents: 5
diff changeset
65 def getRandom(self,number=3,field="title_s"):
c97b5cbcda52 random search in solr
dwinter
parents: 5
diff changeset
66
c97b5cbcda52 random search in solr
dwinter
parents: 5
diff changeset
67 """hole zufaellige eintraege"""
c97b5cbcda52 random search in solr
dwinter
parents: 5
diff changeset
68
c97b5cbcda52 random search in solr
dwinter
parents: 5
diff changeset
69
c97b5cbcda52 random search in solr
dwinter
parents: 5
diff changeset
70 if not getattr(self,'_v_solr_',None):
9
896bea4f61a0 try to deal with timeout on connect.
casties
parents: 8
diff changeset
71 try:
896bea4f61a0 try to deal with timeout on connect.
casties
parents: 8
diff changeset
72 self.connect()
896bea4f61a0 try to deal with timeout on connect.
casties
parents: 8
diff changeset
73 except Exception, e:
896bea4f61a0 try to deal with timeout on connect.
casties
parents: 8
diff changeset
74 logging.error("Error connecting to Solr: %s"%e)
896bea4f61a0 try to deal with timeout on connect.
casties
parents: 8
diff changeset
75 return []
6
c97b5cbcda52 random search in solr
dwinter
parents: 5
diff changeset
76
c97b5cbcda52 random search in solr
dwinter
parents: 5
diff changeset
77 #http://localhost:8983/solr/mpiwgSources/select/?q=*:*&sort=random_12xs34%20desc&rows=1&facet=false
c97b5cbcda52 random search in solr
dwinter
parents: 5
diff changeset
78
c97b5cbcda52 random search in solr
dwinter
parents: 5
diff changeset
79
c97b5cbcda52 random search in solr
dwinter
parents: 5
diff changeset
80 random.seed()
c97b5cbcda52 random search in solr
dwinter
parents: 5
diff changeset
81 rand = random.randrange(300000)
c97b5cbcda52 random search in solr
dwinter
parents: 5
diff changeset
82
c97b5cbcda52 random search in solr
dwinter
parents: 5
diff changeset
83 results = self._v_solr.query("*.*").sort_by("-random_%s"%rand).paginate(start=0, rows=number).execute()
c97b5cbcda52 random search in solr
dwinter
parents: 5
diff changeset
84
c97b5cbcda52 random search in solr
dwinter
parents: 5
diff changeset
85 return results
c97b5cbcda52 random search in solr
dwinter
parents: 5
diff changeset
86
4
10733d367831 date ranges
dwinter
parents: 3
diff changeset
87 #erzeuge den link auf die fulltext display version des bildviewers
10733d367831 date ranges
dwinter
parents: 3
diff changeset
88 def createFullTextLink(self,page,facetSerch,search):
10733d367831 date ranges
dwinter
parents: 3
diff changeset
89
10733d367831 date ranges
dwinter
parents: 3
diff changeset
90 fqs=[]
10733d367831 date ranges
dwinter
parents: 3
diff changeset
91 for key in facetSerch.keys():
10733d367831 date ranges
dwinter
parents: 3
diff changeset
92 values = self.getList(facetSerch.get(key))
10733d367831 date ranges
dwinter
parents: 3
diff changeset
93 for value in values:
10733d367831 date ranges
dwinter
parents: 3
diff changeset
94 if value!="*":
10733d367831 date ranges
dwinter
parents: 3
diff changeset
95 fqs.append("%s=%s"%(key,urllib.quote(value)));
10733d367831 date ranges
dwinter
parents: 3
diff changeset
96
10733d367831 date ranges
dwinter
parents: 3
diff changeset
97 ret="pf=%s"%page
10733d367831 date ranges
dwinter
parents: 3
diff changeset
98 ret+="&query=("
10733d367831 date ranges
dwinter
parents: 3
diff changeset
99
10733d367831 date ranges
dwinter
parents: 3
diff changeset
100
10733d367831 date ranges
dwinter
parents: 3
diff changeset
101
10733d367831 date ranges
dwinter
parents: 3
diff changeset
102 ret+=" OR ".join(fqs)
10733d367831 date ranges
dwinter
parents: 3
diff changeset
103 #if len(fqs)>0 and len(qs)>0 and qs[0]!="":
10733d367831 date ranges
dwinter
parents: 3
diff changeset
104 if len(fqs)>0 and len(search)>0: #TODO das muss noch geandert werden, wenn das der normale suchstring nach feldern müssen alle "AND" in "OR" getauscht werden
10733d367831 date ranges
dwinter
parents: 3
diff changeset
105 #da ja in der volltext-line die Zeile gedunfen werden soll wenn eines der Worte drin ist, vorher wurd eventuell und über die ganee seite gesucht
10733d367831 date ranges
dwinter
parents: 3
diff changeset
106 #die worte müssen aber nicht in einer zeile sein und werden jetzt nicht gefunden
10733d367831 date ranges
dwinter
parents: 3
diff changeset
107 ret+=" OR "
10733d367831 date ranges
dwinter
parents: 3
diff changeset
108
10733d367831 date ranges
dwinter
parents: 3
diff changeset
109
10733d367831 date ranges
dwinter
parents: 3
diff changeset
110
10733d367831 date ranges
dwinter
parents: 3
diff changeset
111 ret+="("+search+")"
10733d367831 date ranges
dwinter
parents: 3
diff changeset
112
10733d367831 date ranges
dwinter
parents: 3
diff changeset
113 ret+=")&viewLayer=search"
10733d367831 date ranges
dwinter
parents: 3
diff changeset
114
10733d367831 date ranges
dwinter
parents: 3
diff changeset
115 return ret
10733d367831 date ranges
dwinter
parents: 3
diff changeset
116
0
834706423ac1 initial
dwinter
parents:
diff changeset
117 #erzeuge einen Link, insbesonder für faceted suche
834706423ac1 initial
dwinter
parents:
diff changeset
118 #@param search: ist suchparameter, wird einfach als search=%s weitergereicht
834706423ac1 initial
dwinter
parents:
diff changeset
119 #@param facetSearch: bekommt einen hash (feldname, liste der suchworte)
834706423ac1 initial
dwinter
parents:
diff changeset
120
4
10733d367831 date ranges
dwinter
parents: 3
diff changeset
121 def generateLink(self,search,facetSearch={},ranges={},dateranges={},args={}):
0
834706423ac1 initial
dwinter
parents:
diff changeset
122 ret="?search=%s"%search
834706423ac1 initial
dwinter
parents:
diff changeset
123
834706423ac1 initial
dwinter
parents:
diff changeset
124 for facet in facetSearch.keys():
834706423ac1 initial
dwinter
parents:
diff changeset
125
834706423ac1 initial
dwinter
parents:
diff changeset
126 searchTerms = facetSearch[facet]
834706423ac1 initial
dwinter
parents:
diff changeset
127
834706423ac1 initial
dwinter
parents:
diff changeset
128 if isinstance(searchTerms, basestring):
834706423ac1 initial
dwinter
parents:
diff changeset
129 searchTerms=[searchTerms]
834706423ac1 initial
dwinter
parents:
diff changeset
130
834706423ac1 initial
dwinter
parents:
diff changeset
131 for searchTerm in searchTerms:
12
2e2da6d5e30d bug in generateLink if searchterm ="*" dann wird _fc nicht in den Link eingebaut.
dwinter
parents: 11
diff changeset
132 if searchTerm!="*":
2e2da6d5e30d bug in generateLink if searchterm ="*" dann wird _fc nicht in den Link eingebaut.
dwinter
parents: 11
diff changeset
133 try:
2e2da6d5e30d bug in generateLink if searchterm ="*" dann wird _fc nicht in den Link eingebaut.
dwinter
parents: 11
diff changeset
134 ret+="&%s_fc=%s"%(facet,searchTerm.encode('utf-8'))
2e2da6d5e30d bug in generateLink if searchterm ="*" dann wird _fc nicht in den Link eingebaut.
dwinter
parents: 11
diff changeset
135 except:
2e2da6d5e30d bug in generateLink if searchterm ="*" dann wird _fc nicht in den Link eingebaut.
dwinter
parents: 11
diff changeset
136 ret+="&%s_fc=%s"%(facet,searchTerm)
2e2da6d5e30d bug in generateLink if searchterm ="*" dann wird _fc nicht in den Link eingebaut.
dwinter
parents: 11
diff changeset
137
0
834706423ac1 initial
dwinter
parents:
diff changeset
138 for key,values in ranges.items():
834706423ac1 initial
dwinter
parents:
diff changeset
139
834706423ac1 initial
dwinter
parents:
diff changeset
140 if isinstance(values, basestring):
834706423ac1 initial
dwinter
parents:
diff changeset
141 values=[values]
834706423ac1 initial
dwinter
parents:
diff changeset
142
834706423ac1 initial
dwinter
parents:
diff changeset
143 for value in values:
834706423ac1 initial
dwinter
parents:
diff changeset
144 ret+="&%s_rg=%s"%(key,value)
834706423ac1 initial
dwinter
parents:
diff changeset
145
4
10733d367831 date ranges
dwinter
parents: 3
diff changeset
146 for key,values in dateranges.items():
10733d367831 date ranges
dwinter
parents: 3
diff changeset
147
10733d367831 date ranges
dwinter
parents: 3
diff changeset
148 if isinstance(values, basestring):
10733d367831 date ranges
dwinter
parents: 3
diff changeset
149 values=[values]
10733d367831 date ranges
dwinter
parents: 3
diff changeset
150
10733d367831 date ranges
dwinter
parents: 3
diff changeset
151 for value in values:
10733d367831 date ranges
dwinter
parents: 3
diff changeset
152 ret+="&%s_drg=%s"%(key,value)
10733d367831 date ranges
dwinter
parents: 3
diff changeset
153
0
834706423ac1 initial
dwinter
parents:
diff changeset
154
1
9c356845613a improvements for the search or search fields added
dwinter
parents: 0
diff changeset
155 for key,value in args.items():
9c356845613a improvements for the search or search fields added
dwinter
parents: 0
diff changeset
156
9c356845613a improvements for the search or search fields added
dwinter
parents: 0
diff changeset
157 if isinstance(value, basestring):
9c356845613a improvements for the search or search fields added
dwinter
parents: 0
diff changeset
158 value=[value]
9c356845613a improvements for the search or search fields added
dwinter
parents: 0
diff changeset
159
9c356845613a improvements for the search or search fields added
dwinter
parents: 0
diff changeset
160 for val in value:
12
2e2da6d5e30d bug in generateLink if searchterm ="*" dann wird _fc nicht in den Link eingebaut.
dwinter
parents: 11
diff changeset
161
1
9c356845613a improvements for the search or search fields added
dwinter
parents: 0
diff changeset
162 ret+="&%s=%s"%(key,val)
0
834706423ac1 initial
dwinter
parents:
diff changeset
163 return ret
834706423ac1 initial
dwinter
parents:
diff changeset
164
834706423ac1 initial
dwinter
parents:
diff changeset
165 #hilfsmethode erzeuget immer eine liste von einträgen
834706423ac1 initial
dwinter
parents:
diff changeset
166 def getList(self,param):
834706423ac1 initial
dwinter
parents:
diff changeset
167 if isinstance(param, basestring):
834706423ac1 initial
dwinter
parents:
diff changeset
168 param=[param]
834706423ac1 initial
dwinter
parents:
diff changeset
169
834706423ac1 initial
dwinter
parents:
diff changeset
170 return param
834706423ac1 initial
dwinter
parents:
diff changeset
171
834706423ac1 initial
dwinter
parents:
diff changeset
172 #erzeugt analog zu den Ranges in Velocity für ein numerisches Feld die Suche in ranges
834706423ac1 initial
dwinter
parents:
diff changeset
173 #@param field:ist der name des Feldes in dem in Ranges gesucht werden soll
834706423ac1 initial
dwinter
parents:
diff changeset
174 #@param begin anfang der ranges
834706423ac1 initial
dwinter
parents:
diff changeset
175 #@param end ende der Raanges
834706423ac1 initial
dwinter
parents:
diff changeset
176 #@param increment größe eines ranges
4
10733d367831 date ranges
dwinter
parents: 3
diff changeset
177 #@param date wenn wahr, dann ist field ein Datumsfeld, begin, end,increment sind trotzdem zur Zeit, dan Integer für Jahre !!, d.h. zur Zeit sind nur JAhre als Schritte möglich
0
834706423ac1 initial
dwinter
parents:
diff changeset
178 # für alle andere paramter @see prepareSearch
834706423ac1 initial
dwinter
parents:
diff changeset
179 #@return gibt ein hash: mit range -> anzahl der Treffer in dem Ranage. e.g 1921-1950 -> 21
4
10733d367831 date ranges
dwinter
parents: 3
diff changeset
180
10733d367831 date ranges
dwinter
parents: 3
diff changeset
181 def ranges(self,field,begin,end,increment,solrQuery="",facetFields=[],start=0,rows=10,facetSearch=None,sortFacets=True,date=False,storeFields=False):
0
834706423ac1 initial
dwinter
parents:
diff changeset
182
834706423ac1 initial
dwinter
parents:
diff changeset
183 q=self.prepareSearch(solrQuery,facetFields,start,rows,facetSearch,sortFacets)
834706423ac1 initial
dwinter
parents:
diff changeset
184
4
10733d367831 date ranges
dwinter
parents: 3
diff changeset
185 res={} #speiceher abzahl
10733d367831 date ranges
dwinter
parents: 3
diff changeset
186
10733d367831 date ranges
dwinter
parents: 3
diff changeset
187 fls={} #speichere felder
10733d367831 date ranges
dwinter
parents: 3
diff changeset
188
10733d367831 date ranges
dwinter
parents: 3
diff changeset
189 #for fl in storeFields: #initialisiere den hash
10733d367831 date ranges
dwinter
parents: 3
diff changeset
190 # fls[fl]={}
10733d367831 date ranges
dwinter
parents: 3
diff changeset
191
0
834706423ac1 initial
dwinter
parents:
diff changeset
192 if not getattr(self,'_v_solr_',None):
9
896bea4f61a0 try to deal with timeout on connect.
casties
parents: 8
diff changeset
193 try:
896bea4f61a0 try to deal with timeout on connect.
casties
parents: 8
diff changeset
194 self.connect()
896bea4f61a0 try to deal with timeout on connect.
casties
parents: 8
diff changeset
195 except Exception, e:
896bea4f61a0 try to deal with timeout on connect.
casties
parents: 8
diff changeset
196 logging.error("Error connecting to Solr: %s"%e)
896bea4f61a0 try to deal with timeout on connect.
casties
parents: 8
diff changeset
197 return res
0
834706423ac1 initial
dwinter
parents:
diff changeset
198
834706423ac1 initial
dwinter
parents:
diff changeset
199 for x in range(begin,end,increment):
834706423ac1 initial
dwinter
parents:
diff changeset
200 query={}
834706423ac1 initial
dwinter
parents:
diff changeset
201 #query["%s__gt"%field]=x
834706423ac1 initial
dwinter
parents:
diff changeset
202 #TODO __gt scheint nicht zu funktionieren wird zu gte (???)
4
10733d367831 date ranges
dwinter
parents: 3
diff changeset
203
10733d367831 date ranges
dwinter
parents: 3
diff changeset
204 if not date:
10733d367831 date ranges
dwinter
parents: 3
diff changeset
205 query["%s__gte"%field]=int(x)+1
10733d367831 date ranges
dwinter
parents: 3
diff changeset
206
10733d367831 date ranges
dwinter
parents: 3
diff changeset
207 query["%s__lte"%field]=x+increment
10733d367831 date ranges
dwinter
parents: 3
diff changeset
208
10733d367831 date ranges
dwinter
parents: 3
diff changeset
209 key="%s-%s"%(x,x+increment)
0
834706423ac1 initial
dwinter
parents:
diff changeset
210
4
10733d367831 date ranges
dwinter
parents: 3
diff changeset
211 else:
10733d367831 date ranges
dwinter
parents: 3
diff changeset
212 year="%s-01-01T00:00:00Z"
10733d367831 date ranges
dwinter
parents: 3
diff changeset
213 query["%s__gte"%field]=year%(int(x))
10733d367831 date ranges
dwinter
parents: 3
diff changeset
214
10733d367831 date ranges
dwinter
parents: 3
diff changeset
215 query["%s__lte"%field]=year%(x+increment)
10733d367831 date ranges
dwinter
parents: 3
diff changeset
216
10733d367831 date ranges
dwinter
parents: 3
diff changeset
217 key="%s@%s"%(year%(int(x)),year%(x+increment))
10733d367831 date ranges
dwinter
parents: 3
diff changeset
218
10733d367831 date ranges
dwinter
parents: 3
diff changeset
219 result = q.query(**query).highlight("text_german").execute()
0
834706423ac1 initial
dwinter
parents:
diff changeset
220
4
10733d367831 date ranges
dwinter
parents: 3
diff changeset
221 res[key]=result.result.numFound
10733d367831 date ranges
dwinter
parents: 3
diff changeset
222
10733d367831 date ranges
dwinter
parents: 3
diff changeset
223 if storeFields:
10733d367831 date ranges
dwinter
parents: 3
diff changeset
224 fls[key] = result.highlighting;
10733d367831 date ranges
dwinter
parents: 3
diff changeset
225
0
834706423ac1 initial
dwinter
parents:
diff changeset
226
4
10733d367831 date ranges
dwinter
parents: 3
diff changeset
227
10733d367831 date ranges
dwinter
parents: 3
diff changeset
228 if storeFields:
10733d367831 date ranges
dwinter
parents: 3
diff changeset
229 return res,fls
10733d367831 date ranges
dwinter
parents: 3
diff changeset
230
10733d367831 date ranges
dwinter
parents: 3
diff changeset
231 else:
10733d367831 date ranges
dwinter
parents: 3
diff changeset
232 return res
0
834706423ac1 initial
dwinter
parents:
diff changeset
233
834706423ac1 initial
dwinter
parents:
diff changeset
234
834706423ac1 initial
dwinter
parents:
diff changeset
235 #prepareSearch erzeugt die Suchabfrage
834706423ac1 initial
dwinter
parents:
diff changeset
236
834706423ac1 initial
dwinter
parents:
diff changeset
237 #solrQuery sucht im in schema.xml bzw. solrconfig.xml festgelegt generischen Feld, hierbei werden mit blanks getrennte eintrage in "AND" zerlegt.
834706423ac1 initial
dwinter
parents:
diff changeset
238 #TODO: erlaube auch suche nach phrasen mit ""
834706423ac1 initial
dwinter
parents:
diff changeset
239 #facetFields:Liste der Felder, nach denen facitiert werden sollen, wirdn in facet_by in solrburn uebersetzt.
834706423ac1 initial
dwinter
parents:
diff changeset
240 #start: Erste Eintrag für paginierung, anzahl der Treffer
834706423ac1 initial
dwinter
parents:
diff changeset
241 #rows: anzahl der Treffer
834706423ac1 initial
dwinter
parents:
diff changeset
242 #facetSearch: Hash mit Feldnamen: suchwort oder Feldname: liste von Suchworten, hast wird direkt an query von solrburnt weitergegeben,
834706423ac1 initial
dwinter
parents:
diff changeset
243 #sortfacets: if true, dann werden die Ergebnisse der facetierten suche alphabetisch sortiert, ACHTUNG: das ist nicht gleich der Funktion in solr die Liste
834706423ac1 initial
dwinter
parents:
diff changeset
244 #direkt sortiert zurückzubekommen, hier werden die haufigsten Werte genommen (einstellt in solrconfig.xml) und dann nur diese sortiert!
834706423ac1 initial
dwinter
parents:
diff changeset
245 #ausserdem werden beim sortieren, die stopworte gefiltert!
834706423ac1 initial
dwinter
parents:
diff changeset
246 #
834706423ac1 initial
dwinter
parents:
diff changeset
247 #neben den direkten parameter koennen auch parameter fuer die facetierte Suche über FORM im REQUEST übergeben werden, diese Felder müssen dann
834706423ac1 initial
dwinter
parents:
diff changeset
248 #mit "_fc" enden.
834706423ac1 initial
dwinter
parents:
diff changeset
249 # gibt als ergebnis den folgenden Hash, so wie in http://opensource.timetric.com/sunburnt/queryingsolr.html#executing-queries-and-interpreting-the-response,
834706423ac1 initial
dwinter
parents:
diff changeset
250 # http://opensource.timetric.com/sunburnt/queryingsolr.html#highlighting
834706423ac1 initial
dwinter
parents:
diff changeset
251 # und http://opensource.timetric.com/sunburnt/queryingsolr.html#faceting
834706423ac1 initial
dwinter
parents:
diff changeset
252 #dokumentiert. Highlighting selbst wird in sorlconfig.xml konfiguriert.
834706423ac1 initial
dwinter
parents:
diff changeset
253 #return ein Queryobjet, zur eigentlichen Suche muss darauf noch execute ausgeführt werden.
834706423ac1 initial
dwinter
parents:
diff changeset
254
10
74a9661e4e9f sorting added
dwinter
parents: 8
diff changeset
255 def prepareSearch(self,solrQuery,facetFields=[],start=0,rows=10,facetSearch=None,sortFacets=True,orSearch=None,sorting=None):
0
834706423ac1 initial
dwinter
parents:
diff changeset
256 "search solr"
834706423ac1 initial
dwinter
parents:
diff changeset
257
834706423ac1 initial
dwinter
parents:
diff changeset
258
834706423ac1 initial
dwinter
parents:
diff changeset
259 ranges={}
834706423ac1 initial
dwinter
parents:
diff changeset
260 ## deal with a form
834706423ac1 initial
dwinter
parents:
diff changeset
261 if self.REQUEST:
834706423ac1 initial
dwinter
parents:
diff changeset
262 constr = self.REQUEST.form
3
156c1db5a701 print statements deleted
dwinter
parents: 2
diff changeset
263
0
834706423ac1 initial
dwinter
parents:
diff changeset
264 for field in constr.keys():
834706423ac1 initial
dwinter
parents:
diff changeset
265
834706423ac1 initial
dwinter
parents:
diff changeset
266 #facetes
834706423ac1 initial
dwinter
parents:
diff changeset
267 if field.endswith("_fc"):
834706423ac1 initial
dwinter
parents:
diff changeset
268 if facetSearch is None:
834706423ac1 initial
dwinter
parents:
diff changeset
269 facetSearch={}
834706423ac1 initial
dwinter
parents:
diff changeset
270
834706423ac1 initial
dwinter
parents:
diff changeset
271 vals = constr[field]
834706423ac1 initial
dwinter
parents:
diff changeset
272 if not isinstance(vals,basestring):
834706423ac1 initial
dwinter
parents:
diff changeset
273
834706423ac1 initial
dwinter
parents:
diff changeset
274 vals=[x.decode('utf-8') for x in vals]
834706423ac1 initial
dwinter
parents:
diff changeset
275 else:
834706423ac1 initial
dwinter
parents:
diff changeset
276 vals=vals.decode('utf-8')
1
9c356845613a improvements for the search or search fields added
dwinter
parents: 0
diff changeset
277
9c356845613a improvements for the search or search fields added
dwinter
parents: 0
diff changeset
278
0
834706423ac1 initial
dwinter
parents:
diff changeset
279 facetSearch[field.replace("_fc",'')]=vals
834706423ac1 initial
dwinter
parents:
diff changeset
280 #ranges form a-b
834706423ac1 initial
dwinter
parents:
diff changeset
281
1
9c356845613a improvements for the search or search fields added
dwinter
parents: 0
diff changeset
282
9c356845613a improvements for the search or search fields added
dwinter
parents: 0
diff changeset
283 if field.endswith("_or"):
9c356845613a improvements for the search or search fields added
dwinter
parents: 0
diff changeset
284 if orSearch is None:
9c356845613a improvements for the search or search fields added
dwinter
parents: 0
diff changeset
285 orSearch={}
9c356845613a improvements for the search or search fields added
dwinter
parents: 0
diff changeset
286
9c356845613a improvements for the search or search fields added
dwinter
parents: 0
diff changeset
287 vals = constr[field]
9c356845613a improvements for the search or search fields added
dwinter
parents: 0
diff changeset
288 if not isinstance(vals,basestring):
9c356845613a improvements for the search or search fields added
dwinter
parents: 0
diff changeset
289
9c356845613a improvements for the search or search fields added
dwinter
parents: 0
diff changeset
290 vals=[x.decode('utf-8') for x in vals]
9c356845613a improvements for the search or search fields added
dwinter
parents: 0
diff changeset
291 else:
9c356845613a improvements for the search or search fields added
dwinter
parents: 0
diff changeset
292 vals=vals.decode('utf-8')
9c356845613a improvements for the search or search fields added
dwinter
parents: 0
diff changeset
293
9c356845613a improvements for the search or search fields added
dwinter
parents: 0
diff changeset
294
9c356845613a improvements for the search or search fields added
dwinter
parents: 0
diff changeset
295 orSearch[field.replace("_or",'')]=vals
9c356845613a improvements for the search or search fields added
dwinter
parents: 0
diff changeset
296 #ranges form a-b
9c356845613a improvements for the search or search fields added
dwinter
parents: 0
diff changeset
297
9c356845613a improvements for the search or search fields added
dwinter
parents: 0
diff changeset
298
0
834706423ac1 initial
dwinter
parents:
diff changeset
299
834706423ac1 initial
dwinter
parents:
diff changeset
300 if field.endswith("_rg"):
834706423ac1 initial
dwinter
parents:
diff changeset
301
834706423ac1 initial
dwinter
parents:
diff changeset
302
834706423ac1 initial
dwinter
parents:
diff changeset
303 splitted = "_".split(field)
834706423ac1 initial
dwinter
parents:
diff changeset
304 if len(splitted)==2:
834706423ac1 initial
dwinter
parents:
diff changeset
305 #ranges[field.replace("_rg","__gt")]=splitted[0]
834706423ac1 initial
dwinter
parents:
diff changeset
306 #TODO __gt scheint nicht zu funktionieren wird zu gte (???)
834706423ac1 initial
dwinter
parents:
diff changeset
307 ranges[field.replace("_rg","__gte")]=int(splitted[0])+1
834706423ac1 initial
dwinter
parents:
diff changeset
308 ranges[field.replace("_rg","__lte")]=splitted[1]
4
10733d367831 date ranges
dwinter
parents: 3
diff changeset
309
10733d367831 date ranges
dwinter
parents: 3
diff changeset
310 elif field.endswith("_drg"):
10733d367831 date ranges
dwinter
parents: 3
diff changeset
311 splitted = "@".split(field)
10733d367831 date ranges
dwinter
parents: 3
diff changeset
312 if len(splitted)==2:
10733d367831 date ranges
dwinter
parents: 3
diff changeset
313 #ranges[field.replace("_rg","__gt")]=splitted[0]
10733d367831 date ranges
dwinter
parents: 3
diff changeset
314 #TODO __gt scheint nicht zu funktionieren wird zu gte (???)
10733d367831 date ranges
dwinter
parents: 3
diff changeset
315 ranges[field.replace("_drg","__gte")]=splitted[0]
10733d367831 date ranges
dwinter
parents: 3
diff changeset
316 ranges[field.replace("_drg","__lte")]=splitted[1]
0
834706423ac1 initial
dwinter
parents:
diff changeset
317
834706423ac1 initial
dwinter
parents:
diff changeset
318
834706423ac1 initial
dwinter
parents:
diff changeset
319 #teste verbindung zu solr
834706423ac1 initial
dwinter
parents:
diff changeset
320 if not getattr(self,'_v_solr_',None):
9
896bea4f61a0 try to deal with timeout on connect.
casties
parents: 8
diff changeset
321 try:
896bea4f61a0 try to deal with timeout on connect.
casties
parents: 8
diff changeset
322 self.connect()
896bea4f61a0 try to deal with timeout on connect.
casties
parents: 8
diff changeset
323 except Exception, e:
896bea4f61a0 try to deal with timeout on connect.
casties
parents: 8
diff changeset
324 logging.error("Error connecting to Solr: %s"%e)
896bea4f61a0 try to deal with timeout on connect.
casties
parents: 8
diff changeset
325 return []
0
834706423ac1 initial
dwinter
parents:
diff changeset
326
834706423ac1 initial
dwinter
parents:
diff changeset
327 solrQuery = solrQuery.decode('utf-8')
834706423ac1 initial
dwinter
parents:
diff changeset
328
834706423ac1 initial
dwinter
parents:
diff changeset
329 #teile die suche nach " " daraus wird dann eine AND suche
834706423ac1 initial
dwinter
parents:
diff changeset
330 #TODO: sollte flexibler sein. insbesondere phrasen
834706423ac1 initial
dwinter
parents:
diff changeset
331 splitted= solrQuery.split(" ")
834706423ac1 initial
dwinter
parents:
diff changeset
332
834706423ac1 initial
dwinter
parents:
diff changeset
333 res = self._v_solr
834706423ac1 initial
dwinter
parents:
diff changeset
334
834706423ac1 initial
dwinter
parents:
diff changeset
335
834706423ac1 initial
dwinter
parents:
diff changeset
336
834706423ac1 initial
dwinter
parents:
diff changeset
337
834706423ac1 initial
dwinter
parents:
diff changeset
338 res=res.query(splitted)
4
10733d367831 date ranges
dwinter
parents: 3
diff changeset
339
0
834706423ac1 initial
dwinter
parents:
diff changeset
340 if len(ranges.keys())>0:
4
10733d367831 date ranges
dwinter
parents: 3
diff changeset
341
10733d367831 date ranges
dwinter
parents: 3
diff changeset
342
0
834706423ac1 initial
dwinter
parents:
diff changeset
343 res=res.query(ranges)
834706423ac1 initial
dwinter
parents:
diff changeset
344
4
10733d367831 date ranges
dwinter
parents: 3
diff changeset
345
0
834706423ac1 initial
dwinter
parents:
diff changeset
346 #ubergebe alle weiteren feld an die suche.
834706423ac1 initial
dwinter
parents:
diff changeset
347 if facetSearch:
834706423ac1 initial
dwinter
parents:
diff changeset
348 for key,vals in facetSearch.items():
834706423ac1 initial
dwinter
parents:
diff changeset
349
4
10733d367831 date ranges
dwinter
parents: 3
diff changeset
350 if key.endswith("_rg"): #range
0
834706423ac1 initial
dwinter
parents:
diff changeset
351
834706423ac1 initial
dwinter
parents:
diff changeset
352 if not isinstance(vals,basestring):
834706423ac1 initial
dwinter
parents:
diff changeset
353
834706423ac1 initial
dwinter
parents:
diff changeset
354 vals=[x.decode('utf-8') for x in vals]
834706423ac1 initial
dwinter
parents:
diff changeset
355 else:
834706423ac1 initial
dwinter
parents:
diff changeset
356 vals=[vals.decode('utf-8')]
834706423ac1 initial
dwinter
parents:
diff changeset
357
834706423ac1 initial
dwinter
parents:
diff changeset
358 for val in vals:
834706423ac1 initial
dwinter
parents:
diff changeset
359
834706423ac1 initial
dwinter
parents:
diff changeset
360 splitted = val.split("-")
3
156c1db5a701 print statements deleted
dwinter
parents: 2
diff changeset
361
0
834706423ac1 initial
dwinter
parents:
diff changeset
362 if len(splitted)==2:
834706423ac1 initial
dwinter
parents:
diff changeset
363 #TODO __gt scheint nicht zu funktionieren wird zu gte (???)
834706423ac1 initial
dwinter
parents:
diff changeset
364 facetSearch[key.replace("_rg","__gte")]=int(splitted[0])+1
4
10733d367831 date ranges
dwinter
parents: 3
diff changeset
365 facetSearch[key.replace("_rg","__lte")]=int(splitted[1])
3
156c1db5a701 print statements deleted
dwinter
parents: 2
diff changeset
366
0
834706423ac1 initial
dwinter
parents:
diff changeset
367 del facetSearch[key] # loesche das urspuerngliche feld
4
10733d367831 date ranges
dwinter
parents: 3
diff changeset
368
10733d367831 date ranges
dwinter
parents: 3
diff changeset
369 elif key.endswith("_drg"): #daterange
10733d367831 date ranges
dwinter
parents: 3
diff changeset
370
10733d367831 date ranges
dwinter
parents: 3
diff changeset
371 if not isinstance(vals,basestring):
10733d367831 date ranges
dwinter
parents: 3
diff changeset
372
10733d367831 date ranges
dwinter
parents: 3
diff changeset
373 vals=[x.decode('utf-8') for x in vals]
10733d367831 date ranges
dwinter
parents: 3
diff changeset
374 else:
10733d367831 date ranges
dwinter
parents: 3
diff changeset
375 vals=[vals.decode('utf-8')]
10733d367831 date ranges
dwinter
parents: 3
diff changeset
376
10733d367831 date ranges
dwinter
parents: 3
diff changeset
377 for val in vals:
10733d367831 date ranges
dwinter
parents: 3
diff changeset
378
10733d367831 date ranges
dwinter
parents: 3
diff changeset
379 splitted = val.split("@")
10733d367831 date ranges
dwinter
parents: 3
diff changeset
380
10733d367831 date ranges
dwinter
parents: 3
diff changeset
381 if len(splitted)==2:
10733d367831 date ranges
dwinter
parents: 3
diff changeset
382 #TODO __gt scheint nicht zu funktionieren wird zu gte (???)
10733d367831 date ranges
dwinter
parents: 3
diff changeset
383 facetSearch[key.replace("_drg","__gte")]=splitted[0]
10733d367831 date ranges
dwinter
parents: 3
diff changeset
384 facetSearch[key.replace("_drg","__lte")]=splitted[1]
10733d367831 date ranges
dwinter
parents: 3
diff changeset
385
10733d367831 date ranges
dwinter
parents: 3
diff changeset
386 del facetSearch[key] # loesche das urspuerngliche feld
10733d367831 date ranges
dwinter
parents: 3
diff changeset
387
0
834706423ac1 initial
dwinter
parents:
diff changeset
388
834706423ac1 initial
dwinter
parents:
diff changeset
389 else:
1
9c356845613a improvements for the search or search fields added
dwinter
parents: 0
diff changeset
390 if not isinstance(vals, basestring):
9c356845613a improvements for the search or search fields added
dwinter
parents: 0
diff changeset
391 val = [x for x in vals if x!="*"] #siehe oben
9c356845613a improvements for the search or search fields added
dwinter
parents: 0
diff changeset
392 else:
9c356845613a improvements for the search or search fields added
dwinter
parents: 0
diff changeset
393 val = [vals]
0
834706423ac1 initial
dwinter
parents:
diff changeset
394 facetSearch[key]=val
834706423ac1 initial
dwinter
parents:
diff changeset
395
4
10733d367831 date ranges
dwinter
parents: 3
diff changeset
396
0
834706423ac1 initial
dwinter
parents:
diff changeset
397 res=res.query(**facetSearch)
1
9c356845613a improvements for the search or search fields added
dwinter
parents: 0
diff changeset
398
9c356845613a improvements for the search or search fields added
dwinter
parents: 0
diff changeset
399
9c356845613a improvements for the search or search fields added
dwinter
parents: 0
diff changeset
400 #felder mit ODER-Suche
9c356845613a improvements for the search or search fields added
dwinter
parents: 0
diff changeset
401 if orSearch:
9c356845613a improvements for the search or search fields added
dwinter
parents: 0
diff changeset
402
9c356845613a improvements for the search or search fields added
dwinter
parents: 0
diff changeset
403 for key,vals in orSearch.items():
9c356845613a improvements for the search or search fields added
dwinter
parents: 0
diff changeset
404 qr = None
9c356845613a improvements for the search or search fields added
dwinter
parents: 0
diff changeset
405 if not "*" in vals: #dann ohne einschränkung (key:*) sucht nur nach eintragen in denen etwas im feld steht, wir wollen aber alle
9c356845613a improvements for the search or search fields added
dwinter
parents: 0
diff changeset
406 if isinstance(vals, basestring):
9c356845613a improvements for the search or search fields added
dwinter
parents: 0
diff changeset
407 vals = [vals]
9c356845613a improvements for the search or search fields added
dwinter
parents: 0
diff changeset
408
9c356845613a improvements for the search or search fields added
dwinter
parents: 0
diff changeset
409
9c356845613a improvements for the search or search fields added
dwinter
parents: 0
diff changeset
410 for val in vals:
9c356845613a improvements for the search or search fields added
dwinter
parents: 0
diff changeset
411 if not qr:
9c356845613a improvements for the search or search fields added
dwinter
parents: 0
diff changeset
412 qr=self._v_solr.Q(**{key:val})
9c356845613a improvements for the search or search fields added
dwinter
parents: 0
diff changeset
413
9c356845613a improvements for the search or search fields added
dwinter
parents: 0
diff changeset
414 else:
9c356845613a improvements for the search or search fields added
dwinter
parents: 0
diff changeset
415 qr=qr|self._v_solr.Q(**{key:val})
9c356845613a improvements for the search or search fields added
dwinter
parents: 0
diff changeset
416
9c356845613a improvements for the search or search fields added
dwinter
parents: 0
diff changeset
417
9c356845613a improvements for the search or search fields added
dwinter
parents: 0
diff changeset
418 res=res.query(qr)
9c356845613a improvements for the search or search fields added
dwinter
parents: 0
diff changeset
419
9c356845613a improvements for the search or search fields added
dwinter
parents: 0
diff changeset
420
0
834706423ac1 initial
dwinter
parents:
diff changeset
421
834706423ac1 initial
dwinter
parents:
diff changeset
422 #wenn facetField existieren dann rufe facetierung auf
834706423ac1 initial
dwinter
parents:
diff changeset
423 if len(facetFields)>0:
834706423ac1 initial
dwinter
parents:
diff changeset
424 #for facet in facetFields:
834706423ac1 initial
dwinter
parents:
diff changeset
425
834706423ac1 initial
dwinter
parents:
diff changeset
426 # res = res.facet_by(facet)
5
f695be8f4f34 Incomplete - # 77: Sources: Cloud
dwinter
parents: 4
diff changeset
427
7
3cb69c6820bd bug in query fixed for limits
dwinter
parents: 6
diff changeset
428
3cb69c6820bd bug in query fixed for limits
dwinter
parents: 6
diff changeset
429 res = res.facet_by(facetFields)
0
834706423ac1 initial
dwinter
parents:
diff changeset
430
834706423ac1 initial
dwinter
parents:
diff changeset
431 #res=res.paginate(start=start, rows=rows).highlight("main_content")
834706423ac1 initial
dwinter
parents:
diff changeset
432
834706423ac1 initial
dwinter
parents:
diff changeset
433 res=res.paginate(start=start, rows=rows)
834706423ac1 initial
dwinter
parents:
diff changeset
434
834706423ac1 initial
dwinter
parents:
diff changeset
435
4
10733d367831 date ranges
dwinter
parents: 3
diff changeset
436 #only highlighting if not searhc for only "*" - avoid max clause error
2
de6b7ed0c34d highlighting only enabled if not search for "*".
dwinter
parents: 1
diff changeset
437 if solrQuery=="*":
3
156c1db5a701 print statements deleted
dwinter
parents: 2
diff changeset
438
4
10733d367831 date ranges
dwinter
parents: 3
diff changeset
439
2
de6b7ed0c34d highlighting only enabled if not search for "*".
dwinter
parents: 1
diff changeset
440 res=res.paginate(start=start, rows=rows)
de6b7ed0c34d highlighting only enabled if not search for "*".
dwinter
parents: 1
diff changeset
441 else:
4
10733d367831 date ranges
dwinter
parents: 3
diff changeset
442
2
de6b7ed0c34d highlighting only enabled if not search for "*".
dwinter
parents: 1
diff changeset
443 res=res.paginate(start=start, rows=rows).highlight(usePhraseHighlighter=True)
0
834706423ac1 initial
dwinter
parents:
diff changeset
444
10
74a9661e4e9f sorting added
dwinter
parents: 8
diff changeset
445
74a9661e4e9f sorting added
dwinter
parents: 8
diff changeset
446 if sorting is not None:
74a9661e4e9f sorting added
dwinter
parents: 8
diff changeset
447 for sort in sorting:
74a9661e4e9f sorting added
dwinter
parents: 8
diff changeset
448 res=res.sort_by(sort);
0
834706423ac1 initial
dwinter
parents:
diff changeset
449 return res
834706423ac1 initial
dwinter
parents:
diff changeset
450
834706423ac1 initial
dwinter
parents:
diff changeset
451
4
10733d367831 date ranges
dwinter
parents: 3
diff changeset
452 def replaceParameter(self,paramsNeu,queryString,ignore=[]):
10733d367831 date ranges
dwinter
parents: 3
diff changeset
453 params=urlparse.parse_qs(queryString);
10733d367831 date ranges
dwinter
parents: 3
diff changeset
454
10733d367831 date ranges
dwinter
parents: 3
diff changeset
455 for key in paramsNeu.keys():
10733d367831 date ranges
dwinter
parents: 3
diff changeset
456
10733d367831 date ranges
dwinter
parents: 3
diff changeset
457 params[key]=paramsNeu[key]
10733d367831 date ranges
dwinter
parents: 3
diff changeset
458
10733d367831 date ranges
dwinter
parents: 3
diff changeset
459 for key in ignore:
10733d367831 date ranges
dwinter
parents: 3
diff changeset
460 del params[key]
10733d367831 date ranges
dwinter
parents: 3
diff changeset
461
10733d367831 date ranges
dwinter
parents: 3
diff changeset
462 retArray = []
10733d367831 date ranges
dwinter
parents: 3
diff changeset
463 for x in params.keys():
10733d367831 date ranges
dwinter
parents: 3
diff changeset
464 for y in self.getList(params[x]):
10733d367831 date ranges
dwinter
parents: 3
diff changeset
465 retArray.append("%s=%s"%(x,urllib.quote(y)))
10733d367831 date ranges
dwinter
parents: 3
diff changeset
466
10733d367831 date ranges
dwinter
parents: 3
diff changeset
467 print retArray
10733d367831 date ranges
dwinter
parents: 3
diff changeset
468 return "&".join(retArray);
0
834706423ac1 initial
dwinter
parents:
diff changeset
469 #für die parameter @see prepareSearch
834706423ac1 initial
dwinter
parents:
diff changeset
470 #erzeugt eine Suchabfrage und führt diese aus.
834706423ac1 initial
dwinter
parents:
diff changeset
471 #return {"result":response.result, "hl":response.highlighting,"facetFields":facetedFields}
834706423ac1 initial
dwinter
parents:
diff changeset
472
10
74a9661e4e9f sorting added
dwinter
parents: 8
diff changeset
473 def search(self,solrQuery,facetFields=[],start=0,rows=10,facetSearch=None,sortFacets=True,orSearch=None,facetFields_limit=None,trunc=False,sorting=None):
8
a8f16569d1a6 trunkierte suche als options
dwinter
parents: 7
diff changeset
474
a8f16569d1a6 trunkierte suche als options
dwinter
parents: 7
diff changeset
475
a8f16569d1a6 trunkierte suche als options
dwinter
parents: 7
diff changeset
476 print trunc
a8f16569d1a6 trunkierte suche als options
dwinter
parents: 7
diff changeset
477 if trunc:
a8f16569d1a6 trunkierte suche als options
dwinter
parents: 7
diff changeset
478 if not solrQuery.endswith("*"):
a8f16569d1a6 trunkierte suche als options
dwinter
parents: 7
diff changeset
479 solrQuery+="*"
0
834706423ac1 initial
dwinter
parents:
diff changeset
480
10
74a9661e4e9f sorting added
dwinter
parents: 8
diff changeset
481 res=self.prepareSearch(solrQuery,facetFields,start,rows,facetSearch,sortFacets,orSearch=orSearch,sorting=sorting)
7
3cb69c6820bd bug in query fixed for limits
dwinter
parents: 6
diff changeset
482 #response= res.execute()
3cb69c6820bd bug in query fixed for limits
dwinter
parents: 6
diff changeset
483
3cb69c6820bd bug in query fixed for limits
dwinter
parents: 6
diff changeset
484 opt = res.options();
3cb69c6820bd bug in query fixed for limits
dwinter
parents: 6
diff changeset
485
3cb69c6820bd bug in query fixed for limits
dwinter
parents: 6
diff changeset
486
3cb69c6820bd bug in query fixed for limits
dwinter
parents: 6
diff changeset
487
3cb69c6820bd bug in query fixed for limits
dwinter
parents: 6
diff changeset
488 if facetFields_limit:
3cb69c6820bd bug in query fixed for limits
dwinter
parents: 6
diff changeset
489
3cb69c6820bd bug in query fixed for limits
dwinter
parents: 6
diff changeset
490
3cb69c6820bd bug in query fixed for limits
dwinter
parents: 6
diff changeset
491 for field,limit in facetFields_limit.items():
3cb69c6820bd bug in query fixed for limits
dwinter
parents: 6
diff changeset
492 opt["f.%s.facet.limit"%field]=limit
3cb69c6820bd bug in query fixed for limits
dwinter
parents: 6
diff changeset
493
3cb69c6820bd bug in query fixed for limits
dwinter
parents: 6
diff changeset
494
3cb69c6820bd bug in query fixed for limits
dwinter
parents: 6
diff changeset
495 response= self._v_solr.search( **opt)
3cb69c6820bd bug in query fixed for limits
dwinter
parents: 6
diff changeset
496
0
834706423ac1 initial
dwinter
parents:
diff changeset
497 #speichere faceted fields
834706423ac1 initial
dwinter
parents:
diff changeset
498 if len(facetFields)>0:
834706423ac1 initial
dwinter
parents:
diff changeset
499 facetedFields=response.facet_counts.facet_fields
834706423ac1 initial
dwinter
parents:
diff changeset
500
834706423ac1 initial
dwinter
parents:
diff changeset
501 if sortFacets:
834706423ac1 initial
dwinter
parents:
diff changeset
502 facetedFields=self.sortFacetedFields(facetedFields)
834706423ac1 initial
dwinter
parents:
diff changeset
503
834706423ac1 initial
dwinter
parents:
diff changeset
504 else:
834706423ac1 initial
dwinter
parents:
diff changeset
505 facetedFields={}
834706423ac1 initial
dwinter
parents:
diff changeset
506
834706423ac1 initial
dwinter
parents:
diff changeset
507
834706423ac1 initial
dwinter
parents:
diff changeset
508
834706423ac1 initial
dwinter
parents:
diff changeset
509
834706423ac1 initial
dwinter
parents:
diff changeset
510 return {"result":response.result, "hl":response.highlighting,"facetFields":facetedFields}
834706423ac1 initial
dwinter
parents:
diff changeset
511
834706423ac1 initial
dwinter
parents:
diff changeset
512
834706423ac1 initial
dwinter
parents:
diff changeset
513
10
74a9661e4e9f sorting added
dwinter
parents: 8
diff changeset
514 #hilfsmethode zum <eren über der ranges
0
834706423ac1 initial
dwinter
parents:
diff changeset
515 def sortRanges(self,ranges):
834706423ac1 initial
dwinter
parents:
diff changeset
516 x=list(ranges)
834706423ac1 initial
dwinter
parents:
diff changeset
517 x.sort()
834706423ac1 initial
dwinter
parents:
diff changeset
518 return x
834706423ac1 initial
dwinter
parents:
diff changeset
519
834706423ac1 initial
dwinter
parents:
diff changeset
520 #sortiert die Werte der FacetedFields
834706423ac1 initial
dwinter
parents:
diff changeset
521 #(facetedFields ist ein hast mit feldname -> liste der (wert für das feld, anzahl der treffer für den wert)
834706423ac1 initial
dwinter
parents:
diff changeset
522 #ausserdem werden die werte gemäß des angegebenen Filter gefiltert.
834706423ac1 initial
dwinter
parents:
diff changeset
523
834706423ac1 initial
dwinter
parents:
diff changeset
524 def sortFacetedFields(self, facetedFields,filter=STOPLIST):
834706423ac1 initial
dwinter
parents:
diff changeset
525 ret={}
834706423ac1 initial
dwinter
parents:
diff changeset
526
834706423ac1 initial
dwinter
parents:
diff changeset
527 def cmpTuple(x,y):
834706423ac1 initial
dwinter
parents:
diff changeset
528 return cmp(x[0],y[0])
834706423ac1 initial
dwinter
parents:
diff changeset
529
834706423ac1 initial
dwinter
parents:
diff changeset
530
834706423ac1 initial
dwinter
parents:
diff changeset
531 for key in facetedFields.keys():
834706423ac1 initial
dwinter
parents:
diff changeset
532 ls = facetedFields[key]
834706423ac1 initial
dwinter
parents:
diff changeset
533 ls.sort(cmpTuple)
834706423ac1 initial
dwinter
parents:
diff changeset
534 ret[key]=ls
834706423ac1 initial
dwinter
parents:
diff changeset
535
834706423ac1 initial
dwinter
parents:
diff changeset
536 if filter.get(key,None):
834706423ac1 initial
dwinter
parents:
diff changeset
537 ls2=[]
834706423ac1 initial
dwinter
parents:
diff changeset
538 for x in ls:
834706423ac1 initial
dwinter
parents:
diff changeset
539 if x[0].encode('utf-8') not in filter[key]:
834706423ac1 initial
dwinter
parents:
diff changeset
540 ls2.append(x)
834706423ac1 initial
dwinter
parents:
diff changeset
541 ret[key]=ls2
834706423ac1 initial
dwinter
parents:
diff changeset
542
834706423ac1 initial
dwinter
parents:
diff changeset
543
834706423ac1 initial
dwinter
parents:
diff changeset
544 ret[key]=[x for x in ret[key] if x[1]!=0]
834706423ac1 initial
dwinter
parents:
diff changeset
545 return ret
834706423ac1 initial
dwinter
parents:
diff changeset
546
834706423ac1 initial
dwinter
parents:
diff changeset
547 def changeMain(self,solrURL=None,title=None,REQUEST=None,RESPONSE=None):
834706423ac1 initial
dwinter
parents:
diff changeset
548 """change main settings"""
834706423ac1 initial
dwinter
parents:
diff changeset
549 if solrURL:
834706423ac1 initial
dwinter
parents:
diff changeset
550 self.solrURL=solrURL
834706423ac1 initial
dwinter
parents:
diff changeset
551 self.title=title
834706423ac1 initial
dwinter
parents:
diff changeset
552 self._v_solr=sunburnt.SolrInterface(url=solrURL)
834706423ac1 initial
dwinter
parents:
diff changeset
553
834706423ac1 initial
dwinter
parents:
diff changeset
554 if RESPONSE is not None:
834706423ac1 initial
dwinter
parents:
diff changeset
555 RESPONSE.redirect('manage_main')
834706423ac1 initial
dwinter
parents:
diff changeset
556
834706423ac1 initial
dwinter
parents:
diff changeset
557
834706423ac1 initial
dwinter
parents:
diff changeset
558 else:
834706423ac1 initial
dwinter
parents:
diff changeset
559 pt=zptFile(self, 'zpt/ChangeZopeSolr.zpt')
834706423ac1 initial
dwinter
parents:
diff changeset
560 return pt()
834706423ac1 initial
dwinter
parents:
diff changeset
561
834706423ac1 initial
dwinter
parents:
diff changeset
562 #sucht die je nach einsteillung in solrconfig.xml Werte mit den häufigsten Treffern oder alphabetisch sortiert zu einem
834706423ac1 initial
dwinter
parents:
diff changeset
563 #bestimmten eintrag aus solr
834706423ac1 initial
dwinter
parents:
diff changeset
564 #gedacht ist die methoden für die Anwendung nach dem Harvesten einer Website, es wird daher davon ausgegangen, dass sie hinter der
834706423ac1 initial
dwinter
parents:
diff changeset
565 #idfield eine url steht. Wir es z.b. bei Nutch passiert. index.htm/index_html als Teil der url wird dabei unterdrückt (analog zu den harbest einstellunge für
834706423ac1 initial
dwinter
parents:
diff changeset
566 # nutch für zope webseiten.
834706423ac1 initial
dwinter
parents:
diff changeset
567 #@param @idfield is hierbei der Name des Feldes, das in solrschema als id defniert wurde
834706423ac1 initial
dwinter
parents:
diff changeset
568 #field der Feldname von dem die Treffer gesurcht wernde sollen
834706423ac1 initial
dwinter
parents:
diff changeset
569 #url die url des textes
834706423ac1 initial
dwinter
parents:
diff changeset
570
834706423ac1 initial
dwinter
parents:
diff changeset
571 def getTermsAsJSON(self,idfield,field,url):
834706423ac1 initial
dwinter
parents:
diff changeset
572 """getTerms"""
834706423ac1 initial
dwinter
parents:
diff changeset
573
834706423ac1 initial
dwinter
parents:
diff changeset
574 ret=[]
834706423ac1 initial
dwinter
parents:
diff changeset
575 h = httplib2.Http()
834706423ac1 initial
dwinter
parents:
diff changeset
576 url = url.replace("/index.html","").replace("/index_html","")
834706423ac1 initial
dwinter
parents:
diff changeset
577 if url[-1]=="/":
834706423ac1 initial
dwinter
parents:
diff changeset
578 url=url[0:-1]
834706423ac1 initial
dwinter
parents:
diff changeset
579
834706423ac1 initial
dwinter
parents:
diff changeset
580 urlq=url.replace(":","\:")
834706423ac1 initial
dwinter
parents:
diff changeset
581 urlq=urlq.replace("/","\/")
834706423ac1 initial
dwinter
parents:
diff changeset
582
4
10733d367831 date ranges
dwinter
parents: 3
diff changeset
583 #q ="http://localhost:8983/solr/mpiwgweb/select?q=%s:%s&wt=xml&tv=on&qt=tvrh&fl=%s&tv.tf=true"%(idfield,urlq,field)
10733d367831 date ranges
dwinter
parents: 3
diff changeset
584 q =self.solrURL+SOLRSERVER%(idfield,urlq,field)
0
834706423ac1 initial
dwinter
parents:
diff changeset
585
834706423ac1 initial
dwinter
parents:
diff changeset
586 resp, content = h.request(q)
834706423ac1 initial
dwinter
parents:
diff changeset
587
834706423ac1 initial
dwinter
parents:
diff changeset
588 root = ET.fromstring(content)
834706423ac1 initial
dwinter
parents:
diff changeset
589 #uri = "http://127.0.0.1:18080/www_neu/de/aktuelles/features/feature28"
834706423ac1 initial
dwinter
parents:
diff changeset
590
834706423ac1 initial
dwinter
parents:
diff changeset
591 xpstr = ".//lst[@name='termVectors']/lst[@name='%s']/lst[@name='%s']/lst"%(url,field)
834706423ac1 initial
dwinter
parents:
diff changeset
592
834706423ac1 initial
dwinter
parents:
diff changeset
593
834706423ac1 initial
dwinter
parents:
diff changeset
594 tvs = root.findall(xpstr)
834706423ac1 initial
dwinter
parents:
diff changeset
595
834706423ac1 initial
dwinter
parents:
diff changeset
596 for tv in tvs:
834706423ac1 initial
dwinter
parents:
diff changeset
597 wd = tv.attrib['name']
834706423ac1 initial
dwinter
parents:
diff changeset
598 for f in tv.findall("./int[@name='tf']"):
834706423ac1 initial
dwinter
parents:
diff changeset
599 fre = f.text
834706423ac1 initial
dwinter
parents:
diff changeset
600
834706423ac1 initial
dwinter
parents:
diff changeset
601
834706423ac1 initial
dwinter
parents:
diff changeset
602 if int(fre)>2:
834706423ac1 initial
dwinter
parents:
diff changeset
603
834706423ac1 initial
dwinter
parents:
diff changeset
604 ret.append('{"text":"%s","size":%s}'%(wd,fre))
834706423ac1 initial
dwinter
parents:
diff changeset
605
834706423ac1 initial
dwinter
parents:
diff changeset
606 retStr="["+",".join(ret)+"]"
834706423ac1 initial
dwinter
parents:
diff changeset
607
834706423ac1 initial
dwinter
parents:
diff changeset
608 return retStr
834706423ac1 initial
dwinter
parents:
diff changeset
609
834706423ac1 initial
dwinter
parents:
diff changeset
610 #tauscht im request die in neewparams angegeben parameter aus.
834706423ac1 initial
dwinter
parents:
diff changeset
611 def replaceParam(self, newparams):
834706423ac1 initial
dwinter
parents:
diff changeset
612 x = self.REQUEST.form.copy()
834706423ac1 initial
dwinter
parents:
diff changeset
613
834706423ac1 initial
dwinter
parents:
diff changeset
614 for key,value in newparams.items():
834706423ac1 initial
dwinter
parents:
diff changeset
615 x[key]=value
834706423ac1 initial
dwinter
parents:
diff changeset
616
834706423ac1 initial
dwinter
parents:
diff changeset
617
834706423ac1 initial
dwinter
parents:
diff changeset
618 retls=[]
834706423ac1 initial
dwinter
parents:
diff changeset
619 for k,v in x.items():
834706423ac1 initial
dwinter
parents:
diff changeset
620 if not isinstance(v,basestring):
834706423ac1 initial
dwinter
parents:
diff changeset
621 for y in v:
834706423ac1 initial
dwinter
parents:
diff changeset
622 retls.append((k,y))
834706423ac1 initial
dwinter
parents:
diff changeset
623 else:
834706423ac1 initial
dwinter
parents:
diff changeset
624 retls.append((k,v))
834706423ac1 initial
dwinter
parents:
diff changeset
625
834706423ac1 initial
dwinter
parents:
diff changeset
626 return "?"+"&".join(["%s=%s"%(k,urllib.quote_plus(v,'/')) for (k, v) in retls])
834706423ac1 initial
dwinter
parents:
diff changeset
627
834706423ac1 initial
dwinter
parents:
diff changeset
628
834706423ac1 initial
dwinter
parents:
diff changeset
629
834706423ac1 initial
dwinter
parents:
diff changeset
630
834706423ac1 initial
dwinter
parents:
diff changeset
631 #ruft @set ranges aus, gibt das ergebnis als json zurück
834706423ac1 initial
dwinter
parents:
diff changeset
632 def getRangesAsJSON(self,field,begin,end,increment):
834706423ac1 initial
dwinter
parents:
diff changeset
633 """ getRangesAsJSON"""
834706423ac1 initial
dwinter
parents:
diff changeset
634 res = self.ranges(field, int(begin), int(end), int(increment))
834706423ac1 initial
dwinter
parents:
diff changeset
635 return json.dumps(res)
834706423ac1 initial
dwinter
parents:
diff changeset
636
834706423ac1 initial
dwinter
parents:
diff changeset
637
834706423ac1 initial
dwinter
parents:
diff changeset
638 #return only the values of resultList whicht start with startLetter or if starLetterNonAscii all values which
834706423ac1 initial
dwinter
parents:
diff changeset
639 #start with an non ascii character
834706423ac1 initial
dwinter
parents:
diff changeset
640 def filter (self,resultList,startLetter=None,startLetterNonAscii=0):
1
9c356845613a improvements for the search or search fields added
dwinter
parents: 0
diff changeset
641 ls=[]
0
834706423ac1 initial
dwinter
parents:
diff changeset
642 if startLetter:
4
10733d367831 date ranges
dwinter
parents: 3
diff changeset
643 startLetter=startLetter.lower()
0
834706423ac1 initial
dwinter
parents:
diff changeset
644
834706423ac1 initial
dwinter
parents:
diff changeset
645 matchStr = "[\[\]'\"]*"+startLetter
4
10733d367831 date ranges
dwinter
parents: 3
diff changeset
646 ls = [x for x in resultList if re.match(matchStr,x[0].lower())]
0
834706423ac1 initial
dwinter
parents:
diff changeset
647
834706423ac1 initial
dwinter
parents:
diff changeset
648 if startLetterNonAscii ==1:
834706423ac1 initial
dwinter
parents:
diff changeset
649
834706423ac1 initial
dwinter
parents:
diff changeset
650 ls = [x for x in resultList if not re.match("[\[\]'\"a-zA-Z].*",x[0])]
834706423ac1 initial
dwinter
parents:
diff changeset
651 return ls
834706423ac1 initial
dwinter
parents:
diff changeset
652
4
10733d367831 date ranges
dwinter
parents: 3
diff changeset
653
10733d367831 date ranges
dwinter
parents: 3
diff changeset
654
0
834706423ac1 initial
dwinter
parents:
diff changeset
655 def manage_addZopeSolrForm(self):
834706423ac1 initial
dwinter
parents:
diff changeset
656 """Form for external Links"""
834706423ac1 initial
dwinter
parents:
diff changeset
657 pt=zptFile(self, 'zpt/AddZopeSolr.zpt')
834706423ac1 initial
dwinter
parents:
diff changeset
658 return pt()
834706423ac1 initial
dwinter
parents:
diff changeset
659
834706423ac1 initial
dwinter
parents:
diff changeset
660
834706423ac1 initial
dwinter
parents:
diff changeset
661 def manage_addZopeSolr(self,id,title,solrURL,RESPONSE=None):
834706423ac1 initial
dwinter
parents:
diff changeset
662 """Add an external Link"""
834706423ac1 initial
dwinter
parents:
diff changeset
663
834706423ac1 initial
dwinter
parents:
diff changeset
664 newObj=ZopeSolr(id,title,solrURL)
834706423ac1 initial
dwinter
parents:
diff changeset
665
834706423ac1 initial
dwinter
parents:
diff changeset
666 self._setObject(id,newObj)
834706423ac1 initial
dwinter
parents:
diff changeset
667
834706423ac1 initial
dwinter
parents:
diff changeset
668
834706423ac1 initial
dwinter
parents:
diff changeset
669 if RESPONSE is not None:
834706423ac1 initial
dwinter
parents:
diff changeset
670 RESPONSE.redirect('manage_main')
4
10733d367831 date ranges
dwinter
parents: 3
diff changeset
671