annotate zopeSolr.py @ 0:834706423ac1

initial
author dwinter
date Tue, 26 Feb 2013 15:22:07 +0100
parents
children 9c356845613a
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
834706423ac1 initial
dwinter
parents:
diff changeset
1 # -*- coding: utf-8 -*-
834706423ac1 initial
dwinter
parents:
diff changeset
2
834706423ac1 initial
dwinter
parents:
diff changeset
3 #Verbindet Zope mit solr. Vorraussetzung ist das Paket sunburnt, @see http://opensource.timetric.com/sunburnt/
834706423ac1 initial
dwinter
parents:
diff changeset
4
834706423ac1 initial
dwinter
parents:
diff changeset
5
834706423ac1 initial
dwinter
parents:
diff changeset
6 from OFS.SimpleItem import SimpleItem
834706423ac1 initial
dwinter
parents:
diff changeset
7 from Products.PageTemplates.PageTemplateFile import PageTemplateFile
834706423ac1 initial
dwinter
parents:
diff changeset
8 import pysolr
834706423ac1 initial
dwinter
parents:
diff changeset
9 import os.path
834706423ac1 initial
dwinter
parents:
diff changeset
10 import sunburnt
834706423ac1 initial
dwinter
parents:
diff changeset
11 from Globals import package_home
834706423ac1 initial
dwinter
parents:
diff changeset
12 import httplib2
834706423ac1 initial
dwinter
parents:
diff changeset
13 import urllib
834706423ac1 initial
dwinter
parents:
diff changeset
14 import re
834706423ac1 initial
dwinter
parents:
diff changeset
15 import xml.etree.ElementTree as ET
834706423ac1 initial
dwinter
parents:
diff changeset
16 import json
834706423ac1 initial
dwinter
parents:
diff changeset
17
834706423ac1 initial
dwinter
parents:
diff changeset
18 #Worte die nicht in der Termliste angezeigt werden sollen #TODO: make this configurable
834706423ac1 initial
dwinter
parents:
diff changeset
19
834706423ac1 initial
dwinter
parents:
diff changeset
20 STOPLIST={'main_content':['forward','drucken','history','science','part','publications','projects',
834706423ac1 initial
dwinter
parents:
diff changeset
21 'project','new','geschichte','institute','related','boltzmannstraße','14195'],
834706423ac1 initial
dwinter
parents:
diff changeset
22
834706423ac1 initial
dwinter
parents:
diff changeset
23 'title':['bd','10','11','12','18','19','20','abt','ad','di','history','geschichte','science']
834706423ac1 initial
dwinter
parents:
diff changeset
24 }
834706423ac1 initial
dwinter
parents:
diff changeset
25
834706423ac1 initial
dwinter
parents:
diff changeset
26
834706423ac1 initial
dwinter
parents:
diff changeset
27
834706423ac1 initial
dwinter
parents:
diff changeset
28 def zptFile(self, path, orphaned=False):
834706423ac1 initial
dwinter
parents:
diff changeset
29 """returns a page template file from the product"""
834706423ac1 initial
dwinter
parents:
diff changeset
30 if orphaned:
834706423ac1 initial
dwinter
parents:
diff changeset
31 # unusual case
834706423ac1 initial
dwinter
parents:
diff changeset
32 pt=PageTemplateFile(os.path.join(package_home(globals()), path))
834706423ac1 initial
dwinter
parents:
diff changeset
33 else:
834706423ac1 initial
dwinter
parents:
diff changeset
34
834706423ac1 initial
dwinter
parents:
diff changeset
35 pt=PageTemplateFile(os.path.join(package_home(globals()), path)).__of__(self)
834706423ac1 initial
dwinter
parents:
diff changeset
36 return pt
834706423ac1 initial
dwinter
parents:
diff changeset
37
834706423ac1 initial
dwinter
parents:
diff changeset
38
834706423ac1 initial
dwinter
parents:
diff changeset
39
834706423ac1 initial
dwinter
parents:
diff changeset
40 class ZopeSolr(SimpleItem):
834706423ac1 initial
dwinter
parents:
diff changeset
41
834706423ac1 initial
dwinter
parents:
diff changeset
42
834706423ac1 initial
dwinter
parents:
diff changeset
43 meta_type="ZopeSolr"
834706423ac1 initial
dwinter
parents:
diff changeset
44
834706423ac1 initial
dwinter
parents:
diff changeset
45 manage_options= ({'label':'Main Config','action': 'changeMain'},) + SimpleItem.manage_options
834706423ac1 initial
dwinter
parents:
diff changeset
46
834706423ac1 initial
dwinter
parents:
diff changeset
47 def __init__(self,id,title,solrURL):
834706423ac1 initial
dwinter
parents:
diff changeset
48 self.id=id
834706423ac1 initial
dwinter
parents:
diff changeset
49 self.title=title
834706423ac1 initial
dwinter
parents:
diff changeset
50 self.solrURL=solrURL #URL einer solr instance bzw. einer collection, falls nicht die default collection benutzt werden soll
834706423ac1 initial
dwinter
parents:
diff changeset
51
834706423ac1 initial
dwinter
parents:
diff changeset
52
834706423ac1 initial
dwinter
parents:
diff changeset
53 #Verbinde mit der solt Instance
834706423ac1 initial
dwinter
parents:
diff changeset
54
834706423ac1 initial
dwinter
parents:
diff changeset
55 def connect(self):
834706423ac1 initial
dwinter
parents:
diff changeset
56 self._v_solr=sunburnt.SolrInterface(url=self.solrURL)
834706423ac1 initial
dwinter
parents:
diff changeset
57
834706423ac1 initial
dwinter
parents:
diff changeset
58 #erzeuge einen Link, insbesonder für faceted suche
834706423ac1 initial
dwinter
parents:
diff changeset
59 #@param search: ist suchparameter, wird einfach als search=%s weitergereicht
834706423ac1 initial
dwinter
parents:
diff changeset
60 #@param facetSearch: bekommt einen hash (feldname, liste der suchworte)
834706423ac1 initial
dwinter
parents:
diff changeset
61
834706423ac1 initial
dwinter
parents:
diff changeset
62 def generateLink(self,search,facetSearch={},ranges={}):
834706423ac1 initial
dwinter
parents:
diff changeset
63 ret="?search=%s"%search
834706423ac1 initial
dwinter
parents:
diff changeset
64
834706423ac1 initial
dwinter
parents:
diff changeset
65 for facet in facetSearch.keys():
834706423ac1 initial
dwinter
parents:
diff changeset
66
834706423ac1 initial
dwinter
parents:
diff changeset
67 searchTerms = facetSearch[facet]
834706423ac1 initial
dwinter
parents:
diff changeset
68
834706423ac1 initial
dwinter
parents:
diff changeset
69 if isinstance(searchTerms, basestring):
834706423ac1 initial
dwinter
parents:
diff changeset
70 searchTerms=[searchTerms]
834706423ac1 initial
dwinter
parents:
diff changeset
71
834706423ac1 initial
dwinter
parents:
diff changeset
72 for searchTerm in searchTerms:
834706423ac1 initial
dwinter
parents:
diff changeset
73 try:
834706423ac1 initial
dwinter
parents:
diff changeset
74 ret+="&%s_fc=%s"%(facet,searchTerm.encode('utf-8'))
834706423ac1 initial
dwinter
parents:
diff changeset
75 except:
834706423ac1 initial
dwinter
parents:
diff changeset
76 ret+="&%s_fc=%s"%(facet,searchTerm)
834706423ac1 initial
dwinter
parents:
diff changeset
77
834706423ac1 initial
dwinter
parents:
diff changeset
78 for key,values in ranges.items():
834706423ac1 initial
dwinter
parents:
diff changeset
79
834706423ac1 initial
dwinter
parents:
diff changeset
80 if isinstance(values, basestring):
834706423ac1 initial
dwinter
parents:
diff changeset
81 values=[values]
834706423ac1 initial
dwinter
parents:
diff changeset
82
834706423ac1 initial
dwinter
parents:
diff changeset
83 for value in values:
834706423ac1 initial
dwinter
parents:
diff changeset
84 ret+="&%s_rg=%s"%(key,value)
834706423ac1 initial
dwinter
parents:
diff changeset
85
834706423ac1 initial
dwinter
parents:
diff changeset
86
834706423ac1 initial
dwinter
parents:
diff changeset
87 return ret
834706423ac1 initial
dwinter
parents:
diff changeset
88
834706423ac1 initial
dwinter
parents:
diff changeset
89 #hilfsmethode erzeuget immer eine liste von einträgen
834706423ac1 initial
dwinter
parents:
diff changeset
90 def getList(self,param):
834706423ac1 initial
dwinter
parents:
diff changeset
91 if isinstance(param, basestring):
834706423ac1 initial
dwinter
parents:
diff changeset
92 param=[param]
834706423ac1 initial
dwinter
parents:
diff changeset
93
834706423ac1 initial
dwinter
parents:
diff changeset
94 return param
834706423ac1 initial
dwinter
parents:
diff changeset
95
834706423ac1 initial
dwinter
parents:
diff changeset
96 #erzeugt analog zu den Ranges in Velocity für ein numerisches Feld die Suche in ranges
834706423ac1 initial
dwinter
parents:
diff changeset
97 #@param field:ist der name des Feldes in dem in Ranges gesucht werden soll
834706423ac1 initial
dwinter
parents:
diff changeset
98 #@param begin anfang der ranges
834706423ac1 initial
dwinter
parents:
diff changeset
99 #@param end ende der Raanges
834706423ac1 initial
dwinter
parents:
diff changeset
100 #@param increment größe eines ranges
834706423ac1 initial
dwinter
parents:
diff changeset
101 # für alle andere paramter @see prepareSearch
834706423ac1 initial
dwinter
parents:
diff changeset
102 #@return gibt ein hash: mit range -> anzahl der Treffer in dem Ranage. e.g 1921-1950 -> 21
834706423ac1 initial
dwinter
parents:
diff changeset
103 def ranges(self,field,begin,end,increment,solrQuery="",facetFields=[],start=0,rows=10,facetSearch=None,sortFacets=True):
834706423ac1 initial
dwinter
parents:
diff changeset
104
834706423ac1 initial
dwinter
parents:
diff changeset
105 q=self.prepareSearch(solrQuery,facetFields,start,rows,facetSearch,sortFacets)
834706423ac1 initial
dwinter
parents:
diff changeset
106
834706423ac1 initial
dwinter
parents:
diff changeset
107 res={}
834706423ac1 initial
dwinter
parents:
diff changeset
108 if not getattr(self,'_v_solr_',None):
834706423ac1 initial
dwinter
parents:
diff changeset
109 self.connect()
834706423ac1 initial
dwinter
parents:
diff changeset
110
834706423ac1 initial
dwinter
parents:
diff changeset
111
834706423ac1 initial
dwinter
parents:
diff changeset
112
834706423ac1 initial
dwinter
parents:
diff changeset
113 for x in range(begin,end,increment):
834706423ac1 initial
dwinter
parents:
diff changeset
114 query={}
834706423ac1 initial
dwinter
parents:
diff changeset
115 #query["%s__gt"%field]=x
834706423ac1 initial
dwinter
parents:
diff changeset
116 #TODO __gt scheint nicht zu funktionieren wird zu gte (???)
834706423ac1 initial
dwinter
parents:
diff changeset
117 query["%s__gte"%field]=int(x)+1
834706423ac1 initial
dwinter
parents:
diff changeset
118
834706423ac1 initial
dwinter
parents:
diff changeset
119 query["%s__lte"%field]=x+increment
834706423ac1 initial
dwinter
parents:
diff changeset
120
834706423ac1 initial
dwinter
parents:
diff changeset
121 result = q.query(**query).execute()
834706423ac1 initial
dwinter
parents:
diff changeset
122
834706423ac1 initial
dwinter
parents:
diff changeset
123 res["%s-%s"%(x,x+increment)]=result.result.numFound
834706423ac1 initial
dwinter
parents:
diff changeset
124
834706423ac1 initial
dwinter
parents:
diff changeset
125 return res
834706423ac1 initial
dwinter
parents:
diff changeset
126
834706423ac1 initial
dwinter
parents:
diff changeset
127
834706423ac1 initial
dwinter
parents:
diff changeset
128 #prepareSearch erzeugt die Suchabfrage
834706423ac1 initial
dwinter
parents:
diff changeset
129
834706423ac1 initial
dwinter
parents:
diff changeset
130 #solrQuery sucht im in schema.xml bzw. solrconfig.xml festgelegt generischen Feld, hierbei werden mit blanks getrennte eintrage in "AND" zerlegt.
834706423ac1 initial
dwinter
parents:
diff changeset
131 #TODO: erlaube auch suche nach phrasen mit ""
834706423ac1 initial
dwinter
parents:
diff changeset
132 #facetFields:Liste der Felder, nach denen facitiert werden sollen, wirdn in facet_by in solrburn uebersetzt.
834706423ac1 initial
dwinter
parents:
diff changeset
133 #start: Erste Eintrag für paginierung, anzahl der Treffer
834706423ac1 initial
dwinter
parents:
diff changeset
134 #rows: anzahl der Treffer
834706423ac1 initial
dwinter
parents:
diff changeset
135 #facetSearch: Hash mit Feldnamen: suchwort oder Feldname: liste von Suchworten, hast wird direkt an query von solrburnt weitergegeben,
834706423ac1 initial
dwinter
parents:
diff changeset
136 #sortfacets: if true, dann werden die Ergebnisse der facetierten suche alphabetisch sortiert, ACHTUNG: das ist nicht gleich der Funktion in solr die Liste
834706423ac1 initial
dwinter
parents:
diff changeset
137 #direkt sortiert zurückzubekommen, hier werden die haufigsten Werte genommen (einstellt in solrconfig.xml) und dann nur diese sortiert!
834706423ac1 initial
dwinter
parents:
diff changeset
138 #ausserdem werden beim sortieren, die stopworte gefiltert!
834706423ac1 initial
dwinter
parents:
diff changeset
139 #
834706423ac1 initial
dwinter
parents:
diff changeset
140 #neben den direkten parameter koennen auch parameter fuer die facetierte Suche über FORM im REQUEST übergeben werden, diese Felder müssen dann
834706423ac1 initial
dwinter
parents:
diff changeset
141 #mit "_fc" enden.
834706423ac1 initial
dwinter
parents:
diff changeset
142 # gibt als ergebnis den folgenden Hash, so wie in http://opensource.timetric.com/sunburnt/queryingsolr.html#executing-queries-and-interpreting-the-response,
834706423ac1 initial
dwinter
parents:
diff changeset
143 # http://opensource.timetric.com/sunburnt/queryingsolr.html#highlighting
834706423ac1 initial
dwinter
parents:
diff changeset
144 # und http://opensource.timetric.com/sunburnt/queryingsolr.html#faceting
834706423ac1 initial
dwinter
parents:
diff changeset
145 #dokumentiert. Highlighting selbst wird in sorlconfig.xml konfiguriert.
834706423ac1 initial
dwinter
parents:
diff changeset
146 #return ein Queryobjet, zur eigentlichen Suche muss darauf noch execute ausgeführt werden.
834706423ac1 initial
dwinter
parents:
diff changeset
147
834706423ac1 initial
dwinter
parents:
diff changeset
148 def prepareSearch(self,solrQuery,facetFields=[],start=0,rows=10,facetSearch=None,sortFacets=True):
834706423ac1 initial
dwinter
parents:
diff changeset
149 "search solr"
834706423ac1 initial
dwinter
parents:
diff changeset
150
834706423ac1 initial
dwinter
parents:
diff changeset
151
834706423ac1 initial
dwinter
parents:
diff changeset
152 ranges={}
834706423ac1 initial
dwinter
parents:
diff changeset
153 ## deal with a form
834706423ac1 initial
dwinter
parents:
diff changeset
154 if self.REQUEST:
834706423ac1 initial
dwinter
parents:
diff changeset
155 constr = self.REQUEST.form
834706423ac1 initial
dwinter
parents:
diff changeset
156 print constr
834706423ac1 initial
dwinter
parents:
diff changeset
157 for field in constr.keys():
834706423ac1 initial
dwinter
parents:
diff changeset
158
834706423ac1 initial
dwinter
parents:
diff changeset
159 #facetes
834706423ac1 initial
dwinter
parents:
diff changeset
160 if field.endswith("_fc"):
834706423ac1 initial
dwinter
parents:
diff changeset
161 if facetSearch is None:
834706423ac1 initial
dwinter
parents:
diff changeset
162 facetSearch={}
834706423ac1 initial
dwinter
parents:
diff changeset
163
834706423ac1 initial
dwinter
parents:
diff changeset
164 vals = constr[field]
834706423ac1 initial
dwinter
parents:
diff changeset
165 if not isinstance(vals,basestring):
834706423ac1 initial
dwinter
parents:
diff changeset
166
834706423ac1 initial
dwinter
parents:
diff changeset
167 vals=[x.decode('utf-8') for x in vals]
834706423ac1 initial
dwinter
parents:
diff changeset
168 else:
834706423ac1 initial
dwinter
parents:
diff changeset
169 vals=vals.decode('utf-8')
834706423ac1 initial
dwinter
parents:
diff changeset
170 facetSearch[field.replace("_fc",'')]=vals
834706423ac1 initial
dwinter
parents:
diff changeset
171 #ranges form a-b
834706423ac1 initial
dwinter
parents:
diff changeset
172
834706423ac1 initial
dwinter
parents:
diff changeset
173
834706423ac1 initial
dwinter
parents:
diff changeset
174 if field.endswith("_rg"):
834706423ac1 initial
dwinter
parents:
diff changeset
175
834706423ac1 initial
dwinter
parents:
diff changeset
176
834706423ac1 initial
dwinter
parents:
diff changeset
177 splitted = "_".split(field)
834706423ac1 initial
dwinter
parents:
diff changeset
178 if len(splitted)==2:
834706423ac1 initial
dwinter
parents:
diff changeset
179 #ranges[field.replace("_rg","__gt")]=splitted[0]
834706423ac1 initial
dwinter
parents:
diff changeset
180 #TODO __gt scheint nicht zu funktionieren wird zu gte (???)
834706423ac1 initial
dwinter
parents:
diff changeset
181 ranges[field.replace("_rg","__gte")]=int(splitted[0])+1
834706423ac1 initial
dwinter
parents:
diff changeset
182 ranges[field.replace("_rg","__lte")]=splitted[1]
834706423ac1 initial
dwinter
parents:
diff changeset
183
834706423ac1 initial
dwinter
parents:
diff changeset
184
834706423ac1 initial
dwinter
parents:
diff changeset
185
834706423ac1 initial
dwinter
parents:
diff changeset
186 #teste verbindung zu solr
834706423ac1 initial
dwinter
parents:
diff changeset
187 if not getattr(self,'_v_solr_',None):
834706423ac1 initial
dwinter
parents:
diff changeset
188 self.connect()
834706423ac1 initial
dwinter
parents:
diff changeset
189
834706423ac1 initial
dwinter
parents:
diff changeset
190
834706423ac1 initial
dwinter
parents:
diff changeset
191
834706423ac1 initial
dwinter
parents:
diff changeset
192 solrQuery = solrQuery.decode('utf-8')
834706423ac1 initial
dwinter
parents:
diff changeset
193
834706423ac1 initial
dwinter
parents:
diff changeset
194 #teile die suche nach " " daraus wird dann eine AND suche
834706423ac1 initial
dwinter
parents:
diff changeset
195 #TODO: sollte flexibler sein. insbesondere phrasen
834706423ac1 initial
dwinter
parents:
diff changeset
196 splitted= solrQuery.split(" ")
834706423ac1 initial
dwinter
parents:
diff changeset
197
834706423ac1 initial
dwinter
parents:
diff changeset
198 res = self._v_solr
834706423ac1 initial
dwinter
parents:
diff changeset
199
834706423ac1 initial
dwinter
parents:
diff changeset
200
834706423ac1 initial
dwinter
parents:
diff changeset
201
834706423ac1 initial
dwinter
parents:
diff changeset
202
834706423ac1 initial
dwinter
parents:
diff changeset
203 res=res.query(splitted)
834706423ac1 initial
dwinter
parents:
diff changeset
204 if len(ranges.keys())>0:
834706423ac1 initial
dwinter
parents:
diff changeset
205 res=res.query(ranges)
834706423ac1 initial
dwinter
parents:
diff changeset
206
834706423ac1 initial
dwinter
parents:
diff changeset
207
834706423ac1 initial
dwinter
parents:
diff changeset
208 #ubergebe alle weiteren feld an die suche.
834706423ac1 initial
dwinter
parents:
diff changeset
209 if facetSearch:
834706423ac1 initial
dwinter
parents:
diff changeset
210 for key,vals in facetSearch.items():
834706423ac1 initial
dwinter
parents:
diff changeset
211
834706423ac1 initial
dwinter
parents:
diff changeset
212 if key.endswith("_rg"):
834706423ac1 initial
dwinter
parents:
diff changeset
213
834706423ac1 initial
dwinter
parents:
diff changeset
214 if not isinstance(vals,basestring):
834706423ac1 initial
dwinter
parents:
diff changeset
215
834706423ac1 initial
dwinter
parents:
diff changeset
216 vals=[x.decode('utf-8') for x in vals]
834706423ac1 initial
dwinter
parents:
diff changeset
217 else:
834706423ac1 initial
dwinter
parents:
diff changeset
218 vals=[vals.decode('utf-8')]
834706423ac1 initial
dwinter
parents:
diff changeset
219
834706423ac1 initial
dwinter
parents:
diff changeset
220 for val in vals:
834706423ac1 initial
dwinter
parents:
diff changeset
221
834706423ac1 initial
dwinter
parents:
diff changeset
222 splitted = val.split("-")
834706423ac1 initial
dwinter
parents:
diff changeset
223 print splitted
834706423ac1 initial
dwinter
parents:
diff changeset
224 if len(splitted)==2:
834706423ac1 initial
dwinter
parents:
diff changeset
225 #TODO __gt scheint nicht zu funktionieren wird zu gte (???)
834706423ac1 initial
dwinter
parents:
diff changeset
226 facetSearch[key.replace("_rg","__gte")]=int(splitted[0])+1
834706423ac1 initial
dwinter
parents:
diff changeset
227 facetSearch[key.replace("_rg","__lte")]=splitted[1]
834706423ac1 initial
dwinter
parents:
diff changeset
228 print "I AM HERE 3"
834706423ac1 initial
dwinter
parents:
diff changeset
229 print facetSearch
834706423ac1 initial
dwinter
parents:
diff changeset
230 del facetSearch[key] # loesche das urspuerngliche feld
834706423ac1 initial
dwinter
parents:
diff changeset
231
834706423ac1 initial
dwinter
parents:
diff changeset
232 else:
834706423ac1 initial
dwinter
parents:
diff changeset
233 val = [x for x in vals if x!="*"] #siehe oben
834706423ac1 initial
dwinter
parents:
diff changeset
234 facetSearch[key]=val
834706423ac1 initial
dwinter
parents:
diff changeset
235
834706423ac1 initial
dwinter
parents:
diff changeset
236
834706423ac1 initial
dwinter
parents:
diff changeset
237 res=res.query(**facetSearch)
834706423ac1 initial
dwinter
parents:
diff changeset
238
834706423ac1 initial
dwinter
parents:
diff changeset
239
834706423ac1 initial
dwinter
parents:
diff changeset
240 #wenn facetField existieren dann rufe facetierung auf
834706423ac1 initial
dwinter
parents:
diff changeset
241 if len(facetFields)>0:
834706423ac1 initial
dwinter
parents:
diff changeset
242 #for facet in facetFields:
834706423ac1 initial
dwinter
parents:
diff changeset
243
834706423ac1 initial
dwinter
parents:
diff changeset
244 # res = res.facet_by(facet)
834706423ac1 initial
dwinter
parents:
diff changeset
245 res = res.facet_by(facetFields)
834706423ac1 initial
dwinter
parents:
diff changeset
246
834706423ac1 initial
dwinter
parents:
diff changeset
247 #res=res.paginate(start=start, rows=rows).highlight("main_content")
834706423ac1 initial
dwinter
parents:
diff changeset
248
834706423ac1 initial
dwinter
parents:
diff changeset
249 res=res.paginate(start=start, rows=rows)
834706423ac1 initial
dwinter
parents:
diff changeset
250
834706423ac1 initial
dwinter
parents:
diff changeset
251
834706423ac1 initial
dwinter
parents:
diff changeset
252
834706423ac1 initial
dwinter
parents:
diff changeset
253 res=res.paginate(start=start, rows=rows).highlight()
834706423ac1 initial
dwinter
parents:
diff changeset
254
834706423ac1 initial
dwinter
parents:
diff changeset
255 return res
834706423ac1 initial
dwinter
parents:
diff changeset
256
834706423ac1 initial
dwinter
parents:
diff changeset
257
834706423ac1 initial
dwinter
parents:
diff changeset
258 #für die parameter @see prepareSearch
834706423ac1 initial
dwinter
parents:
diff changeset
259 #erzeugt eine Suchabfrage und führt diese aus.
834706423ac1 initial
dwinter
parents:
diff changeset
260 #return {"result":response.result, "hl":response.highlighting,"facetFields":facetedFields}
834706423ac1 initial
dwinter
parents:
diff changeset
261
834706423ac1 initial
dwinter
parents:
diff changeset
262 def search(self,solrQuery,facetFields=[],start=0,rows=10,facetSearch=None,sortFacets=True):
834706423ac1 initial
dwinter
parents:
diff changeset
263
834706423ac1 initial
dwinter
parents:
diff changeset
264 res=self.prepareSearch(solrQuery,facetFields,start,rows,facetSearch,sortFacets)
834706423ac1 initial
dwinter
parents:
diff changeset
265 response= res.execute()
834706423ac1 initial
dwinter
parents:
diff changeset
266
834706423ac1 initial
dwinter
parents:
diff changeset
267 #speichere faceted fields
834706423ac1 initial
dwinter
parents:
diff changeset
268 if len(facetFields)>0:
834706423ac1 initial
dwinter
parents:
diff changeset
269 facetedFields=response.facet_counts.facet_fields
834706423ac1 initial
dwinter
parents:
diff changeset
270
834706423ac1 initial
dwinter
parents:
diff changeset
271 if sortFacets:
834706423ac1 initial
dwinter
parents:
diff changeset
272 facetedFields=self.sortFacetedFields(facetedFields)
834706423ac1 initial
dwinter
parents:
diff changeset
273
834706423ac1 initial
dwinter
parents:
diff changeset
274 else:
834706423ac1 initial
dwinter
parents:
diff changeset
275 facetedFields={}
834706423ac1 initial
dwinter
parents:
diff changeset
276
834706423ac1 initial
dwinter
parents:
diff changeset
277
834706423ac1 initial
dwinter
parents:
diff changeset
278
834706423ac1 initial
dwinter
parents:
diff changeset
279
834706423ac1 initial
dwinter
parents:
diff changeset
280 return {"result":response.result, "hl":response.highlighting,"facetFields":facetedFields}
834706423ac1 initial
dwinter
parents:
diff changeset
281
834706423ac1 initial
dwinter
parents:
diff changeset
282
834706423ac1 initial
dwinter
parents:
diff changeset
283
834706423ac1 initial
dwinter
parents:
diff changeset
284 #hilfsmethode zum sortieren über der ranges
834706423ac1 initial
dwinter
parents:
diff changeset
285 def sortRanges(self,ranges):
834706423ac1 initial
dwinter
parents:
diff changeset
286 x=list(ranges)
834706423ac1 initial
dwinter
parents:
diff changeset
287 x.sort()
834706423ac1 initial
dwinter
parents:
diff changeset
288 return x
834706423ac1 initial
dwinter
parents:
diff changeset
289
834706423ac1 initial
dwinter
parents:
diff changeset
290 #sortiert die Werte der FacetedFields
834706423ac1 initial
dwinter
parents:
diff changeset
291 #(facetedFields ist ein hast mit feldname -> liste der (wert für das feld, anzahl der treffer für den wert)
834706423ac1 initial
dwinter
parents:
diff changeset
292 #ausserdem werden die werte gemäß des angegebenen Filter gefiltert.
834706423ac1 initial
dwinter
parents:
diff changeset
293
834706423ac1 initial
dwinter
parents:
diff changeset
294 def sortFacetedFields(self, facetedFields,filter=STOPLIST):
834706423ac1 initial
dwinter
parents:
diff changeset
295 ret={}
834706423ac1 initial
dwinter
parents:
diff changeset
296
834706423ac1 initial
dwinter
parents:
diff changeset
297 def cmpTuple(x,y):
834706423ac1 initial
dwinter
parents:
diff changeset
298 return cmp(x[0],y[0])
834706423ac1 initial
dwinter
parents:
diff changeset
299
834706423ac1 initial
dwinter
parents:
diff changeset
300
834706423ac1 initial
dwinter
parents:
diff changeset
301 for key in facetedFields.keys():
834706423ac1 initial
dwinter
parents:
diff changeset
302 ls = facetedFields[key]
834706423ac1 initial
dwinter
parents:
diff changeset
303 ls.sort(cmpTuple)
834706423ac1 initial
dwinter
parents:
diff changeset
304 ret[key]=ls
834706423ac1 initial
dwinter
parents:
diff changeset
305
834706423ac1 initial
dwinter
parents:
diff changeset
306 if filter.get(key,None):
834706423ac1 initial
dwinter
parents:
diff changeset
307 ls2=[]
834706423ac1 initial
dwinter
parents:
diff changeset
308 for x in ls:
834706423ac1 initial
dwinter
parents:
diff changeset
309 if x[0].encode('utf-8') not in filter[key]:
834706423ac1 initial
dwinter
parents:
diff changeset
310 ls2.append(x)
834706423ac1 initial
dwinter
parents:
diff changeset
311 ret[key]=ls2
834706423ac1 initial
dwinter
parents:
diff changeset
312
834706423ac1 initial
dwinter
parents:
diff changeset
313
834706423ac1 initial
dwinter
parents:
diff changeset
314 ret[key]=[x for x in ret[key] if x[1]!=0]
834706423ac1 initial
dwinter
parents:
diff changeset
315 return ret
834706423ac1 initial
dwinter
parents:
diff changeset
316
834706423ac1 initial
dwinter
parents:
diff changeset
317 def changeMain(self,solrURL=None,title=None,REQUEST=None,RESPONSE=None):
834706423ac1 initial
dwinter
parents:
diff changeset
318 """change main settings"""
834706423ac1 initial
dwinter
parents:
diff changeset
319 if solrURL:
834706423ac1 initial
dwinter
parents:
diff changeset
320 self.solrURL=solrURL
834706423ac1 initial
dwinter
parents:
diff changeset
321 self.title=title
834706423ac1 initial
dwinter
parents:
diff changeset
322 self._v_solr=sunburnt.SolrInterface(url=solrURL)
834706423ac1 initial
dwinter
parents:
diff changeset
323
834706423ac1 initial
dwinter
parents:
diff changeset
324 if RESPONSE is not None:
834706423ac1 initial
dwinter
parents:
diff changeset
325 RESPONSE.redirect('manage_main')
834706423ac1 initial
dwinter
parents:
diff changeset
326
834706423ac1 initial
dwinter
parents:
diff changeset
327
834706423ac1 initial
dwinter
parents:
diff changeset
328 else:
834706423ac1 initial
dwinter
parents:
diff changeset
329 pt=zptFile(self, 'zpt/ChangeZopeSolr.zpt')
834706423ac1 initial
dwinter
parents:
diff changeset
330 return pt()
834706423ac1 initial
dwinter
parents:
diff changeset
331
834706423ac1 initial
dwinter
parents:
diff changeset
332 #sucht die je nach einsteillung in solrconfig.xml Werte mit den häufigsten Treffern oder alphabetisch sortiert zu einem
834706423ac1 initial
dwinter
parents:
diff changeset
333 #bestimmten eintrag aus solr
834706423ac1 initial
dwinter
parents:
diff changeset
334 #gedacht ist die methoden für die Anwendung nach dem Harvesten einer Website, es wird daher davon ausgegangen, dass sie hinter der
834706423ac1 initial
dwinter
parents:
diff changeset
335 #idfield eine url steht. Wir es z.b. bei Nutch passiert. index.htm/index_html als Teil der url wird dabei unterdrückt (analog zu den harbest einstellunge für
834706423ac1 initial
dwinter
parents:
diff changeset
336 # nutch für zope webseiten.
834706423ac1 initial
dwinter
parents:
diff changeset
337 #@param @idfield is hierbei der Name des Feldes, das in solrschema als id defniert wurde
834706423ac1 initial
dwinter
parents:
diff changeset
338 #field der Feldname von dem die Treffer gesurcht wernde sollen
834706423ac1 initial
dwinter
parents:
diff changeset
339 #url die url des textes
834706423ac1 initial
dwinter
parents:
diff changeset
340
834706423ac1 initial
dwinter
parents:
diff changeset
341 def getTermsAsJSON(self,idfield,field,url):
834706423ac1 initial
dwinter
parents:
diff changeset
342 """getTerms"""
834706423ac1 initial
dwinter
parents:
diff changeset
343
834706423ac1 initial
dwinter
parents:
diff changeset
344 ret=[]
834706423ac1 initial
dwinter
parents:
diff changeset
345 h = httplib2.Http()
834706423ac1 initial
dwinter
parents:
diff changeset
346 url = url.replace("/index.html","").replace("/index_html","")
834706423ac1 initial
dwinter
parents:
diff changeset
347 if url[-1]=="/":
834706423ac1 initial
dwinter
parents:
diff changeset
348 url=url[0:-1]
834706423ac1 initial
dwinter
parents:
diff changeset
349
834706423ac1 initial
dwinter
parents:
diff changeset
350 urlq=url.replace(":","\:")
834706423ac1 initial
dwinter
parents:
diff changeset
351 urlq=urlq.replace("/","\/")
834706423ac1 initial
dwinter
parents:
diff changeset
352
834706423ac1 initial
dwinter
parents:
diff changeset
353 q ="http://localhost:8983/solr/mpiwgweb/select?q=%s:%s&wt=xml&tv=on&qt=tvrh&fl=%s&tv.tf=true"%(idfield,urlq,field)
834706423ac1 initial
dwinter
parents:
diff changeset
354
834706423ac1 initial
dwinter
parents:
diff changeset
355 resp, content = h.request(q)
834706423ac1 initial
dwinter
parents:
diff changeset
356
834706423ac1 initial
dwinter
parents:
diff changeset
357 root = ET.fromstring(content)
834706423ac1 initial
dwinter
parents:
diff changeset
358 #uri = "http://127.0.0.1:18080/www_neu/de/aktuelles/features/feature28"
834706423ac1 initial
dwinter
parents:
diff changeset
359
834706423ac1 initial
dwinter
parents:
diff changeset
360 xpstr = ".//lst[@name='termVectors']/lst[@name='%s']/lst[@name='%s']/lst"%(url,field)
834706423ac1 initial
dwinter
parents:
diff changeset
361
834706423ac1 initial
dwinter
parents:
diff changeset
362
834706423ac1 initial
dwinter
parents:
diff changeset
363 tvs = root.findall(xpstr)
834706423ac1 initial
dwinter
parents:
diff changeset
364
834706423ac1 initial
dwinter
parents:
diff changeset
365 for tv in tvs:
834706423ac1 initial
dwinter
parents:
diff changeset
366 wd = tv.attrib['name']
834706423ac1 initial
dwinter
parents:
diff changeset
367 for f in tv.findall("./int[@name='tf']"):
834706423ac1 initial
dwinter
parents:
diff changeset
368 fre = f.text
834706423ac1 initial
dwinter
parents:
diff changeset
369
834706423ac1 initial
dwinter
parents:
diff changeset
370
834706423ac1 initial
dwinter
parents:
diff changeset
371 if int(fre)>2:
834706423ac1 initial
dwinter
parents:
diff changeset
372
834706423ac1 initial
dwinter
parents:
diff changeset
373 ret.append('{"text":"%s","size":%s}'%(wd,fre))
834706423ac1 initial
dwinter
parents:
diff changeset
374
834706423ac1 initial
dwinter
parents:
diff changeset
375 retStr="["+",".join(ret)+"]"
834706423ac1 initial
dwinter
parents:
diff changeset
376
834706423ac1 initial
dwinter
parents:
diff changeset
377 return retStr
834706423ac1 initial
dwinter
parents:
diff changeset
378
834706423ac1 initial
dwinter
parents:
diff changeset
379 #tauscht im request die in neewparams angegeben parameter aus.
834706423ac1 initial
dwinter
parents:
diff changeset
380 def replaceParam(self, newparams):
834706423ac1 initial
dwinter
parents:
diff changeset
381 x = self.REQUEST.form.copy()
834706423ac1 initial
dwinter
parents:
diff changeset
382
834706423ac1 initial
dwinter
parents:
diff changeset
383 for key,value in newparams.items():
834706423ac1 initial
dwinter
parents:
diff changeset
384 x[key]=value
834706423ac1 initial
dwinter
parents:
diff changeset
385
834706423ac1 initial
dwinter
parents:
diff changeset
386
834706423ac1 initial
dwinter
parents:
diff changeset
387 retls=[]
834706423ac1 initial
dwinter
parents:
diff changeset
388 for k,v in x.items():
834706423ac1 initial
dwinter
parents:
diff changeset
389 if not isinstance(v,basestring):
834706423ac1 initial
dwinter
parents:
diff changeset
390 for y in v:
834706423ac1 initial
dwinter
parents:
diff changeset
391 retls.append((k,y))
834706423ac1 initial
dwinter
parents:
diff changeset
392 else:
834706423ac1 initial
dwinter
parents:
diff changeset
393 retls.append((k,v))
834706423ac1 initial
dwinter
parents:
diff changeset
394
834706423ac1 initial
dwinter
parents:
diff changeset
395 return "?"+"&".join(["%s=%s"%(k,urllib.quote_plus(v,'/')) for (k, v) in retls])
834706423ac1 initial
dwinter
parents:
diff changeset
396
834706423ac1 initial
dwinter
parents:
diff changeset
397
834706423ac1 initial
dwinter
parents:
diff changeset
398
834706423ac1 initial
dwinter
parents:
diff changeset
399
834706423ac1 initial
dwinter
parents:
diff changeset
400 #ruft @set ranges aus, gibt das ergebnis als json zurück
834706423ac1 initial
dwinter
parents:
diff changeset
401 def getRangesAsJSON(self,field,begin,end,increment):
834706423ac1 initial
dwinter
parents:
diff changeset
402 """ getRangesAsJSON"""
834706423ac1 initial
dwinter
parents:
diff changeset
403 res = self.ranges(field, int(begin), int(end), int(increment))
834706423ac1 initial
dwinter
parents:
diff changeset
404 return json.dumps(res)
834706423ac1 initial
dwinter
parents:
diff changeset
405
834706423ac1 initial
dwinter
parents:
diff changeset
406
834706423ac1 initial
dwinter
parents:
diff changeset
407 #return only the values of resultList whicht start with startLetter or if starLetterNonAscii all values which
834706423ac1 initial
dwinter
parents:
diff changeset
408 #start with an non ascii character
834706423ac1 initial
dwinter
parents:
diff changeset
409 def filter (self,resultList,startLetter=None,startLetterNonAscii=0):
834706423ac1 initial
dwinter
parents:
diff changeset
410 if startLetter:
834706423ac1 initial
dwinter
parents:
diff changeset
411
834706423ac1 initial
dwinter
parents:
diff changeset
412 matchStr = "[\[\]'\"]*"+startLetter
834706423ac1 initial
dwinter
parents:
diff changeset
413 ls = [x for x in resultList if re.match(matchStr,x[0])]
834706423ac1 initial
dwinter
parents:
diff changeset
414
834706423ac1 initial
dwinter
parents:
diff changeset
415 if startLetterNonAscii ==1:
834706423ac1 initial
dwinter
parents:
diff changeset
416
834706423ac1 initial
dwinter
parents:
diff changeset
417 ls = [x for x in resultList if not re.match("[\[\]'\"a-zA-Z].*",x[0])]
834706423ac1 initial
dwinter
parents:
diff changeset
418 return ls
834706423ac1 initial
dwinter
parents:
diff changeset
419
834706423ac1 initial
dwinter
parents:
diff changeset
420 def manage_addZopeSolrForm(self):
834706423ac1 initial
dwinter
parents:
diff changeset
421 """Form for external Links"""
834706423ac1 initial
dwinter
parents:
diff changeset
422 pt=zptFile(self, 'zpt/AddZopeSolr.zpt')
834706423ac1 initial
dwinter
parents:
diff changeset
423 return pt()
834706423ac1 initial
dwinter
parents:
diff changeset
424
834706423ac1 initial
dwinter
parents:
diff changeset
425
834706423ac1 initial
dwinter
parents:
diff changeset
426 def manage_addZopeSolr(self,id,title,solrURL,RESPONSE=None):
834706423ac1 initial
dwinter
parents:
diff changeset
427 """Add an external Link"""
834706423ac1 initial
dwinter
parents:
diff changeset
428
834706423ac1 initial
dwinter
parents:
diff changeset
429 newObj=ZopeSolr(id,title,solrURL)
834706423ac1 initial
dwinter
parents:
diff changeset
430
834706423ac1 initial
dwinter
parents:
diff changeset
431 self._setObject(id,newObj)
834706423ac1 initial
dwinter
parents:
diff changeset
432
834706423ac1 initial
dwinter
parents:
diff changeset
433
834706423ac1 initial
dwinter
parents:
diff changeset
434 if RESPONSE is not None:
834706423ac1 initial
dwinter
parents:
diff changeset
435 RESPONSE.redirect('manage_main')
834706423ac1 initial
dwinter
parents:
diff changeset
436