comparison MpdlXmlTextServer.py @ 509:9d05befdd462 elementtree

try to get characterNormalization in search result working.
author casties
date Mon, 27 Feb 2012 21:57:18 +0100
parents d5a47f82e755
children 4fb35343d2e7
comparison
equal deleted inserted replaced
508:d5a47f82e755 509:9d05befdd462
268 return serialize(pagediv) 268 return serialize(pagediv)
269 269
270 return None 270 return None
271 271
272 272
273 def getSearchResults(self, mode, query=None, docinfo=None): 273 def getSearchResults(self, mode, query=None, pageinfo=None, docinfo=None):
274 """loads list of search results and stores XML in docinfo""" 274 """loads list of search results and stores XML in docinfo"""
275 logging.debug("getSearchResults mode=%s query=%s"%(mode, query)) 275 logging.debug("getSearchResults mode=%s query=%s"%(mode, query))
276 if mode == "none": 276 if mode == "none":
277 return docinfo 277 return docinfo
278 278
279 if 'resultSize_%s_%s'%(mode,query) in docinfo: 279 if 'resultSize_%s_%s'%(mode,query) in docinfo:
280 # cached result 280 # cached result
281 return docinfo 281 return docinfo
282 282
283 docpath = docinfo['textURLPath'] 283 docpath = docinfo['textURLPath']
284 # we need to set a result set size 284
285 pagesize = 1000
286 pn = 1
287 # fetch full results 285 # fetch full results
288 params = {'document': docpath, 286 params = {'document': docpath,
289 'mode': 'text', 287 'mode': 'text',
290 'queryType': mode, 288 'queryType': mode,
291 'query': query, 289 'query': query,
292 'queryResultPageSize': 1000, 290 'queryResultPageSize': 1000,
293 'queryResultPN': 1} 291 'queryResultPN': 1,
292 'characterNormalization': pageinfo.get('characterNormalization', 'reg')}
294 pagexml = self.getServerData("doc-query.xql",urllib.urlencode(params)) 293 pagexml = self.getServerData("doc-query.xql",urllib.urlencode(params))
295 #pagexml = self.getServerData("doc-query.xql","document=%s&mode=%s&queryType=%s&query=%s&queryResultPageSize=%s&queryResultPN=%s&s=%s&viewMode=%s&characterNormalization=%s&highlightElementPos=%s&highlightElement=%s&highlightQuery=%s"%(docpath, 'text', queryType, urllib.quote(query), pagesize, pn, s, viewMode,characterNormalization, highlightElementPos, highlightElement, urllib.quote(highlightQuery))) 294 #pagexml = self.getServerData("doc-query.xql","document=%s&mode=%s&queryType=%s&query=%s&queryResultPageSize=%s&queryResultPN=%s&s=%s&viewMode=%s&characterNormalization=%s&highlightElementPos=%s&highlightElement=%s&highlightQuery=%s"%(docpath, 'text', queryType, urllib.quote(query), pagesize, pn, s, viewMode,characterNormalization, highlightElementPos, highlightElement, urllib.quote(highlightQuery)))
296 dom = ET.fromstring(pagexml) 295 dom = ET.fromstring(pagexml)
297 # page content is in <div class="queryResultPage"> 296 # page content is in <div class="queryResultPage">
298 pagediv = None 297 pagediv = None
319 """returns single page from the table of contents""" 318 """returns single page from the table of contents"""
320 logging.debug("getResultsPage mode=%s, pn=%s"%(mode,pn)) 319 logging.debug("getResultsPage mode=%s, pn=%s"%(mode,pn))
321 # check for cached TOC 320 # check for cached TOC
322 #TODO: cache only one search 321 #TODO: cache only one search
323 if not docinfo.has_key('resultXML_%s_%s'%(mode,query)): 322 if not docinfo.has_key('resultXML_%s_%s'%(mode,query)):
324 self.getSearchResults(mode=mode, query=query, docinfo=docinfo) 323 self.getSearchResults(mode=mode, query=query, pageinfo=pageinfo, docinfo=docinfo)
325 324
326 resultxml = docinfo.get('resultXML_%s_%s'%(mode,query), None) 325 resultxml = docinfo.get('resultXML_%s_%s'%(mode,query), None)
327 if not resultxml: 326 if not resultxml:
328 logging.error("getResultPage: unable to find resultXML") 327 logging.error("getResultPage: unable to find resultXML")
329 return "Error: no result!" 328 return "Error: no result!"