# HG changeset patch # User casties # Date 1330449719 -3600 # Node ID 551ca1641a5ecd5e2c16ac14ff7051cb081ada95 # Parent 4fb35343d2e7c23c758e88382ced6c784ac4b607 more cleanup. search really works now. diff -r 4fb35343d2e7 -r 551ca1641a5e .cvsignore --- a/.cvsignore Tue Feb 28 10:39:21 2012 +0100 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,3 +0,0 @@ -.project -.cvsignore -.pydevproject diff -r 4fb35343d2e7 -r 551ca1641a5e MpdlXmlTextServer.py --- a/MpdlXmlTextServer.py Tue Feb 28 10:39:21 2012 +0100 +++ b/MpdlXmlTextServer.py Tue Feb 28 18:21:59 2012 +0100 @@ -6,6 +6,7 @@ import re import logging import urllib +import urlparse import base64 from SrvTxtUtils import getInt, getText, getHttpData @@ -139,9 +140,11 @@ logging.warning("getTextPage: current!=pn!") # stuff for constructing full urls - characterNormalization = pageinfo.get('characterNormalization', None) - moreTextParam = '' selfurl = docinfo['viewerUrl'] + textParams = {'document': docpath, + 'pn': pn} + if 'characterNormalization' in pageinfo: + textParams['characterNormalization'] = pageinfo['characterNormalization'] if not mode: # default is dict @@ -156,11 +159,12 @@ if 'search' in modes: # add highlighting highlightQuery = pageinfo.get('highlightQuery', None) - sn = pageinfo.get('sn', None) - if highlightQuery and sn: - moreTextParam +="&highlightQuery=%s&sn=%s"%(urllib.quote(highlightQuery),sn) + if highlightQuery: + textParams['highlightQuery'] = highlightQuery + textParams['highlightElement'] = pageinfo.get('highlightElement', '') + textParams['highlightElementPos'] = pageinfo.get('highlightElementPos', '') - # remove mode + # ignore mode in the following modes.remove('search') # other modes don't combine @@ -174,11 +178,10 @@ # just take first mode textmode = modes[0] - textParam = "document=%s&mode=%s&pn=%s&characterNormalization=%s"%(docpath,textmode,pn,characterNormalization) - textParam += moreTextParam + textParams['mode'] = textmode # fetch the page - pagexml = self.getServerData("page-fragment.xql",textParam) + pagexml = self.getServerData("page-fragment.xql",urllib.urlencode(textParams)) dom = ET.fromstring(pagexml) # extract additional info self.processPageInfo(dom, docinfo, pageinfo) @@ -220,19 +223,22 @@ if href: # is link with href - if href.startswith('http://mpdl-proto.mpiwg-berlin.mpg.de/mpdl/interface/lt/wordInfo.xql'): + linkurl = urlparse.urlparse(href) + #logging.debug("getTextPage: linkurl=%s"%repr(linkurl)) + if linkurl.path.endswith('GetDictionaryEntries'): + #TODO: replace wordInfo page # is dictionary link - change href (keeping parameters) - l.set('href', href.replace('http://mpdl-proto.mpiwg-berlin.mpg.de/mpdl/interface/lt/wordInfo.xql','%s/template/viewer_wordinfo'%viewerurl)) + #l.set('href', href.replace('http://mpdl-proto.mpiwg-berlin.mpg.de/mpdl/interface/lt/wordInfo.xql','%s/template/viewer_wordinfo'%viewerurl)) # add target to open new page l.set('target', '_blank') # TODO: is this needed? - if href.startswith('http://mpdl-proto.mpiwg-berlin.mpg.de/mpdl/lt/lemma.xql'): - selfurl = self.absolute_url() - l.set('href', href.replace('http://mpdl-proto.mpiwg-berlin.mpg.de/mpdl/lt/lemma.xql','%s/head_main_lemma'%selfurl)) - l.set('target', '_blank') - l.set('onclick',"popupWin = window.open(this.href, 'InfoWindow', 'menubar=no, location,width=500,height=600,top=180, left=700, toolbar=no, scrollbars=1'); return false;") - l.set('ondblclick', 'popupWin.focus();') +# if href.startswith('http://mpdl-proto.mpiwg-berlin.mpg.de/mpdl/lt/lemma.xql'): +# selfurl = self.absolute_url() +# l.set('href', href.replace('http://mpdl-proto.mpiwg-berlin.mpg.de/mpdl/lt/lemma.xql','%s/head_main_lemma'%selfurl)) +# l.set('target', '_blank') +# l.set('onclick',"popupWin = window.open(this.href, 'InfoWindow', 'menubar=no, location,width=500,height=600,top=180, left=700, toolbar=no, scrollbars=1'); return false;") +# l.set('ondblclick', 'popupWin.focus();') if href.startswith('#note-'): # note link @@ -272,17 +278,28 @@ def getSearchResults(self, mode, query=None, pageinfo=None, docinfo=None): """loads list of search results and stores XML in docinfo""" + logging.debug("getSearchResults mode=%s query=%s"%(mode, query)) if mode == "none": return docinfo - if 'resultSize_%s_%s'%(mode,query) in docinfo: - # cached result - return docinfo + cachedQuery = docinfo.get('cachedQuery', None) + if cachedQuery is not None: + # cached search result + if cachedQuery == '%s_%s'%(mode,query): + # same query + return docinfo + + else: + # different query + del docinfo['resultSize'] + del docinfo['resultXML'] - docpath = docinfo['textURLPath'] + # cache query + docinfo['cachedQuery'] = '%s_%s'%(mode,query) # fetch full results + docpath = docinfo['textURLPath'] params = {'document': docpath, 'mode': 'text', 'queryType': mode, @@ -304,11 +321,11 @@ pagediv = div elif dc == 'queryResultHits': - docinfo['resultSize_%s_%s'%(mode,query)] = getInt(div.text) + docinfo['resultSize'] = getInt(div.text) if pagediv is not None: # store XML in docinfo - docinfo['resultXML_%s_%s'%(mode,query)] = ET.tostring(pagediv, 'UTF-8') + docinfo['resultXML'] = ET.tostring(pagediv, 'UTF-8') return docinfo @@ -316,18 +333,17 @@ def getResultsPage(self, mode="text", query=None, pn=None, start=None, size=None, pageinfo=None, docinfo=None): """returns single page from the table of contents""" logging.debug("getResultsPage mode=%s, pn=%s"%(mode,pn)) - # check for cached TOC - #TODO: cache only one search - if not docinfo.has_key('resultXML_%s_%s'%(mode,query)): + # check for cached result + if not 'resultXML' in docinfo: self.getSearchResults(mode=mode, query=query, pageinfo=pageinfo, docinfo=docinfo) - resultxml = docinfo.get('resultXML_%s_%s'%(mode,query), None) + resultxml = docinfo.get('resultXML', None) if not resultxml: logging.error("getResultPage: unable to find resultXML") return "Error: no result!" if size is None: - size = pageinfo.get('searchResultPageSize', 20) + size = pageinfo.get('resultPageSize', 10) if start is None: start = (pn - 1) * size @@ -336,7 +352,7 @@ if fullresult is not None: # paginate - first = start + first = start-1 len = size del fullresult[:first] del fullresult[len:] @@ -347,24 +363,18 @@ for l in links: href = l.get('href') if href: - # take pn from href - m = re.match(r'page-fragment\.xql.*pn=(\d+)', href) - if m is not None: - # and create new url (assuming parent is documentViewer) - #TODO: add highlighting params - url = self.getLink('pn', m.group(1)) - l.set('href', url) - else: - logging.warning("getResultPage: Problem with link=%s"%href) + # assume all links go to pages + linkUrl = urlparse.urlparse(href) + linkParams = urlparse.parse_qs(linkUrl.query) + # take some parameters + params = {'pn': linkParams['pn'], + 'highlightQuery': linkParams.get('highlightQuery',''), + 'highlightElement': linkParams.get('highlightElement',''), + 'highlightElementPos': linkParams.get('highlightElementPos','') + } + url = self.getLink(params=params) + l.set('href', url) - # fix two-divs-per-row with containing div -# newtoc = ET.Element('div', {'class':'queryResultPage'}) -# for (d1,d2) in zip(tocdivs[::2],tocdivs[1::2]): -# e = ET.Element('div',{'class':'tocline'}) -# e.append(d1) -# e.append(d2) -# newtoc.append(e) - return serialize(tocdivs) return "ERROR: no results!" diff -r 4fb35343d2e7 -r 551ca1641a5e css/docuviewer.css --- a/css/docuviewer.css Tue Feb 28 10:39:21 2012 +0100 +++ b/css/docuviewer.css Tue Feb 28 18:21:59 2012 +0100 @@ -7,15 +7,22 @@ vertical-align: top; } -div.toc-text, div.toc-figures { +div.toc-text, +div.toc-figures { max-width: 20em; } -div.toc-text .toc, div.toc-figures .toc { +div.col.results { + max-width: 20em; +} + +div.toc-text .toc, +div.toc-figures .toc { float:left; clear:right; } -div.toc-text .toc.float.right, div.toc-figures .toc.float.right { +div.toc-text .toc.float.right, +div.toc-figures .toc.float.right { float:right; } @@ -29,3 +36,7 @@ div.toc-thumbs .thumbcap { color: black; } + +span.hit.highlight { + background-color: lightgreen; +} \ No newline at end of file diff -r 4fb35343d2e7 -r 551ca1641a5e documentViewer.py --- a/documentViewer.py Tue Feb 28 10:39:21 2012 +0100 +++ b/documentViewer.py Tue Feb 28 18:21:59 2012 +0100 @@ -123,6 +123,7 @@ toc_figures = PageTemplateFile('zpt/toc_figures', globals()) toc_none = PageTemplateFile('zpt/toc_none', globals()) common_template = PageTemplateFile('zpt/common_template', globals()) + search_template = PageTemplateFile('zpt/search_template', globals()) info_xml = PageTemplateFile('zpt/info_xml', globals()) docuviewer_css = ImageFile('css/docuviewer.css',globals()) # make ImageFile better for development @@ -710,7 +711,7 @@ start = getInt(start, default=(math.ceil(float(current)/float(grpsize))*grpsize-(grpsize-1))) # int(current / grpsize) * grpsize +1)) pageinfo['start'] = start - + # get number of pages np = int(docinfo.get('numPages', 0)) if np == 0: # numPages unknown - maybe we can get it from text page @@ -719,6 +720,8 @@ pageinfo['textPage'] = self.getTextPage(mode=viewLayer, pn=current, docinfo=docinfo, pageinfo=pageinfo) np = int(docinfo.get('numPages', 0)) + # cache table of contents + pageinfo['tocPageSize'] = getInt(self.REQUEST.get('tocPageSize', 30)) pageinfo['numgroups'] = int(np / grpsize) if np % grpsize > 0: pageinfo['numgroups'] += 1 @@ -730,24 +733,24 @@ pageinfo['pageZero'] = pageZero pageinfo['pageBatch'] = self.getPageBatch(start=start, rows=rows, cols=cols, pageFlowLtr=pageFlowLtr, pageZero=pageZero, minIdx=1, maxIdx=np) - # TODO: do we need this here? pageinfo['characterNormalization'] = self.REQUEST.get('characterNormalization','reg') - pageinfo['tocPageSize'] = getInt(self.REQUEST.get('tocPageSize', 30)) - pageinfo['resultPageSize'] = getInt(self.REQUEST.get('resultPageSize', 10)) - pageinfo['tocPN'] = getInt(self.REQUEST.get('tocPN', '1')) - pageinfo['resultPN'] = getInt(self.REQUEST.get('resultPN','1')) - # limit tocPN TODO: do we need this? - if 'tocSize_%s'%tocMode in docinfo: - tocSize = docinfo['tocSize_%s'%tocMode] - tocPageSize = pageinfo['tocPageSize'] - # cached toc - if tocSize%tocPageSize>0: - tocPages=tocSize/tocPageSize+1 - else: - tocPages=tocSize/tocPageSize - - pageinfo['tocPN'] = min(tocPages,pageinfo['tocPN']) + # cache search results + pageinfo['resultPageSize'] = getInt(self.REQUEST.get('resultPageSize', 10)) + query = self.REQUEST.get('query',None) + pageinfo['query'] = query + if query: + queryType = self.REQUEST.get('queryType', 'fulltextMorph') + pageinfo['queryType'] = queryType + pageinfo['resultStart'] = getInt(self.REQUEST.get('resultStart', '1')) + self.getSearchResults(mode=queryType, query=query, pageinfo=pageinfo, docinfo=docinfo) + + # highlighting + highlightQuery = self.REQUEST.get('highlightQuery', None) + if highlightQuery: + pageinfo['highlightQuery'] = highlightQuery + pageinfo['highlightElement'] = self.REQUEST.get('highlightElement', '') + pageinfo['highlightElementPos'] = self.REQUEST.get('highlightElementPos', '') return pageinfo diff -r 4fb35343d2e7 -r 551ca1641a5e zpt/common_template.zpt --- a/zpt/common_template.zpt Tue Feb 28 10:39:21 2012 +0100 +++ b/zpt/common_template.zpt Tue Feb 28 18:21:59 2012 +0100 @@ -54,15 +54,15 @@ -
- + < < -
diff -r 4fb35343d2e7 -r 551ca1641a5e zpt/search_template.zpt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/zpt/search_template.zpt Tue Feb 28 18:21:59 2012 +0100 @@ -0,0 +1,21 @@ + + + + + + + +
+
+ +
+
+
+ + + + diff -r 4fb35343d2e7 -r 551ca1641a5e zpt/viewer_text.zpt --- a/zpt/viewer_text.zpt Tue Feb 28 10:39:21 2012 +0100 +++ b/zpt/viewer_text.zpt Tue Feb 28 18:21:59 2012 +0100 @@ -63,11 +63,11 @@ -