Mercurial > hg > documentViewer
changeset 511:551ca1641a5e elementtree
more cleanup.
search really works now.
author | casties |
---|---|
date | Tue, 28 Feb 2012 18:21:59 +0100 |
parents | 4fb35343d2e7 |
children | 92a6443a6f16 |
files | .cvsignore MpdlXmlTextServer.py css/docuviewer.css documentViewer.py zpt/common_template.zpt zpt/search_template.zpt zpt/viewer_text.zpt zpt/viewer_xml.zpt |
diffstat | 8 files changed, 145 insertions(+), 110 deletions(-) [+] |
line wrap: on
line diff
--- a/.cvsignore Tue Feb 28 10:39:21 2012 +0100 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,3 +0,0 @@ -.project -.cvsignore -.pydevproject
--- a/MpdlXmlTextServer.py Tue Feb 28 10:39:21 2012 +0100 +++ b/MpdlXmlTextServer.py Tue Feb 28 18:21:59 2012 +0100 @@ -6,6 +6,7 @@ import re import logging import urllib +import urlparse import base64 from SrvTxtUtils import getInt, getText, getHttpData @@ -139,9 +140,11 @@ logging.warning("getTextPage: current!=pn!") # stuff for constructing full urls - characterNormalization = pageinfo.get('characterNormalization', None) - moreTextParam = '' selfurl = docinfo['viewerUrl'] + textParams = {'document': docpath, + 'pn': pn} + if 'characterNormalization' in pageinfo: + textParams['characterNormalization'] = pageinfo['characterNormalization'] if not mode: # default is dict @@ -156,11 +159,12 @@ if 'search' in modes: # add highlighting highlightQuery = pageinfo.get('highlightQuery', None) - sn = pageinfo.get('sn', None) - if highlightQuery and sn: - moreTextParam +="&highlightQuery=%s&sn=%s"%(urllib.quote(highlightQuery),sn) + if highlightQuery: + textParams['highlightQuery'] = highlightQuery + textParams['highlightElement'] = pageinfo.get('highlightElement', '') + textParams['highlightElementPos'] = pageinfo.get('highlightElementPos', '') - # remove mode + # ignore mode in the following modes.remove('search') # other modes don't combine @@ -174,11 +178,10 @@ # just take first mode textmode = modes[0] - textParam = "document=%s&mode=%s&pn=%s&characterNormalization=%s"%(docpath,textmode,pn,characterNormalization) - textParam += moreTextParam + textParams['mode'] = textmode # fetch the page - pagexml = self.getServerData("page-fragment.xql",textParam) + pagexml = self.getServerData("page-fragment.xql",urllib.urlencode(textParams)) dom = ET.fromstring(pagexml) # extract additional info self.processPageInfo(dom, docinfo, pageinfo) @@ -220,19 +223,22 @@ if href: # is link with href - if href.startswith('http://mpdl-proto.mpiwg-berlin.mpg.de/mpdl/interface/lt/wordInfo.xql'): + linkurl = urlparse.urlparse(href) + #logging.debug("getTextPage: linkurl=%s"%repr(linkurl)) + if linkurl.path.endswith('GetDictionaryEntries'): + #TODO: replace wordInfo page # is dictionary link - change href (keeping parameters) - l.set('href', href.replace('http://mpdl-proto.mpiwg-berlin.mpg.de/mpdl/interface/lt/wordInfo.xql','%s/template/viewer_wordinfo'%viewerurl)) + #l.set('href', href.replace('http://mpdl-proto.mpiwg-berlin.mpg.de/mpdl/interface/lt/wordInfo.xql','%s/template/viewer_wordinfo'%viewerurl)) # add target to open new page l.set('target', '_blank') # TODO: is this needed? - if href.startswith('http://mpdl-proto.mpiwg-berlin.mpg.de/mpdl/lt/lemma.xql'): - selfurl = self.absolute_url() - l.set('href', href.replace('http://mpdl-proto.mpiwg-berlin.mpg.de/mpdl/lt/lemma.xql','%s/head_main_lemma'%selfurl)) - l.set('target', '_blank') - l.set('onclick',"popupWin = window.open(this.href, 'InfoWindow', 'menubar=no, location,width=500,height=600,top=180, left=700, toolbar=no, scrollbars=1'); return false;") - l.set('ondblclick', 'popupWin.focus();') +# if href.startswith('http://mpdl-proto.mpiwg-berlin.mpg.de/mpdl/lt/lemma.xql'): +# selfurl = self.absolute_url() +# l.set('href', href.replace('http://mpdl-proto.mpiwg-berlin.mpg.de/mpdl/lt/lemma.xql','%s/head_main_lemma'%selfurl)) +# l.set('target', '_blank') +# l.set('onclick',"popupWin = window.open(this.href, 'InfoWindow', 'menubar=no, location,width=500,height=600,top=180, left=700, toolbar=no, scrollbars=1'); return false;") +# l.set('ondblclick', 'popupWin.focus();') if href.startswith('#note-'): # note link @@ -272,17 +278,28 @@ def getSearchResults(self, mode, query=None, pageinfo=None, docinfo=None): """loads list of search results and stores XML in docinfo""" + logging.debug("getSearchResults mode=%s query=%s"%(mode, query)) if mode == "none": return docinfo - if 'resultSize_%s_%s'%(mode,query) in docinfo: - # cached result - return docinfo + cachedQuery = docinfo.get('cachedQuery', None) + if cachedQuery is not None: + # cached search result + if cachedQuery == '%s_%s'%(mode,query): + # same query + return docinfo + + else: + # different query + del docinfo['resultSize'] + del docinfo['resultXML'] - docpath = docinfo['textURLPath'] + # cache query + docinfo['cachedQuery'] = '%s_%s'%(mode,query) # fetch full results + docpath = docinfo['textURLPath'] params = {'document': docpath, 'mode': 'text', 'queryType': mode, @@ -304,11 +321,11 @@ pagediv = div elif dc == 'queryResultHits': - docinfo['resultSize_%s_%s'%(mode,query)] = getInt(div.text) + docinfo['resultSize'] = getInt(div.text) if pagediv is not None: # store XML in docinfo - docinfo['resultXML_%s_%s'%(mode,query)] = ET.tostring(pagediv, 'UTF-8') + docinfo['resultXML'] = ET.tostring(pagediv, 'UTF-8') return docinfo @@ -316,18 +333,17 @@ def getResultsPage(self, mode="text", query=None, pn=None, start=None, size=None, pageinfo=None, docinfo=None): """returns single page from the table of contents""" logging.debug("getResultsPage mode=%s, pn=%s"%(mode,pn)) - # check for cached TOC - #TODO: cache only one search - if not docinfo.has_key('resultXML_%s_%s'%(mode,query)): + # check for cached result + if not 'resultXML' in docinfo: self.getSearchResults(mode=mode, query=query, pageinfo=pageinfo, docinfo=docinfo) - resultxml = docinfo.get('resultXML_%s_%s'%(mode,query), None) + resultxml = docinfo.get('resultXML', None) if not resultxml: logging.error("getResultPage: unable to find resultXML") return "Error: no result!" if size is None: - size = pageinfo.get('searchResultPageSize', 20) + size = pageinfo.get('resultPageSize', 10) if start is None: start = (pn - 1) * size @@ -336,7 +352,7 @@ if fullresult is not None: # paginate - first = start + first = start-1 len = size del fullresult[:first] del fullresult[len:] @@ -347,24 +363,18 @@ for l in links: href = l.get('href') if href: - # take pn from href - m = re.match(r'page-fragment\.xql.*pn=(\d+)', href) - if m is not None: - # and create new url (assuming parent is documentViewer) - #TODO: add highlighting params - url = self.getLink('pn', m.group(1)) - l.set('href', url) - else: - logging.warning("getResultPage: Problem with link=%s"%href) + # assume all links go to pages + linkUrl = urlparse.urlparse(href) + linkParams = urlparse.parse_qs(linkUrl.query) + # take some parameters + params = {'pn': linkParams['pn'], + 'highlightQuery': linkParams.get('highlightQuery',''), + 'highlightElement': linkParams.get('highlightElement',''), + 'highlightElementPos': linkParams.get('highlightElementPos','') + } + url = self.getLink(params=params) + l.set('href', url) - # fix two-divs-per-row with containing div -# newtoc = ET.Element('div', {'class':'queryResultPage'}) -# for (d1,d2) in zip(tocdivs[::2],tocdivs[1::2]): -# e = ET.Element('div',{'class':'tocline'}) -# e.append(d1) -# e.append(d2) -# newtoc.append(e) - return serialize(tocdivs) return "ERROR: no results!"
--- a/css/docuviewer.css Tue Feb 28 10:39:21 2012 +0100 +++ b/css/docuviewer.css Tue Feb 28 18:21:59 2012 +0100 @@ -7,15 +7,22 @@ vertical-align: top; } -div.toc-text, div.toc-figures { +div.toc-text, +div.toc-figures { max-width: 20em; } -div.toc-text .toc, div.toc-figures .toc { +div.col.results { + max-width: 20em; +} + +div.toc-text .toc, +div.toc-figures .toc { float:left; clear:right; } -div.toc-text .toc.float.right, div.toc-figures .toc.float.right { +div.toc-text .toc.float.right, +div.toc-figures .toc.float.right { float:right; } @@ -29,3 +36,7 @@ div.toc-thumbs .thumbcap { color: black; } + +span.hit.highlight { + background-color: lightgreen; +} \ No newline at end of file
--- a/documentViewer.py Tue Feb 28 10:39:21 2012 +0100 +++ b/documentViewer.py Tue Feb 28 18:21:59 2012 +0100 @@ -123,6 +123,7 @@ toc_figures = PageTemplateFile('zpt/toc_figures', globals()) toc_none = PageTemplateFile('zpt/toc_none', globals()) common_template = PageTemplateFile('zpt/common_template', globals()) + search_template = PageTemplateFile('zpt/search_template', globals()) info_xml = PageTemplateFile('zpt/info_xml', globals()) docuviewer_css = ImageFile('css/docuviewer.css',globals()) # make ImageFile better for development @@ -710,7 +711,7 @@ start = getInt(start, default=(math.ceil(float(current)/float(grpsize))*grpsize-(grpsize-1))) # int(current / grpsize) * grpsize +1)) pageinfo['start'] = start - + # get number of pages np = int(docinfo.get('numPages', 0)) if np == 0: # numPages unknown - maybe we can get it from text page @@ -719,6 +720,8 @@ pageinfo['textPage'] = self.getTextPage(mode=viewLayer, pn=current, docinfo=docinfo, pageinfo=pageinfo) np = int(docinfo.get('numPages', 0)) + # cache table of contents + pageinfo['tocPageSize'] = getInt(self.REQUEST.get('tocPageSize', 30)) pageinfo['numgroups'] = int(np / grpsize) if np % grpsize > 0: pageinfo['numgroups'] += 1 @@ -730,24 +733,24 @@ pageinfo['pageZero'] = pageZero pageinfo['pageBatch'] = self.getPageBatch(start=start, rows=rows, cols=cols, pageFlowLtr=pageFlowLtr, pageZero=pageZero, minIdx=1, maxIdx=np) - # TODO: do we need this here? pageinfo['characterNormalization'] = self.REQUEST.get('characterNormalization','reg') - pageinfo['tocPageSize'] = getInt(self.REQUEST.get('tocPageSize', 30)) - pageinfo['resultPageSize'] = getInt(self.REQUEST.get('resultPageSize', 10)) - pageinfo['tocPN'] = getInt(self.REQUEST.get('tocPN', '1')) - pageinfo['resultPN'] = getInt(self.REQUEST.get('resultPN','1')) - # limit tocPN TODO: do we need this? - if 'tocSize_%s'%tocMode in docinfo: - tocSize = docinfo['tocSize_%s'%tocMode] - tocPageSize = pageinfo['tocPageSize'] - # cached toc - if tocSize%tocPageSize>0: - tocPages=tocSize/tocPageSize+1 - else: - tocPages=tocSize/tocPageSize - - pageinfo['tocPN'] = min(tocPages,pageinfo['tocPN']) + # cache search results + pageinfo['resultPageSize'] = getInt(self.REQUEST.get('resultPageSize', 10)) + query = self.REQUEST.get('query',None) + pageinfo['query'] = query + if query: + queryType = self.REQUEST.get('queryType', 'fulltextMorph') + pageinfo['queryType'] = queryType + pageinfo['resultStart'] = getInt(self.REQUEST.get('resultStart', '1')) + self.getSearchResults(mode=queryType, query=query, pageinfo=pageinfo, docinfo=docinfo) + + # highlighting + highlightQuery = self.REQUEST.get('highlightQuery', None) + if highlightQuery: + pageinfo['highlightQuery'] = highlightQuery + pageinfo['highlightElement'] = self.REQUEST.get('highlightElement', '') + pageinfo['highlightElementPos'] = self.REQUEST.get('highlightElementPos', '') return pageinfo
--- a/zpt/common_template.zpt Tue Feb 28 10:39:21 2012 +0100 +++ b/zpt/common_template.zpt Tue Feb 28 18:21:59 2012 +0100 @@ -54,15 +54,15 @@ <!-- toc ruler with previous/next toc page buttons --> <metal:block metal:define-macro="toc_ruler"> - <form class="autosubmit" tal:attributes="action viewerUrl"> - <input type="hidden" tal:define="params python:here.getParams('start', None)" tal:repeat="param params" + <form class="autosubmit" tal:attributes="action viewerUrl" tal:define="startParam startParam | string:start"> + <input type="hidden" tal:define="params python:here.getParams(startParam, None)" tal:repeat="param params" tal:attributes="name param; value python:params[param]" /> <a tal:condition="batch/prevStart" - tal:attributes="href python:here.getLink('start',batch['prevStart'])"><</a> <span tal:condition="not:batch/prevStart"><</span> - <select class="autosubmit" name="start"> + tal:attributes="href python:here.getLink(startParam,batch['prevStart'])"><</a> <span tal:condition="not:batch/prevStart"><</span> + <select class="autosubmit" tal:attributes="name startParam"> <option tal:repeat="grp batch/batches" tal:attributes="selected python:(start==grp['start']); value grp/start" tal:content="string:${grp/start} - ${grp/end}" /> </select> <input type="submit" value="Go" /> <a tal:condition="batch/nextStart" - tal:attributes="href python:here.getLink('start',batch['nextStart'])">></a> <span tal:condition="not:batch/nextStart">></span> + tal:attributes="href python:here.getLink(startParam,batch['nextStart'])">></a> <span tal:condition="not:batch/nextStart">></span> </form> </metal:block>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/zpt/search_template.zpt Tue Feb 28 18:21:59 2012 +0100 @@ -0,0 +1,21 @@ +<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" + "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"> +<html xmlns="http://www.w3.org/1999/xhtml"> +<head> +<meta http-equiv="Content-Type" content="text/html; charset=utf-8" /> +</head> +<body> + <!-- block used for main content area --> + <div metal:define-macro="results_div" + tal:define="start pageinfo/resultStart; resultsize python:docinfo.get('resultSize',0); grpsize pageinfo/resultPageSize; + batch python:here.getBatch(start=start,size=grpsize,end=resultsize);"> + <div class="ruler" tal:define="startParam string:resultStart"> + <metal:block metal:use-macro="here/template/common_template/macros/toc_ruler"/> + </div> + <div class="content" + tal:content="structure python:here.getResultsPage(mode=queryType,query=query,start=start,size=grpsize,pageinfo=pageinfo,docinfo=docinfo)" /> + </div> +<!-- tal:content="structure python:here.getResultsPage(mode=queryType,query=query,pn=1,pageinfo=pageinfo,docinfo=docinfo)" --> +<!-- structure python:here.getTocPage(mode='text',start=start,pageinfo=pageinfo,docinfo=docinfo) --> +</body> +</html>
--- a/zpt/viewer_text.zpt Tue Feb 28 10:39:21 2012 +0100 +++ b/zpt/viewer_text.zpt Tue Feb 28 18:21:59 2012 +0100 @@ -63,11 +63,11 @@ <!-- end of col-main --> <!-- right-side search results --> - <div class="col search" tal:condition="query"> + <div class="col results" tal:condition="query"> <!--"BEGIN SEARCH RESULTS" --> <div class="options"> <h4>Search results</h4> - <div tal:content="structure python:here.getResultsPage(mode=queryType,query=query,pn=1,pageinfo=pageinfo,docinfo=docinfo)"/> + <div metal:use-macro="here/template/search_template/macros/results_div"/> </div> </div> @@ -91,7 +91,7 @@ value="dict" tal:attributes="checked python:'dict' in viewLayers" /> Dictionary </li> - <li tal:condition="query"> + <li tal:condition="python:query"> <input type="checkbox" class="autosubmit" name="viewLayer" value="search" tal:attributes="checked python:'search' in viewLayers" /> Search hits @@ -128,6 +128,7 @@ <!-- query text --> <input type="text" name="query" tal:attributes="value query"/> <input type="submit" value="Search"/> + <a tal:attributes="href python:here.getLink('query',None)">Clear</a> <ul> <li> <input type="radio" name="queryType" value="fulltext"
--- a/zpt/viewer_xml.zpt Tue Feb 28 10:39:21 2012 +0100 +++ b/zpt/viewer_xml.zpt Tue Feb 28 18:21:59 2012 +0100 @@ -35,13 +35,13 @@ </div> <div class="page-body" tal:condition="python:here.isAccessible(docinfo)"> <!--table of contents--> - <div class="col-left"> + <div class="col toc"> <metal:block metal:use-macro="python:path('here/template/toc_%s/macros/main'%tocMode)" /> </div> <!-- text page --> - <div class="col-main"> + <div class="col main"> <div class="ruler"> <metal:block metal:use-macro="here/template/common_template/macros/page_ruler" /> </div> @@ -60,7 +60,7 @@ <!-- col-main --> <!-- right-side options --> - <div class="col-right"> + <div class="col buttons"> <!--"BEGIN TEXT DISPLAY" --> <div class="options"> <h4>Text display</h4> @@ -68,11 +68,17 @@ <input type="hidden" tal:define="params python:here.getParams(params={'viewMode':None})" tal:repeat="param params" - tal:attributes="name param; value python:params[param]" /> - <input class="autosubmit" type="radio" name="viewMode" value="text" - tal:attributes="checked python:viewMode=='text'" /> Text<br /> - <input type="radio" class="autosubmit" name="viewMode" value="text" - tal:attributes="checked python:viewMode=='xml'" /> XML<br /> + tal:attributes="name param; value python:params[param]" /> + <ul> + <li> + <input class="autosubmit" type="radio" name="viewMode" value="text" + tal:attributes="checked python:viewMode=='text'" /> Text + </li> + <li> + <input type="radio" class="autosubmit" name="viewMode" value="text" + tal:attributes="checked python:viewMode=='xml'" /> XML + </li> + </ul> <input type="submit" value="Go!" /> </form> </div> @@ -81,34 +87,20 @@ <!--"BEGIN TEXT SIZE"--> <div class="options"> <h4>Text size</h4> - <div class="fsizer"> - <a href="javascript:fontSize(12);" class="fs_sml">S</a> <a - href="javascript:fontSize(14);" class="fs_med">M</a> <a - href="javascript:fontSize(16);" class="fs_lrg">L</a> - </div> + <ul class="fsizer"> + <li> + <a href="javascript:fontSize(12);" class="fs_sml">S</a> + </li> + <li> + <a href="javascript:fontSize(14);" class="fs_med">M</a> + </li> + <li> + <a href="javascript:fontSize(16);" class="fs_lrg">L</a> + </li> + </ul> </div> <!--"END TEXT SIZE"--> - <!--"BEGIN TEXT NORMALIZATION"--> - <div class="options" tal:condition="python:viewMode!='xml'"> - <h4>Text normalization</h4> - <form tal:attributes="action viewerUrl" class="autosubmit" - tal:define="norm python:pageinfo.get('characterNormalization','regPlusNorm');"> - <input type="hidden" - tal:define="params python:here.getParams('characterNormalization',None)" - tal:repeat="param params" - tal:attributes="name param; value python:params[param]" /> <input - type="radio" class="autosubmit" name="characterNormalization" value="orig" - tal:attributes="checked python:norm=='orig'" /> Original<br /> <input - type="radio" class="autosubmit" name="characterNormalization" value="reg" - tal:attributes="checked python:norm=='reg'" /> Regularized<br /> <input - type="radio" class="autosubmit" name="characterNormalization" - value="regPlusNorm" tal:attributes="checked python:norm=='regPlusNorm'" /> - Normalized<br /> <input type="submit" value="Go!" /> - </form> - </div> - <!--"END TEXT NORMALIZATION"--> - </div> <!-- /col-right -->