# HG changeset patch
# User casties
# Date 1330449719 -3600
# Node ID 551ca1641a5ecd5e2c16ac14ff7051cb081ada95
# Parent 4fb35343d2e7c23c758e88382ced6c784ac4b607
more cleanup.
search really works now.
diff -r 4fb35343d2e7 -r 551ca1641a5e .cvsignore
--- a/.cvsignore Tue Feb 28 10:39:21 2012 +0100
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,3 +0,0 @@
-.project
-.cvsignore
-.pydevproject
diff -r 4fb35343d2e7 -r 551ca1641a5e MpdlXmlTextServer.py
--- a/MpdlXmlTextServer.py Tue Feb 28 10:39:21 2012 +0100
+++ b/MpdlXmlTextServer.py Tue Feb 28 18:21:59 2012 +0100
@@ -6,6 +6,7 @@
import re
import logging
import urllib
+import urlparse
import base64
from SrvTxtUtils import getInt, getText, getHttpData
@@ -139,9 +140,11 @@
logging.warning("getTextPage: current!=pn!")
# stuff for constructing full urls
- characterNormalization = pageinfo.get('characterNormalization', None)
- moreTextParam = ''
selfurl = docinfo['viewerUrl']
+ textParams = {'document': docpath,
+ 'pn': pn}
+ if 'characterNormalization' in pageinfo:
+ textParams['characterNormalization'] = pageinfo['characterNormalization']
if not mode:
# default is dict
@@ -156,11 +159,12 @@
if 'search' in modes:
# add highlighting
highlightQuery = pageinfo.get('highlightQuery', None)
- sn = pageinfo.get('sn', None)
- if highlightQuery and sn:
- moreTextParam +="&highlightQuery=%s&sn=%s"%(urllib.quote(highlightQuery),sn)
+ if highlightQuery:
+ textParams['highlightQuery'] = highlightQuery
+ textParams['highlightElement'] = pageinfo.get('highlightElement', '')
+ textParams['highlightElementPos'] = pageinfo.get('highlightElementPos', '')
- # remove mode
+ # ignore mode in the following
modes.remove('search')
# other modes don't combine
@@ -174,11 +178,10 @@
# just take first mode
textmode = modes[0]
- textParam = "document=%s&mode=%s&pn=%s&characterNormalization=%s"%(docpath,textmode,pn,characterNormalization)
- textParam += moreTextParam
+ textParams['mode'] = textmode
# fetch the page
- pagexml = self.getServerData("page-fragment.xql",textParam)
+ pagexml = self.getServerData("page-fragment.xql",urllib.urlencode(textParams))
dom = ET.fromstring(pagexml)
# extract additional info
self.processPageInfo(dom, docinfo, pageinfo)
@@ -220,19 +223,22 @@
if href:
# is link with href
- if href.startswith('http://mpdl-proto.mpiwg-berlin.mpg.de/mpdl/interface/lt/wordInfo.xql'):
+ linkurl = urlparse.urlparse(href)
+ #logging.debug("getTextPage: linkurl=%s"%repr(linkurl))
+ if linkurl.path.endswith('GetDictionaryEntries'):
+ #TODO: replace wordInfo page
# is dictionary link - change href (keeping parameters)
- l.set('href', href.replace('http://mpdl-proto.mpiwg-berlin.mpg.de/mpdl/interface/lt/wordInfo.xql','%s/template/viewer_wordinfo'%viewerurl))
+ #l.set('href', href.replace('http://mpdl-proto.mpiwg-berlin.mpg.de/mpdl/interface/lt/wordInfo.xql','%s/template/viewer_wordinfo'%viewerurl))
# add target to open new page
l.set('target', '_blank')
# TODO: is this needed?
- if href.startswith('http://mpdl-proto.mpiwg-berlin.mpg.de/mpdl/lt/lemma.xql'):
- selfurl = self.absolute_url()
- l.set('href', href.replace('http://mpdl-proto.mpiwg-berlin.mpg.de/mpdl/lt/lemma.xql','%s/head_main_lemma'%selfurl))
- l.set('target', '_blank')
- l.set('onclick',"popupWin = window.open(this.href, 'InfoWindow', 'menubar=no, location,width=500,height=600,top=180, left=700, toolbar=no, scrollbars=1'); return false;")
- l.set('ondblclick', 'popupWin.focus();')
+# if href.startswith('http://mpdl-proto.mpiwg-berlin.mpg.de/mpdl/lt/lemma.xql'):
+# selfurl = self.absolute_url()
+# l.set('href', href.replace('http://mpdl-proto.mpiwg-berlin.mpg.de/mpdl/lt/lemma.xql','%s/head_main_lemma'%selfurl))
+# l.set('target', '_blank')
+# l.set('onclick',"popupWin = window.open(this.href, 'InfoWindow', 'menubar=no, location,width=500,height=600,top=180, left=700, toolbar=no, scrollbars=1'); return false;")
+# l.set('ondblclick', 'popupWin.focus();')
if href.startswith('#note-'):
# note link
@@ -272,17 +278,28 @@
def getSearchResults(self, mode, query=None, pageinfo=None, docinfo=None):
"""loads list of search results and stores XML in docinfo"""
+
logging.debug("getSearchResults mode=%s query=%s"%(mode, query))
if mode == "none":
return docinfo
- if 'resultSize_%s_%s'%(mode,query) in docinfo:
- # cached result
- return docinfo
+ cachedQuery = docinfo.get('cachedQuery', None)
+ if cachedQuery is not None:
+ # cached search result
+ if cachedQuery == '%s_%s'%(mode,query):
+ # same query
+ return docinfo
+
+ else:
+ # different query
+ del docinfo['resultSize']
+ del docinfo['resultXML']
- docpath = docinfo['textURLPath']
+ # cache query
+ docinfo['cachedQuery'] = '%s_%s'%(mode,query)
# fetch full results
+ docpath = docinfo['textURLPath']
params = {'document': docpath,
'mode': 'text',
'queryType': mode,
@@ -304,11 +321,11 @@
pagediv = div
elif dc == 'queryResultHits':
- docinfo['resultSize_%s_%s'%(mode,query)] = getInt(div.text)
+ docinfo['resultSize'] = getInt(div.text)
if pagediv is not None:
# store XML in docinfo
- docinfo['resultXML_%s_%s'%(mode,query)] = ET.tostring(pagediv, 'UTF-8')
+ docinfo['resultXML'] = ET.tostring(pagediv, 'UTF-8')
return docinfo
@@ -316,18 +333,17 @@
def getResultsPage(self, mode="text", query=None, pn=None, start=None, size=None, pageinfo=None, docinfo=None):
"""returns single page from the table of contents"""
logging.debug("getResultsPage mode=%s, pn=%s"%(mode,pn))
- # check for cached TOC
- #TODO: cache only one search
- if not docinfo.has_key('resultXML_%s_%s'%(mode,query)):
+ # check for cached result
+ if not 'resultXML' in docinfo:
self.getSearchResults(mode=mode, query=query, pageinfo=pageinfo, docinfo=docinfo)
- resultxml = docinfo.get('resultXML_%s_%s'%(mode,query), None)
+ resultxml = docinfo.get('resultXML', None)
if not resultxml:
logging.error("getResultPage: unable to find resultXML")
return "Error: no result!"
if size is None:
- size = pageinfo.get('searchResultPageSize', 20)
+ size = pageinfo.get('resultPageSize', 10)
if start is None:
start = (pn - 1) * size
@@ -336,7 +352,7 @@
if fullresult is not None:
# paginate
- first = start
+ first = start-1
len = size
del fullresult[:first]
del fullresult[len:]
@@ -347,24 +363,18 @@
for l in links:
href = l.get('href')
if href:
- # take pn from href
- m = re.match(r'page-fragment\.xql.*pn=(\d+)', href)
- if m is not None:
- # and create new url (assuming parent is documentViewer)
- #TODO: add highlighting params
- url = self.getLink('pn', m.group(1))
- l.set('href', url)
- else:
- logging.warning("getResultPage: Problem with link=%s"%href)
+ # assume all links go to pages
+ linkUrl = urlparse.urlparse(href)
+ linkParams = urlparse.parse_qs(linkUrl.query)
+ # take some parameters
+ params = {'pn': linkParams['pn'],
+ 'highlightQuery': linkParams.get('highlightQuery',''),
+ 'highlightElement': linkParams.get('highlightElement',''),
+ 'highlightElementPos': linkParams.get('highlightElementPos','')
+ }
+ url = self.getLink(params=params)
+ l.set('href', url)
- # fix two-divs-per-row with containing div
-# newtoc = ET.Element('div', {'class':'queryResultPage'})
-# for (d1,d2) in zip(tocdivs[::2],tocdivs[1::2]):
-# e = ET.Element('div',{'class':'tocline'})
-# e.append(d1)
-# e.append(d2)
-# newtoc.append(e)
-
return serialize(tocdivs)
return "ERROR: no results!"
diff -r 4fb35343d2e7 -r 551ca1641a5e css/docuviewer.css
--- a/css/docuviewer.css Tue Feb 28 10:39:21 2012 +0100
+++ b/css/docuviewer.css Tue Feb 28 18:21:59 2012 +0100
@@ -7,15 +7,22 @@
vertical-align: top;
}
-div.toc-text, div.toc-figures {
+div.toc-text,
+div.toc-figures {
max-width: 20em;
}
-div.toc-text .toc, div.toc-figures .toc {
+div.col.results {
+ max-width: 20em;
+}
+
+div.toc-text .toc,
+div.toc-figures .toc {
float:left;
clear:right;
}
-div.toc-text .toc.float.right, div.toc-figures .toc.float.right {
+div.toc-text .toc.float.right,
+div.toc-figures .toc.float.right {
float:right;
}
@@ -29,3 +36,7 @@
div.toc-thumbs .thumbcap {
color: black;
}
+
+span.hit.highlight {
+ background-color: lightgreen;
+}
\ No newline at end of file
diff -r 4fb35343d2e7 -r 551ca1641a5e documentViewer.py
--- a/documentViewer.py Tue Feb 28 10:39:21 2012 +0100
+++ b/documentViewer.py Tue Feb 28 18:21:59 2012 +0100
@@ -123,6 +123,7 @@
toc_figures = PageTemplateFile('zpt/toc_figures', globals())
toc_none = PageTemplateFile('zpt/toc_none', globals())
common_template = PageTemplateFile('zpt/common_template', globals())
+ search_template = PageTemplateFile('zpt/search_template', globals())
info_xml = PageTemplateFile('zpt/info_xml', globals())
docuviewer_css = ImageFile('css/docuviewer.css',globals())
# make ImageFile better for development
@@ -710,7 +711,7 @@
start = getInt(start, default=(math.ceil(float(current)/float(grpsize))*grpsize-(grpsize-1)))
# int(current / grpsize) * grpsize +1))
pageinfo['start'] = start
-
+ # get number of pages
np = int(docinfo.get('numPages', 0))
if np == 0:
# numPages unknown - maybe we can get it from text page
@@ -719,6 +720,8 @@
pageinfo['textPage'] = self.getTextPage(mode=viewLayer, pn=current, docinfo=docinfo, pageinfo=pageinfo)
np = int(docinfo.get('numPages', 0))
+ # cache table of contents
+ pageinfo['tocPageSize'] = getInt(self.REQUEST.get('tocPageSize', 30))
pageinfo['numgroups'] = int(np / grpsize)
if np % grpsize > 0:
pageinfo['numgroups'] += 1
@@ -730,24 +733,24 @@
pageinfo['pageZero'] = pageZero
pageinfo['pageBatch'] = self.getPageBatch(start=start, rows=rows, cols=cols, pageFlowLtr=pageFlowLtr, pageZero=pageZero, minIdx=1, maxIdx=np)
- # TODO: do we need this here?
pageinfo['characterNormalization'] = self.REQUEST.get('characterNormalization','reg')
- pageinfo['tocPageSize'] = getInt(self.REQUEST.get('tocPageSize', 30))
- pageinfo['resultPageSize'] = getInt(self.REQUEST.get('resultPageSize', 10))
- pageinfo['tocPN'] = getInt(self.REQUEST.get('tocPN', '1'))
- pageinfo['resultPN'] = getInt(self.REQUEST.get('resultPN','1'))
- # limit tocPN TODO: do we need this?
- if 'tocSize_%s'%tocMode in docinfo:
- tocSize = docinfo['tocSize_%s'%tocMode]
- tocPageSize = pageinfo['tocPageSize']
- # cached toc
- if tocSize%tocPageSize>0:
- tocPages=tocSize/tocPageSize+1
- else:
- tocPages=tocSize/tocPageSize
-
- pageinfo['tocPN'] = min(tocPages,pageinfo['tocPN'])
+ # cache search results
+ pageinfo['resultPageSize'] = getInt(self.REQUEST.get('resultPageSize', 10))
+ query = self.REQUEST.get('query',None)
+ pageinfo['query'] = query
+ if query:
+ queryType = self.REQUEST.get('queryType', 'fulltextMorph')
+ pageinfo['queryType'] = queryType
+ pageinfo['resultStart'] = getInt(self.REQUEST.get('resultStart', '1'))
+ self.getSearchResults(mode=queryType, query=query, pageinfo=pageinfo, docinfo=docinfo)
+
+ # highlighting
+ highlightQuery = self.REQUEST.get('highlightQuery', None)
+ if highlightQuery:
+ pageinfo['highlightQuery'] = highlightQuery
+ pageinfo['highlightElement'] = self.REQUEST.get('highlightElement', '')
+ pageinfo['highlightElementPos'] = self.REQUEST.get('highlightElementPos', '')
return pageinfo
diff -r 4fb35343d2e7 -r 551ca1641a5e zpt/common_template.zpt
--- a/zpt/common_template.zpt Tue Feb 28 10:39:21 2012 +0100
+++ b/zpt/common_template.zpt Tue Feb 28 18:21:59 2012 +0100
@@ -54,15 +54,15 @@