changeset 511:551ca1641a5e elementtree

more cleanup. search really works now.
author casties
date Tue, 28 Feb 2012 18:21:59 +0100
parents 4fb35343d2e7
children 92a6443a6f16
files .cvsignore MpdlXmlTextServer.py css/docuviewer.css documentViewer.py zpt/common_template.zpt zpt/search_template.zpt zpt/viewer_text.zpt zpt/viewer_xml.zpt
diffstat 8 files changed, 145 insertions(+), 110 deletions(-) [+]
line wrap: on
line diff
--- a/.cvsignore	Tue Feb 28 10:39:21 2012 +0100
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,3 +0,0 @@
-.project
-.cvsignore
-.pydevproject
--- a/MpdlXmlTextServer.py	Tue Feb 28 10:39:21 2012 +0100
+++ b/MpdlXmlTextServer.py	Tue Feb 28 18:21:59 2012 +0100
@@ -6,6 +6,7 @@
 import re
 import logging
 import urllib
+import urlparse
 import base64
 
 from SrvTxtUtils import getInt, getText, getHttpData
@@ -139,9 +140,11 @@
             logging.warning("getTextPage: current!=pn!")
             
         # stuff for constructing full urls
-        characterNormalization = pageinfo.get('characterNormalization', None)
-        moreTextParam = ''
         selfurl = docinfo['viewerUrl']
+        textParams = {'document': docpath,
+                      'pn': pn}
+        if 'characterNormalization' in pageinfo:
+            textParams['characterNormalization'] = pageinfo['characterNormalization']
         
         if not mode:
             # default is dict
@@ -156,11 +159,12 @@
         if 'search' in modes:
             # add highlighting
             highlightQuery = pageinfo.get('highlightQuery', None)
-            sn = pageinfo.get('sn', None)
-            if highlightQuery and sn:
-                moreTextParam +="&highlightQuery=%s&sn=%s"%(urllib.quote(highlightQuery),sn)
+            if highlightQuery:
+                textParams['highlightQuery'] = highlightQuery
+                textParams['highlightElement'] = pageinfo.get('highlightElement', '')
+                textParams['highlightElementPos'] = pageinfo.get('highlightElementPos', '')
                 
-            # remove mode
+            # ignore mode in the following
             modes.remove('search')
                             
         # other modes don't combine
@@ -174,11 +178,10 @@
             # just take first mode
             textmode = modes[0]
         
-        textParam = "document=%s&mode=%s&pn=%s&characterNormalization=%s"%(docpath,textmode,pn,characterNormalization)
-        textParam += moreTextParam
+        textParams['mode'] = textmode
         
         # fetch the page
-        pagexml = self.getServerData("page-fragment.xql",textParam)
+        pagexml = self.getServerData("page-fragment.xql",urllib.urlencode(textParams))
         dom = ET.fromstring(pagexml)
         # extract additional info
         self.processPageInfo(dom, docinfo, pageinfo)
@@ -220,19 +223,22 @@
                     
                     if href:
                         # is link with href
-                        if href.startswith('http://mpdl-proto.mpiwg-berlin.mpg.de/mpdl/interface/lt/wordInfo.xql'):
+                        linkurl = urlparse.urlparse(href)
+                        #logging.debug("getTextPage: linkurl=%s"%repr(linkurl))
+                        if linkurl.path.endswith('GetDictionaryEntries'):
+                            #TODO: replace wordInfo page
                             # is dictionary link - change href (keeping parameters)
-                            l.set('href', href.replace('http://mpdl-proto.mpiwg-berlin.mpg.de/mpdl/interface/lt/wordInfo.xql','%s/template/viewer_wordinfo'%viewerurl))
+                            #l.set('href', href.replace('http://mpdl-proto.mpiwg-berlin.mpg.de/mpdl/interface/lt/wordInfo.xql','%s/template/viewer_wordinfo'%viewerurl))
                             # add target to open new page
                             l.set('target', '_blank')
                                                           
                         # TODO: is this needed?
-                        if href.startswith('http://mpdl-proto.mpiwg-berlin.mpg.de/mpdl/lt/lemma.xql'):
-                            selfurl = self.absolute_url()
-                            l.set('href', href.replace('http://mpdl-proto.mpiwg-berlin.mpg.de/mpdl/lt/lemma.xql','%s/head_main_lemma'%selfurl))
-                            l.set('target', '_blank')
-                            l.set('onclick',"popupWin = window.open(this.href, 'InfoWindow', 'menubar=no, location,width=500,height=600,top=180, left=700, toolbar=no, scrollbars=1'); return false;")
-                            l.set('ondblclick', 'popupWin.focus();')   
+#                        if href.startswith('http://mpdl-proto.mpiwg-berlin.mpg.de/mpdl/lt/lemma.xql'):
+#                            selfurl = self.absolute_url()
+#                            l.set('href', href.replace('http://mpdl-proto.mpiwg-berlin.mpg.de/mpdl/lt/lemma.xql','%s/head_main_lemma'%selfurl))
+#                            l.set('target', '_blank')
+#                            l.set('onclick',"popupWin = window.open(this.href, 'InfoWindow', 'menubar=no, location,width=500,height=600,top=180, left=700, toolbar=no, scrollbars=1'); return false;")
+#                            l.set('ondblclick', 'popupWin.focus();')   
                     
                         if href.startswith('#note-'):
                             # note link
@@ -272,17 +278,28 @@
 
     def getSearchResults(self, mode, query=None, pageinfo=None, docinfo=None):
         """loads list of search results and stores XML in docinfo"""
+        
         logging.debug("getSearchResults mode=%s query=%s"%(mode, query))
         if mode == "none":
             return docinfo
               
-        if 'resultSize_%s_%s'%(mode,query) in docinfo:
-            # cached result
-            return docinfo
+        cachedQuery = docinfo.get('cachedQuery', None)
+        if cachedQuery is not None:
+            # cached search result
+            if cachedQuery == '%s_%s'%(mode,query):
+                # same query
+                return docinfo
+            
+            else:
+                # different query
+                del docinfo['resultSize']
+                del docinfo['resultXML']
         
-        docpath = docinfo['textURLPath']
+        # cache query
+        docinfo['cachedQuery'] = '%s_%s'%(mode,query)
         
         # fetch full results
+        docpath = docinfo['textURLPath']
         params = {'document': docpath,
                   'mode': 'text',
                   'queryType': mode,
@@ -304,11 +321,11 @@
                 pagediv = div
                 
             elif dc == 'queryResultHits':
-                docinfo['resultSize_%s_%s'%(mode,query)] = getInt(div.text)
+                docinfo['resultSize'] = getInt(div.text)
 
         if pagediv is not None:
             # store XML in docinfo
-            docinfo['resultXML_%s_%s'%(mode,query)] = ET.tostring(pagediv, 'UTF-8')
+            docinfo['resultXML'] = ET.tostring(pagediv, 'UTF-8')
 
         return docinfo
     
@@ -316,18 +333,17 @@
     def getResultsPage(self, mode="text", query=None, pn=None, start=None, size=None, pageinfo=None, docinfo=None):
         """returns single page from the table of contents"""
         logging.debug("getResultsPage mode=%s, pn=%s"%(mode,pn))
-        # check for cached TOC
-        #TODO: cache only one search
-        if not docinfo.has_key('resultXML_%s_%s'%(mode,query)):
+        # check for cached result
+        if not 'resultXML' in docinfo:
             self.getSearchResults(mode=mode, query=query, pageinfo=pageinfo, docinfo=docinfo)
             
-        resultxml = docinfo.get('resultXML_%s_%s'%(mode,query), None)
+        resultxml = docinfo.get('resultXML', None)
         if not resultxml:
             logging.error("getResultPage: unable to find resultXML")
             return "Error: no result!"
         
         if size is None:
-            size = pageinfo.get('searchResultPageSize', 20)
+            size = pageinfo.get('resultPageSize', 10)
             
         if start is None:
             start = (pn - 1) * size
@@ -336,7 +352,7 @@
         
         if fullresult is not None:
             # paginate
-            first = start
+            first = start-1
             len = size
             del fullresult[:first]
             del fullresult[len:]
@@ -347,24 +363,18 @@
             for l in links:
                 href = l.get('href')
                 if href:
-                    # take pn from href
-                    m = re.match(r'page-fragment\.xql.*pn=(\d+)', href)
-                    if m is not None:
-                        # and create new url (assuming parent is documentViewer)
-                        #TODO: add highlighting params
-                        url = self.getLink('pn', m.group(1))
-                        l.set('href', url)
-                    else:
-                        logging.warning("getResultPage: Problem with link=%s"%href)
+                    # assume all links go to pages
+                    linkUrl = urlparse.urlparse(href)
+                    linkParams = urlparse.parse_qs(linkUrl.query)
+                    # take some parameters
+                    params = {'pn': linkParams['pn'],
+                              'highlightQuery': linkParams.get('highlightQuery',''),
+                              'highlightElement': linkParams.get('highlightElement',''),
+                              'highlightElementPos': linkParams.get('highlightElementPos','')
+                              }
+                    url = self.getLink(params=params)
+                    l.set('href', url)
                         
-            # fix two-divs-per-row with containing div
-#            newtoc = ET.Element('div', {'class':'queryResultPage'})
-#            for (d1,d2) in zip(tocdivs[::2],tocdivs[1::2]):
-#                e = ET.Element('div',{'class':'tocline'})
-#                e.append(d1)
-#                e.append(d2)
-#                newtoc.append(e)
-                
             return serialize(tocdivs)
         
         return "ERROR: no results!"
--- a/css/docuviewer.css	Tue Feb 28 10:39:21 2012 +0100
+++ b/css/docuviewer.css	Tue Feb 28 18:21:59 2012 +0100
@@ -7,15 +7,22 @@
     vertical-align: top;
 }
 
-div.toc-text, div.toc-figures {
+div.toc-text, 
+div.toc-figures {
     max-width: 20em;
 }
 
-div.toc-text .toc, div.toc-figures .toc {
+div.col.results {
+	max-width: 20em;
+}
+
+div.toc-text .toc, 
+div.toc-figures .toc {
     float:left;
     clear:right; 
 }
-div.toc-text .toc.float.right, div.toc-figures .toc.float.right  {
+div.toc-text .toc.float.right, 
+div.toc-figures .toc.float.right  {
     float:right;
 }
 
@@ -29,3 +36,7 @@
 div.toc-thumbs .thumbcap {
     color: black;
 }
+
+span.hit.highlight {
+	background-color: lightgreen;
+}
\ No newline at end of file
--- a/documentViewer.py	Tue Feb 28 10:39:21 2012 +0100
+++ b/documentViewer.py	Tue Feb 28 18:21:59 2012 +0100
@@ -123,6 +123,7 @@
     toc_figures = PageTemplateFile('zpt/toc_figures', globals())
     toc_none = PageTemplateFile('zpt/toc_none', globals())
     common_template = PageTemplateFile('zpt/common_template', globals())
+    search_template = PageTemplateFile('zpt/search_template', globals())
     info_xml = PageTemplateFile('zpt/info_xml', globals())
     docuviewer_css = ImageFile('css/docuviewer.css',globals())
     # make ImageFile better for development
@@ -710,7 +711,7 @@
         start = getInt(start, default=(math.ceil(float(current)/float(grpsize))*grpsize-(grpsize-1)))
         # int(current / grpsize) * grpsize +1))
         pageinfo['start'] = start
-        
+        # get number of pages
         np = int(docinfo.get('numPages', 0))
         if np == 0:
             # numPages unknown - maybe we can get it from text page
@@ -719,6 +720,8 @@
                 pageinfo['textPage'] = self.getTextPage(mode=viewLayer, pn=current, docinfo=docinfo, pageinfo=pageinfo)
                 np = int(docinfo.get('numPages', 0))
                 
+        # cache table of contents
+        pageinfo['tocPageSize'] = getInt(self.REQUEST.get('tocPageSize', 30))
         pageinfo['numgroups'] = int(np / grpsize)
         if np % grpsize > 0:
             pageinfo['numgroups'] += 1
@@ -730,24 +733,24 @@
         pageinfo['pageZero'] = pageZero
         pageinfo['pageBatch'] = self.getPageBatch(start=start, rows=rows, cols=cols, pageFlowLtr=pageFlowLtr, pageZero=pageZero, minIdx=1, maxIdx=np)
                 
-        # TODO: do we need this here?
         pageinfo['characterNormalization'] = self.REQUEST.get('characterNormalization','reg')
-        pageinfo['tocPageSize'] = getInt(self.REQUEST.get('tocPageSize', 30))
-        pageinfo['resultPageSize'] = getInt(self.REQUEST.get('resultPageSize', 10))
-        pageinfo['tocPN'] = getInt(self.REQUEST.get('tocPN', '1'))
-        pageinfo['resultPN'] = getInt(self.REQUEST.get('resultPN','1'))
         
-        # limit tocPN TODO: do we need this?
-        if 'tocSize_%s'%tocMode in docinfo:
-            tocSize = docinfo['tocSize_%s'%tocMode]
-            tocPageSize = pageinfo['tocPageSize']
-            # cached toc           
-            if tocSize%tocPageSize>0:
-                tocPages=tocSize/tocPageSize+1
-            else:
-                tocPages=tocSize/tocPageSize
-                
-            pageinfo['tocPN'] = min(tocPages,pageinfo['tocPN'])
+        # cache search results
+        pageinfo['resultPageSize'] = getInt(self.REQUEST.get('resultPageSize', 10))
+        query = self.REQUEST.get('query',None)
+        pageinfo['query'] = query
+        if query:
+            queryType = self.REQUEST.get('queryType', 'fulltextMorph')
+            pageinfo['queryType'] = queryType
+            pageinfo['resultStart'] = getInt(self.REQUEST.get('resultStart', '1'))
+            self.getSearchResults(mode=queryType, query=query, pageinfo=pageinfo, docinfo=docinfo)
+            
+            # highlighting
+            highlightQuery = self.REQUEST.get('highlightQuery', None)
+            if highlightQuery:
+                pageinfo['highlightQuery'] = highlightQuery
+                pageinfo['highlightElement'] = self.REQUEST.get('highlightElement', '')
+                pageinfo['highlightElementPos'] = self.REQUEST.get('highlightElementPos', '')
             
         return pageinfo
 
--- a/zpt/common_template.zpt	Tue Feb 28 10:39:21 2012 +0100
+++ b/zpt/common_template.zpt	Tue Feb 28 18:21:59 2012 +0100
@@ -54,15 +54,15 @@
 
   <!-- toc ruler with previous/next toc page buttons -->
   <metal:block metal:define-macro="toc_ruler">
-    <form class="autosubmit" tal:attributes="action viewerUrl">
-      <input type="hidden" tal:define="params python:here.getParams('start', None)" tal:repeat="param params"
+    <form class="autosubmit" tal:attributes="action viewerUrl" tal:define="startParam startParam | string:start">
+      <input type="hidden" tal:define="params python:here.getParams(startParam, None)" tal:repeat="param params"
         tal:attributes="name param; value python:params[param]" /> <a tal:condition="batch/prevStart"
-        tal:attributes="href python:here.getLink('start',batch['prevStart'])">&lt;</a> <span tal:condition="not:batch/prevStart">&lt;</span>
-      <select class="autosubmit" name="start">
+        tal:attributes="href python:here.getLink(startParam,batch['prevStart'])">&lt;</a> <span tal:condition="not:batch/prevStart">&lt;</span>
+      <select class="autosubmit" tal:attributes="name startParam">
         <option tal:repeat="grp batch/batches" tal:attributes="selected python:(start==grp['start']); value grp/start"
           tal:content="string:${grp/start} - ${grp/end}" />
       </select> <input type="submit" value="Go" /> <a tal:condition="batch/nextStart"
-        tal:attributes="href python:here.getLink('start',batch['nextStart'])">&gt;</a> <span tal:condition="not:batch/nextStart">&gt;</span>
+        tal:attributes="href python:here.getLink(startParam,batch['nextStart'])">&gt;</a> <span tal:condition="not:batch/nextStart">&gt;</span>
     </form>
   </metal:block>
 
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/zpt/search_template.zpt	Tue Feb 28 18:21:59 2012 +0100
@@ -0,0 +1,21 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
+          "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
+<html xmlns="http://www.w3.org/1999/xhtml">
+<head>
+<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
+</head>
+<body>
+  <!-- block used for main content area -->
+  <div metal:define-macro="results_div"
+    tal:define="start pageinfo/resultStart; resultsize python:docinfo.get('resultSize',0); grpsize pageinfo/resultPageSize;
+                batch python:here.getBatch(start=start,size=grpsize,end=resultsize);">
+    <div class="ruler" tal:define="startParam string:resultStart">
+        <metal:block metal:use-macro="here/template/common_template/macros/toc_ruler"/>
+    </div>
+    <div class="content"
+      tal:content="structure python:here.getResultsPage(mode=queryType,query=query,start=start,size=grpsize,pageinfo=pageinfo,docinfo=docinfo)" />
+  </div>
+<!--  tal:content="structure python:here.getResultsPage(mode=queryType,query=query,pn=1,pageinfo=pageinfo,docinfo=docinfo)" -->
+<!--  structure python:here.getTocPage(mode='text',start=start,pageinfo=pageinfo,docinfo=docinfo) -->
+</body>
+</html>
--- a/zpt/viewer_text.zpt	Tue Feb 28 10:39:21 2012 +0100
+++ b/zpt/viewer_text.zpt	Tue Feb 28 18:21:59 2012 +0100
@@ -63,11 +63,11 @@
       <!-- end of col-main -->
 
       <!-- right-side search results -->
-      <div class="col search" tal:condition="query">
+      <div class="col results" tal:condition="query">
         <!--"BEGIN SEARCH RESULTS"  -->
         <div class="options">
           <h4>Search results</h4>
-          <div tal:content="structure python:here.getResultsPage(mode=queryType,query=query,pn=1,pageinfo=pageinfo,docinfo=docinfo)"/>
+          <div metal:use-macro="here/template/search_template/macros/results_div"/>
         </div>
       </div>
 
@@ -91,7 +91,7 @@
                       value="dict" tal:attributes="checked python:'dict' in viewLayers" />
                     Dictionary
                   </li>
-                  <li tal:condition="query">
+                  <li tal:condition="python:query">
                     <input type="checkbox" class="autosubmit" name="viewLayer"
                       value="search"
                       tal:attributes="checked python:'search' in viewLayers" /> Search hits
@@ -128,6 +128,7 @@
             <!-- query text -->
             <input type="text" name="query" tal:attributes="value query"/>
             <input type="submit" value="Search"/>
+            <a tal:attributes="href python:here.getLink('query',None)">Clear</a>
             <ul>
                 <li>
                   <input type="radio" name="queryType" value="fulltext" 
--- a/zpt/viewer_xml.zpt	Tue Feb 28 10:39:21 2012 +0100
+++ b/zpt/viewer_xml.zpt	Tue Feb 28 18:21:59 2012 +0100
@@ -35,13 +35,13 @@
     </div>
     <div class="page-body" tal:condition="python:here.isAccessible(docinfo)">
       <!--table of contents-->
-      <div class="col-left">
+      <div class="col toc">
         <metal:block
           metal:use-macro="python:path('here/template/toc_%s/macros/main'%tocMode)" />
       </div>
 
       <!-- text page -->
-      <div class="col-main">
+      <div class="col main">
         <div class="ruler">
           <metal:block metal:use-macro="here/template/common_template/macros/page_ruler" />
         </div>
@@ -60,7 +60,7 @@
       <!-- col-main -->
 
       <!-- right-side options -->
-      <div class="col-right">
+      <div class="col buttons">
         <!--"BEGIN TEXT DISPLAY"  -->
         <div class="options">
           <h4>Text display</h4>
@@ -68,11 +68,17 @@
             <input type="hidden"
               tal:define="params python:here.getParams(params={'viewMode':None})"
               tal:repeat="param params"
-              tal:attributes="name param; value python:params[param]" /> 
-            <input class="autosubmit" type="radio" name="viewMode" value="text"
-              tal:attributes="checked python:viewMode=='text'" /> Text<br /> 
-            <input type="radio" class="autosubmit" name="viewMode" value="text"
-              tal:attributes="checked python:viewMode=='xml'" /> XML<br />
+              tal:attributes="name param; value python:params[param]" />
+            <ul>
+              <li>
+                <input class="autosubmit" type="radio" name="viewMode" value="text"
+                  tal:attributes="checked python:viewMode=='text'" /> Text
+              </li>
+              <li>
+                <input type="radio" class="autosubmit" name="viewMode" value="text"
+                  tal:attributes="checked python:viewMode=='xml'" /> XML
+              </li>
+            </ul>
             <input type="submit" value="Go!" />
           </form>
         </div>
@@ -81,34 +87,20 @@
         <!--"BEGIN TEXT SIZE"-->
         <div class="options">
           <h4>Text size</h4>
-          <div class="fsizer">
-            &nbsp; <a href="javascript:fontSize(12);" class="fs_sml">S</a> <a
-              href="javascript:fontSize(14);" class="fs_med">M</a> <a
-              href="javascript:fontSize(16);" class="fs_lrg">L</a>
-          </div>
+          <ul class="fsizer">
+            <li>
+              <a href="javascript:fontSize(12);" class="fs_sml">S</a>
+            </li>
+            <li>
+              <a href="javascript:fontSize(14);" class="fs_med">M</a>
+            </li>
+            <li>
+              <a href="javascript:fontSize(16);" class="fs_lrg">L</a>
+            </li>
+          </ul>
         </div>
         <!--"END TEXT SIZE"-->
 
-        <!--"BEGIN TEXT NORMALIZATION"-->
-        <div class="options" tal:condition="python:viewMode!='xml'">
-          <h4>Text normalization</h4>
-          <form tal:attributes="action viewerUrl" class="autosubmit"
-            tal:define="norm python:pageinfo.get('characterNormalization','regPlusNorm');">
-            <input type="hidden"
-              tal:define="params python:here.getParams('characterNormalization',None)"
-              tal:repeat="param params"
-              tal:attributes="name param; value python:params[param]" /> <input
-              type="radio" class="autosubmit" name="characterNormalization" value="orig"
-              tal:attributes="checked python:norm=='orig'" /> Original<br /> <input
-              type="radio" class="autosubmit" name="characterNormalization" value="reg"
-              tal:attributes="checked python:norm=='reg'" /> Regularized<br /> <input
-              type="radio" class="autosubmit" name="characterNormalization"
-              value="regPlusNorm" tal:attributes="checked python:norm=='regPlusNorm'" />
-            Normalized<br /> <input type="submit" value="Go!" />
-          </form>
-        </div>
-        <!--"END TEXT NORMALIZATION"-->
-
       </div>
       <!-- /col-right -->