Mercurial > hg > documentViewer

--- a/MpdlXmlTextServer.py	Fri Feb 17 10:46:03 2012 +0100
+++ b/MpdlXmlTextServer.py	Tue Feb 21 19:16:28 2012 +0100
@@ -1,16 +1,12 @@
 from OFS.SimpleItem import SimpleItem
 from Products.PageTemplates.PageTemplateFile import PageTemplateFile

-from Ft.Xml import EMPTY_NAMESPACE, Parse
-from Ft.Xml.Domlette import NonvalidatingReader
-import Ft.Xml.Domlette
-import cStringIO
-
 import xml.etree.ElementTree as ET

 import re
 import logging
 import urllib
+import base64

 from SrvTxtUtils import getInt, getText, getHttpData

@@ -25,33 +21,6 @@
     return s


-def getTextFromNode(node):
-    """get the cdata content of a node"""
-    if node is None:
-        return ""
-
-    # 4Suite:
-    nodelist=node.childNodes
-    text = ""
-    for n in nodelist:
-        if n.nodeType == node.TEXT_NODE:
-           text = text + n.data
-
-    return text
-
-def serializeNode(node, encoding="utf-8"):
-    """returns a string containing node as XML"""
-    #s = ET.tostring(node)
-
-    # 4Suite:
-    stream = cStringIO.StringIO()
-    Ft.Xml.Domlette.Print(node, stream=stream, encoding=encoding)
-    s = stream.getvalue()
-    stream.close()
-
-    return s
-
-
 class MpdlXmlTextServer(SimpleItem):
     """TextServer implementation for MPDL-XML eXist server"""
     meta_type="MPDL-XML TextServer"
@@ -81,136 +50,25 @@
         url = self.serverUrl+method
         return getHttpData(url,data,timeout=self.timeout)

-    # WTF: what does this really do? can it be integrated in getPage?
-    def getSearch(self, pageinfo=None,  docinfo=None):
-        """get search list"""
-        logging.debug("getSearch()")
-        docpath = docinfo['textURLPath']
-        url = docinfo['url']
-        pagesize = pageinfo['queryPageSize']
-        pn = pageinfo.get('searchPN',1)
-        sn = pageinfo.get('sn',None) #TODO: is this s now?
-        highlightQuery = pageinfo['highlightQuery']
-        query =pageinfo['query']
-        queryType =pageinfo['queryType']
-        viewMode=  pageinfo['viewMode']
-        tocMode = pageinfo['tocMode']
-        characterNormalization = pageinfo['characterNormalization']
-        #optionToggle = pageinfo['optionToggle']
-        tocPN = pageinfo['tocPN']
-        selfurl = self.absolute_url()
-        data = self.getServerData("doc-query.xql","document=%s&mode=%s&queryType=%s&query=%s&queryResultPageSize=%s&queryResultPN=%s&sn=%s&viewMode=%s&characterNormalization=%s&highlightQuery=%s"%(docpath, 'text', queryType, urllib.quote(query), pagesize, pn, sn, viewMode,characterNormalization, urllib.quote(highlightQuery)))
-        pagexml = data.replace('?document=%s'%str(docpath),'?url=%s'%url)
-        pagedom = Parse(pagexml)
-
-        """
-        pagedivs = pagedom.xpath("//div[@class='queryResultHits']")
-        if (pagedivs == pagedom.xpath("//div[@class='queryResultHits']")):
-            if len(pagedivs)>0:
-                docinfo['queryResultHits'] = int(getTextFromNode(pagedivs[0]))
-                s = getTextFromNode(pagedivs[0])
-                s1 = int(s)/10+1
-                try:
-                    docinfo['queryResultHits'] = int(s1)
-                    logging.debug("SEARCH ENTRIES: %s"%(s1))
-                except:
-                    docinfo['queryResultHits'] = 0
-        """
-        if (queryType=="fulltext")or(queryType=="xpath")or(queryType=="xquery")or(queryType=="fulltextMorphLemma"):
-            pagedivs = pagedom.xpath("//div[@class='queryResultPage']")
-            if len(pagedivs)>0:
-                pagenode=pagedivs[0]
-                links=pagenode.xpath("//a")
-                for l in links:
-                    hrefNode = l.getAttributeNodeNS(None, u"href")
-                    if hrefNode:
-                        href = hrefNode.nodeValue
-                        if href.startswith('page-fragment.xql'):
-                            selfurl = self.absolute_url()
-                            pagexml=href.replace('mode=text','mode=texttool&viewMode=%s&queryType=%s&query=%s&queryResultPageSize=%s&queryResultPN=%s&tocMode=%s&searchPN=%s&tocPN=%s&characterNormalization=%s'%(viewMode,queryType,urllib.quote(query),pagesize,pn,tocMode,pn,tocPN, characterNormalization))
-                            hrefNode.nodeValue = pagexml.replace('page-fragment.xql','%s'%selfurl)
-                #logging.debug("PUREXML :%s"%(serializeNode(pagenode)))
-                return serializeNode(pagenode)
-        if (queryType=="fulltextMorph"):
-            pagedivs = pagedom.xpath("//div[@class='queryResult']")
-            if len(pagedivs)>0:
-                pagenode=pagedivs[0]
-                links=pagenode.xpath("//a")
-                for l in links:
-                    hrefNode = l.getAttributeNodeNS(None, u"href")
-                    if hrefNode:
-                        href = hrefNode.nodeValue
-                        if href.startswith('page-fragment.xql'):
-                            selfurl = self.absolute_url()
-                            pagexml=href.replace('mode=text','mode=texttool&viewMode=%s&queryType=%s&query=%s&queryResultPageSize=%s&queryResultPN=%s&tocMode=%s&searchPN=%s&tocPN=%s&characterNormalization=%s'%(viewMode,queryType,urllib.quote(query),pagesize,pn,tocMode,pn,tocPN,characterNormalization))
-                            hrefNode.nodeValue = pagexml.replace('page-fragment.xql','%s'%selfurl)
-                        if href.startswith('../lt/lemma.xql'):
-                            hrefNode.nodeValue = href.replace('../lt/lemma.xql','%s/template/head_main_query'%(selfurl))
-                            l.setAttributeNS(None, 'target', '_blank')
-                            l.setAttributeNS(None, 'onClick',"popupWin = window.open(this.href, 'contacts', 'location,width=500,height=600,top=180, left=400, scrollbars=1'); return false;")
-                            l.setAttributeNS(None, 'onDblclick', 'popupWin.focus();')
-                pagedivs = pagedom.xpath("//div[@class='queryResultMorphExpansion']")
-                return serializeNode(pagenode)
-        if (queryType=="ftIndex")or(queryType=="ftIndexMorph"):
-            pagedivs= pagedom.xpath("//div[@class='queryResultPage']")
-            if len(pagedivs)>0:
-                pagenode=pagedivs[0]
-                links=pagenode.xpath("//a")
-                for l in links:
-                    hrefNode = l.getAttributeNodeNS(None, u"href")
-                    if hrefNode:
-                        href = hrefNode.nodeValue
-                        hrefNode.nodeValue=href.replace('mode=text','mode=texttool&viewMode=%s&tocMode=%s&tocPN=%s&pn=%s&characterNormalization=%s'%(viewMode,tocMode,tocPN,pn,characterNormalization))
-                        if href.startswith('../lt/lex.xql'):
-                            hrefNode.nodeValue = href.replace('../lt/lex.xql','%s/template/head_main_lex'%selfurl)
-                            l.setAttributeNS(None, 'target', '_blank')
-                            l.setAttributeNS(None, 'onClick',"popupWin = window.open(this.href, 'contacts', 'location,width=500,height=600,top=180, left=400, scrollbars=1'); return false;")
-                            l.setAttributeNS(None, 'onDblclick', 'popupWin.focus();')
-                        if href.startswith('../lt/lemma.xql'):
-                            hrefNode.nodeValue = href.replace('../lt/lemma.xql','%s/template/head_main_lemma'%(selfurl))
-                            l.setAttributeNS(None, 'target', '_blank')
-                            l.setAttributeNS(None, 'onClick',"popupWin = window.open(this.href, 'contacts', 'location,width=500,height=600,top=180, left=400, scrollbars=1'); return false;")
-                            l.setAttributeNS(None, 'onDblclick', 'popupWin.focus();')
-                return serializeNode(pagenode)
-        return "no text here"
-
-    def getGisPlaces(self, docinfo=None, pageinfo=None):
-        """ Show all Gis Places of whole Page"""
-        xpath='//place'
+
+    def getPlacesOnPage(self, docinfo=None, pn=None):
+        """Returns list of GIS places of page pn"""
         docpath = docinfo.get('textURLPath',None)
         if not docpath:
             return None

-        pn = pageinfo['current']
-        hrefList=[]
-        myList= ""
-        text=self.getServerData("xpath.xql", "document=%s&xpath=%s&pn=%s"%(docinfo['textURLPath'],xpath,pn))
+        places=[]
+        text=self.getServerData("xpath.xql", "document=%s&xpath=//place&pn=%s"%(docpath,pn))
         dom = ET.fromstring(text)
-        result = dom.findall(".//result/resultPage/place")
+        result = dom.findall(".//resultPage/place")
         for l in result:
-            href = l.get("id")
-            hrefList.append(href)
-            # WTF: what does this do?
-            myList = ",".join(hrefList)
-        #logging.debug("getGisPlaces :%s"%(myList))
-        return myList
+            id = l.get("id")
+            name = l.text
+            place = {'id': id, 'name': name}
+            places.append(place)
+
+        return places

-    def getAllGisPlaces (self, docinfo=None, pageinfo=None):
-        """Show all Gis Places of whole Book """
-        xpath ='//echo:place'
-        hrefList=[]
-        myList=""
-        text=self.getServerData("xpath.xql", "document=%s&xpath=%s"%(docinfo['textURLPath'],xpath))
-        dom = ET.fromstring(text)
-        result = dom.findall(".//result/resultPage/place")
-
-        for l in result:
-            href = l.get("id")
-            hrefList.append(href)
-            # WTF: what does this do?
-            myList = ",".join(hrefList)
-            #logging.debug("getALLGisPlaces :%s"%(myList))
-        return myList

     def processPageInfo(self, dom, docinfo, pageinfo):
         """processes page info divs from dom and stores in docinfo and pageinfo"""
@@ -379,63 +237,24 @@

         # gis mode
         elif mode == "gis":
-            name = docinfo['name']
             if pagediv is not None:
                 # check all a-tags
                 links = pagediv.findall(".//a")
+                # add our URL as backlink
+                selfurl = self.getLink()
+                doc = base64.b64encode(selfurl)
                 for l in links:
                     href = l.get('href')
                     if href:
-                        if href.startswith('http://chinagis.mpiwg-berlin.mpg.de'):
-                            l.set('href', href.replace('chinagis_REST/REST/db/chgis/mpdl','chinagis/REST/db/mpdl/%s'%name))
-                            l.set('target', '_blank')
+                        if href.startswith('http://mappit.mpiwg-berlin.mpg.de'):
+                            l.set('href', re.sub(r'doc=[\w+/=]+', 'doc=%s'%doc, href))
+                            l.set('target', '_blank')

                 return serialize(pagediv)

         return None

-    # TODO: should be getWordInfo
-    def getWordInfo(self, word='', language='', display=''):
-        """show information (like dictionaries) about word"""
-        data = self.getServerData("lt/wordInfo.xql","language=%s&word=%s&display=%s&output=html"%(language,urllib.quote(word),urllib.quote(display)))
-        return data
-
-    # WTF: what does this do?
-    def getLemma(self, lemma=None, language=None):
-        """simular words lemma """
-        data = self.getServerData("lt/lemma.xql","language="+str(language)+"&lemma="+urllib.quote(lemma)+"&output=html")
-        return data
-
-    # WTF: what does this do?
-    def getLemmaQuery(self, query=None, language=None):
-        """simular words lemma """
-        data = self.getServerData("lt/lemma.xql","language="+str(language)+"&query="+urllib.quote(query)+"&output=html")
-        return data
-
-    # WTF: what does this do?
-    def getLex(self, query=None, language=None):
-        #simular words lemma
-        data = self.getServerData("lt/lex.xql","document=&language="+str(language)+"&query="+urllib.quote(query))
-        return data

-    # WTF: what does this do?
-    def getQuery (self,  docinfo=None, pageinfo=None, query=None, queryType=None, pn=1):
-         #number of
-         docpath = docinfo['textURLPath']
-         pagesize = pageinfo['queryPageSize']
-         pn = pageinfo['searchPN']
-         query =pageinfo['query']
-         queryType =pageinfo['queryType']
-         tocSearch = 0
-         tocDiv = None
-
-         pagexml = self.getServerData("doc-query.xql","document=%s&mode=%s&queryType=%s&query=%s&queryResultPageSize=%s&queryResultPN=%s"%(docpath, 'text', queryType, urllib.quote(query), pagesize, pn))
-         pagedom = Parse(pagexml)
-         numdivs = pagedom.xpath("//div[@class='queryResultHits']")
-         tocSearch = int(getTextFromNode(numdivs[0]))
-         tc=int((tocSearch/10)+1)
-         return tc
-
     def getToc(self, mode="text", docinfo=None):
         """loads table of contents and stores XML in docinfo"""
         logging.debug("getToc mode=%s"%mode)
--- a/documentViewer.py	Fri Feb 17 10:46:03 2012 +0100
+++ b/documentViewer.py	Tue Feb 21 19:16:28 2012 +0100
@@ -182,47 +182,11 @@
         """returns one page of the table of contents"""
         return self.template.fulltextclient.getTocPage(**args)

-    #WTF?
-    def getQuery(self, **args):
-        """get query in search"""
-        return self.template.fulltextclient.getQuery(**args)
-
-    #WTF?
-    def getSearch(self, **args):
-        """get search"""
-        return self.template.fulltextclient.getSearch(**args)
-
-    #WTF?
-    def getGisPlaces(self, **args):
-        """get gis places"""
-        return self.template.fulltextclient.getGisPlaces(**args)
+    def getPlacesOnPage(self, **args):
+        """get list of gis places on one page"""
+        return self.template.fulltextclient.getPlacesOnPage(**args)

     #WTF?
-    def getAllGisPlaces(self, **args):
-        """get all gis places """
-        return self.template.fulltextclient.getAllGisPlaces(**args)
-
-    #WTF?
-    def getWordInfo(self, **args):
-        """get translate"""
-        return self.template.fulltextclient.getWordInfo(**args)
-
-    #WTF?
-    def getLemma(self, **args):
-        """get lemma"""
-        return self.template.fulltextclient.getLemma(**args)
-
-    #WTF?
-    def getLemmaQuery(self, **args):
-        """get query"""
-        return self.template.fulltextclient.getLemmaQuery(**args)
-
-    #WTF?
-    def getLex(self, **args):
-        """get lex"""
-        return self.template.fulltextclient.getLex(**args)
-
-    #WTF?
     thumbs_main_rss = PageTemplateFile('zpt/thumbs_main_rss', globals())
     security.declareProtected('View','thumbs_rss')
     def thumbs_rss(self,mode,url,viewMode="auto",start=None,pn=1):
@@ -315,6 +279,7 @@
         # and execute with parameters
         return pt(docinfo=docinfo, pageinfo=pageinfo)

+    #WTF?
     def generateMarks(self,mk):
         ret=""
         if mk is None:
--- a/zpt/viewer_text.zpt	Fri Feb 17 10:46:03 2012 +0100
+++ b/zpt/viewer_text.zpt	Tue Feb 21 19:16:28 2012 +0100
@@ -26,9 +26,9 @@
 <body tal:condition="numPages">
   <tal:block
     tal:define="docpath docinfo/textURLPath;
-                               pn pageinfo/pn;
-                               flowLtr python:pageinfo.get('pageFlow','ltr')!='rtl';
-                               textPage python:here.getTextPage(mode=viewLayer, pn=pn, docinfo=docinfo, pageinfo=pageinfo) or '[no text here]';">
+                pn pageinfo/pn;
+                flowLtr python:pageinfo.get('pageFlow','ltr')!='rtl';
+                textPage python:here.getTextPage(mode=viewLayer, pn=pn, docinfo=docinfo, pageinfo=pageinfo) or '[no text here]';">
     <!-- header -->
     <div class="page-head">
       <metal:block metal:use-macro="here/template/common_template/macros/head" />
@@ -68,20 +68,18 @@
             <input type="hidden"
               tal:define="params python:here.getParams(params={'viewLayer':None,'viewMode':None})"
               tal:repeat="param params"
-              tal:attributes="name param; value python:params[param]" />
-            <input class="autosubmit" type="radio" name="viewMode" value="text"
-              tal:attributes="checked python:viewMode=='text'" /> Text<br />
-            <span class="optionsText">
-              &nbsp;&nbsp;<input type="checkbox" class="autosubmit" name="viewLayer"
-              value="dict" tal:attributes="checked python:viewLayer=='dict'" /> Dictionary<br />
-            </span>
-            <span class="optionsText" tal:condition="python:docinfo.get('numPlaces',0)">
+              tal:attributes="name param; value python:params[param]" /> <input
+              class="autosubmit" type="radio" name="viewMode" value="text"
+              tal:attributes="checked python:viewMode=='text'" /> Text<br /> <span
+              class="optionsText"> &nbsp;&nbsp;<input type="checkbox"
+              class="autosubmit" name="viewLayer" value="dict"
+              tal:attributes="checked python:viewLayer=='dict'" /> Dictionary<br />
+            </span> <span class="optionsText" tal:condition="python:docinfo.get('numPlaces',0)">
               &nbsp;&nbsp;<input type="checkbox" class="autosubmit" name="viewLayer"
               value="gis" tal:attributes="checked python:viewLayer=='gis'" /> Places<br />
-            </span>
-            <input type="radio" class="autosubmit" name="viewMode" value="xml"
-              tal:attributes="checked python:viewMode=='xml'" /> XML<br />
-            <input type="submit" value="Go!" />
+            </span> <input type="radio" class="autosubmit" name="viewMode" value="xml"
+              tal:attributes="checked python:viewMode=='xml'" /> XML<br /> <input
+              type="submit" value="Go!" />
           </form>
         </div>
         <!--"END TEXT DISPLAY"-->
@@ -128,24 +126,22 @@

         <!--"BEGIN PLACES"-->
         <div class="options" tal:condition="python:viewLayer=='gis'">
-          <tal:block
-            tal:define="gisPlaces python:here.getGisPlaces(docinfo=docinfo, pageinfo=pageinfo); gisAllPlaces python:here.getAllGisPlaces(docinfo=docinfo, pageinfo=pageinfo);">
-            <span><b>Places</b></span>
-            <br />(Link to extern:)<br />
-            <span>Page</span>
-            <span style="float: right"> <a
-              tal:attributes="href python:'http://chinagis.mpiwg-berlin.mpg.de/chinagis/REST/db/mpdl/%s?id=%s&format=%s'%(name,gisPlaces,'gis')"
-              target="_blank"> <img src="images/arrow.png" alt="" />
-            </a>
-            </span>
-            <br />
-            <span>Book</span>
-            <span style="float: right"> <a
-              tal:attributes="href python:'http://chinagis.mpiwg-berlin.mpg.de/chinagis/REST/db/mpdl/%s?format=%s'%(name,'gis')"
-              target="_blank"> <img src="images/arrow.png" alt="" />
-            </a>
-            </span>
-            <br />
+          <tal:block tal:define="
+            name docinfo/documentName;
+            places python:here.getPlacesOnPage(docinfo=docinfo, pn=pn);
+            pidlist python:','.join([p['id'] for p in places]);">
+            <h4>Places</h4>
+            <ul>
+            <li><a
+              tal:attributes="href python:'http://mappit.mpiwg-berlin.mpg.de/db/RESTdb/db/mpdl/%s?id=%s&format=gis'%(name,pidlist)"
+              target="_blank">on this page</a>
+            </li>
+            <li>
+              <a
+              tal:attributes="href python:'http://mappit.mpiwg-berlin.mpg.de/db/RESTdb/db/mpdl/%s?format=gis'%(name)"
+              target="_blank">in whole document</a>
+            </li>
+            </ul>
           </tal:block>
         </div>
         <!--"END PLACES"-->