changeset 158:4f4fe4e56ffe

characterNormalization
author abukhman
date Tue, 24 Aug 2010 11:38:45 +0200
parents de82ae2e9850
children 29fc850d4a6f
files MpdlXmlTextServer.py documentViewer.py
diffstat 2 files changed, 18 insertions(+), 22 deletions(-) [+]
line wrap: on
line diff
--- a/MpdlXmlTextServer.py	Tue Aug 24 11:33:26 2010 +0200
+++ b/MpdlXmlTextServer.py	Tue Aug 24 11:38:45 2010 +0200
@@ -52,7 +52,7 @@
         queryType =pageinfo['queryType']
         viewMode=  pageinfo['viewMode']
         tocMode = pageinfo['tocMode']
-        #characterNormalization = pageinfo ['characterNormalization']
+        characterNormalization = pageinfo ['characterNormalization']
         tocPN = pageinfo['tocPN']
         selfurl = self.absolute_url()
         
@@ -72,9 +72,7 @@
                         href = hrefNode.nodeValue
                         if href.startswith('page-fragment.xql'):
                             selfurl = self.absolute_url()            
-                            #pagexml=href.replace('mode=text','mode=texttool&viewMode=%s&queryType=%s&query=%s&queryResultPageSize=%s&queryResultPN=%s&tocMode=%s&searchPN=%s&tocPN=%s&characterNormalization=%s'%(viewMode,queryType,query,pagesize,pn,tocMode,pn,tocPN,characterNormalization))
-                            pagexml=href.replace('mode=text','mode=texttool&viewMode=%s&queryType=%s&query=%s&queryResultPageSize=%s&queryResultPN=%s&tocMode=%s&searchPN=%s&tocPN=%s'%(viewMode,queryType,query,pagesize,pn,tocMode,pn,tocPN))
-                            
+                            pagexml=href.replace('mode=text','mode=texttool&viewMode=%s&queryType=%s&query=%s&queryResultPageSize=%s&queryResultPN=%s&tocMode=%s&searchPN=%s&tocPN=%s&characterNormalization=%s'%(viewMode,queryType,query,pagesize,pn,tocMode,pn,tocPN,characterNormalization))
                             hrefNode.nodeValue = pagexml.replace('page-fragment.xql','%s'%selfurl)                                           
                 return serializeNode(pagenode)        
         if (queryType=="fulltextMorph"):
@@ -88,9 +86,7 @@
                         href = hrefNode.nodeValue
                         if href.startswith('page-fragment.xql'):
                             selfurl = self.absolute_url()       
-                            #pagexml=href.replace('mode=text','mode=texttool&viewMode=%s&queryType=%s&query=%s&queryResultPageSize=%s&queryResultPN=%s&tocMode=%s&searchPN=%s&tocPN=%s&characterNormalization=%s'%(viewMode,queryType,query,pagesize,pn,tocMode,pn,tocPN,characterNormalization))
-                            pagexml=href.replace('mode=text','mode=texttool&viewMode=%s&queryType=%s&query=%s&queryResultPageSize=%s&queryResultPN=%s&tocMode=%s&searchPN=%s&tocPN=%s'%(viewMode,queryType,query,pagesize,pn,tocMode,pn,tocPN))
-                            
+                            pagexml=href.replace('mode=text','mode=texttool&viewMode=%s&queryType=%s&query=%s&queryResultPageSize=%s&queryResultPN=%s&tocMode=%s&searchPN=%s&tocPN=%s&characterNormalization=%s'%(viewMode,queryType,query,pagesize,pn,tocMode,pn,tocPN,characterNormalization))
                             hrefNode.nodeValue = pagexml.replace('page-fragment.xql','%s'%selfurl)  
                         if href.startswith('../lt/lemma.xql'):
                             hrefNode.nodeValue = href.replace('../lt/lemma.xql','%s/template/head_main_lemma_New'%(selfurl))        
@@ -108,8 +104,7 @@
                     hrefNode = l.getAttributeNodeNS(None, u"href")
                     if hrefNode:
                         href = hrefNode.nodeValue
-                        #hrefNode.nodeValue=href.replace('mode=text','mode=texttool&viewMode=%s&tocMode=%s&tocPN=%s&pn=%s&characterNormalization=%s'%(viewMode,tocMode,tocPN,pn,characterNormalization))
-                        hrefNode.nodeValue=href.replace('mode=text','mode=texttool&viewMode=%s&tocMode=%s&tocPN=%s&pn=%s'%(viewMode,tocMode,tocPN,pn))             
+                        hrefNode.nodeValue=href.replace('mode=text','mode=texttool&viewMode=%s&tocMode=%s&tocPN=%s&pn=%s&characterNormalization=%s'%(viewMode,tocMode,tocPN,pn,characterNormalization))             
                         if href.startswith('../lt/lex.xql'):
                             hrefNode.nodeValue = href.replace('../lt/lex.xql','%s/template/head_main_voc'%selfurl)         
                             l.setAttributeNS(None, 'target', '_blank')
@@ -135,21 +130,21 @@
         docinfo['numPages'] = text.count("<pb ")
         return docinfo
        
-    def getTextPage(self, mode="text", pn=1, docinfo=None, pageinfo=None, highlightQuery=None,sn=None, viewMode=None, tocMode=None, tocPN=None):
+    def getTextPage(self, mode="text", pn=1, docinfo=None, pageinfo=None, highlightQuery=None,sn=None, viewMode=None, tocMode=None, tocPN=None, characterNormalization="reg"):
         """returns single page from fulltext"""
         docpath = docinfo['textURLPath']
         path = docinfo['textURLPath']
         url = docinfo['url']
         viewMode= pageinfo['viewMode']
         tocMode = pageinfo['tocMode']
-        #characterNormalization = pageinfo ['characterNormalization']
+        characterNormalization = pageinfo ['characterNormalization']
         tocPN = pageinfo['tocPN']
         selfurl = self.absolute_url()   
         if mode == "text_dict":
             textmode = "textPollux"
         else:
             textmode = mode
-        #logging.debug("documentViewer (characterNormalization) characterNormalization: %s"%(characterNormalization))
+        logging.debug("documentViewer (characterNormalization) characterNormalization: %s"%(characterNormalization))
         textParam = "document=%s&mode=%s&pn=%s"%(docpath,textmode,pn)
         if highlightQuery is not None:
             textParam +="&highlightQuery=%s&sn=%s"%(highlightQuery,sn)           
@@ -170,7 +165,7 @@
                     if hrefNode:
                         href= hrefNode.nodeValue
                         if href.startswith('#note-'):
-                            hrefNode.nodeValue = href.replace('#note-',"?url=%s&viewMode=%s&tocMode=%s&tocPN=%s&pn=%s#note-"%(url,viewMode,tocMode,tocPN,pn))
+                            hrefNode.nodeValue = href.replace('#note-',"?url=%s&viewMode=%s&tocMode=%s&characterNormalization=%s&tocPN=%s&pn=%s#note-"%(url,viewMode,tocMode,characterNormalization,tocPN,pn))
                 return serializeNode(pagenode)
         if mode == "xml":
               # first div contains text
@@ -220,7 +215,7 @@
                             l.setAttributeNS(None, 'onClick',"popupWin = window.open(this.href, 'contacts', 'location,width=500,height=600,top=180, left=700, scrollbars=1'); return false;")
                             l.setAttributeNS(None, 'onClick', 'popupWin.focus();')   
                         if href.startswith('#note-'):
-                            hrefNode.nodeValue = href.replace('#note-',"?url=%s&viewMode=%s&tocMode=%s&tocPN=%s&pn=%s#note-"%(url,viewMode,tocMode,tocPN,pn))    
+                            hrefNode.nodeValue = href.replace('#note-',"?url=%s&viewMode=%s&tocMode=%s&characterNormalization=%s&tocPN=%s&pn=%s#note-"%(url,viewMode,tocMode,characterNormalization,tocPN,pn))    
                 return serializeNode(pagenode)
         return "no text here"
 
@@ -307,17 +302,16 @@
         url = docinfo['url']
         selfurl = self.absolute_url()  
         viewMode=  pageinfo['viewMode']
-        #characterNormalization =pageinfo ['characterNormalization']
+        characterNormalization =pageinfo ['characterNormalization']
         tocMode = pageinfo['tocMode']
         tocPN = pageinfo['tocPN']  
         
         data = self.getServerData("doc-query.xql","document=%s&queryType=%s&queryResultPageSize=%s&queryResultPN=%s"%(docpath,queryType, pagesize, pn))  
 
-        #page = data.replace('page-fragment.xql?document=%s'%str(path),'%s?url=%s&viewMode=%s&tocMode=%s&tocPN=%s&characterNormalization=%s'%(selfurl,url, viewMode, tocMode, tocPN, characterNormalization))
-        page = data.replace('page-fragment.xql?document=%s'%str(path),'%s?url=%s&viewMode=%s&tocMode=%s&tocPN=%s'%(selfurl,url, viewMode, tocMode, tocPN))
+        page = data.replace('page-fragment.xql?document=%s'%str(path),'%s?url=%s&viewMode=%s&tocMode=%s&tocPN=%s&characterNormalization=%s'%(selfurl,url, viewMode, tocMode, tocPN, characterNormalization))
         text = page.replace('mode=image','mode=texttool')
-        #logging.debug("documentViewer (characterNormalization) characterNormalization: %s"%(characterNormalization))
-        #logging.debug("documentViewer (characterNormalization) characterNormalization: %s"%(characterNormalization))
+        logging.debug("documentViewer (characterNormalization) characterNormalization: %s"%(characterNormalization))
+        logging.debug("documentViewer (characterNormalization) text: %s"%(text))
         return text
     
     def manage_changeMpdlXmlTextServer(self,title="",serverUrl="http://mpdl-proto.mpiwg-berlin.mpg.de/mpdl/interface/",timeout=40,RESPONSE=None):
--- a/documentViewer.py	Tue Aug 24 11:33:26 2010 +0200
+++ b/documentViewer.py	Tue Aug 24 11:38:45 2010 +0200
@@ -678,7 +678,7 @@
         self.REQUEST.SESSION['docinfo'] = docinfo
         return docinfo
                
-    def getPageinfo(self, current, start=None, rows=None, cols=None, docinfo=None, viewMode=None, tocMode=None):
+    def getPageinfo(self, current, start=None, rows=None, cols=None, docinfo=None, viewMode=None, tocMode=None, characterNormalization=None):
         """returns pageinfo with the given parameters"""
         pageinfo = {}
         current = getInt(current)
@@ -701,7 +701,7 @@
                 pageinfo['numgroups'] += 1        
         pageinfo['viewMode'] = viewMode
         pageinfo['tocMode'] = tocMode
-        #pageinfo['characterNormalization'] = self.REQUEST.get('characterNormalization','')
+        pageinfo['characterNormalization'] = self.REQUEST.get('characterNormalization','')
         pageinfo['query'] = self.REQUEST.get('query',' ')
         pageinfo['queryType'] = self.REQUEST.get('queryType',' ')
         pageinfo['querySearch'] =self.REQUEST.get('querySearch', 'fulltext')
@@ -712,7 +712,9 @@
         pageinfo['tocPN'] = self.REQUEST.get('tocPN', '1')
         toc = int (pageinfo['tocPN'])
         pageinfo['textPages'] =int (toc)
-              
+        
+       
+        
         if 'tocSize_%s'%tocMode in docinfo:
             tocSize = int(docinfo['tocSize_%s'%tocMode])
             tocPageSize = int(pageinfo['tocPageSize'])