changeset 167:7e2b97941a66

characterNormalization
author abukhman
date Tue, 24 Aug 2010 14:43:24 +0200
parents ffb5c62bd459
children 6f31a7c37914
files MpdlXmlTextServer.py documentViewer.py
diffstat 2 files changed, 35 insertions(+), 35 deletions(-) [+]
line wrap: on
line diff
--- a/MpdlXmlTextServer.py	Tue Aug 24 14:34:32 2010 +0200
+++ b/MpdlXmlTextServer.py	Tue Aug 24 14:43:24 2010 +0200
@@ -42,8 +42,8 @@
         """get search list"""
         docpath = docinfo['textURLPath'] 
         url = docinfo['url']
-        #logging.debug("documentViewer (gettoc) docpath: %s"%(docpath))
-        #logging.debug("documentViewer (gettoc) url: %s"%(url))
+        logging.debug("documentViewer (gettoc) docpath: %s"%(docpath))
+        logging.debug("documentViewer (gettoc) url: %s"%(url))
         pagesize = pageinfo['queryPageSize']
         pn = pageinfo['searchPN']
         sn = pageinfo['sn']
@@ -253,12 +253,12 @@
          numdivs = pagedom.xpath("//div[@class='queryResultHits']")
          tocSearch = int(getTextFromNode(numdivs[0]))
          tc=int((tocSearch/10)+1)
-         #logging.debug("documentViewer (gettoc) tc: %s"%(tc))
+         logging.debug("documentViewer (gettoc) tc: %s"%(tc))
          return tc
 
     def getToc(self, mode="text", docinfo=None):
         """loads table of contents and stores in docinfo"""
-        #logging.debug("documentViewer (gettoc) mode: %s"%(mode))
+        logging.debug("documentViewer (gettoc) mode: %s"%(mode))
         if mode == "none":
             return docinfo        
         if 'tocSize_%s'%mode in docinfo:
@@ -302,12 +302,12 @@
         url = docinfo['url']
         selfurl = self.absolute_url()  
         viewMode=  pageinfo['viewMode']
-        characterNormalization =pageinfo['characterNormalization']
+        characterNormalization =pageinfo ['characterNormalization']
         tocMode = pageinfo['tocMode']
         tocPN = pageinfo['tocPN']  
         
-        data = self.getServerData("doc-query.xql","document=%s&queryType=%s&queryResultPageSize=%s&queryResultPN=%s"%(docpath,queryType, pagesize, pn))  
-        page = data.replace('page-fragment.xql?document=%s'%str(path),'%s?url=%s&viewMode=%s&tocMode=%s&tocPN=%s&characterNormalization=%s'%(selfurl,url, viewMode, tocMode, tocPN,characterNormalization))
+        data = self.getServerData("doc-query.xql","document=%s&queryType=%s&queryResultPageSize=%s&queryResultPN=%s&characterNormalization=%s"%(docpath,queryType, pagesize, pn,characterNormalization))  
+        page = data.replace('page-fragment.xql?document=%s'%str(path),'%s?url=%s&viewMode=%s&tocMode=%s&tocPN=%s'%(selfurl,url, viewMode, tocMode, tocPN))
         text = page.replace('mode=image','mode=texttool')
         logging.debug("documentViewer (characterNormalization) characterNormalization: %s"%(characterNormalization))
         #logging.debug("documentViewer (characterNormalization) text: %s"%(text))
--- a/documentViewer.py	Tue Aug 24 14:34:32 2010 +0200
+++ b/documentViewer.py	Tue Aug 24 14:43:24 2010 +0200
@@ -68,7 +68,7 @@
     errmsg = None
     for cnt in range(num_tries):
         try:
-            #logging.debug("getHttpData(#%s %ss) url=%s"%(cnt+1,timeout,url))
+            logging.debug("getHttpData(#%s %ss) url=%s"%(cnt+1,timeout,url))
             if sys.version_info < (2, 6):
                 # set timeout on socket -- ugly :-(
                 import socket
@@ -79,12 +79,12 @@
             # check result?
             break
         except urllib2.HTTPError, e:
-            #logging.error("getHttpData: HTTP error(%s): %s"%(e.code,e))
+            logging.error("getHttpData: HTTP error(%s): %s"%(e.code,e))
             errmsg = str(e)
             # stop trying
             break
         except urllib2.URLError, e:
-            #logging.error("getHttpData: URLLIB error(%s): %s"%(e.reason,e))
+            logging.error("getHttpData: URLLIB error(%s): %s"%(e.reason,e))
             errmsg = str(e)
             # stop trying
             #break
@@ -203,8 +203,8 @@
         @param viewMode: if images display images, if text display text, default is images (text,images or auto)
         
         '''
-        #logging.debug("HHHHHHHHHHHHHH:load the rss")
-        #logger("documentViewer (index)", logging.INFO, "mode: %s url:%s start:%s pn:%s"%(mode,url,start,pn))
+        logging.debug("HHHHHHHHHHHHHH:load the rss")
+        logger("documentViewer (index)", logging.INFO, "mode: %s url:%s start:%s pn:%s"%(mode,url,start,pn))
         
         if not hasattr(self, 'template'):
             # create template folder if it doesn't exist
@@ -237,7 +237,7 @@
         @param querySearch: type of different search modes (fulltext, fulltextMorph, xpath, xquery, ftIndex, ftIndexMorph, fulltextMorphLemma)
         '''
         
-        #logging.debug("documentViewer (index) mode: %s url:%s start:%s pn:%s"%(mode,url,start,pn))
+        logging.debug("documentViewer (index) mode: %s url:%s start:%s pn:%s"%(mode,url,start,pn))
         
         if not hasattr(self, 'template'):
             # this won't work
@@ -323,7 +323,7 @@
                 params[param] = str(val)
                 
         # quote values and assemble into query string
-        #logging.debug("XYXXXXX: %s"%repr(params.items()))
+        logging.debug("XYXXXXX: %s"%repr(params.items()))
         ps = "&amp;".join(["%s=%s"%(k,urllib.quote(v)) for (k, v) in params.items()])
         url=self.REQUEST['URL1']+"?"+ps
         return url
@@ -342,21 +342,21 @@
     def isAccessible(self, docinfo):
         """returns if access to the resource is granted"""
         access = docinfo.get('accessType', None)
-        #logging.debug("documentViewer (accessOK) access type %s"%access)
+        logging.debug("documentViewer (accessOK) access type %s"%access)
         if access is not None and access == 'free':
-            #logging.debug("documentViewer (accessOK) access is free")
+            logging.debug("documentViewer (accessOK) access is free")
             return True
         elif access is None or access in self.authgroups:
             # only local access -- only logged in users
             user = getSecurityManager().getUser()
-            #logging.debug("documentViewer (accessOK) user=%s ip=%s"%(user,self.REQUEST.getClientAddr()))
+            logging.debug("documentViewer (accessOK) user=%s ip=%s"%(user,self.REQUEST.getClientAddr()))
             if user is not None:
                 #print "user: ", user
                 return (user.getUserName() != "Anonymous User")
             else:
                 return False
         
-        #logging.error("documentViewer (accessOK) unknown access type %s"%access)
+        logging.error("documentViewer (accessOK) unknown access type %s"%access)
         return False
     
                 
@@ -371,7 +371,7 @@
        
         infoUrl=self.digilibBaseUrl+"/dirInfo-xml.jsp?mo=dir&fn="+path
     
-        #logging.debug("documentViewer (getparamfromdigilib) dirInfo from %s"%(infoUrl))
+        logging.debug("documentViewer (getparamfromdigilib) dirInfo from %s"%(infoUrl))
         
         txt = getHttpData(infoUrl)
         if txt is None:
@@ -379,7 +379,7 @@
 
         dom = Parse(txt)
         sizes=dom.xpath("//dir/size")
-        #logging.debug("documentViewer (getparamfromdigilib) dirInfo:size"%sizes)
+        logging.debug("documentViewer (getparamfromdigilib) dirInfo:size"%sizes)
         
         if sizes:
             docinfo['numPages'] = int(getTextFromNode(sizes[0]))
@@ -405,7 +405,7 @@
             if not metaUrl.endswith("index.meta"):
                 metaUrl += "/index.meta"
                 
-        #logging.debug("(getIndexMeta): METAURL: %s"%metaUrl)
+        logging.debug("(getIndexMeta): METAURL: %s"%metaUrl)
         txt=getHttpData(metaUrl)
         if txt is None:
             raise IOError("Unable to read index meta from %s"%(url))
@@ -435,7 +435,7 @@
         
     def getAuthinfoFromIndexMeta(self,path,docinfo=None,dom=None,cut=0):
         """gets authorization info from the index.meta file at path or given by dom"""
-        #logging.debug("documentViewer (getauthinfofromindexmeta) path: %s"%(path))
+        logging.debug("documentViewer (getauthinfofromindexmeta) path: %s"%(path))
         
         access = None
         
@@ -459,7 +459,7 @@
         
     def getBibinfoFromIndexMeta(self,path,docinfo=None,dom=None,cut=0):
         """gets bibliographical info from the index.meta file at path or given by dom"""
-        #logging.debug("documentViewer (getbibinfofromindexmeta) path: %s"%(path))
+        logging.debug("documentViewer (getbibinfofromindexmeta) path: %s"%(path))
         
         if docinfo is None:
             docinfo = {}
@@ -469,7 +469,7 @@
                 path=getParentDir(path)
             dom = self.getIndexMeta(path)
         
-        #logging.debug("documentViewer (getbibinfofromindexmeta cutted) path: %s"%(path))
+        logging.debug("documentViewer (getbibinfofromindexmeta cutted) path: %s"%(path))
         # put in all raw bib fields as dict "bib"
         bib = dom.xpath("//bib/*")
         if bib and len(bib)>0:
@@ -500,7 +500,7 @@
             try:
                 docinfo['year']=getTextFromNode(dom.xpath("//bib/%s"%bibmap['year'][0])[0])
             except: pass
-            #logging.debug("documentViewer (getbibinfofromindexmeta) using mapping for %s"%bibtype)
+            logging.debug("documentViewer (getbibinfofromindexmeta) using mapping for %s"%bibtype)
             try:
                 docinfo['lang']=getTextFromNode(dom.xpath("//bib/lang")[0])
             except:
@@ -511,7 +511,7 @@
     
     def getDocinfoFromTextTool(self, url, dom=None, docinfo=None):
         """parse texttool tag in index meta"""
-        #logging.debug("documentViewer (getdocinfofromtexttool) url: %s" % (url))
+        logging.debug("documentViewer (getdocinfofromtexttool) url: %s" % (url))
         if docinfo is None:
            docinfo = {}
         if docinfo.get('lang', None) is None:
@@ -538,7 +538,7 @@
                 archivePath += "/" + archiveName
         else:
             # try to get archive-path from url
-            #logging.warning("documentViewer (getdocinfofromtexttool) resource/archive-path missing in: %s" % (url))
+            logging.warning("documentViewer (getdocinfofromtexttool) resource/archive-path missing in: %s" % (url))
             if (not url.startswith('http')):
                 archivePath = url.replace('index.meta', '')
                 
@@ -632,7 +632,7 @@
     
     def getDocinfoFromImagePath(self,path,docinfo=None,cut=0):
         """path ist the path to the images it assumes that the index.meta file is one level higher."""
-        #logging.debug("documentViewer (getdocinfofromimagepath) path: %s"%(path))
+        logging.debug("documentViewer (getdocinfofromimagepath) path: %s"%(path))
         if docinfo is None:
             docinfo = {}
         path=path.replace("/mpiwg/online","")
@@ -642,7 +642,7 @@
         pathorig=path
         for x in range(cut):       
                 path=getParentDir(path)
-        #logging.debug("documentViewer (getdocinfofromimagepath) PATH:"+path)
+        logging.debug("documentViewer (getdocinfofromimagepath) PATH:"+path)
         imageUrl=self.digilibBaseUrl+"/servlet/Scaler?fn="+path
         docinfo['imageURL'] = imageUrl
         
@@ -654,13 +654,13 @@
     
     def getDocinfo(self, mode, url):
         """returns docinfo depending on mode"""
-        #logging.debug("documentViewer (getdocinfo) mode: %s, url: %s"%(mode,url))
+        logging.debug("documentViewer (getdocinfo) mode: %s, url: %s"%(mode,url))
         # look for cached docinfo in session
         if self.REQUEST.SESSION.has_key('docinfo'):
             docinfo = self.REQUEST.SESSION['docinfo']
             # check if its still current
             if docinfo is not None and docinfo.get('mode') == mode and docinfo.get('url') == url:
-                #logging.debug("documentViewer (getdocinfo) docinfo in session: %s"%docinfo)
+                logging.debug("documentViewer (getdocinfo) docinfo in session: %s"%docinfo)
                 return docinfo
         # new docinfo
         docinfo = {'mode': mode, 'url': url}
@@ -671,10 +671,10 @@
         elif mode=="filepath":
             docinfo = self.getDocinfoFromImagePath(url, docinfo=docinfo,cut=1)
         else:
-            #logging.error("documentViewer (getdocinfo) unknown mode: %s!"%mode)
+            logging.error("documentViewer (getdocinfo) unknown mode: %s!"%mode)
             raise ValueError("Unknown mode %s! Has to be one of 'texttool','imagepath','filepath'."%(mode))
                         
-        #logging.debug("documentViewer (getdocinfo) docinfo: %s"%docinfo)
+        logging.debug("documentViewer (getdocinfo) docinfo: %s"%docinfo)
         self.REQUEST.SESSION['docinfo'] = docinfo
         return docinfo
                
@@ -701,7 +701,7 @@
                 pageinfo['numgroups'] += 1        
         pageinfo['viewMode'] = viewMode
         pageinfo['tocMode'] = tocMode
-        pageinfo['characterNormalization'] = self.REQUEST.get('characterNormalization','')
+        pageinfo['characterNormalization'] = self.REQUEST.get('characterNormalization',' ')
         pageinfo['query'] = self.REQUEST.get('query',' ')
         pageinfo['queryType'] = self.REQUEST.get('queryType',' ')
         pageinfo['querySearch'] =self.REQUEST.get('querySearch', 'fulltext')
@@ -767,7 +767,7 @@
     self._setObject(id, DocumentViewerTemplate(id))
     ob = getattr(self, id)
     txt=file(os.path.join(package_home(globals()),'zpt/viewer_main.zpt'),'r').read()
-    #logging.info("txt %s:"%txt)
+    logging.info("txt %s:"%txt)
     ob.pt_edit(txt,"text/html")
     if title:
         ob.pt_setTitle(title)