--- documentViewer/documentViewer.py	2010/06/16 18:46:29	1.69.2.9
+++ documentViewer/documentViewer.py	2010/08/24 12:34:32	1.94
@@ -7,6 +7,7 @@ from AccessControl import getSecurityMan
 from Globals import package_home
 
 from Ft.Xml import EMPTY_NAMESPACE, Parse
+import Ft.Xml.Domlette
 import os.path
 import sys
 import urllib
@@ -42,7 +43,7 @@ def getTextFromNode(nodename):
 def serializeNode(node, encoding='utf-8'):
     """returns a string containing node as XML"""
     buf = cStringIO.StringIO()
-    Print(node, stream=buf, encoding=encoding)
+    Ft.Xml.Domlette.Print(node, stream=buf, encoding=encoding)
     s = buf.getvalue()
     buf.close()
     return s
@@ -67,7 +68,7 @@ def getHttpData(url, data=None, num_trie
     errmsg = None
     for cnt in range(num_tries):
         try:
-            logging.debug("getHttpData(#%s %ss) url=%s"%(cnt+1,timeout,url))
+            #logging.debug("getHttpData(#%s %ss) url=%s"%(cnt+1,timeout,url))
             if sys.version_info < (2, 6):
                 # set timeout on socket -- ugly :-(
                 import socket
@@ -78,12 +79,12 @@ def getHttpData(url, data=None, num_trie
             # check result?
             break
         except urllib2.HTTPError, e:
-            logging.error("getHttpData: HTTP error(%s): %s"%(e.code,e))
+            #logging.error("getHttpData: HTTP error(%s): %s"%(e.code,e))
             errmsg = str(e)
             # stop trying
             break
         except urllib2.URLError, e:
-            logging.error("getHttpData: URLLIB error(%s): %s"%(e.reason,e))
+            #logging.error("getHttpData: URLLIB error(%s): %s"%(e.reason,e))
             errmsg = str(e)
             # stop trying
             #break
@@ -118,6 +119,7 @@ class documentViewer(Folder):
     page_main_images = PageTemplateFile('zpt/page_main_images', globals())
     page_main_text = PageTemplateFile('zpt/page_main_text', globals())
     page_main_text_dict = PageTemplateFile('zpt/page_main_text_dict', globals())
+    page_main_gis =PageTemplateFile ('zpt/page_main_gis', globals())
     page_main_xml = PageTemplateFile('zpt/page_main_xml', globals())
     head_main = PageTemplateFile('zpt/head_main', globals())
     docuviewer_css = PageTemplateFile('css/docuviewer.css', globals())
@@ -144,7 +146,7 @@ class documentViewer(Folder):
         self._setObject('template',templateFolder) # old style
         try:
             import MpdlXmlTextServer
-            textServer = MpdlXmlTextServer(id='fulltextclient')
+            textServer = MpdlXmlTextServer.MpdlXmlTextServer(id='fulltextclient',serverName=textServerName)
             #templateFolder['fulltextclient'] = xmlRpcClient
             templateFolder._setObject('fulltextclient',textServer)
         except Exception, e:
@@ -171,9 +173,9 @@ class documentViewer(Folder):
         """get search"""
         return self.template.fulltextclient.getSearch(**args)
 
-    def getNumPages(self, **args):
+    def getNumPages(self, docinfo):
         """get numpages"""
-        return self.template.fulltextclient.getNumPages(**args)
+        return self.template.fulltextclient.getNumPages(docinfo)
 
     def getTranslate(self, **args):
         """get translate"""
@@ -201,8 +203,8 @@ class documentViewer(Folder):
         @param viewMode: if images display images, if text display text, default is images (text,images or auto)
         
         '''
-        logging.debug("HHHHHHHHHHHHHH:load the rss")
-        logger("documentViewer (index)", logging.INFO, "mode: %s url:%s start:%s pn:%s"%(mode,url,start,pn))
+        #logging.debug("HHHHHHHHHHHHHH:load the rss")
+        #logger("documentViewer (index)", logging.INFO, "mode: %s url:%s start:%s pn:%s"%(mode,url,start,pn))
         
         if not hasattr(self, 'template'):
             # create template folder if it doesn't exist
@@ -216,7 +218,7 @@ class documentViewer(Folder):
         pt = getattr(self.template, 'thumbs_main_rss')
         
         if viewMode=="auto": # automodus gewaehlt
-            if docinfo.get("textURL",'') and self.textViewerUrl: #texturl gesetzt und textViewer konfiguriert
+            if docinfo.has_key("textURL") or docinfo.has_key('textURLPath'): #texturl gesetzt und textViewer konfiguriert
                 viewMode="text"
             else:
                 viewMode="images"
@@ -231,10 +233,11 @@ class documentViewer(Folder):
         @param url: url which contains display information
         @param viewMode: if images display images, if text display text, default is auto (text,images or auto)
         @param tocMode: type of 'table of contents' for navigation (thumbs, text, figures, none)
+        @param characterNormalization type of text display (reg, norm, none)
         @param querySearch: type of different search modes (fulltext, fulltextMorph, xpath, xquery, ftIndex, ftIndexMorph, fulltextMorphLemma)
         '''
         
-        logging.debug("documentViewer (index) mode: %s url:%s start:%s pn:%s"%(mode,url,start,pn))
+        #logging.debug("documentViewer (index) mode: %s url:%s start:%s pn:%s"%(mode,url,start,pn))
         
         if not hasattr(self, 'template'):
             # this won't work
@@ -242,17 +245,16 @@ class documentViewer(Folder):
             return "ERROR: template folder missing!"
             
         if not getattr(self, 'digilibBaseUrl', None):
-            self.digilibBaseUrl = self.findDigilibUrl() or "http://nausikaa.mpiwg-berlin.mpg.de/digitallibrary"
+            self.digilibBaseUrl = self.findDigilibUrl() or "http://digilib.mpiwg-berlin.mpg.de/digitallibrary"
             
         docinfo = self.getDocinfo(mode=mode,url=url)
         
-        
         if tocMode != "thumbs":
             # get table of contents
             docinfo = self.getToc(mode=tocMode, docinfo=docinfo)
             
         if viewMode=="auto": # automodus gewaehlt
-            if docinfo.get("textURL",''): #texturl gesetzt und textViewer konfiguriert
+            if docinfo.has_key('textURL') or docinfo.has_key('textURLPath'): #texturl gesetzt und textViewer konfiguriert
                 viewMode="text_dict"
             else:
                 viewMode="images"
@@ -266,8 +268,8 @@ class documentViewer(Folder):
         ret=""
         if mk is None:
             return ""
-    	if type(mk) is not ListType:
-    		mk=[mk]
+        if not isinstance(mk, list):
+            mk=[mk]
         for m in mk:
             ret+="mk=%s"%m
         return ret
@@ -305,7 +307,8 @@ class documentViewer(Folder):
                 params["url"] = getParentDir(params["url"])
                 
         # quote values and assemble into query string
-        ps = "&".join(["%s=%s"%(k,urllib.quote(v)) for (k, v) in params.items()])
+        #ps = "&".join(["%s=%s"%(k,urllib.quote(v)) for (k, v) in params.items()])
+        ps = urllib.urlencode(params)
         url=self.REQUEST['URL1']+"?"+ps
         return url
 
@@ -320,7 +323,7 @@ class documentViewer(Folder):
                 params[param] = str(val)
                 
         # quote values and assemble into query string
-        logging.debug("XYXXXXX: %s"%repr(params.items()))
+        #logging.debug("XYXXXXX: %s"%repr(params.items()))
         ps = "&amp;".join(["%s=%s"%(k,urllib.quote(v)) for (k, v) in params.items()])
         url=self.REQUEST['URL1']+"?"+ps
         return url
@@ -339,20 +342,21 @@ class documentViewer(Folder):
     def isAccessible(self, docinfo):
         """returns if access to the resource is granted"""
         access = docinfo.get('accessType', None)
-        logging.debug("documentViewer (accessOK) access type %s"%access)
+        #logging.debug("documentViewer (accessOK) access type %s"%access)
         if access is not None and access == 'free':
-            logging.debug("documentViewer (accessOK) access is free")
+            #logging.debug("documentViewer (accessOK) access is free")
             return True
         elif access is None or access in self.authgroups:
             # only local access -- only logged in users
             user = getSecurityManager().getUser()
+            #logging.debug("documentViewer (accessOK) user=%s ip=%s"%(user,self.REQUEST.getClientAddr()))
             if user is not None:
                 #print "user: ", user
                 return (user.getUserName() != "Anonymous User")
             else:
                 return False
         
-        logging.debug("documentViewer (accessOK) unknown access type %s"%access)
+        #logging.error("documentViewer (accessOK) unknown access type %s"%access)
         return False
     
                 
@@ -367,7 +371,7 @@ class documentViewer(Folder):
        
         infoUrl=self.digilibBaseUrl+"/dirInfo-xml.jsp?mo=dir&fn="+path
     
-        logging.debug("documentViewer (getparamfromdigilib) dirInfo from %s"%(infoUrl))
+        #logging.debug("documentViewer (getparamfromdigilib) dirInfo from %s"%(infoUrl))
         
         txt = getHttpData(infoUrl)
         if txt is None:
@@ -375,7 +379,7 @@ class documentViewer(Folder):
 
         dom = Parse(txt)
         sizes=dom.xpath("//dir/size")
-        logging.debug("documentViewer (getparamfromdigilib) dirInfo:size"%sizes)
+        #logging.debug("documentViewer (getparamfromdigilib) dirInfo:size"%sizes)
         
         if sizes:
             docinfo['numPages'] = int(getTextFromNode(sizes[0]))
@@ -401,7 +405,7 @@ class documentViewer(Folder):
             if not metaUrl.endswith("index.meta"):
                 metaUrl += "/index.meta"
                 
-        logging.debug("(getIndexMeta): METAURL: %s"%metaUrl)
+        #logging.debug("(getIndexMeta): METAURL: %s"%metaUrl)
         txt=getHttpData(metaUrl)
         if txt is None:
             raise IOError("Unable to read index meta from %s"%(url))
@@ -431,7 +435,7 @@ class documentViewer(Folder):
         
     def getAuthinfoFromIndexMeta(self,path,docinfo=None,dom=None,cut=0):
         """gets authorization info from the index.meta file at path or given by dom"""
-        logging.debug("documentViewer (getauthinfofromindexmeta) path: %s"%(path))
+        #logging.debug("documentViewer (getauthinfofromindexmeta) path: %s"%(path))
         
         access = None
         
@@ -455,7 +459,7 @@ class documentViewer(Folder):
         
     def getBibinfoFromIndexMeta(self,path,docinfo=None,dom=None,cut=0):
         """gets bibliographical info from the index.meta file at path or given by dom"""
-        logging.debug("documentViewer (getbibinfofromindexmeta) path: %s"%(path))
+        #logging.debug("documentViewer (getbibinfofromindexmeta) path: %s"%(path))
         
         if docinfo is None:
             docinfo = {}
@@ -465,7 +469,7 @@ class documentViewer(Folder):
                 path=getParentDir(path)
             dom = self.getIndexMeta(path)
         
-        logging.debug("documentViewer (getbibinfofromindexmeta cutted) path: %s"%(path))
+        #logging.debug("documentViewer (getbibinfofromindexmeta cutted) path: %s"%(path))
         # put in all raw bib fields as dict "bib"
         bib = dom.xpath("//bib/*")
         if bib and len(bib)>0:
@@ -496,7 +500,7 @@ class documentViewer(Folder):
             try:
                 docinfo['year']=getTextFromNode(dom.xpath("//bib/%s"%bibmap['year'][0])[0])
             except: pass
-            logging.debug("documentViewer (getbibinfofromindexmeta) using mapping for %s"%bibtype)
+            #logging.debug("documentViewer (getbibinfofromindexmeta) using mapping for %s"%bibtype)
             try:
                 docinfo['lang']=getTextFromNode(dom.xpath("//bib/lang")[0])
             except:
@@ -507,7 +511,7 @@ class documentViewer(Folder):
     
     def getDocinfoFromTextTool(self, url, dom=None, docinfo=None):
         """parse texttool tag in index meta"""
-        logging.debug("documentViewer (getdocinfofromtexttool) url: %s" % (url))
+        #logging.debug("documentViewer (getdocinfofromtexttool) url: %s" % (url))
         if docinfo is None:
            docinfo = {}
         if docinfo.get('lang', None) is None:
@@ -534,7 +538,7 @@ class documentViewer(Folder):
                 archivePath += "/" + archiveName
         else:
             # try to get archive-path from url
-            logging.warning("documentViewer (getdocinfofromtexttool) resource/archive-path missing in: %s" % (url))
+            #logging.warning("documentViewer (getdocinfofromtexttool) resource/archive-path missing in: %s" % (url))
             if (not url.startswith('http')):
                 archivePath = url.replace('index.meta', '')
                 
@@ -567,7 +571,8 @@ class documentViewer(Folder):
         if viewerUrls and (len(viewerUrls) > 0):
             viewerUrl = getTextFromNode(viewerUrls[0])
             docinfo['viewerURL'] = viewerUrl
-                   
+        
+        # old style text URL
         textUrls = dom.xpath("//texttool/text")
         if textUrls and (len(textUrls) > 0):
             textUrl = getTextFromNode(textUrls[0])
@@ -579,13 +584,14 @@ class documentViewer(Folder):
             
             docinfo['textURL'] = textUrl
     
+        # new style text-url-path
         textUrls = dom.xpath("//texttool/text-url-path")
         if textUrls and (len(textUrls) > 0):
             textUrl = getTextFromNode(textUrls[0])
             docinfo['textURLPath'] = textUrl
             if not docinfo['imagePath']:
                 # text-only, no page images
-                docinfo = self.getNumPages(docinfo) #im moment einfach auf eins setzen, navigation ueber die thumbs geht natuerlich nicht    
+                docinfo = self.getNumPages(docinfo)
          
         presentationUrls = dom.xpath("//texttool/presentation")
         docinfo = self.getBibinfoFromIndexMeta(url, docinfo=docinfo, dom=dom)   # get info von bib tag
@@ -626,7 +632,7 @@ class documentViewer(Folder):
     
     def getDocinfoFromImagePath(self,path,docinfo=None,cut=0):
         """path ist the path to the images it assumes that the index.meta file is one level higher."""
-        logging.debug("documentViewer (getdocinfofromimagepath) path: %s"%(path))
+        #logging.debug("documentViewer (getdocinfofromimagepath) path: %s"%(path))
         if docinfo is None:
             docinfo = {}
         path=path.replace("/mpiwg/online","")
@@ -636,7 +642,7 @@ class documentViewer(Folder):
         pathorig=path
         for x in range(cut):       
                 path=getParentDir(path)
-        logging.debug("documentViewer (getdocinfofromimagepath) PATH:"+path)
+        #logging.debug("documentViewer (getdocinfofromimagepath) PATH:"+path)
         imageUrl=self.digilibBaseUrl+"/servlet/Scaler?fn="+path
         docinfo['imageURL'] = imageUrl
         
@@ -648,13 +654,13 @@ class documentViewer(Folder):
     
     def getDocinfo(self, mode, url):
         """returns docinfo depending on mode"""
-        logging.debug("documentViewer (getdocinfo) mode: %s, url: %s"%(mode,url))
+        #logging.debug("documentViewer (getdocinfo) mode: %s, url: %s"%(mode,url))
         # look for cached docinfo in session
         if self.REQUEST.SESSION.has_key('docinfo'):
             docinfo = self.REQUEST.SESSION['docinfo']
             # check if its still current
             if docinfo is not None and docinfo.get('mode') == mode and docinfo.get('url') == url:
-                logging.debug("documentViewer (getdocinfo) docinfo in session: %s"%docinfo)
+                #logging.debug("documentViewer (getdocinfo) docinfo in session: %s"%docinfo)
                 return docinfo
         # new docinfo
         docinfo = {'mode': mode, 'url': url}
@@ -665,10 +671,10 @@ class documentViewer(Folder):
         elif mode=="filepath":
             docinfo = self.getDocinfoFromImagePath(url, docinfo=docinfo,cut=1)
         else:
-            logging.error("documentViewer (getdocinfo) unknown mode: %s!"%mode)
+            #logging.error("documentViewer (getdocinfo) unknown mode: %s!"%mode)
             raise ValueError("Unknown mode %s! Has to be one of 'texttool','imagepath','filepath'."%(mode))
                         
-        logging.debug("documentViewer (getdocinfo) docinfo: %s"%docinfo)
+        #logging.debug("documentViewer (getdocinfo) docinfo: %s"%docinfo)
         self.REQUEST.SESSION['docinfo'] = docinfo
         return docinfo
                
@@ -695,6 +701,7 @@ class documentViewer(Folder):
                 pageinfo['numgroups'] += 1        
         pageinfo['viewMode'] = viewMode
         pageinfo['tocMode'] = tocMode
+        pageinfo['characterNormalization'] = self.REQUEST.get('characterNormalization','')
         pageinfo['query'] = self.REQUEST.get('query',' ')
         pageinfo['queryType'] = self.REQUEST.get('queryType',' ')
         pageinfo['querySearch'] =self.REQUEST.get('querySearch', 'fulltext')
@@ -760,7 +767,7 @@ def manage_addDocumentViewerTemplate(sel
     self._setObject(id, DocumentViewerTemplate(id))
     ob = getattr(self, id)
     txt=file(os.path.join(package_home(globals()),'zpt/viewer_main.zpt'),'r').read()
-    logging.info("txt %s:"%txt)
+    #logging.info("txt %s:"%txt)
     ob.pt_edit(txt,"text/html")
     if title:
         ob.pt_setTitle(title)