--- documentViewer/documentViewer.py	2010/08/24 12:34:32	1.94
+++ documentViewer/documentViewer.py	2010/10/08 13:49:44	1.112
@@ -16,6 +16,7 @@ import logging
 import math
 import urlparse 
 import cStringIO
+import re
 
 def logger(txt,method,txt2):
     """logging"""
@@ -68,7 +69,7 @@ def getHttpData(url, data=None, num_trie
     errmsg = None
     for cnt in range(num_tries):
         try:
-            #logging.debug("getHttpData(#%s %ss) url=%s"%(cnt+1,timeout,url))
+            logging.debug("getHttpData(#%s %ss) url=%s"%(cnt+1,timeout,url))
             if sys.version_info < (2, 6):
                 # set timeout on socket -- ugly :-(
                 import socket
@@ -79,12 +80,12 @@ def getHttpData(url, data=None, num_trie
             # check result?
             break
         except urllib2.HTTPError, e:
-            #logging.error("getHttpData: HTTP error(%s): %s"%(e.code,e))
+            logging.error("getHttpData: HTTP error(%s): %s"%(e.code,e))
             errmsg = str(e)
             # stop trying
             break
         except urllib2.URLError, e:
-            #logging.error("getHttpData: URLLIB error(%s): %s"%(e.reason,e))
+            logging.error("getHttpData: URLLIB error(%s): %s"%(e.reason,e))
             errmsg = str(e)
             # stop trying
             #break
@@ -176,7 +177,11 @@ class documentViewer(Folder):
     def getNumPages(self, docinfo):
         """get numpages"""
         return self.template.fulltextclient.getNumPages(docinfo)
-
+   
+    def getNumTextPages(self, docinfo):
+        """get numpages text"""
+        return self.template.fulltextclient.getNumTextPages(docinfo)
+   
     def getTranslate(self, **args):
         """get translate"""
         return self.template.fulltextclient.getTranslate(**args)
@@ -203,8 +208,8 @@ class documentViewer(Folder):
         @param viewMode: if images display images, if text display text, default is images (text,images or auto)
         
         '''
-        #logging.debug("HHHHHHHHHHHHHH:load the rss")
-        #logger("documentViewer (index)", logging.INFO, "mode: %s url:%s start:%s pn:%s"%(mode,url,start,pn))
+        logging.debug("HHHHHHHHHHHHHH:load the rss")
+        logger("documentViewer (index)", logging.INFO, "mode: %s url:%s start:%s pn:%s"%(mode,url,start,pn))
         
         if not hasattr(self, 'template'):
             # create template folder if it doesn't exist
@@ -226,7 +231,7 @@ class documentViewer(Folder):
         return pt(docinfo=docinfo,pageinfo=pageinfo,viewMode=viewMode)
   
     security.declareProtected('View','index_html')
-    def index_html(self,url,mode="texttool",viewMode="auto",tocMode="thumbs",start=None,pn=1,mk=None, query=None, querySearch=None):
+    def index_html(self,url,mode="texttool",viewMode="auto",tocMode="thumbs",start=None,pn=1,mk=None, query=None, querySearch=None, characterNormalization=""):
         '''
         view it
         @param mode: defines how to access the document behind url 
@@ -237,7 +242,7 @@ class documentViewer(Folder):
         @param querySearch: type of different search modes (fulltext, fulltextMorph, xpath, xquery, ftIndex, ftIndexMorph, fulltextMorphLemma)
         '''
         
-        #logging.debug("documentViewer (index) mode: %s url:%s start:%s pn:%s"%(mode,url,start,pn))
+        logging.debug("documentViewer (index) mode: %s url:%s start:%s pn:%s"%(mode,url,start,pn))
         
         if not hasattr(self, 'template'):
             # this won't work
@@ -323,7 +328,7 @@ class documentViewer(Folder):
                 params[param] = str(val)
                 
         # quote values and assemble into query string
-        #logging.debug("XYXXXXX: %s"%repr(params.items()))
+        logging.debug("XYXXXXX: %s"%repr(params.items()))
         ps = "&amp;".join(["%s=%s"%(k,urllib.quote(v)) for (k, v) in params.items()])
         url=self.REQUEST['URL1']+"?"+ps
         return url
@@ -342,21 +347,21 @@ class documentViewer(Folder):
     def isAccessible(self, docinfo):
         """returns if access to the resource is granted"""
         access = docinfo.get('accessType', None)
-        #logging.debug("documentViewer (accessOK) access type %s"%access)
+        logging.debug("documentViewer (accessOK) access type %s"%access)
         if access is not None and access == 'free':
-            #logging.debug("documentViewer (accessOK) access is free")
+            logging.debug("documentViewer (accessOK) access is free")
             return True
         elif access is None or access in self.authgroups:
             # only local access -- only logged in users
             user = getSecurityManager().getUser()
-            #logging.debug("documentViewer (accessOK) user=%s ip=%s"%(user,self.REQUEST.getClientAddr()))
+            logging.debug("documentViewer (accessOK) user=%s ip=%s"%(user,self.REQUEST.getClientAddr()))
             if user is not None:
                 #print "user: ", user
                 return (user.getUserName() != "Anonymous User")
             else:
                 return False
         
-        #logging.error("documentViewer (accessOK) unknown access type %s"%access)
+        logging.error("documentViewer (accessOK) unknown access type %s"%access)
         return False
     
                 
@@ -371,7 +376,7 @@ class documentViewer(Folder):
        
         infoUrl=self.digilibBaseUrl+"/dirInfo-xml.jsp?mo=dir&fn="+path
     
-        #logging.debug("documentViewer (getparamfromdigilib) dirInfo from %s"%(infoUrl))
+        logging.debug("documentViewer (getparamfromdigilib) dirInfo from %s"%(infoUrl))
         
         txt = getHttpData(infoUrl)
         if txt is None:
@@ -379,7 +384,7 @@ class documentViewer(Folder):
 
         dom = Parse(txt)
         sizes=dom.xpath("//dir/size")
-        #logging.debug("documentViewer (getparamfromdigilib) dirInfo:size"%sizes)
+        logging.debug("documentViewer (getparamfromdigilib) dirInfo:size"%sizes)
         
         if sizes:
             docinfo['numPages'] = int(getTextFromNode(sizes[0]))
@@ -390,10 +395,20 @@ class documentViewer(Folder):
                         
         return docinfo
     
-            
-    def getIndexMeta(self, url):
-        """returns dom of index.meta document at url"""
-        dom = None
+    def getIndexMetaPath(self,url):
+        """gib nur den Pfad zurueck"""
+        regexp = re.compile(r".*(experimental|permanent)/(.*)")
+        regpath = regexp.match(url)
+        if (regpath==None):
+            return ""
+        logging.debug("(getDomFromIndexMeta): URLXAXA: %s"%regpath.group(2))            
+        return ("/mpiwg/online/"+regpath.group(1)+"/"+regpath.group(2))
+     
+    
+    
+    def getIndexMetaUrl(self,url):
+        """returns utr  of index.meta document at url"""
+      
         metaUrl = None
         if url.startswith("http://"):
             # real URL
@@ -404,8 +419,15 @@ class documentViewer(Folder):
             metaUrl=server+url.replace("/mpiwg/online","")
             if not metaUrl.endswith("index.meta"):
                 metaUrl += "/index.meta"
+        
+        return metaUrl
+    
+    def getDomFromIndexMeta(self, url):
+        """get dom from index meta"""
+        dom = None
+        metaUrl = self.getIndexMetaUrl(url)
                 
-        #logging.debug("(getIndexMeta): METAURL: %s"%metaUrl)
+        logging.debug("(getDomFromIndexMeta): METAURL: %s"%metaUrl)
         txt=getHttpData(metaUrl)
         if txt is None:
             raise IOError("Unable to read index meta from %s"%(url))
@@ -435,7 +457,7 @@ class documentViewer(Folder):
         
     def getAuthinfoFromIndexMeta(self,path,docinfo=None,dom=None,cut=0):
         """gets authorization info from the index.meta file at path or given by dom"""
-        #logging.debug("documentViewer (getauthinfofromindexmeta) path: %s"%(path))
+        logging.debug("documentViewer (getauthinfofromindexmeta) path: %s"%(path))
         
         access = None
         
@@ -445,7 +467,7 @@ class documentViewer(Folder):
         if dom is None:
             for x in range(cut):
                 path=getParentDir(path)
-            dom = self.getIndexMeta(path)
+            dom = self.getDomFromIndexMeta(path)
        
         acctype = dom.xpath("//access-conditions/access/@type")
         if acctype and (len(acctype)>0):
@@ -459,7 +481,7 @@ class documentViewer(Folder):
         
     def getBibinfoFromIndexMeta(self,path,docinfo=None,dom=None,cut=0):
         """gets bibliographical info from the index.meta file at path or given by dom"""
-        #logging.debug("documentViewer (getbibinfofromindexmeta) path: %s"%(path))
+        logging.debug("documentViewer (getbibinfofromindexmeta) path: %s"%(path))
         
         if docinfo is None:
             docinfo = {}
@@ -467,9 +489,11 @@ class documentViewer(Folder):
         if dom is None:
             for x in range(cut):
                 path=getParentDir(path)
-            dom = self.getIndexMeta(path)
+            dom = self.getDomFromIndexMeta(path)
+        
+        docinfo['indexMetaPath']=self.getIndexMetaPath(path);
         
-        #logging.debug("documentViewer (getbibinfofromindexmeta cutted) path: %s"%(path))
+        logging.debug("documentViewer (getbibinfofromindexmeta cutted) path: %s"%(path))
         # put in all raw bib fields as dict "bib"
         bib = dom.xpath("//bib/*")
         if bib and len(bib)>0:
@@ -489,6 +513,8 @@ class documentViewer(Folder):
         bibtype=bibtype.replace("-"," ") # wrong typesiin index meta "-" instead of " " (not wrong! ROC)
         docinfo['bib_type'] = bibtype
         bibmap=metaData.generateMappingForType(bibtype)
+        logging.debug("documentViewer (getbibinfofromindexmeta) bibmap:"+repr(bibmap))
+        logging.debug("documentViewer (getbibinfofromindexmeta) bibtype:"+repr(bibtype))
         # if there is no mapping bibmap is empty (mapping sometimes has empty fields)
         if len(bibmap) > 0 and len(bibmap['author'][0]) > 0:
             try:
@@ -500,24 +526,51 @@ class documentViewer(Folder):
             try:
                 docinfo['year']=getTextFromNode(dom.xpath("//bib/%s"%bibmap['year'][0])[0])
             except: pass
-            #logging.debug("documentViewer (getbibinfofromindexmeta) using mapping for %s"%bibtype)
+            logging.debug("documentViewer (getbibinfofromindexmeta) using mapping for %s"%bibtype)
             try:
                 docinfo['lang']=getTextFromNode(dom.xpath("//bib/lang")[0])
             except:
                 docinfo['lang']=''
-
+            try:
+                docinfo['name']=getTextFromNode(dom.xpath("//%s"%bibmap['name'][0])[0])
+            except: pass
+            
         return docinfo
     
+     
+    def getNameFromIndexMeta(self,path,docinfo=None,dom=None,cut=0):
+        """gets name info from the index.meta file at path or given by dom"""
+        if docinfo is None:
+            docinfo = {}
+        
+        if dom is None:
+            for x in range(cut):
+                path=getParentDir(path)
+            dom = self.getDomFromIndexMeta(path)
+        
+        #docinfo['indexMetaPath']=self.getIndexMetaPath(path);
+        
+        #result= dom.xpath("//result/resultPage")
+        #docinfo['numPages']=int(getTextFromNode(result[0]))
+        
+        if len(name) > 0:
+            try:
+                result =dom.xpath("//name")
+                docinfo['name']=getTextFromNode(result[0])
+                logging.debug("documentViewer docinfo[name] %s"%docinfo[name])
+            except: pass  
+                #logging.debug("documentViewer (getbibinfofromindexmeta) using mapping for %s"%bibtype)
+        return docinfo
     
     def getDocinfoFromTextTool(self, url, dom=None, docinfo=None):
         """parse texttool tag in index meta"""
-        #logging.debug("documentViewer (getdocinfofromtexttool) url: %s" % (url))
+        logging.debug("documentViewer (getdocinfofromtexttool) url: %s" % (url))
         if docinfo is None:
            docinfo = {}
         if docinfo.get('lang', None) is None:
             docinfo['lang'] = '' # default keine Sprache gesetzt
         if dom is None:
-            dom = self.getIndexMeta(url)
+            dom = self.getDomFromIndexMeta(url)
         
         archivePath = None
         archiveName = None
@@ -538,7 +591,7 @@ class documentViewer(Folder):
                 archivePath += "/" + archiveName
         else:
             # try to get archive-path from url
-            #logging.warning("documentViewer (getdocinfofromtexttool) resource/archive-path missing in: %s" % (url))
+            logging.warning("documentViewer (getdocinfofromtexttool) resource/archive-path missing in: %s" % (url))
             if (not url.startswith('http')):
                 archivePath = url.replace('index.meta', '')
                 
@@ -591,10 +644,11 @@ class documentViewer(Folder):
             docinfo['textURLPath'] = textUrl
             if not docinfo['imagePath']:
                 # text-only, no page images
-                docinfo = self.getNumPages(docinfo)
+                docinfo = self.getNumTextPages(docinfo)
          
         presentationUrls = dom.xpath("//texttool/presentation")
         docinfo = self.getBibinfoFromIndexMeta(url, docinfo=docinfo, dom=dom)   # get info von bib tag
+        #docinfo = self.getNameFromIndexMeta(url, docinfo=docinfo, dom=dom)
         
         if presentationUrls and (len(presentationUrls) > 0): # ueberschreibe diese durch presentation informationen 
              # presentation url ergiebt sich ersetzen von index.meta in der url der fuer die Metadaten
@@ -628,11 +682,15 @@ class documentViewer(Folder):
             docinfo['year']=getTextFromNode(dom.xpath("//date")[0])
         except:
             pass
+        try:
+            docinfo['name']=getTextFromNode(dom.xpath("//name")[0])
+        except:
+            pass
         return docinfo
     
     def getDocinfoFromImagePath(self,path,docinfo=None,cut=0):
         """path ist the path to the images it assumes that the index.meta file is one level higher."""
-        #logging.debug("documentViewer (getdocinfofromimagepath) path: %s"%(path))
+        logging.debug("documentViewer (getdocinfofromimagepath) path: %s"%(path))
         if docinfo is None:
             docinfo = {}
         path=path.replace("/mpiwg/online","")
@@ -642,7 +700,7 @@ class documentViewer(Folder):
         pathorig=path
         for x in range(cut):       
                 path=getParentDir(path)
-        #logging.debug("documentViewer (getdocinfofromimagepath) PATH:"+path)
+        logging.debug("documentViewer (getdocinfofromimagepath) PATH:"+path)
         imageUrl=self.digilibBaseUrl+"/servlet/Scaler?fn="+path
         docinfo['imageURL'] = imageUrl
         
@@ -654,13 +712,13 @@ class documentViewer(Folder):
     
     def getDocinfo(self, mode, url):
         """returns docinfo depending on mode"""
-        #logging.debug("documentViewer (getdocinfo) mode: %s, url: %s"%(mode,url))
+        logging.debug("documentViewer (getdocinfo) mode: %s, url: %s"%(mode,url))
         # look for cached docinfo in session
         if self.REQUEST.SESSION.has_key('docinfo'):
             docinfo = self.REQUEST.SESSION['docinfo']
             # check if its still current
             if docinfo is not None and docinfo.get('mode') == mode and docinfo.get('url') == url:
-                #logging.debug("documentViewer (getdocinfo) docinfo in session: %s"%docinfo)
+                logging.debug("documentViewer (getdocinfo) docinfo in session: %s"%docinfo)
                 return docinfo
         # new docinfo
         docinfo = {'mode': mode, 'url': url}
@@ -671,14 +729,14 @@ class documentViewer(Folder):
         elif mode=="filepath":
             docinfo = self.getDocinfoFromImagePath(url, docinfo=docinfo,cut=1)
         else:
-            #logging.error("documentViewer (getdocinfo) unknown mode: %s!"%mode)
+            logging.error("documentViewer (getdocinfo) unknown mode: %s!"%mode)
             raise ValueError("Unknown mode %s! Has to be one of 'texttool','imagepath','filepath'."%(mode))
                         
-        #logging.debug("documentViewer (getdocinfo) docinfo: %s"%docinfo)
+        logging.debug("documentViewer (getdocinfo) docinfo: %s"%docinfo)
         self.REQUEST.SESSION['docinfo'] = docinfo
         return docinfo
                
-    def getPageinfo(self, current, start=None, rows=None, cols=None, docinfo=None, viewMode=None, tocMode=None):
+    def getPageinfo(self, current, start=None, rows=None, cols=None, docinfo=None, viewMode=None, tocMode=None,characterNormalization=""):
         """returns pageinfo with the given parameters"""
         pageinfo = {}
         current = getInt(current)
@@ -701,7 +759,8 @@ class documentViewer(Folder):
                 pageinfo['numgroups'] += 1        
         pageinfo['viewMode'] = viewMode
         pageinfo['tocMode'] = tocMode
-        pageinfo['characterNormalization'] = self.REQUEST.get('characterNormalization','')
+        #pageinfo['characterNormalization'] =characterNormalization
+        pageinfo['characterNormalization'] = self.REQUEST.get('characterNormalization',' ')
         pageinfo['query'] = self.REQUEST.get('query',' ')
         pageinfo['queryType'] = self.REQUEST.get('queryType',' ')
         pageinfo['querySearch'] =self.REQUEST.get('querySearch', 'fulltext')
@@ -767,7 +826,7 @@ def manage_addDocumentViewerTemplate(sel
     self._setObject(id, DocumentViewerTemplate(id))
     ob = getattr(self, id)
     txt=file(os.path.join(package_home(globals()),'zpt/viewer_main.zpt'),'r').read()
-    #logging.info("txt %s:"%txt)
+    logging.info("txt %s:"%txt)
     ob.pt_edit(txt,"text/html")
     if title:
         ob.pt_setTitle(title)