File:  [Repository] / documentViewer / documentViewer.py
Revision 1.69.2.6: download - view: text, annotated - select for diffs - revision graph
Wed Jun 16 18:40:18 2010 UTC (14 years ago) by casties
Branches: modularisierung
Diff to: branchpoint 1.69: preferred, unified
fixed oopsie

    1: 
    2: from OFS.Folder import Folder
    3: from Products.PageTemplates.ZopePageTemplate import ZopePageTemplate
    4: from Products.PageTemplates.PageTemplateFile import PageTemplateFile 
    5: from AccessControl import ClassSecurityInfo
    6: from AccessControl import getSecurityManager
    7: from Globals import package_home
    8: 
    9: from Ft.Xml import EMPTY_NAMESPACE, Parse
   10: import os.path
   11: import sys
   12: import urllib
   13: import urllib2
   14: import logging
   15: 
   16: def logger(txt,method,txt2):
   17:     """logging"""
   18:     logging.info(txt+ txt2)
   19:     
   20:     
   21: def getInt(number, default=0):
   22:     """returns always an int (0 in case of problems)"""
   23:     try:
   24:         return int(number)
   25:     except:
   26:         return int(default)
   27: 
   28: def getTextFromNode(nodename):
   29:     """get the cdata content of a node"""
   30:     if nodename is None:
   31:         return ""
   32:     nodelist=nodename.childNodes
   33:     rc = ""
   34:     for node in nodelist:
   35:         if node.nodeType == node.TEXT_NODE:
   36:            rc = rc + node.data
   37:     return rc
   38: 
   39: def serializeNode(node, encoding='utf-8'):
   40:     """returns a string containing node as XML"""
   41:     buf = cStringIO.StringIO()
   42:     Print(node, stream=buf, encoding=encoding)
   43:     s = buf.getvalue()
   44:     buf.close()
   45:     return s
   46: 
   47:         
   48: def getParentDir(path):
   49:     """returns pathname shortened by one"""
   50:     return '/'.join(path.split('/')[0:-1])
   51:         
   52: 
   53: def getHttpData(url, data=None, num_tries=3, timeout=10):
   54:     """returns result from url+data HTTP request"""
   55:     # we do GET (by appending data to url)
   56:     if isinstance(data, str) or isinstance(data, unicode):
   57:         # if data is string then append
   58:         url = "%s?%s"%(url,data)
   59:     elif isinstance(data, dict) or isinstance(data, list) or isinstance(data, tuple):
   60:         # urlencode
   61:         url = "%s?%s"%(url,urllib.urlencode(data))
   62:     
   63:     response = None
   64:     errmsg = None
   65:     for cnt in range(num_tries):
   66:         try:
   67:             logging.debug("getHttpData(#%s %ss) url=%s"%(cnt+1,timeout,url))
   68:             if sys.version_info < (2, 6):
   69:                 # set timeout on socket -- ugly :-(
   70:                 import socket
   71:                 socket.setdefaulttimeout(timeout)
   72:                 response = urllib2.urlopen(url)
   73:             else:
   74:                 response = urllib2.urlopen(url,timeout=float(timeout))
   75:             # check result?
   76:             break
   77:         except urllib2.HTTPError, e:
   78:             logging.error("getHttpData: HTTP error(%s): %s"%(e.code,e))
   79:             errmsg = str(e)
   80:             # stop trying
   81:             break
   82:         except urllib2.URLError, e:
   83:             logging.error("getHttpData: URLLIB error(%s): %s"%(e.reason,e))
   84:             errmsg = str(e)
   85:             # stop trying
   86:             #break
   87: 
   88:     if response is not None:
   89:         data = response.read()
   90:         response.close()
   91:         return data
   92:     
   93:     raise IOError("ERROR fetching HTTP data from %s: %s"%(url,errmsg))
   94:     #return None
   95: 
   96: 
   97: 
   98: ##
   99: ## documentViewer class
  100: ##
  101: class documentViewer(Folder):
  102:     """document viewer"""
  103:     meta_type="Document viewer"
  104:     
  105:     security=ClassSecurityInfo()
  106:     manage_options=Folder.manage_options+(
  107:         {'label':'main config','action':'changeDocumentViewerForm'},
  108:         )
  109: 
  110:     # templates and forms
  111:     viewer_main = PageTemplateFile('zpt/viewer_main', globals())
  112:     toc_thumbs = PageTemplateFile('zpt/toc_thumbs', globals())
  113:     toc_text = PageTemplateFile('zpt/toc_text', globals())
  114:     toc_figures = PageTemplateFile('zpt/toc_figures', globals())
  115:     page_main_images = PageTemplateFile('zpt/page_main_images', globals())
  116:     page_main_text = PageTemplateFile('zpt/page_main_text', globals())
  117:     page_main_text_dict = PageTemplateFile('zpt/page_main_text_dict', globals())
  118:     page_main_xml = PageTemplateFile('zpt/page_main_xml', globals())
  119:     head_main = PageTemplateFile('zpt/head_main', globals())
  120:     docuviewer_css = PageTemplateFile('css/docuviewer.css', globals())
  121:     info_xml = PageTemplateFile('zpt/info_xml', globals())
  122:     
  123:     
  124:     thumbs_main_rss = PageTemplateFile('zpt/thumbs_main_rss', globals())
  125:     security.declareProtected('View management screens','changeDocumentViewerForm')    
  126:     changeDocumentViewerForm = PageTemplateFile('zpt/changeDocumentViewer', globals())
  127: 
  128:     
  129:     def __init__(self,id,imageScalerUrl=None,textServerName=None,title="",digilibBaseUrl=None,thumbcols=2,thumbrows=5,authgroups="mpiwg"):
  130:         """init document viewer"""
  131:         self.id=id
  132:         self.title=title
  133:         self.thumbcols = thumbcols
  134:         self.thumbrows = thumbrows
  135:         # authgroups is list of authorized groups (delimited by ,)
  136:         self.authgroups = [s.strip().lower() for s in authgroups.split(',')]
  137:         # create template folder so we can always use template.something
  138:         
  139:         templateFolder = Folder('template')
  140:         #self['template'] = templateFolder # Zope-2.12 style
  141:         self._setObject('template',templateFolder) # old style
  142:         try:
  143:             import MpdlXmlTextServer
  144:             textServer = MpdlXmlTextServer(id='fulltextclient')
  145:             #templateFolder['fulltextclient'] = xmlRpcClient
  146:             templateFolder._setObject('fulltextclient',textServer)
  147:         except Exception, e:
  148:             logging.error("Unable to create MpdlXmlTextServer for fulltextclient: "+str(e))
  149:         try:
  150:             from Products.zogiLib.zogiLib import zogiLib
  151:             zogilib = zogiLib(id="zogilib", title="zogilib for docuviewer", dlServerURL=imageScalerUrl, layout="book")
  152:             #templateFolder['zogilib'] = zogilib
  153:             templateFolder._setObject('zogilib',zogilib)
  154:         except Exception, e:
  155:             logging.error("Unable to create zogiLib for zogilib: "+str(e))
  156:         
  157:         
  158:     # proxy text server methods to fulltextclient
  159:     def getTextPage(self, **args):
  160:         """get page"""
  161:         return self.template.fulltextclient.getTextPage(**args)
  162: 
  163:     def getQuery(self, **args):
  164:         """get query"""
  165:         return self.template.fulltextclient.getQuery(**args)
  166: 
  167:     def getSearch(self, **args):
  168:         """get search"""
  169:         return self.template.fulltextclient.getSearch(**args)
  170: 
  171:     def getNumPages(self, **args):
  172:         """get numpages"""
  173:         return self.template.fulltextclient.getNumPages(**args)
  174: 
  175:     def getTranslate(self, **args):
  176:         """get translate"""
  177:         return self.template.fulltextclient.getTranslate(**args)
  178: 
  179:     def getLemma(self, **args):
  180:         """get lemma"""
  181:         return self.template.fulltextclient.getLemma(**args)
  182: 
  183:     def getToc(self, **args):
  184:         """get toc"""
  185:         return self.template.fulltextclient.getToc(**args)
  186: 
  187:     def getTocPage(self, **args):
  188:         """get tocpage"""
  189:         return self.template.fulltextclient.getTocPage(**args)
  190: 
  191:     
  192:     security.declareProtected('View','thumbs_rss')
  193:     def thumbs_rss(self,mode,url,viewMode="auto",start=None,pn=1):
  194:         '''
  195:         view it
  196:         @param mode: defines how to access the document behind url 
  197:         @param url: url which contains display information
  198:         @param viewMode: if images display images, if text display text, default is images (text,images or auto)
  199:         
  200:         '''
  201:         logging.debug("HHHHHHHHHHHHHH:load the rss")
  202:         logger("documentViewer (index)", logging.INFO, "mode: %s url:%s start:%s pn:%s"%(mode,url,start,pn))
  203:         
  204:         if not hasattr(self, 'template'):
  205:             # create template folder if it doesn't exist
  206:             self.manage_addFolder('template')
  207:             
  208:         if not self.digilibBaseUrl:
  209:             self.digilibBaseUrl = self.findDigilibUrl() or "http://nausikaa.mpiwg-berlin.mpg.de/digitallibrary"
  210:             
  211:         docinfo = self.getDocinfo(mode=mode,url=url)
  212:         pageinfo = self.getPageinfo(start=start,current=pn,docinfo=docinfo)
  213:         pt = getattr(self.template, 'thumbs_main_rss')
  214:         
  215:         if viewMode=="auto": # automodus gewaehlt
  216:             if docinfo.get("textURL",'') and self.textViewerUrl: #texturl gesetzt und textViewer konfiguriert
  217:                 viewMode="text"
  218:             else:
  219:                 viewMode="images"
  220:                
  221:         return pt(docinfo=docinfo,pageinfo=pageinfo,viewMode=viewMode)
  222:   
  223:     security.declareProtected('View','index_html')
  224:     def index_html(self,url,mode="texttool",viewMode="auto",tocMode="thumbs",start=None,pn=1,mk=None, query=None, querySearch=None):
  225:         '''
  226:         view it
  227:         @param mode: defines how to access the document behind url 
  228:         @param url: url which contains display information
  229:         @param viewMode: if images display images, if text display text, default is auto (text,images or auto)
  230:         @param tocMode: type of 'table of contents' for navigation (thumbs, text, figures, none)
  231:         @param querySearch: type of different search modes (fulltext, fulltextMorph, xpath, xquery, ftIndex, ftIndexMorph, fulltextMorphLemma)
  232:         '''
  233:         
  234:         logging.debug("documentViewer (index) mode: %s url:%s start:%s pn:%s"%(mode,url,start,pn))
  235:         
  236:         if not hasattr(self, 'template'):
  237:             # this won't work
  238:             logging.error("template folder missing!")
  239:             return "ERROR: template folder missing!"
  240:             
  241:         if not getattr(self, 'digilibBaseUrl', None):
  242:             self.digilibBaseUrl = self.findDigilibUrl() or "http://nausikaa.mpiwg-berlin.mpg.de/digitallibrary"
  243:             
  244:         docinfo = self.getDocinfo(mode=mode,url=url)
  245:         
  246:         
  247:         if tocMode != "thumbs":
  248:             # get table of contents
  249:             docinfo = self.getToc(mode=tocMode, docinfo=docinfo)
  250:             
  251:         if viewMode=="auto": # automodus gewaehlt
  252:             if docinfo.get("textURL",''): #texturl gesetzt und textViewer konfiguriert
  253:                 viewMode="text_dict"
  254:             else:
  255:                 viewMode="images"
  256:                 
  257:         pageinfo = self.getPageinfo(start=start,current=pn,docinfo=docinfo,viewMode=viewMode,tocMode=tocMode)
  258:         
  259:         pt = getattr(self.template, 'viewer_main')               
  260:         return pt(docinfo=docinfo,pageinfo=pageinfo,viewMode=viewMode,mk=self.generateMarks(mk))
  261:   
  262:     def generateMarks(self,mk):
  263:         ret=""
  264:         if mk is None:
  265:             return ""
  266:     	if type(mk) is not ListType:
  267:     		mk=[mk]
  268:         for m in mk:
  269:             ret+="mk=%s"%m
  270:         return ret
  271: 
  272: 
  273:     def findDigilibUrl(self):
  274:         """try to get the digilib URL from zogilib"""
  275:         url = self.template.zogilib.getDLBaseUrl()
  276:         return url
  277: 
  278:     def getDocumentViewerURL(self):
  279:         """returns the URL of this instance"""
  280:         return self.absolute_url()
  281:     
  282:     def getStyle(self, idx, selected, style=""):
  283:         """returns a string with the given style and append 'sel' if path == selected."""
  284:         #logger("documentViewer (getstyle)", logging.INFO, "idx: %s selected: %s style: %s"%(idx,selected,style))
  285:         if idx == selected:
  286:             return style + 'sel'
  287:         else:
  288:             return style
  289:     
  290:     def getLink(self,param=None,val=None):
  291:         """link to documentviewer with parameter param set to val"""
  292:         params=self.REQUEST.form.copy()
  293:         if param is not None:
  294:             if val is None:
  295:                 if params.has_key(param):
  296:                     del params[param]
  297:             else:
  298:                 params[param] = str(val)
  299:                 
  300:         if params.get("mode", None) == "filepath": #wenn beim erst Aufruf filepath gesetzt wurde aendere das nun zu imagepath
  301:                 params["mode"] = "imagepath"
  302:                 params["url"] = getParentDir(params["url"])
  303:                 
  304:         # quote values and assemble into query string
  305:         ps = "&".join(["%s=%s"%(k,urllib.quote(v)) for (k, v) in params.items()])
  306:         url=self.REQUEST['URL1']+"?"+ps
  307:         return url
  308: 
  309:     def getLinkAmp(self,param=None,val=None):
  310:         """link to documentviewer with parameter param set to val"""
  311:         params=self.REQUEST.form.copy()
  312:         if param is not None:
  313:             if val is None:
  314:                 if params.has_key(param):
  315:                     del params[param]
  316:             else:
  317:                 params[param] = str(val)
  318:                 
  319:         # quote values and assemble into query string
  320:         logging.debug("XYXXXXX: %s"%repr(params.items()))
  321:         ps = "&amp;".join(["%s=%s"%(k,urllib.quote(v)) for (k, v) in params.items()])
  322:         url=self.REQUEST['URL1']+"?"+ps
  323:         return url
  324:     
  325:     def getInfo_xml(self,url,mode):
  326:         """returns info about the document as XML"""
  327: 
  328:         if not self.digilibBaseUrl:
  329:             self.digilibBaseUrl = self.findDigilibUrl() or "http://nausikaa.mpiwg-berlin.mpg.de/digitallibrary"
  330:         
  331:         docinfo = self.getDocinfo(mode=mode,url=url)
  332:         pt = getattr(self.template, 'info_xml')
  333:         return pt(docinfo=docinfo)
  334: 
  335:     
  336:     def isAccessible(self, docinfo):
  337:         """returns if access to the resource is granted"""
  338:         access = docinfo.get('accessType', None)
  339:         logging.debug("documentViewer (accessOK) access type %s"%access)
  340:         if access is not None and access == 'free':
  341:             logging.debug("documentViewer (accessOK) access is free")
  342:             return True
  343:         elif access is None or access in self.authgroups:
  344:             # only local access -- only logged in users
  345:             user = getSecurityManager().getUser()
  346:             if user is not None:
  347:                 #print "user: ", user
  348:                 return (user.getUserName() != "Anonymous User")
  349:             else:
  350:                 return False
  351:         
  352:         logging.debug("documentViewer (accessOK) unknown access type %s"%access)
  353:         return False
  354:     
  355:                 
  356:     def getDirinfoFromDigilib(self,path,docinfo=None,cut=0):
  357:         """gibt param von dlInfo aus"""
  358:         if docinfo is None:
  359:             docinfo = {}
  360:         
  361:         for x in range(cut):
  362:                
  363:                 path=getParentDir(path)
  364:        
  365:         infoUrl=self.digilibBaseUrl+"/dirInfo-xml.jsp?mo=dir&fn="+path
  366:     
  367:         logging.debug("documentViewer (getparamfromdigilib) dirInfo from %s"%(infoUrl))
  368:         
  369:         txt = getHttpData(infoUrl)
  370:         if txt is None:
  371:             raise IOError("Unable to get dir-info from %s"%(infoUrl))
  372: 
  373:         dom = Parse(txt)
  374:         sizes=dom.xpath("//dir/size")
  375:         logging.debug("documentViewer (getparamfromdigilib) dirInfo:size"%sizes)
  376:         
  377:         if sizes:
  378:             docinfo['numPages'] = int(getTextFromNode(sizes[0]))
  379:         else:
  380:             docinfo['numPages'] = 0
  381:             
  382:         # TODO: produce and keep list of image names and numbers
  383:                         
  384:         return docinfo
  385:     
  386:             
  387:     def getIndexMeta(self, url):
  388:         """returns dom of index.meta document at url"""
  389:         dom = None
  390:         metaUrl = None
  391:         if url.startswith("http://"):
  392:             # real URL
  393:             metaUrl = url
  394:         else:
  395:             # online path
  396:             server=self.digilibBaseUrl+"/servlet/Texter?fn="
  397:             metaUrl=server+url.replace("/mpiwg/online","")
  398:             if not metaUrl.endswith("index.meta"):
  399:                 metaUrl += "/index.meta"
  400:                 
  401:         logging.debug("(getIndexMeta): METAURL: %s"%metaUrl)
  402:         txt=getHttpData(metaUrl)
  403:         if txt is None:
  404:             raise IOError("Unable to read index meta from %s"%(url))
  405:         
  406:         dom = Parse(txt)
  407:         return dom
  408:     
  409:     def getPresentationInfoXML(self, url):
  410:         """returns dom of info.xml document at url"""
  411:         dom = None
  412:         metaUrl = None
  413:         if url.startswith("http://"):
  414:             # real URL
  415:             metaUrl = url
  416:         else:
  417:             # online path
  418:             server=self.digilibBaseUrl+"/servlet/Texter?fn="
  419:             metaUrl=server+url.replace("/mpiwg/online","")
  420:         
  421:         txt=getHttpData(metaUrl)
  422:         if txt is None:
  423:             raise IOError("Unable to read infoXMLfrom %s"%(url))
  424:             
  425:         dom = Parse(txt)
  426:         return dom
  427:                         
  428:         
  429:     def getAuthinfoFromIndexMeta(self,path,docinfo=None,dom=None,cut=0):
  430:         """gets authorization info from the index.meta file at path or given by dom"""
  431:         logging.debug("documentViewer (getauthinfofromindexmeta) path: %s"%(path))
  432:         
  433:         access = None
  434:         
  435:         if docinfo is None:
  436:             docinfo = {}
  437:             
  438:         if dom is None:
  439:             for x in range(cut):
  440:                 path=getParentDir(path)
  441:             dom = self.getIndexMeta(path)
  442:        
  443:         acctype = dom.xpath("//access-conditions/access/@type")
  444:         if acctype and (len(acctype)>0):
  445:             access=acctype[0].value
  446:             if access in ['group', 'institution']:
  447:                 access = getTextFromNode(dom.xpath("//access-conditions/access/name")[0]).lower()
  448:             
  449:         docinfo['accessType'] = access
  450:         return docinfo
  451:     
  452:         
  453:     def getBibinfoFromIndexMeta(self,path,docinfo=None,dom=None,cut=0):
  454:         """gets bibliographical info from the index.meta file at path or given by dom"""
  455:         logging.debug("documentViewer (getbibinfofromindexmeta) path: %s"%(path))
  456:         
  457:         if docinfo is None:
  458:             docinfo = {}
  459:         
  460:         if dom is None:
  461:             for x in range(cut):
  462:                 path=getParentDir(path)
  463:             dom = self.getIndexMeta(path)
  464:         
  465:         logging.debug("documentViewer (getbibinfofromindexmeta cutted) path: %s"%(path))
  466:         # put in all raw bib fields as dict "bib"
  467:         bib = dom.xpath("//bib/*")
  468:         if bib and len(bib)>0:
  469:             bibinfo = {}
  470:             for e in bib:
  471:                 bibinfo[e.localName] = getTextFromNode(e)
  472:             docinfo['bib'] = bibinfo
  473:         
  474:         # extract some fields (author, title, year) according to their mapping
  475:         metaData=self.metadata.main.meta.bib
  476:         bibtype=dom.xpath("//bib/@type")
  477:         if bibtype and (len(bibtype)>0):
  478:             bibtype=bibtype[0].value
  479:         else:
  480:             bibtype="generic"
  481:             
  482:         bibtype=bibtype.replace("-"," ") # wrong typesiin index meta "-" instead of " " (not wrong! ROC)
  483:         docinfo['bib_type'] = bibtype
  484:         bibmap=metaData.generateMappingForType(bibtype)
  485:         # if there is no mapping bibmap is empty (mapping sometimes has empty fields)
  486:         if len(bibmap) > 0 and len(bibmap['author'][0]) > 0:
  487:             try:
  488:                 docinfo['author']=getTextFromNode(dom.xpath("//bib/%s"%bibmap['author'][0])[0])
  489:             except: pass
  490:             try:
  491:                 docinfo['title']=getTextFromNode(dom.xpath("//bib/%s"%bibmap['title'][0])[0])
  492:             except: pass
  493:             try:
  494:                 docinfo['year']=getTextFromNode(dom.xpath("//bib/%s"%bibmap['year'][0])[0])
  495:             except: pass
  496:             logging.debug("documentViewer (getbibinfofromindexmeta) using mapping for %s"%bibtype)
  497:             try:
  498:                 docinfo['lang']=getTextFromNode(dom.xpath("//bib/lang")[0])
  499:             except:
  500:                 docinfo['lang']=''
  501: 
  502:         return docinfo
  503:     
  504:     
  505:     def getDocinfoFromTextTool(self, url, dom=None, docinfo=None):
  506:         """parse texttool tag in index meta"""
  507:         logging.debug("documentViewer (getdocinfofromtexttool) url: %s" % (url))
  508:         if docinfo is None:
  509:            docinfo = {}
  510:         if docinfo.get('lang', None) is None:
  511:             docinfo['lang'] = '' # default keine Sprache gesetzt
  512:         if dom is None:
  513:             dom = self.getIndexMeta(url)
  514:         
  515:         archivePath = None
  516:         archiveName = None
  517:     
  518:         archiveNames = dom.xpath("//resource/name")
  519:         if archiveNames and (len(archiveNames) > 0):
  520:             archiveName = getTextFromNode(archiveNames[0])
  521:         else:
  522:             logging.warning("documentViewer (getdocinfofromtexttool) resource/name missing in: %s" % (url))
  523:         
  524:         archivePaths = dom.xpath("//resource/archive-path")
  525:         if archivePaths and (len(archivePaths) > 0):
  526:             archivePath = getTextFromNode(archivePaths[0])
  527:             # clean up archive path
  528:             if archivePath[0] != '/':
  529:                 archivePath = '/' + archivePath
  530:             if archiveName and (not archivePath.endswith(archiveName)):
  531:                 archivePath += "/" + archiveName
  532:         else:
  533:             # try to get archive-path from url
  534:             logging.warning("documentViewer (getdocinfofromtexttool) resource/archive-path missing in: %s" % (url))
  535:             if (not url.startswith('http')):
  536:                 archivePath = url.replace('index.meta', '')
  537:                 
  538:         if archivePath is None:
  539:             # we balk without archive-path
  540:             raise IOError("Missing archive-path (for text-tool) in %s" % (url))
  541:         
  542:         imageDirs = dom.xpath("//texttool/image")
  543:         if imageDirs and (len(imageDirs) > 0):
  544:             imageDir = getTextFromNode(imageDirs[0])
  545:             
  546:         else:
  547:             # we balk with no image tag / not necessary anymore because textmode is now standard
  548:             #raise IOError("No text-tool info in %s"%(url))
  549:             imageDir = ""
  550:             #xquery="//pb"  
  551:             docinfo['imagePath'] = "" # keine Bilder
  552:             docinfo['imageURL'] = ""
  553:             
  554:         if imageDir and archivePath:
  555:             #print "image: ", imageDir, " archivepath: ", archivePath
  556:             imageDir = os.path.join(archivePath, imageDir)
  557:             imageDir = imageDir.replace("/mpiwg/online", '')
  558:             docinfo = self.getDirinfoFromDigilib(imageDir, docinfo=docinfo)
  559:             docinfo['imagePath'] = imageDir
  560:             
  561:             docinfo['imageURL'] = self.digilibBaseUrl + "/servlet/Scaler?fn=" + imageDir
  562:             
  563:         viewerUrls = dom.xpath("//texttool/digiliburlprefix")
  564:         if viewerUrls and (len(viewerUrls) > 0):
  565:             viewerUrl = getTextFromNode(viewerUrls[0])
  566:             docinfo['viewerURL'] = viewerUrl
  567:                    
  568:         textUrls = dom.xpath("//texttool/text")
  569:         if textUrls and (len(textUrls) > 0):
  570:             textUrl = getTextFromNode(textUrls[0])
  571:             if urlparse.urlparse(textUrl)[0] == "": #keine url
  572:                 textUrl = os.path.join(archivePath, textUrl) 
  573:             # fix URLs starting with /mpiwg/online
  574:             if textUrl.startswith("/mpiwg/online"):
  575:                 textUrl = textUrl.replace("/mpiwg/online", '', 1)
  576:             
  577:             docinfo['textURL'] = textUrl
  578:     
  579:         textUrls = dom.xpath("//texttool/text-url-path")
  580:         if textUrls and (len(textUrls) > 0):
  581:             textUrl = getTextFromNode(textUrls[0])
  582:             docinfo['textURLPath'] = textUrl
  583:             if not docinfo['imagePath']:
  584:                 # text-only, no page images
  585:                 docinfo = self.getNumPages(docinfo) #im moment einfach auf eins setzen, navigation ueber die thumbs geht natuerlich nicht    
  586:          
  587:         presentationUrls = dom.xpath("//texttool/presentation")
  588:         docinfo = self.getBibinfoFromIndexMeta(url, docinfo=docinfo, dom=dom)   # get info von bib tag
  589:         
  590:         if presentationUrls and (len(presentationUrls) > 0): # ueberschreibe diese durch presentation informationen 
  591:              # presentation url ergiebt sich ersetzen von index.meta in der url der fuer die Metadaten
  592:              # durch den relativen Pfad auf die presentation infos
  593:             presentationPath = getTextFromNode(presentationUrls[0])
  594:             if url.endswith("index.meta"): 
  595:                 presentationUrl = url.replace('index.meta', presentationPath)
  596:             else:
  597:                 presentationUrl = url + "/" + presentationPath
  598:                 
  599:             docinfo = self.getBibinfoFromTextToolPresentation(presentationUrl, docinfo=docinfo, dom=dom)
  600:     
  601:         docinfo = self.getAuthinfoFromIndexMeta(url, docinfo=docinfo, dom=dom)   # get access info
  602:         
  603:         return docinfo
  604:    
  605:    
  606:     def getBibinfoFromTextToolPresentation(self,url,docinfo=None,dom=None):
  607:         """gets the bibliographical information from the preseantion entry in texttools
  608:         """
  609:         dom=self.getPresentationInfoXML(url)
  610:         try:
  611:             docinfo['author']=getTextFromNode(dom.xpath("//author")[0])
  612:         except:
  613:             pass
  614:         try:
  615:             docinfo['title']=getTextFromNode(dom.xpath("//title")[0])
  616:         except:
  617:             pass
  618:         try:
  619:             docinfo['year']=getTextFromNode(dom.xpath("//date")[0])
  620:         except:
  621:             pass
  622:         return docinfo
  623:     
  624:     def getDocinfoFromImagePath(self,path,docinfo=None,cut=0):
  625:         """path ist the path to the images it assumes that the index.meta file is one level higher."""
  626:         logging.debug("documentViewer (getdocinfofromimagepath) path: %s"%(path))
  627:         if docinfo is None:
  628:             docinfo = {}
  629:         path=path.replace("/mpiwg/online","")
  630:         docinfo['imagePath'] = path
  631:         docinfo=self.getDirinfoFromDigilib(path,docinfo=docinfo,cut=cut)
  632:         
  633:         pathorig=path
  634:         for x in range(cut):       
  635:                 path=getParentDir(path)
  636:         logging.debug("documentViewer (getdocinfofromimagepath) PATH:"+path)
  637:         imageUrl=self.digilibBaseUrl+"/servlet/Scaler?fn="+path
  638:         docinfo['imageURL'] = imageUrl
  639:         
  640:         #path ist the path to the images it assumes that the index.meta file is one level higher.
  641:         docinfo = self.getBibinfoFromIndexMeta(pathorig,docinfo=docinfo,cut=cut+1)
  642:         docinfo = self.getAuthinfoFromIndexMeta(pathorig,docinfo=docinfo,cut=cut+1)
  643:         return docinfo
  644:     
  645:     
  646:     def getDocinfo(self, mode, url):
  647:         """returns docinfo depending on mode"""
  648:         logging.debug("documentViewer (getdocinfo) mode: %s, url: %s"%(mode,url))
  649:         # look for cached docinfo in session
  650:         if self.REQUEST.SESSION.has_key('docinfo'):
  651:             docinfo = self.REQUEST.SESSION['docinfo']
  652:             # check if its still current
  653:             if docinfo is not None and docinfo.get('mode') == mode and docinfo.get('url') == url:
  654:                 logging.debug("documentViewer (getdocinfo) docinfo in session: %s"%docinfo)
  655:                 return docinfo
  656:         # new docinfo
  657:         docinfo = {'mode': mode, 'url': url}
  658:         if mode=="texttool": #index.meta with texttool information
  659:             docinfo = self.getDocinfoFromTextTool(url, docinfo=docinfo)
  660:         elif mode=="imagepath":
  661:             docinfo = self.getDocinfoFromImagePath(url, docinfo=docinfo)
  662:         elif mode=="filepath":
  663:             docinfo = self.getDocinfoFromImagePath(url, docinfo=docinfo,cut=1)
  664:         else:
  665:             logging.error("documentViewer (getdocinfo) unknown mode: %s!"%mode)
  666:             raise ValueError("Unknown mode %s! Has to be one of 'texttool','imagepath','filepath'."%(mode))
  667:                         
  668:         logging.debug("documentViewer (getdocinfo) docinfo: %s"%docinfo)
  669:         self.REQUEST.SESSION['docinfo'] = docinfo
  670:         return docinfo
  671:                
  672:     def getPageinfo(self, current, start=None, rows=None, cols=None, docinfo=None, viewMode=None, tocMode=None):
  673:         """returns pageinfo with the given parameters"""
  674:         pageinfo = {}
  675:         current = getInt(current)
  676:         pageinfo['current'] = current
  677:         rows = int(rows or self.thumbrows)
  678:         pageinfo['rows'] = rows
  679:         cols = int(cols or self.thumbcols)
  680:         pageinfo['cols'] = cols
  681:         grpsize = cols * rows
  682:         pageinfo['groupsize'] = grpsize
  683:         start = getInt(start, default=(math.ceil(float(current)/float(grpsize))*grpsize-(grpsize-1)))
  684:         # int(current / grpsize) * grpsize +1))
  685:         pageinfo['start'] = start
  686:         pageinfo['end'] = start + grpsize
  687:         if (docinfo is not None) and ('numPages' in docinfo):
  688:             np = int(docinfo['numPages'])
  689:             pageinfo['end'] = min(pageinfo['end'], np)
  690:             pageinfo['numgroups'] = int(np / grpsize)
  691:             if np % grpsize > 0:
  692:                 pageinfo['numgroups'] += 1        
  693:         pageinfo['viewMode'] = viewMode
  694:         pageinfo['tocMode'] = tocMode
  695:         pageinfo['query'] = self.REQUEST.get('query',' ')
  696:         pageinfo['queryType'] = self.REQUEST.get('queryType',' ')
  697:         pageinfo['querySearch'] =self.REQUEST.get('querySearch', 'fulltext')
  698:         pageinfo['textPN'] = self.REQUEST.get('textPN','1')
  699:         pageinfo['highlightQuery'] = self.REQUEST.get('highlightQuery','')
  700:         pageinfo['tocPageSize'] = self.REQUEST.get('tocPageSize', '30')
  701:         pageinfo['queryPageSize'] =self.REQUEST.get('queryPageSize', '10')
  702:         pageinfo['tocPN'] = self.REQUEST.get('tocPN', '1')
  703:         toc = int (pageinfo['tocPN'])
  704:         pageinfo['textPages'] =int (toc)
  705:         
  706:         if 'tocSize_%s'%tocMode in docinfo:
  707:             tocSize = int(docinfo['tocSize_%s'%tocMode])
  708:             tocPageSize = int(pageinfo['tocPageSize'])
  709:             # cached toc           
  710:             if tocSize%tocPageSize>0:
  711:                 tocPages=tocSize/tocPageSize+1
  712:             else:
  713:                 tocPages=tocSize/tocPageSize
  714:             pageinfo['tocPN'] = min (tocPages,toc)                    
  715:         pageinfo['searchPN'] =self.REQUEST.get('searchPN','1')
  716:         pageinfo['sn'] =self.REQUEST.get('sn','')
  717:         return pageinfo
  718:     
  719: def changeDocumentViewer(self,title="",digilibBaseUrl=None,thumbrows=2,thumbcols=5,authgroups='mpiwg',RESPONSE=None):
  720:         """init document viewer"""
  721:         self.title=title
  722:         self.digilibBaseUrl = digilibBaseUrl
  723:         self.thumbrows = thumbrows
  724:         self.thumbcols = thumbcols
  725:         self.authgroups = [s.strip().lower() for s in authgroups.split(',')]
  726:         if RESPONSE is not None:
  727:             RESPONSE.redirect('manage_main')
  728:         
  729: def manage_AddDocumentViewerForm(self):
  730:     """add the viewer form"""
  731:     pt=PageTemplateFile('zpt/addDocumentViewer', globals()).__of__(self)
  732:     return pt()
  733:   
  734: def manage_AddDocumentViewer(self,id,imageScalerUrl="",textServerName="",title="",RESPONSE=None):
  735:     """add the viewer"""
  736:     newObj=documentViewer(id,imageScalerUrl=imageScalerUrl,title=title,textServerName=textServerName)
  737:     self._setObject(id,newObj)
  738:     
  739:     if RESPONSE is not None:
  740:         RESPONSE.redirect('manage_main')
  741: 
  742: ## DocumentViewerTemplate class
  743: class DocumentViewerTemplate(ZopePageTemplate):
  744:     """Template for document viewer"""
  745:     meta_type="DocumentViewer Template"
  746: 
  747: 
  748: def manage_addDocumentViewerTemplateForm(self):
  749:     """Form for adding"""
  750:     pt=PageTemplateFile('zpt/addDocumentViewerTemplate', globals()).__of__(self)
  751:     return pt()
  752: 
  753: def manage_addDocumentViewerTemplate(self, id='viewer_main', title=None, text=None,
  754:                            REQUEST=None, submit=None):
  755:     "Add a Page Template with optional file content."
  756: 
  757:     self._setObject(id, DocumentViewerTemplate(id))
  758:     ob = getattr(self, id)
  759:     txt=file(os.path.join(package_home(globals()),'zpt/viewer_main.zpt'),'r').read()
  760:     logging.info("txt %s:"%txt)
  761:     ob.pt_edit(txt,"text/html")
  762:     if title:
  763:         ob.pt_setTitle(title)
  764:     try:
  765:         u = self.DestinationURL()
  766:     except AttributeError:
  767:         u = REQUEST['URL1']
  768:         
  769:     u = "%s/%s" % (u, urllib.quote(id))
  770:     REQUEST.RESPONSE.redirect(u+'/manage_main')
  771:     return ''
  772: 
  773: 
  774:     

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>