File:  [Repository] / documentViewer / documentViewer.py
Revision 1.43: download - view: text, annotated - select for diffs - revision graph
Fri Mar 19 11:42:40 2010 UTC (14 years, 2 months ago) by casties
Branches: MAIN
CVS tags: HEAD
first version with new full-text infrastructure and slightly changed templates

    1: 
    2: from OFS.Folder import Folder
    3: from Products.PageTemplates.ZopePageTemplate import ZopePageTemplate
    4: from Products.PageTemplates.PageTemplateFile import PageTemplateFile 
    5: from AccessControl import ClassSecurityInfo
    6: from AccessControl import getSecurityManager
    7: from Globals import package_home
    8: 
    9: from Ft.Xml.Domlette import NonvalidatingReader
   10: from Ft.Xml.Domlette import PrettyPrint, Print
   11: from Ft.Xml import EMPTY_NAMESPACE, Parse
   12: 
   13: 
   14: import Ft.Xml.XPath
   15: import cStringIO
   16: import xmlrpclib
   17: import os.path
   18: import sys
   19: import cgi
   20: import urllib
   21: import logging
   22: import math
   23: 
   24: import urlparse 
   25: from types import *
   26: 
   27: def logger(txt,method,txt2):
   28:     """logging"""
   29:     logging.info(txt+ txt2)
   30:     
   31:     
   32: def getInt(number, default=0):
   33:     """returns always an int (0 in case of problems)"""
   34:     try:
   35:         return int(number)
   36:     except:
   37:         return int(default)
   38: 
   39: def getTextFromNode(nodename):
   40:     """get the cdata content of a node"""
   41:     if nodename is None:
   42:         return ""
   43:     nodelist=nodename.childNodes
   44:     rc = ""
   45:     for node in nodelist:
   46:         if node.nodeType == node.TEXT_NODE:
   47:            rc = rc + node.data
   48:     return rc
   49: 
   50: def serializeNode(node, encoding='utf-8'):
   51:     """returns a string containing node as XML"""
   52:     buf = cStringIO.StringIO()
   53:     Print(node, stream=buf, encoding=encoding)
   54:     s = buf.getvalue()
   55:     buf.close()
   56:     return s
   57: 
   58:         
   59: def getParentDir(path):
   60:     """returns pathname shortened by one"""
   61:     return '/'.join(path.split('/')[0:-1])
   62:         
   63: 
   64: import socket
   65: 
   66: def urlopen(url,timeout=2):
   67:         """urlopen mit timeout"""
   68:         socket.setdefaulttimeout(timeout)
   69:         ret=urllib.urlopen(url)
   70:         socket.setdefaulttimeout(5)
   71:         return ret
   72: 
   73: 
   74: ##
   75: ## documentViewer class
   76: ##
   77: class documentViewer(Folder):
   78:     """document viewer"""
   79:     #textViewerUrl="http://127.0.0.1:8080/HFQP/testXSLT/getPage?"
   80:     
   81:     meta_type="Document viewer"
   82:     
   83:     security=ClassSecurityInfo()
   84:     manage_options=Folder.manage_options+(
   85:         {'label':'main config','action':'changeDocumentViewerForm'},
   86:         )
   87: 
   88:     # templates and forms
   89:     viewer_main = PageTemplateFile('zpt/viewer_main', globals())
   90:     thumbs_main = PageTemplateFile('zpt/thumbs_main', globals())
   91:     image_main = PageTemplateFile('zpt/image_main', globals()) # obsolete!
   92:     page_main_images = PageTemplateFile('zpt/page_main_images', globals())
   93:     page_main_text = PageTemplateFile('zpt/page_main_text', globals())
   94:     head_main = PageTemplateFile('zpt/head_main', globals())
   95:     docuviewer_css = PageTemplateFile('css/docuviewer.css', globals())
   96:     info_xml = PageTemplateFile('zpt/info_xml', globals())
   97: 
   98:     thumbs_main_rss = PageTemplateFile('zpt/thumbs_main_rss', globals())
   99:     security.declareProtected('View management screens','changeDocumentViewerForm')    
  100:     changeDocumentViewerForm = PageTemplateFile('zpt/changeDocumentViewer', globals())
  101: 
  102:     
  103:     def __init__(self,id,imageScalerUrl=None,textServerName=None,title="",digilibBaseUrl=None,thumbcols=2,thumbrows=10,authgroups="mpiwg"):
  104:         """init document viewer"""
  105:         self.id=id
  106:         self.title=title
  107:         self.thumbcols = thumbcols
  108:         self.thumbrows = thumbrows
  109:         # authgroups is list of authorized groups (delimited by ,)
  110:         self.authgroups = [s.strip().lower() for s in authgroups.split(',')]
  111:         # create template folder so we can always use template.something
  112:         
  113:         templateFolder = Folder('template')
  114:         #self['template'] = templateFolder # Zope-2.12 style
  115:         self._setObject('template',templateFolder) # old style
  116:         try:
  117:             from Products.XMLRpcTools.XMLRpcTools import XMLRpcServerProxy
  118:             xmlRpcClient = XMLRpcServerProxy(id='fulltextclient', serverUrl=textServerName, use_xmlrpc=False)
  119:             #templateFolder['fulltextclient'] = xmlRpcClient
  120:             templateFolder._setObject('fulltextclient',xmlRpcClient)
  121:         except Exception, e:
  122:             logging.error("Unable to create XMLRpcTools for fulltextclient: "+str(e))
  123:         try:
  124:             from Products.zogiLib.zogiLib import zogiLib
  125:             zogilib = zogiLib(id="zogilib", title="zogilib for docuviewer", dlServerURL=imageScalerUrl, layout="book")
  126:             #templateFolder['zogilib'] = zogilib
  127:             templateFolder._setObject('zogilib',zogilib)
  128:         except Exception, e:
  129:             logging.error("Unable to create zogiLib for zogilib: "+str(e))
  130:         
  131: 
  132:     security.declareProtected('View','thumbs_rss')
  133:     def thumbs_rss(self,mode,url,viewMode="auto",start=None,pn=1):
  134:         '''
  135:         view it
  136:         @param mode: defines how to access the document behind url 
  137:         @param url: url which contains display information
  138:         @param viewMode: if images display images, if text display text, default is images (text,images or auto)
  139:         
  140:         '''
  141:         logging.debug("HHHHHHHHHHHHHH:load the rss")
  142:         logger("documentViewer (index)", logging.INFO, "mode: %s url:%s start:%s pn:%s"%(mode,url,start,pn))
  143:         
  144:         if not hasattr(self, 'template'):
  145:             # create template folder if it doesn't exist
  146:             self.manage_addFolder('template')
  147:             
  148:         if not self.digilibBaseUrl:
  149:             self.digilibBaseUrl = self.findDigilibUrl() or "http://nausikaa.mpiwg-berlin.mpg.de/digitallibrary"
  150:             
  151:         docinfo = self.getDocinfo(mode=mode,url=url)
  152:         pageinfo = self.getPageinfo(start=start,current=pn,docinfo=docinfo)
  153:         pt = getattr(self.template, 'thumbs_main_rss')
  154:         
  155:         if viewMode=="auto": # automodus gewaehlt
  156:             if docinfo.get("textURL",'') and self.textViewerUrl: #texturl gesetzt und textViewer konfiguriert
  157:                 viewMode="text"
  158:             else:
  159:                 viewMode="images"
  160:                
  161:         return pt(docinfo=docinfo,pageinfo=pageinfo,viewMode=viewMode)
  162:   
  163:     security.declareProtected('View','index_html')
  164:     def index_html(self,url,mode="texttool",viewMode="auto",start=None,pn=1,mk=None):
  165:         '''
  166:         view it
  167:         @param mode: defines how to access the document behind url 
  168:         @param url: url which contains display information
  169:         @param viewMode: if images display images, if text display text, default is images (text,images or auto)
  170:         
  171:         '''
  172:         
  173:         logging.debug("documentViewer (index) mode: %s url:%s start:%s pn:%s"%(mode,url,start,pn))
  174:         
  175:         if not hasattr(self, 'template'):
  176:             # this won't work
  177:             logging.error("template folder missing!")
  178:             return "ERROR: template folder missing!"
  179:             
  180:         if not getattr(self, 'digilibBaseUrl', None):
  181:             self.digilibBaseUrl = self.findDigilibUrl() or "http://nausikaa.mpiwg-berlin.mpg.de/digitallibrary"
  182:             
  183:         docinfo = self.getDocinfo(mode=mode,url=url)
  184:         pageinfo = self.getPageinfo(start=start,current=pn,docinfo=docinfo)
  185:         pt = getattr(self.template, 'viewer_main')
  186:         
  187:         if viewMode=="auto": # automodus gewaehlt
  188:             if docinfo.get("textURL",''): #texturl gesetzt und textViewer konfiguriert
  189:                 viewMode="text"
  190:             else:
  191:                 viewMode="images"
  192:                
  193:         return pt(docinfo=docinfo,pageinfo=pageinfo,viewMode=viewMode,mk=self.generateMarks(mk))
  194:   
  195:     def generateMarks(self,mk):
  196:         ret=""
  197: 	if mk is None:
  198: 		return ""
  199: 	
  200: 	if type(mk) is not ListType:
  201: 		mk=[mk]
  202:         for m in mk:
  203:             ret+="mk=%s"%m
  204:         return ret
  205: 
  206:     def findDigilibUrl(self):
  207:         """try to get the digilib URL from zogilib"""
  208:         url = self.template.zogilib.getDLBaseUrl()
  209:         return url
  210:     
  211:     def getStyle(self, idx, selected, style=""):
  212:         """returns a string with the given style and append 'sel' if path == selected."""
  213:         #logger("documentViewer (getstyle)", logging.INFO, "idx: %s selected: %s style: %s"%(idx,selected,style))
  214:         if idx == selected:
  215:             return style + 'sel'
  216:         else:
  217:             return style
  218:     
  219:     def getLink(self,param=None,val=None):
  220:         """link to documentviewer with parameter param set to val"""
  221:         params=self.REQUEST.form.copy()
  222:         if param is not None:
  223:             if val is None:
  224:                 if params.has_key(param):
  225:                     del params[param]
  226:             else:
  227:                 params[param] = str(val)
  228:                 
  229:         if params.get("mode", None) == "filepath": #wenn beim erst Aufruf filepath gesetzt wurde aendere das nun zu imagepath
  230:                 params["mode"] = "imagepath"
  231:                 params["url"] = getParentDir(params["url"])
  232:                 
  233:         # quote values and assemble into query string
  234:         ps = "&".join(["%s=%s"%(k,urllib.quote(v)) for (k, v) in params.items()])
  235:         url=self.REQUEST['URL1']+"?"+ps
  236:         return url
  237: 
  238:     def getLinkAmp(self,param=None,val=None):
  239:         """link to documentviewer with parameter param set to val"""
  240:         params=self.REQUEST.form.copy()
  241:         if param is not None:
  242:             if val is None:
  243:                 if params.has_key(param):
  244:                     del params[param]
  245:             else:
  246:                 params[param] = str(val)
  247:                 
  248:         # quote values and assemble into query string
  249:         logging.info("XYXXXXX: %s"%repr(params.items()))
  250:         ps = "&".join(["%s=%s"%(k,urllib.quote(v)) for (k, v) in params.items()])
  251:         url=self.REQUEST['URL1']+"?"+ps
  252:         return url
  253:     
  254:     def getInfo_xml(self,url,mode):
  255:         """returns info about the document as XML"""
  256: 
  257:         if not self.digilibBaseUrl:
  258:             self.digilibBaseUrl = self.findDigilibUrl() or "http://nausikaa.mpiwg-berlin.mpg.de/digitallibrary"
  259:         
  260:         docinfo = self.getDocinfo(mode=mode,url=url)
  261:         pt = getattr(self.template, 'info_xml')
  262:         return pt(docinfo=docinfo)
  263: 
  264:     
  265:     def isAccessible(self, docinfo):
  266:         """returns if access to the resource is granted"""
  267:         access = docinfo.get('accessType', None)
  268:         logger("documentViewer (accessOK)", logging.INFO, "access type %s"%access)
  269:         if access is not None and access == 'free':
  270:             logger("documentViewer (accessOK)", logging.INFO, "access is free")
  271:             return True
  272:         elif access is None or access in self.authgroups:
  273:             # only local access -- only logged in users
  274:             user = getSecurityManager().getUser()
  275:             if user is not None:
  276:                 #print "user: ", user
  277:                 return (user.getUserName() != "Anonymous User")
  278:             else:
  279:                 return False
  280:         
  281:         logger("documentViewer (accessOK)", logging.INFO, "unknown access type %s"%access)
  282:         return False
  283:     
  284:                 
  285:     def getDirinfoFromDigilib(self,path,docinfo=None,cut=0):
  286:         """gibt param von dlInfo aus"""
  287:         num_retries = 3
  288:         if docinfo is None:
  289:             docinfo = {}
  290:         
  291:         for x in range(cut):
  292:                
  293:                 path=getParentDir(path)
  294:        
  295:         infoUrl=self.digilibBaseUrl+"/dirInfo-xml.jsp?mo=dir&fn="+path
  296:     
  297:         logger("documentViewer (getparamfromdigilib)", logging.INFO, "dirInfo from %s"%(infoUrl))
  298:         
  299:         for cnt in range(num_retries):
  300:             try:
  301:                 # dom = NonvalidatingReader.parseUri(imageUrl)
  302:                 txt=urllib.urlopen(infoUrl).read()
  303:                 dom = Parse(txt)
  304:                 break
  305:             except:
  306:                 logger("documentViewer (getdirinfofromdigilib)", logging.ERROR, "error reading %s (try %d)"%(infoUrl,cnt))
  307:         else:
  308:             raise IOError("Unable to get dir-info from %s"%(infoUrl))
  309:         
  310:         sizes=dom.xpath("//dir/size")
  311:         logger("documentViewer (getparamfromdigilib)", logging.INFO, "dirInfo:size"%sizes)
  312:         
  313:         if sizes:
  314:             docinfo['numPages'] = int(getTextFromNode(sizes[0]))
  315:         else:
  316:             docinfo['numPages'] = 0
  317:             
  318:         # TODO: produce and keep list of image names and numbers
  319:                         
  320:         return docinfo
  321:     
  322:             
  323:     def getIndexMeta(self, url):
  324:         """returns dom of index.meta document at url"""
  325:         num_retries = 3
  326:         dom = None
  327:         metaUrl = None
  328:         if url.startswith("http://"):
  329:             # real URL
  330:             metaUrl = url
  331:         else:
  332:             # online path
  333:             server=self.digilibBaseUrl+"/servlet/Texter?fn="
  334:             metaUrl=server+url.replace("/mpiwg/online","")
  335:             if not metaUrl.endswith("index.meta"):
  336:                 metaUrl += "/index.meta"
  337:         logging.debug("METAURL: %s"%metaUrl)
  338:         for cnt in range(num_retries):
  339:             try:
  340:                 # patch dirk encoding fehler treten dann nicht mehr auf
  341:                 # dom = NonvalidatingReader.parseUri(metaUrl)
  342:                 txt=urllib.urlopen(metaUrl).read()
  343:                 dom = Parse(txt)
  344:                 break
  345:             except:
  346:                 logger("ERROR documentViewer (getIndexMata)", logging.INFO,"%s (%s)"%sys.exc_info()[0:2])
  347:                 
  348:         if dom is None:
  349:             raise IOError("Unable to read index meta from %s"%(url))
  350:                  
  351:         return dom
  352:     
  353:     def getPresentationInfoXML(self, url):
  354:         """returns dom of info.xml document at url"""
  355:         num_retries = 3
  356:         dom = None
  357:         metaUrl = None
  358:         if url.startswith("http://"):
  359:             # real URL
  360:             metaUrl = url
  361:         else:
  362:             # online path
  363:             server=self.digilibBaseUrl+"/servlet/Texter?fn="
  364:             metaUrl=server+url.replace("/mpiwg/online","")
  365:            
  366:         
  367:         for cnt in range(num_retries):
  368:             try:
  369:                 # patch dirk encoding fehler treten dann nicht mehr auf
  370:                 # dom = NonvalidatingReader.parseUri(metaUrl)
  371:                 txt=urllib.urlopen(metaUrl).read()
  372:                 dom = Parse(txt)
  373:                 break
  374:             except:
  375:                 logger("ERROR documentViewer (getPresentationInfoXML)", logging.INFO,"%s (%s)"%sys.exc_info()[0:2])
  376:                 
  377:         if dom is None:
  378:             raise IOError("Unable to read infoXMLfrom %s"%(url))
  379:                  
  380:         return dom
  381:                         
  382:         
  383:     def getAuthinfoFromIndexMeta(self,path,docinfo=None,dom=None,cut=0):
  384:         """gets authorization info from the index.meta file at path or given by dom"""
  385:         logger("documentViewer (getauthinfofromindexmeta)", logging.INFO,"path: %s"%(path))
  386:         
  387:         access = None
  388:         
  389:         if docinfo is None:
  390:             docinfo = {}
  391:             
  392:         if dom is None:
  393:             for x in range(cut):
  394:                 path=getParentDir(path)
  395:             dom = self.getIndexMeta(path)
  396:        
  397:         acctype = dom.xpath("//access-conditions/access/@type")
  398:         if acctype and (len(acctype)>0):
  399:             access=acctype[0].value
  400:             if access in ['group', 'institution']:
  401:                 access = getTextFromNode(dom.xpath("//access-conditions/access/name")[0]).lower()
  402:             
  403:         docinfo['accessType'] = access
  404:         return docinfo
  405:     
  406:         
  407:     def getBibinfoFromIndexMeta(self,path,docinfo=None,dom=None,cut=0):
  408:         """gets bibliographical info from the index.meta file at path or given by dom"""
  409:         logging.debug("documentViewer (getbibinfofromindexmeta) path: %s"%(path))
  410:         
  411:         if docinfo is None:
  412:             docinfo = {}
  413:         
  414:         if dom is None:
  415:             for x in range(cut):
  416:                 path=getParentDir(path)
  417:             dom = self.getIndexMeta(path)
  418:         
  419:         logging.debug("documentViewer (getbibinfofromindexmeta cutted) path: %s"%(path))
  420:         # put in all raw bib fields as dict "bib"
  421:         bib = dom.xpath("//bib/*")
  422:         if bib and len(bib)>0:
  423:             bibinfo = {}
  424:             for e in bib:
  425:                 bibinfo[e.localName] = getTextFromNode(e)
  426:             docinfo['bib'] = bibinfo
  427:         
  428:         # extract some fields (author, title, year) according to their mapping
  429:         metaData=self.metadata.main.meta.bib
  430:         bibtype=dom.xpath("//bib/@type")
  431:         if bibtype and (len(bibtype)>0):
  432:             bibtype=bibtype[0].value
  433:         else:
  434:             bibtype="generic"
  435:             
  436:         bibtype=bibtype.replace("-"," ") # wrong typesiin index meta "-" instead of " " (not wrong! ROC)
  437:         docinfo['bib_type'] = bibtype
  438:         bibmap=metaData.generateMappingForType(bibtype)
  439:         # if there is no mapping bibmap is empty (mapping sometimes has empty fields)
  440:         if len(bibmap) > 0 and len(bibmap['author'][0]) > 0:
  441:             try:
  442:                 docinfo['author']=getTextFromNode(dom.xpath("//bib/%s"%bibmap['author'][0])[0])
  443:             except: pass
  444:             try:
  445:                 docinfo['title']=getTextFromNode(dom.xpath("//bib/%s"%bibmap['title'][0])[0])
  446:             except: pass
  447:             try:
  448:                 docinfo['year']=getTextFromNode(dom.xpath("//bib/%s"%bibmap['year'][0])[0])
  449:             except: pass
  450:             logging.debug("documentViewer (getbibinfofromindexmeta) using mapping for %s"%bibtype)
  451:             try:
  452:                 docinfo['lang']=getTextFromNode(dom.xpath("//bib/lang")[0])
  453:             except:
  454:                 docinfo['lang']=''
  455: 
  456:         return docinfo
  457:     
  458:     
  459:     def getDocinfoFromTextTool(self, url, dom=None, docinfo=None):
  460:         """parse texttool tag in index meta"""
  461:         logger("documentViewer (getdocinfofromtexttool)", logging.INFO, "url: %s" % (url))
  462:         if docinfo is None:
  463:            docinfo = {}
  464:             
  465:         if docinfo.get('lang', None) is None:
  466:             docinfo['lang'] = '' # default keine Sprache gesetzt
  467:         if dom is None:
  468:             dom = self.getIndexMeta(url)
  469:         
  470:         archivePath = None
  471:         archiveName = None
  472:     
  473:         archiveNames = dom.xpath("//resource/name")
  474:         if archiveNames and (len(archiveNames) > 0):
  475:             archiveName = getTextFromNode(archiveNames[0])
  476:         else:
  477:             logger("documentViewer (getdocinfofromtexttool)", logging.WARNING, "resource/name missing in: %s" % (url))
  478:         
  479:         archivePaths = dom.xpath("//resource/archive-path")
  480:         if archivePaths and (len(archivePaths) > 0):
  481:             archivePath = getTextFromNode(archivePaths[0])
  482:             # clean up archive path
  483:             if archivePath[0] != '/':
  484:                 archivePath = '/' + archivePath
  485:             if archiveName and (not archivePath.endswith(archiveName)):
  486:                 archivePath += "/" + archiveName
  487:         else:
  488:             # try to get archive-path from url
  489:             logger("documentViewer (getdocinfofromtexttool)", logging.WARNING, "resource/archive-path missing in: %s" % (url))
  490:             if (not url.startswith('http')):
  491:                 archivePath = url.replace('index.meta', '')
  492:                 
  493:         if archivePath is None:
  494:             # we balk without archive-path
  495:             raise IOError("Missing archive-path (for text-tool) in %s" % (url))
  496:         
  497:         imageDirs = dom.xpath("//texttool/image")
  498:         if imageDirs and (len(imageDirs) > 0):
  499:             imageDir = getTextFromNode(imageDirs[0])
  500:             
  501:         else:
  502:             # we balk with no image tag / not necessary anymore because textmode is now standard
  503:             #raise IOError("No text-tool info in %s"%(url))
  504:             imageDir = ""
  505:             #xquery="//pb"  
  506:             docinfo['imagePath'] = "" # keine Bilder
  507:             docinfo['imageURL'] = ""
  508:             
  509:         if imageDir and archivePath:
  510:             #print "image: ", imageDir, " archivepath: ", archivePath
  511:             imageDir = os.path.join(archivePath, imageDir)
  512:             imageDir = imageDir.replace("/mpiwg/online", '')
  513:             docinfo = self.getDirinfoFromDigilib(imageDir, docinfo=docinfo)
  514:             docinfo['imagePath'] = imageDir
  515:             
  516:             docinfo['imageURL'] = self.digilibBaseUrl + "/servlet/Scaler?fn=" + imageDir
  517:             
  518:         viewerUrls = dom.xpath("//texttool/digiliburlprefix")
  519:         if viewerUrls and (len(viewerUrls) > 0):
  520:             viewerUrl = getTextFromNode(viewerUrls[0])
  521:             docinfo['viewerURL'] = viewerUrl
  522:                    
  523:         textUrls = dom.xpath("//texttool/text")
  524:         if textUrls and (len(textUrls) > 0):
  525:             textUrl = getTextFromNode(textUrls[0])
  526:             if urlparse.urlparse(textUrl)[0] == "": #keine url
  527:                 textUrl = os.path.join(archivePath, textUrl) 
  528:             # fix URLs starting with /mpiwg/online
  529:             if textUrl.startswith("/mpiwg/online"):
  530:                 textUrl = textUrl.replace("/mpiwg/online", '', 1)
  531:             
  532:             docinfo['textURL'] = textUrl
  533:     
  534:         textUrls = dom.xpath("//texttool/text-url-path")
  535:         if textUrls and (len(textUrls) > 0):
  536:             textUrl = getTextFromNode(textUrls[0])
  537:             docinfo['textURLPath'] = textUrl   
  538:          
  539:         presentationUrls = dom.xpath("//texttool/presentation")
  540:         docinfo = self.getBibinfoFromIndexMeta(url, docinfo=docinfo, dom=dom)   # get info von bib tag
  541:         
  542:         if presentationUrls and (len(presentationUrls) > 0): # ueberschreibe diese durch presentation informationen 
  543:              # presentation url ergiebt sich ersetzen von index.meta in der url der fuer die Metadaten
  544:              # durch den relativen Pfad auf die presentation infos
  545:             presentationPath = getTextFromNode(presentationUrls[0])
  546:             if url.endswith("index.meta"): 
  547:                 presentationUrl = url.replace('index.meta', presentationPath)
  548:             else:
  549:                 presentationUrl = url + "/" + presentationPath
  550:             docinfo = self.getNumPages(docinfo) #im moment einfach auf eins setzen, navigation ueber die thumbs geht natuerlich nicht    
  551:             docinfo = self.getBibinfoFromTextToolPresentation(presentationUrl, docinfo=docinfo, dom=dom)
  552:     
  553:         docinfo = self.getAuthinfoFromIndexMeta(url, docinfo=docinfo, dom=dom)   # get access info
  554:         
  555:         return docinfo
  556: 
  557: 
  558: 
  559:    
  560:    
  561:     def getBibinfoFromTextToolPresentation(self,url,docinfo=None,dom=None):
  562:         """gets the bibliographical information from the preseantion entry in texttools
  563:         """
  564:         dom=self.getPresentationInfoXML(url)
  565:         try:
  566:             docinfo['author']=getTextFromNode(dom.xpath("//author")[0])
  567:         except:
  568:             pass
  569:         try:
  570:             docinfo['title']=getTextFromNode(dom.xpath("//title")[0])
  571:         except:
  572:             pass
  573:         try:
  574:             docinfo['year']=getTextFromNode(dom.xpath("//date")[0])
  575:         except:
  576:             pass
  577:         return docinfo
  578:     
  579:     def getDocinfoFromImagePath(self,path,docinfo=None,cut=0):
  580:         """path ist the path to the images it assumes that the index.meta file is one level higher."""
  581:         logger("documentViewer (getdocinfofromimagepath)", logging.INFO,"path: %s"%(path))
  582:         if docinfo is None:
  583:             docinfo = {}
  584:         path=path.replace("/mpiwg/online","")
  585:         docinfo['imagePath'] = path
  586:         docinfo=self.getDirinfoFromDigilib(path,docinfo=docinfo,cut=cut)
  587:         
  588:         pathorig=path
  589:         for x in range(cut):       
  590:                 path=getParentDir(path)
  591:         logging.error("PATH:"+path)
  592:         imageUrl=self.digilibBaseUrl+"/servlet/Scaler?fn="+path
  593:         docinfo['imageURL'] = imageUrl
  594:         
  595:         #path ist the path to the images it assumes that the index.meta file is one level higher.
  596:         docinfo = self.getBibinfoFromIndexMeta(pathorig,docinfo=docinfo,cut=cut+1)
  597:         docinfo = self.getAuthinfoFromIndexMeta(pathorig,docinfo=docinfo,cut=cut+1)
  598:         return docinfo
  599:     
  600:     
  601:     def getDocinfo(self, mode, url):
  602:         """returns docinfo depending on mode"""
  603:         logger("documentViewer (getdocinfo)", logging.INFO,"mode: %s, url: %s"%(mode,url))
  604:         # look for cached docinfo in session
  605:         if self.REQUEST.SESSION.has_key('docinfo'):
  606:             docinfo = self.REQUEST.SESSION['docinfo']
  607:             # check if its still current
  608:             if docinfo is not None and docinfo.get('mode') == mode and docinfo.get('url') == url:
  609:                 logger("documentViewer (getdocinfo)", logging.INFO,"docinfo in session: %s"%docinfo)
  610:                 return docinfo
  611:         # new docinfo
  612:         docinfo = {'mode': mode, 'url': url}
  613:         if mode=="texttool": #index.meta with texttool information
  614:             docinfo = self.getDocinfoFromTextTool(url, docinfo=docinfo)
  615:         elif mode=="imagepath":
  616:             docinfo = self.getDocinfoFromImagePath(url, docinfo=docinfo)
  617:         elif mode=="filepath":
  618:             docinfo = self.getDocinfoFromImagePath(url, docinfo=docinfo,cut=1)
  619:         else:
  620:             logger("documentViewer (getdocinfo)", logging.ERROR,"unknown mode!")
  621:             raise ValueError("Unknown mode %s"%(mode))
  622:                         
  623:         logger("documentViewer (getdocinfo)", logging.INFO,"docinfo: %s"%docinfo)
  624:         self.REQUEST.SESSION['docinfo'] = docinfo
  625:         return docinfo
  626:         
  627:         
  628:     def getPageinfo(self, current, start=None, rows=None, cols=None, docinfo=None):
  629:         """returns pageinfo with the given parameters"""
  630:         pageinfo = {}
  631:         current = getInt(current)
  632:         pageinfo['current'] = current
  633:         rows = int(rows or self.thumbrows)
  634:         pageinfo['rows'] = rows
  635:         cols = int(cols or self.thumbcols)
  636:         pageinfo['cols'] = cols
  637:         grpsize = cols * rows
  638:         pageinfo['groupsize'] = grpsize
  639:         start = getInt(start, default=(math.ceil(float(current)/float(grpsize))*grpsize-(grpsize-1)))
  640:         # int(current / grpsize) * grpsize +1))
  641:         pageinfo['start'] = start
  642:         pageinfo['end'] = start + grpsize
  643:         if docinfo is not None:
  644:             np = int(docinfo['numPages'])
  645:             pageinfo['end'] = min(pageinfo['end'], np)
  646:             pageinfo['numgroups'] = int(np / grpsize)
  647:             if np % grpsize > 0:
  648:                 pageinfo['numgroups'] += 1
  649: 
  650:         return pageinfo
  651:                 
  652: 
  653: 
  654:     def getNumPages(self,docinfo=None):
  655:         """get list of pages from fulltext and put in docinfo"""
  656:         xquery = '//pb'
  657:         text = self.template.fulltextclient.eval("/mpdl/interface/xquery.xql", "document=%s&xquery=%s"%(docinfo['textURLPath'],xquery))
  658:         # TODO: better processing of the page list. do we need the info somewhere else also?
  659:         docinfo['numPages'] = text.count("<pb ")
  660:         return docinfo
  661:        
  662:     def getTextPage(self, mode="text", pn=1, docinfo=None):
  663:         """returns single page from fulltext"""
  664:         pagexml=self.template.fulltextclient.eval("/mpdl/interface/page-fragment.xql", "document=%s&mode=%s&pn=%s"%(docinfo['textURLPath'],mode,pn), outputUnicode=False)
  665:         # post-processing downloaded xml
  666:         pagedom = Parse(pagexml)
  667:         # plain text mode
  668:         if mode == "text":
  669:             # first div contains text
  670:             pagedivs = pagedom.xpath("/div")
  671:             if len(pagedivs) > 0:
  672:                 pagenode = pagedivs[0]
  673:                 return serializeNode(pagenode)
  674: 
  675:         # text-with-links mode
  676:         if mode == "textPollux":
  677:             # first div contains text
  678:             pagedivs = pagedom.xpath("/div")
  679:             if len(pagedivs) > 0:
  680:                 pagenode = pagedivs[0]
  681:                 # check all a-tags
  682:                 links = pagenode.xpath("//a")
  683:                 for l in links:
  684:                     hrefNode = l.getAttributeNodeNS(None, u"href")
  685:                     if hrefNode:
  686:                         # is link with href
  687:                         href = hrefNode.nodeValue
  688:                         if href.startswith('lt/lex.xql'):
  689:                             # is pollux link
  690:                             selfurl = self.absolute_url()
  691:                             # change href
  692:                             hrefNode.nodeValue = href.replace('lt/lex.xql','%s/head_main_voc'%selfurl)
  693:                             # add target
  694:                             l.setAttributeNS(None, 'target', '_blank')
  695:                 return serializeNode(pagenode)
  696:         
  697:         return "no text here"
  698: 
  699:     
  700:     def changeDocumentViewer(self,title="",digilibBaseUrl=None,thumbrows=2,thumbcols=10,authgroups='mpiwg',RESPONSE=None):
  701:         """init document viewer"""
  702:         self.title=title
  703:         self.digilibBaseUrl = digilibBaseUrl
  704:         self.thumbrows = thumbrows
  705:         self.thumbcols = thumbcols
  706:         self.authgroups = [s.strip().lower() for s in authgroups.split(',')]
  707:         if RESPONSE is not None:
  708:             RESPONSE.redirect('manage_main')
  709:     
  710:     
  711:         
  712: def manage_AddDocumentViewerForm(self):
  713:     """add the viewer form"""
  714:     pt=PageTemplateFile('zpt/addDocumentViewer', globals()).__of__(self)
  715:     return pt()
  716:   
  717: def manage_AddDocumentViewer(self,id,imageScalerUrl="",textServerName="",title="",RESPONSE=None):
  718:     """add the viewer"""
  719:     newObj=documentViewer(id,imageScalerUrl=imageScalerUrl,title=title,textServerName=textServerName)
  720:     self._setObject(id,newObj)
  721:     
  722:     if RESPONSE is not None:
  723:         RESPONSE.redirect('manage_main')
  724: 
  725: 
  726: ##
  727: ## DocumentViewerTemplate class
  728: ##
  729: class DocumentViewerTemplate(ZopePageTemplate):
  730:     """Template for document viewer"""
  731:     meta_type="DocumentViewer Template"
  732: 
  733: 
  734: def manage_addDocumentViewerTemplateForm(self):
  735:     """Form for adding"""
  736:     pt=PageTemplateFile('zpt/addDocumentViewerTemplate', globals()).__of__(self)
  737:     return pt()
  738: 
  739: def manage_addDocumentViewerTemplate(self, id='viewer_main', title=None, text=None,
  740:                            REQUEST=None, submit=None):
  741:     "Add a Page Template with optional file content."
  742: 
  743:     self._setObject(id, DocumentViewerTemplate(id))
  744:     ob = getattr(self, id)
  745:     txt=file(os.path.join(package_home(globals()),'zpt/viewer_main.zpt'),'r').read()
  746:     logging.info("txt %s:"%txt)
  747:     ob.pt_edit(txt,"text/html")
  748:     if title:
  749:         ob.pt_setTitle(title)
  750:     try:
  751:         u = self.DestinationURL()
  752:     except AttributeError:
  753:         u = REQUEST['URL1']
  754:         
  755:     u = "%s/%s" % (u, urllib.quote(id))
  756:     REQUEST.RESPONSE.redirect(u+'/manage_main')
  757:     return ''
  758: 
  759: 
  760:     

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>