documentViewer/documentViewer.py - view

File: [Repository] / documentViewer / documentViewer.py
Revision 1.178: download - view: text, annotated - select for diffs - revision graph
Wed Jan 4 07:38:17 2012 UTC (12 years, 4 months ago) by dwinter
Branches: MAIN
CVS tags: HEAD

hack for annalen

from OFS.Folder import Folder from Products.PageTemplates.ZopePageTemplate import ZopePageTemplate from Products.PageTemplates.PageTemplateFile import PageTemplateFile from AccessControl import ClassSecurityInfo from AccessControl import getSecurityManager from Globals import package_home from Products.zogiLib.zogiLib import browserCheck from Ft.Xml import EMPTY_NAMESPACE, Parse import Ft.Xml.Domlette import os.path import sys import urllib import urllib2 import logging import math import urlparse import cStringIO import re import string def logger(txt,method,txt2): """logging""" logging.info(txt+ txt2) def getInt(number, default=0): """returns always an int (0 in case of problems)""" try: return int(number) except: return int(default) def getTextFromNode(nodename): """get the cdata content of a node""" if nodename is None: return "" nodelist=nodename.childNodes rc = "" for node in nodelist: if node.nodeType == node.TEXT_NODE: rc = rc + node.data return rc def serializeNode(node, encoding="utf-8"): """returns a string containing node as XML""" stream = cStringIO.StringIO() #logging.debug("BUF: %s"%(stream)) Ft.Xml.Domlette.Print(node, stream=stream, encoding=encoding) s = stream.getvalue() #logging.debug("BUF: %s"%(s)) stream.close() return s def browserCheck(self): """check the browsers request to find out the browser type""" bt = {} ua = self.REQUEST.get_header("HTTP_USER_AGENT") bt['ua'] = ua bt['isIE'] = False bt['isN4'] = False bt['versFirefox']="" bt['versIE']="" bt['versSafariChrome']="" bt['versOpera']="" if string.find(ua, 'MSIE') > -1: bt['isIE'] = True else: bt['isN4'] = (string.find(ua, 'Mozilla/4.') > -1) # Safari oder Chrome identification try: nav = ua[string.find(ua, '('):] nav1=ua[string.find(ua,')'):] nav2=nav1[string.find(nav1,'('):] nav3=nav2[string.find(nav2,')'):] ie = string.split(nav, "; ")[1] ie1 =string.split(nav1, " ")[2] ie2 =string.split(nav3, " ")[1] ie3 =string.split(nav3, " ")[2] if string.find(ie3, "Safari") >-1: bt['versSafariChrome']=string.split(ie2, "/")[1] except: pass # IE identification try: nav = ua[string.find(ua, '('):] ie = string.split(nav, "; ")[1] if string.find(ie, "MSIE") > -1: bt['versIE'] = string.split(ie, " ")[1] except:pass # Firefox identification try: nav = ua[string.find(ua, '('):] nav1=ua[string.find(ua,')'):] if string.find(ie1, "Firefox") >-1: nav5= string.split(ie1, "/")[1] logging.debug("FIREFOX: %s"%(nav5)) bt['versFirefox']=nav5[0:3] except:pass #Opera identification try: if string.find(ua,"Opera") >-1: nav = ua[string.find(ua, '('):] nav1=nav[string.find(nav,')'):] bt['versOpera']=string.split(nav1,"/")[2] except:pass bt['isMac'] = string.find(ua, 'Macintosh') > -1 bt['isWin'] = string.find(ua, 'Windows') > -1 bt['isIEWin'] = bt['isIE'] and bt['isWin'] bt['isIEMac'] = bt['isIE'] and bt['isMac'] bt['staticHTML'] = False return bt def getParentDir(path): """returns pathname shortened by one""" return '/'.join(path.split('/')[0:-1]) def getHttpData(url, data=None, num_tries=3, timeout=10): """returns result from url+data HTTP request""" # we do GET (by appending data to url) if isinstance(data, str) or isinstance(data, unicode): # if data is string then append url = "%s?%s"%(url,data) elif isinstance(data, dict) or isinstance(data, list) or isinstance(data, tuple): # urlencode url = "%s?%s"%(url,urllib.urlencode(data)) response = None errmsg = None for cnt in range(num_tries): try: logging.debug("getHttpData(#%s %ss) url=%s"%(cnt+1,timeout,url)) if sys.version_info < (2, 6): # set timeout on socket -- ugly :-( import socket socket.setdefaulttimeout(float(timeout)) response = urllib2.urlopen(url) else: response = urllib2.urlopen(url,timeout=float(timeout)) # check result? break except urllib2.HTTPError, e: logging.error("getHttpData: HTTP error(%s): %s"%(e.code,e)) errmsg = str(e) # stop trying break except urllib2.URLError, e: logging.error("getHttpData: URLLIB error(%s): %s"%(e.reason,e)) errmsg = str(e) # stop trying #break if response is not None: data = response.read() response.close() return data raise IOError("ERROR fetching HTTP data from %s: %s"%(url,errmsg)) #return None ## ## documentViewer class ## class documentViewer(Folder): """document viewer""" meta_type="Document viewer" security=ClassSecurityInfo() manage_options=Folder.manage_options+( {'label':'main config','action':'changeDocumentViewerForm'}, ) # templates and forms viewer_main = PageTemplateFile('zpt/viewer_main', globals()) toc_thumbs = PageTemplateFile('zpt/toc_thumbs', globals()) toc_text = PageTemplateFile('zpt/toc_text', globals()) toc_figures = PageTemplateFile('zpt/toc_figures', globals()) page_main_images = PageTemplateFile('zpt/page_main_images', globals()) page_main_double = PageTemplateFile('zpt/page_main_double', globals()) page_main_text = PageTemplateFile('zpt/page_main_text', globals()) page_main_text_dict = PageTemplateFile('zpt/page_main_text_dict', globals()) page_main_gis =PageTemplateFile ('zpt/page_main_gis', globals()) page_main_xml = PageTemplateFile('zpt/page_main_xml', globals()) page_main_pureXml = PageTemplateFile('zpt/page_main_pureXml', globals()) head_main = PageTemplateFile('zpt/head_main', globals()) docuviewer_css = PageTemplateFile('css/docuviewer.css', globals()) info_xml = PageTemplateFile('zpt/info_xml', globals()) thumbs_main_rss = PageTemplateFile('zpt/thumbs_main_rss', globals()) security.declareProtected('View management screens','changeDocumentViewerForm') changeDocumentViewerForm = PageTemplateFile('zpt/changeDocumentViewer', globals()) def __init__(self,id,imageScalerUrl=None,textServerName=None,title="",digilibBaseUrl=None,thumbcols=2,thumbrows=5,authgroups="mpiwg"): """init document viewer""" self.id=id self.title=title self.thumbcols = thumbcols self.thumbrows = thumbrows # authgroups is list of authorized groups (delimited by ,) self.authgroups = [s.strip().lower() for s in authgroups.split(',')] # create template folder so we can always use template.something templateFolder = Folder('template') #self['template'] = templateFolder # Zope-2.12 style self._setObject('template',templateFolder) # old style try: import MpdlXmlTextServer textServer = MpdlXmlTextServer.MpdlXmlTextServer(id='fulltextclient',serverName=textServerName) #templateFolder['fulltextclient'] = xmlRpcClient templateFolder._setObject('fulltextclient',textServer) except Exception, e: logging.error("Unable to create MpdlXmlTextServer for fulltextclient: "+str(e)) try: from Products.zogiLib.zogiLib import zogiLib zogilib = zogiLib(id="zogilib", title="zogilib for docuviewer", dlServerURL=imageScalerUrl, layout="book") #templateFolder['zogilib'] = zogilib templateFolder._setObject('zogilib',zogilib) except Exception, e: logging.error("Unable to create zogiLib for zogilib: "+str(e)) # proxy text server methods to fulltextclient def getTextPage(self, **args): """get page""" return self.template.fulltextclient.getTextPage(**args) def getOrigPages(self, **args): """get page""" return self.template.fulltextclient.getOrigPages(**args) def getOrigPagesNorm(self, **args): """get page""" return self.template.fulltextclient.getOrigPagesNorm(**args) def getQuery(self, **args): """get query in search""" return self.template.fulltextclient.getQuery(**args) def getSearch(self, **args): """get search""" return self.template.fulltextclient.getSearch(**args) def getGisPlaces(self, **args): """get gis places""" return self.template.fulltextclient.getGisPlaces(**args) def getAllGisPlaces(self, **args): """get all gis places """ return self.template.fulltextclient.getAllGisPlaces(**args) def getTranslate(self, **args): """get translate""" return self.template.fulltextclient.getTranslate(**args) def getLemma(self, **args): """get lemma""" return self.template.fulltextclient.getLemma(**args) def getLemmaQuery(self, **args): """get query""" return self.template.fulltextclient.getLemmaQuery(**args) def getLex(self, **args): """get lex""" return self.template.fulltextclient.getLex(**args) def getToc(self, **args): """get toc""" return self.template.fulltextclient.getToc(**args) def getTocPage(self, **args): """get tocpage""" return self.template.fulltextclient.getTocPage(**args) security.declareProtected('View','thumbs_rss') def thumbs_rss(self,mode,url,viewMode="auto",start=None,pn=1): ''' view it @param mode: defines how to access the document behind url @param url: url which contains display information @param viewMode: if images display images, if text display text, default is images (text,images or auto) ''' logging.debug("HHHHHHHHHHHHHH:load the rss") logger("documentViewer (index)", logging.INFO, "mode: %s url:%s start:%s pn:%s"%(mode,url,start,pn)) if not hasattr(self, 'template'): # create template folder if it doesn't exist self.manage_addFolder('template') if not self.digilibBaseUrl: self.digilibBaseUrl = self.findDigilibUrl() or "http://nausikaa.mpiwg-berlin.mpg.de/digitallibrary" docinfo = self.getDocinfo(mode=mode,url=url) #pageinfo = self.getPageinfo(start=start,current=pn,docinfo=docinfo) pageinfo = self.getPageinfo(start=start,current=pn, docinfo=docinfo) ''' ZDES ''' pt = getattr(self.template, 'thumbs_main_rss') if viewMode=="auto": # automodus gewaehlt if docinfo.has_key("textURL") or docinfo.get('textURLPath',None): #texturl gesetzt und textViewer konfiguriert viewMode="text" else: viewMode="images" return pt(docinfo=docinfo,pageinfo=pageinfo,viewMode=viewMode) security.declareProtected('View','index_html') def index_html(self,url,mode="texttool",viewMode="auto",tocMode="thumbs",start=None,pn=1,mk=None): ''' view it @param mode: defines how to access the document behind url @param url: url which contains display information @param viewMode: if images display images, if text display text, default is auto (text,images or auto) @param tocMode: type of 'table of contents' for navigation (thumbs, text, figures, none) @param characterNormalization type of text display (reg, norm, none) @param querySearch: type of different search modes (fulltext, fulltextMorph, xpath, xquery, ftIndex, ftIndexMorph, fulltextMorphLemma) ''' logging.debug("documentViewer (index) mode: %s url:%s start:%s pn:%s"%(mode,url,start,pn)) if not hasattr(self, 'template'): # this won't work logging.error("template folder missing!") return "ERROR: template folder missing!" if not getattr(self, 'digilibBaseUrl', None): self.digilibBaseUrl = self.findDigilibUrl() or "http://digilib.mpiwg-berlin.mpg.de/digitallibrary" docinfo = self.getDocinfo(mode=mode,url=url) if tocMode != "thumbs": # get table of contents docinfo = self.getToc(mode=tocMode, docinfo=docinfo) if viewMode=="auto": # automodus gewaehlt if docinfo.has_key('textURL') or docinfo.get('textURLPath',None): #texturl gesetzt und textViewer konfiguriert viewMode="text_dict" else: viewMode="images" pageinfo = self.getPageinfo(start=start,current=pn, docinfo=docinfo,viewMode=viewMode,tocMode=tocMode) if (docinfo.get('textURLPath',None)): page = self.getTextPage(docinfo=docinfo, pageinfo=pageinfo) pageinfo['textPage'] = page tt = getattr(self, 'template') pt = getattr(tt, 'viewer_main') return pt(docinfo=docinfo,pageinfo=pageinfo,viewMode=viewMode,mk=self.generateMarks(mk)) def generateMarks(self,mk): ret="" if mk is None: return "" if not isinstance(mk, list): mk=[mk] for m in mk: ret+="mk=%s"%m return ret def getBrowser(self): """getBrowser the version of browser """ bt = browserCheck(self) logging.debug("BROWSER VERSION: %s"%(bt)) return bt def findDigilibUrl(self): """try to get the digilib URL from zogilib""" url = self.template.zogilib.getDLBaseUrl() return url def getDocumentViewerURL(self): """returns the URL of this instance""" return self.absolute_url() def getStyle(self, idx, selected, style=""): """returns a string with the given style and append 'sel' if path == selected.""" #logger("documentViewer (getstyle)", logging.INFO, "idx: %s selected: %s style: %s"%(idx,selected,style)) if idx == selected: return style + 'sel' else: return style def getLink(self, param=None, val=None, params=None, baseUrl=None, paramSep='&'): """returns URL to documentviewer with parameter param set to val or from dict params""" # copy existing request params urlParams=self.REQUEST.form.copy() # change single param if param is not None: if val is None: if urlParams.has_key(param): del urlParams[param] else: urlParams[param] = str(val) # change more params if params is not None: for k in params.keys(): v = params[k] if v is None: # val=None removes param if urlParams.has_key(k): del urlParams[k] else: urlParams[k] = v # FIXME: does this belong here? if urlParams.get("mode", None) == "filepath": #wenn beim erst Aufruf filepath gesetzt wurde aendere das nun zu imagepath urlParams["mode"] = "imagepath" urlParams["url"] = getParentDir(urlParams["url"]) # quote values and assemble into query string (not escaping '/') ps = paramSep.join(["%s=%s"%(k,urllib.quote_plus(v,'/')) for (k, v) in urlParams.items()]) #ps = urllib.urlencode(urlParams) if baseUrl is None: baseUrl = self.REQUEST['URL1'] url = "%s?%s"%(baseUrl, ps) return url def getLinkAmp(self, param=None, val=None, params=None, baseUrl=None): """link to documentviewer with parameter param set to val""" return self.getLink(param, val, params, baseUrl, '&') def getInfo_xml(self,url,mode): """returns info about the document as XML""" if not self.digilibBaseUrl: self.digilibBaseUrl = self.findDigilibUrl() or "http://nausikaa.mpiwg-berlin.mpg.de/digitallibrary" docinfo = self.getDocinfo(mode=mode,url=url) pt = getattr(self.template, 'info_xml') return pt(docinfo=docinfo) def getOptionToggle(self, newState=None, optionName='text_options_open', initialState=True): """returns new option state""" if not self.REQUEST.SESSION.has_key(optionName): # not in session -- initial opt = {'lastState': newState, 'state': initialState} else: opt = self.REQUEST.SESSION.get(optionName) if opt['lastState'] != newState: # state in session has changed -- toggle opt['state'] = not opt['state'] opt['lastState'] = newState self.REQUEST.SESSION[optionName] = opt return opt['state'] def isAccessible(self, docinfo): """returns if access to the resource is granted""" access = docinfo.get('accessType', None) logging.debug("documentViewer (accessOK) access type %s"%access) if access is not None and access == 'free': logging.debug("documentViewer (accessOK) access is free") return True elif access is None or access in self.authgroups: # only local access -- only logged in users user = getSecurityManager().getUser() logging.debug("documentViewer (accessOK) user=%s ip=%s"%(user,self.REQUEST.getClientAddr())) if user is not None: #print "user: ", user return (user.getUserName() != "Anonymous User") else: return False logging.error("documentViewer (accessOK) unknown access type %s"%access) return False def getDirinfoFromDigilib(self,path,docinfo=None,cut=0): """gibt param von dlInfo aus""" if docinfo is None: docinfo = {} for x in range(cut): path=getParentDir(path) infoUrl=self.digilibBaseUrl+"/dirInfo-xml.jsp?mo=dir&fn="+path logging.debug("documentViewer (getparamfromdigilib) dirInfo from %s"%(infoUrl)) txt = getHttpData(infoUrl) if txt is None: raise IOError("Unable to get dir-info from %s"%(infoUrl)) dom = Parse(txt) sizes=dom.xpath("//dir/size") logging.debug("documentViewer (getparamfromdigilib) dirInfo:size"%sizes) if sizes: docinfo['numPages'] = int(getTextFromNode(sizes[0])) else: docinfo['numPages'] = 0 # TODO: produce and keep list of image names and numbers return docinfo def getIndexMetaPath(self,url): """gib nur den Pfad zurueck""" regexp = re.compile(r".*(experimental|permanent)/(.*)") regpath = regexp.match(url) if (regpath==None): return "" logging.debug("(getDomFromIndexMeta): URLXAXA: %s"%regpath.group(2)) return ("/mpiwg/online/"+regpath.group(1)+"/"+regpath.group(2)) def getIndexMetaUrl(self,url): """returns utr of index.meta document at url""" metaUrl = None if url.startswith("http://"): # real URL metaUrl = url else: # online path server=self.digilibBaseUrl+"/servlet/Texter?fn=" metaUrl=server+url.replace("/mpiwg/online","") if not metaUrl.endswith("index.meta"): metaUrl += "/index.meta" return metaUrl def getDomFromIndexMeta(self, url): """get dom from index meta""" dom = None metaUrl = self.getIndexMetaUrl(url) logging.debug("(getDomFromIndexMeta): METAURL: %s"%metaUrl) txt=getHttpData(metaUrl) if txt is None: raise IOError("Unable to read index meta from %s"%(url)) dom = Parse(txt) return dom def getPresentationInfoXML(self, url): """returns dom of info.xml document at url""" dom = None metaUrl = None if url.startswith("http://"): # real URL metaUrl = url else: # online path server=self.digilibBaseUrl+"/servlet/Texter?fn=" metaUrl=server+url.replace("/mpiwg/online","") txt=getHttpData(metaUrl) if txt is None: raise IOError("Unable to read infoXMLfrom %s"%(url)) dom = Parse(txt) return dom def getAuthinfoFromIndexMeta(self,path,docinfo=None,dom=None,cut=0): """gets authorization info from the index.meta file at path or given by dom""" logging.debug("documentViewer (getauthinfofromindexmeta) path: %s"%(path)) access = None if docinfo is None: docinfo = {} if dom is None: for x in range(cut): path=getParentDir(path) dom = self.getDomFromIndexMeta(path) acctype = dom.xpath("//access-conditions/access/@type") if acctype and (len(acctype)>0): access=acctype[0].value if access in ['group', 'institution']: access = getTextFromNode(dom.xpath("//access-conditions/access/name")[0]).lower() docinfo['accessType'] = access return docinfo def getBibinfoFromIndexMeta(self,path,docinfo=None,dom=None,cut=0): """gets bibliographical info from the index.meta file at path or given by dom""" #logging.debug("documentViewer (getbibinfofromindexmeta) path: %s"%(path)) if docinfo is None: docinfo = {} if dom is None: for x in range(cut): path=getParentDir(path) dom = self.getDomFromIndexMeta(path) docinfo['indexMetaPath']=self.getIndexMetaPath(path); #logging.debug("documentViewer (getbibinfofromindexmeta cutted) path: %s"%(path)) # put in all raw bib fields as dict "bib" bib = dom.xpath("//bib/*") if bib and len(bib)>0: bibinfo = {} for e in bib: bibinfo[e.localName] = getTextFromNode(e) docinfo['bib'] = bibinfo # extract some fields (author, title, year) according to their mapping metaData=self.metadata.main.meta.bib bibtype=dom.xpath("//bib/@type") if bibtype and (len(bibtype)>0): bibtype=bibtype[0].value else: bibtype="generic" bibtype=bibtype.replace("-"," ") # wrong typesiin index meta "-" instead of " " (not wrong! ROC) docinfo['bib_type'] = bibtype bibmap=metaData.generateMappingForType(bibtype) #logging.debug("documentViewer (getbibinfofromindexmeta) bibmap:"+repr(bibmap)) #logging.debug("documentViewer (getbibinfofromindexmeta) bibtype:"+repr(bibtype)) # if there is no mapping bibmap is empty (mapping sometimes has empty fields) if len(bibmap) > 0 and len(bibmap['author'][0]) > 0: try: docinfo['author']=getTextFromNode(dom.xpath("//bib/%s"%bibmap['author'][0])[0]) except: pass try: docinfo['title']=getTextFromNode(dom.xpath("//bib/%s"%bibmap['title'][0])[0]) except: pass try: docinfo['year']=getTextFromNode(dom.xpath("//bib/%s"%bibmap['year'][0])[0]) except: pass #logging.debug("documentViewer (getbibinfofromindexmeta) using mapping for %s"%bibtype) try: docinfo['lang']=getTextFromNode(dom.xpath("//bib/lang")[0]) except: docinfo['lang']='' try: docinfo['city']=getTextFromNode(dom.xpath("//bib/city")[0]) except: docinfo['city']='' try: docinfo['number_of_pages']=getTextFromNode(dom.xpath("//bib/number_of_pages")[0]) except: docinfo['number_of_pages']='' try: docinfo['series_volume']=getTextFromNode(dom.xpath("//bib/series_volume")[0]) except: docinfo['series_volume']='' try: docinfo['number_of_volumes']=getTextFromNode(dom.xpath("//bib/number_of_volumes")[0]) except: docinfo['number_of_volumes']='' try: docinfo['translator']=getTextFromNode(dom.xpath("//bib/translator")[0]) except: docinfo['translator']='' try: docinfo['edition']=getTextFromNode(dom.xpath("//bib/edition")[0]) except: docinfo['edition']='' try: docinfo['series_author']=getTextFromNode(dom.xpath("//bib/series_author")[0]) except: docinfo['series_author']='' try: docinfo['publisher']=getTextFromNode(dom.xpath("//bib/publisher")[0]) except: docinfo['publisher']='' try: docinfo['series_title']=getTextFromNode(dom.xpath("//bib/series_title")[0]) except: docinfo['series_title']='' try: docinfo['isbn_issn']=getTextFromNode(dom.xpath("//bib/isbn_issn")[0]) except: docinfo['isbn_issn']='' #logging.debug("I NEED BIBTEX %s"%docinfo) return docinfo def getNameFromIndexMeta(self,path,docinfo=None,dom=None,cut=0): """gets name info from the index.meta file at path or given by dom""" if docinfo is None: docinfo = {} if dom is None: for x in range(cut): path=getParentDir(path) dom = self.getDomFromIndexMeta(path) docinfo['name']=getTextFromNode(dom.xpath("/resource/name")[0]) logging.debug("documentViewer docinfo[name] %s"%docinfo['name']) return docinfo def getDocinfoFromTextTool(self, url, dom=None, docinfo=None): """parse texttool tag in index meta""" logging.debug("documentViewer (getdocinfofromtexttool) url: %s" % (url)) if docinfo is None: docinfo = {} if docinfo.get('lang', None) is None: docinfo['lang'] = '' # default keine Sprache gesetzt if dom is None: dom = self.getDomFromIndexMeta(url) archivePath = None archiveName = None archiveNames = dom.xpath("//resource/name") if archiveNames and (len(archiveNames) > 0): archiveName = getTextFromNode(archiveNames[0]) else: logging.warning("documentViewer (getdocinfofromtexttool) resource/name missing in: %s" % (url)) archivePaths = dom.xpath("//resource/archive-path") if archivePaths and (len(archivePaths) > 0): archivePath = getTextFromNode(archivePaths[0]) # clean up archive path if archivePath[0] != '/': archivePath = '/' + archivePath if archiveName and (not archivePath.endswith(archiveName)): archivePath += "/" + archiveName else: # try to get archive-path from url logging.warning("documentViewer (getdocinfofromtexttool) resource/archive-path missing in: %s" % (url)) if (not url.startswith('http')): archivePath = url.replace('index.meta', '') if archivePath is None: # we balk without archive-path raise IOError("Missing archive-path (for text-tool) in %s" % (url)) imageDirs = dom.xpath("//texttool/image") if imageDirs and (len(imageDirs) > 0): imageDir = getTextFromNode(imageDirs[0]) else: # we balk with no image tag / not necessary anymore because textmode is now standard #raise IOError("No text-tool info in %s"%(url)) imageDir = "" #xquery="//pb" docinfo['imagePath'] = "" # keine Bilder docinfo['imageURL'] = "" if imageDir and archivePath: #print "image: ", imageDir, " archivepath: ", archivePath imageDir = os.path.join(archivePath, imageDir) imageDir = imageDir.replace("/mpiwg/online", '') docinfo = self.getDirinfoFromDigilib(imageDir, docinfo=docinfo) docinfo['imagePath'] = imageDir docinfo['imageURL'] = self.digilibBaseUrl + "/servlet/Scaler?fn=" + imageDir viewerUrls = dom.xpath("//texttool/digiliburlprefix") if viewerUrls and (len(viewerUrls) > 0): viewerUrl = getTextFromNode(viewerUrls[0]) docinfo['viewerURL'] = viewerUrl # old style text URL textUrls = dom.xpath("//texttool/text") if textUrls and (len(textUrls) > 0): textUrl = getTextFromNode(textUrls[0]) if urlparse.urlparse(textUrl)[0] == "": #keine url textUrl = os.path.join(archivePath, textUrl) # fix URLs starting with /mpiwg/online if textUrl.startswith("/mpiwg/online"): textUrl = textUrl.replace("/mpiwg/online", '', 1) docinfo['textURL'] = textUrl #TODO: hack-DW for annalen if (textUrl is not None) and (textUrl.startswith("/permanent/einstein/annalen")): textUrl=textUrl.replace("/permanent/einstein/annalen/","/diverse/de/") splitted=textUrl.split("/fulltext") textUrl=splitted[0]+".xml" textUrlkurz = string.split(textUrl, ".")[0] docinfo['textURLPathkurz'] = textUrlkurz docinfo['textURLPath'] = textUrl logging.debug("hack") logging.debug(textUrl) # new style text-url-path textUrls = dom.xpath("//texttool/text-url-path") if textUrls and (len(textUrls) > 0): textUrl = getTextFromNode(textUrls[0]) docinfo['textURLPath'] = textUrl textUrlkurz = string.split(textUrl, ".")[0] docinfo['textURLPathkurz'] = textUrlkurz #if not docinfo['imagePath']: # text-only, no page images #docinfo = self.getNumTextPages(docinfo) presentationUrls = dom.xpath("//texttool/presentation") docinfo = self.getBibinfoFromIndexMeta(url, docinfo=docinfo, dom=dom) # get info von bib tag #docinfo = self.getDownloadfromDocinfoToBibtex(url, docinfo=docinfo, dom=dom) docinfo = self.getNameFromIndexMeta(url, docinfo=docinfo, dom=dom) if presentationUrls and (len(presentationUrls) > 0): # ueberschreibe diese durch presentation informationen # presentation url ergiebt sich ersetzen von index.meta in der url der fuer die Metadaten # durch den relativen Pfad auf die presentation infos presentationPath = getTextFromNode(presentationUrls[0]) if url.endswith("index.meta"): presentationUrl = url.replace('index.meta', presentationPath) else: presentationUrl = url + "/" + presentationPath docinfo = self.getBibinfoFromTextToolPresentation(presentationUrl, docinfo=docinfo, dom=dom) docinfo = self.getAuthinfoFromIndexMeta(url, docinfo=docinfo, dom=dom) # get access info return docinfo def getBibinfoFromTextToolPresentation(self,url,docinfo=None,dom=None): """gets the bibliographical information from the preseantion entry in texttools """ dom=self.getPresentationInfoXML(url) try: docinfo['author']=getTextFromNode(dom.xpath("//author")[0]) except: pass try: docinfo['title']=getTextFromNode(dom.xpath("//title")[0]) except: pass try: docinfo['year']=getTextFromNode(dom.xpath("//date")[0]) except: pass return docinfo def getDocinfoFromImagePath(self,path,docinfo=None,cut=0): """path ist the path to the images it assumes that the index.meta file is one level higher.""" logging.debug("documentViewer (getdocinfofromimagepath) path: %s"%(path)) if docinfo is None: docinfo = {} path=path.replace("/mpiwg/online","") docinfo['imagePath'] = path docinfo=self.getDirinfoFromDigilib(path,docinfo=docinfo,cut=cut) pathorig=path for x in range(cut): path=getParentDir(path) logging.debug("documentViewer (getdocinfofromimagepath) PATH:"+path) imageUrl=self.digilibBaseUrl+"/servlet/Scaler?fn="+path docinfo['imageURL'] = imageUrl #path ist the path to the images it assumes that the index.meta file is one level higher. docinfo = self.getBibinfoFromIndexMeta(pathorig,docinfo=docinfo,cut=cut+1) #docinfo = self.getDownloadfromDocinfoToBibtex(pathorig,docinfo=docinfo,cut=cut+1) docinfo = self.getAuthinfoFromIndexMeta(pathorig,docinfo=docinfo,cut=cut+1) return docinfo def getDocinfo(self, mode, url): """returns docinfo depending on mode""" logging.debug("documentViewer (getdocinfo) mode: %s, url: %s"%(mode,url)) # look for cached docinfo in session if self.REQUEST.SESSION.has_key('docinfo'): docinfo = self.REQUEST.SESSION['docinfo'] # check if its still current if docinfo is not None and docinfo.get('mode') == mode and docinfo.get('url') == url: logging.debug("documentViewer (getdocinfo) docinfo in session: %s"%docinfo) return docinfo # new docinfo docinfo = {'mode': mode, 'url': url} if mode=="texttool": #index.meta with texttool information docinfo = self.getDocinfoFromTextTool(url, docinfo=docinfo) elif mode=="imagepath": docinfo = self.getDocinfoFromImagePath(url, docinfo=docinfo) elif mode=="filepath": docinfo = self.getDocinfoFromImagePath(url, docinfo=docinfo,cut=1) else: logging.error("documentViewer (getdocinfo) unknown mode: %s!"%mode) raise ValueError("Unknown mode %s! Has to be one of 'texttool','imagepath','filepath'."%(mode)) # FIXME: fake texturlpath if not docinfo.has_key('textURLPath'): docinfo['textURLPath'] = None logging.debug("documentViewer (getdocinfo) docinfo: %s"%docinfo) #logging.debug("documentViewer (getdocinfo) docinfo: %s"%) self.REQUEST.SESSION['docinfo'] = docinfo return docinfo def getPageinfo(self, current, start=None, rows=None, cols=None, docinfo=None, viewMode=None, tocMode=None): """returns pageinfo with the given parameters""" pageinfo = {} current = getInt(current) pageinfo['current'] = current rows = int(rows or self.thumbrows) pageinfo['rows'] = rows cols = int(cols or self.thumbcols) pageinfo['cols'] = cols grpsize = cols * rows pageinfo['groupsize'] = grpsize start = getInt(start, default=(math.ceil(float(current)/float(grpsize))*grpsize-(grpsize-1))) # int(current / grpsize) * grpsize +1)) pageinfo['start'] = start pageinfo['end'] = start + grpsize if (docinfo is not None) and ('numPages' in docinfo): np = int(docinfo['numPages']) pageinfo['end'] = min(pageinfo['end'], np) pageinfo['numgroups'] = int(np / grpsize) if np % grpsize > 0: pageinfo['numgroups'] += 1 pageinfo['viewMode'] = viewMode pageinfo['tocMode'] = tocMode pageinfo['characterNormalization'] = self.REQUEST.get('characterNormalization','reg') #pageinfo['optionToggle'] = self.REQUEST.get('optionToggle','1') pageinfo['query'] = self.REQUEST.get('query','') pageinfo['queryType'] = self.REQUEST.get('queryType','') pageinfo['querySearch'] =self.REQUEST.get('querySearch', 'fulltext') pageinfo['textPN'] = self.REQUEST.get('textPN','1') pageinfo['highlightQuery'] = self.REQUEST.get('highlightQuery','') pageinfo ['highlightElementPos'] = self.REQUEST.get('highlightElementPos','') pageinfo ['highlightElement'] = self.REQUEST.get('highlightElement','') pageinfo ['xpointer'] = self.REQUEST.get('xpointer','') pageinfo['tocPageSize'] = self.REQUEST.get('tocPageSize', '30') pageinfo['queryPageSize'] =self.REQUEST.get('queryPageSize', '10') pageinfo['tocPN'] = self.REQUEST.get('tocPN', '1') toc = int (pageinfo['tocPN']) pageinfo['textPages'] =int (toc) if 'tocSize_%s'%tocMode in docinfo: tocSize = int(docinfo['tocSize_%s'%tocMode]) tocPageSize = int(pageinfo['tocPageSize']) # cached toc if tocSize%tocPageSize>0: tocPages=tocSize/tocPageSize+1 else: tocPages=tocSize/tocPageSize pageinfo['tocPN'] = min (tocPages,toc) pageinfo['searchPN'] =self.REQUEST.get('searchPN','1') #pageinfo['sn'] =self.REQUEST.get('sn','') pageinfo['s'] =self.REQUEST.get('s','') return pageinfo def changeDocumentViewer(self,title="",digilibBaseUrl=None,thumbrows=2,thumbcols=5,authgroups='mpiwg',RESPONSE=None): """init document viewer""" self.title=title self.digilibBaseUrl = digilibBaseUrl self.thumbrows = thumbrows self.thumbcols = thumbcols self.authgroups = [s.strip().lower() for s in authgroups.split(',')] if RESPONSE is not None: RESPONSE.redirect('manage_main') def manage_AddDocumentViewerForm(self): """add the viewer form""" pt=PageTemplateFile('zpt/addDocumentViewer', globals()).__of__(self) return pt() def manage_AddDocumentViewer(self,id,imageScalerUrl="",textServerName="",title="",RESPONSE=None): """add the viewer""" newObj=documentViewer(id,imageScalerUrl=imageScalerUrl,title=title,textServerName=textServerName) self._setObject(id,newObj) if RESPONSE is not None: RESPONSE.redirect('manage_main') ## DocumentViewerTemplate class class DocumentViewerTemplate(ZopePageTemplate): """Template for document viewer""" meta_type="DocumentViewer Template" def manage_addDocumentViewerTemplateForm(self): """Form for adding""" pt=PageTemplateFile('zpt/addDocumentViewerTemplate', globals()).__of__(self) return pt() def manage_addDocumentViewerTemplate(self, id='viewer_main', title=None, text=None, REQUEST=None, submit=None): "Add a Page Template with optional file content." self._setObject(id, DocumentViewerTemplate(id)) ob = getattr(self, id) txt=file(os.path.join(package_home(globals()),'zpt/viewer_main.zpt'),'r').read() logging.info("txt %s:"%txt) ob.pt_edit(txt,"text/html") if title: ob.pt_setTitle(title) try: u = self.DestinationURL() except AttributeError: u = REQUEST['URL1'] u = "%s/%s" % (u, urllib.quote(id)) REQUEST.RESPONSE.redirect(u+'/manage_main') return ''