Changeset 513:67095296c95a in documentViewer for documentViewer.py
- Timestamp:
- Feb 28, 2012, 6:10:08 PM (12 years ago)
- Branch:
- default
- Parents:
- 497:73fb73577961 (diff), 512:92a6443a6f16 (diff)
Note: this is a merge changeset, the changes displayed below correspond to the merge itself.
Use the (diff) links above to see all the changes relative to each parent. - File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
documentViewer.py
r452 r513 1 2 1 from OFS.Folder import Folder 3 2 from Products.PageTemplates.ZopePageTemplate import ZopePageTemplate 4 from Products.PageTemplates.PageTemplateFile import PageTemplateFile 3 from Products.PageTemplates.PageTemplateFile import PageTemplateFile 4 from App.ImageFile import ImageFile 5 5 from AccessControl import ClassSecurityInfo 6 6 from AccessControl import getSecurityManager 7 7 from Globals import package_home 8 from Products.zogiLib.zogiLib import browserCheck 9 10 from Ft.Xml import EMPTY_NAMESPACE, Parse 11 import Ft.Xml.Domlette 12 import os.path 8 9 import xml.etree.ElementTree as ET 10 11 import os 13 12 import sys 14 13 import urllib 15 import urllib216 14 import logging 17 15 import math 18 16 import urlparse 19 import cStringIO20 17 import re 21 18 import string 22 19 23 def logger(txt,method,txt2): 24 """logging""" 25 logging.info(txt+ txt2) 26 27 28 def getInt(number, default=0): 29 """returns always an int (0 in case of problems)""" 30 try: 31 return int(number) 32 except: 33 return int(default) 34 35 def getTextFromNode(nodename): 36 """get the cdata content of a node""" 37 if nodename is None: 38 return "" 39 nodelist=nodename.childNodes 40 rc = "" 41 for node in nodelist: 42 if node.nodeType == node.TEXT_NODE: 43 rc = rc + node.data 44 return rc 45 20 from SrvTxtUtils import getInt, getText, getHttpData, refreshingImageFileIndexHtml 21 46 22 def serializeNode(node, encoding="utf-8"): 47 23 """returns a string containing node as XML""" 48 stream = cStringIO.StringIO() 49 #logging.debug("BUF: %s"%(stream)) 50 Ft.Xml.Domlette.Print(node, stream=stream, encoding=encoding) 51 s = stream.getvalue() 52 #logging.debug("BUF: %s"%(s)) 53 stream.close() 24 s = ET.tostring(node) 25 26 # 4Suite: 27 # stream = cStringIO.StringIO() 28 # Ft.Xml.Domlette.Print(node, stream=stream, encoding=encoding) 29 # s = stream.getvalue() 30 # stream.close() 54 31 return s 55 32 … … 115 92 return bt 116 93 117 118 def getParentDir(path): 119 """returns pathname shortened by one""" 120 return '/'.join(path.split('/')[0:-1]) 121 122 123 def getHttpData(url, data=None, num_tries=3, timeout=10): 124 """returns result from url+data HTTP request""" 125 # we do GET (by appending data to url) 126 if isinstance(data, str) or isinstance(data, unicode): 127 # if data is string then append 128 url = "%s?%s"%(url,data) 129 elif isinstance(data, dict) or isinstance(data, list) or isinstance(data, tuple): 130 # urlencode 131 url = "%s?%s"%(url,urllib.urlencode(data)) 132 133 response = None 134 errmsg = None 135 for cnt in range(num_tries): 136 try: 137 logging.debug("getHttpData(#%s %ss) url=%s"%(cnt+1,timeout,url)) 138 if sys.version_info < (2, 6): 139 # set timeout on socket -- ugly :-( 140 import socket 141 socket.setdefaulttimeout(float(timeout)) 142 response = urllib2.urlopen(url) 143 else: 144 response = urllib2.urlopen(url,timeout=float(timeout)) 145 # check result? 146 break 147 except urllib2.HTTPError, e: 148 logging.error("getHttpData: HTTP error(%s): %s"%(e.code,e)) 149 errmsg = str(e) 150 # stop trying 151 break 152 except urllib2.URLError, e: 153 logging.error("getHttpData: URLLIB error(%s): %s"%(e.reason,e)) 154 errmsg = str(e) 155 # stop trying 156 #break 157 158 if response is not None: 159 data = response.read() 160 response.close() 161 return data 162 163 raise IOError("ERROR fetching HTTP data from %s: %s"%(url,errmsg)) 164 #return None 94 def getParentPath(path, cnt=1): 95 """returns pathname shortened by cnt""" 96 # make sure path doesn't end with / 97 path = path.rstrip('/') 98 # split by /, shorten, and reassemble 99 return '/'.join(path.split('/')[0:-cnt]) 165 100 166 101 ## … … 173 108 security=ClassSecurityInfo() 174 109 manage_options=Folder.manage_options+( 175 {'label':' main config','action':'changeDocumentViewerForm'},110 {'label':'Configuration','action':'changeDocumentViewerForm'}, 176 111 ) 112 113 metadataService = None 114 """MetaDataFolder instance""" 177 115 178 116 # templates and forms 179 viewer_main = PageTemplateFile('zpt/viewer_main', globals()) 117 viewer_text = PageTemplateFile('zpt/viewer_text', globals()) 118 viewer_xml = PageTemplateFile('zpt/viewer_xml', globals()) 119 viewer_images = PageTemplateFile('zpt/viewer_images', globals()) 120 viewer_index = PageTemplateFile('zpt/viewer_index', globals()) 180 121 toc_thumbs = PageTemplateFile('zpt/toc_thumbs', globals()) 181 122 toc_text = PageTemplateFile('zpt/toc_text', globals()) 182 123 toc_figures = PageTemplateFile('zpt/toc_figures', globals()) 183 page_main_images = PageTemplateFile('zpt/page_main_images', globals()) 184 page_main_double = PageTemplateFile('zpt/page_main_double', globals()) 185 page_main_text = PageTemplateFile('zpt/page_main_text', globals()) 186 page_main_text_dict = PageTemplateFile('zpt/page_main_text_dict', globals()) 187 page_main_gis =PageTemplateFile ('zpt/page_main_gis', globals()) 188 page_main_xml = PageTemplateFile('zpt/page_main_xml', globals()) 189 page_main_pureXml = PageTemplateFile('zpt/page_main_pureXml', globals()) 190 head_main = PageTemplateFile('zpt/head_main', globals()) 191 docuviewer_css = PageTemplateFile('css/docuviewer.css', globals()) 124 toc_none = PageTemplateFile('zpt/toc_none', globals()) 125 common_template = PageTemplateFile('zpt/common_template', globals()) 126 search_template = PageTemplateFile('zpt/search_template', globals()) 192 127 info_xml = PageTemplateFile('zpt/info_xml', globals()) 193 194 195 thumbs_main_rss = PageTemplateFile('zpt/thumbs_main_rss', globals()) 196 security.declareProtected('View management screens','changeDocumentViewerForm') 197 changeDocumentViewerForm = PageTemplateFile('zpt/changeDocumentViewer', globals()) 198 128 docuviewer_css = ImageFile('css/docuviewer.css',globals()) 129 # make ImageFile better for development 130 docuviewer_css.index_html = refreshingImageFileIndexHtml 131 jquery_js = ImageFile('js/jquery.js',globals()) 132 199 133 200 134 def __init__(self,id,imageScalerUrl=None,textServerName=None,title="",digilibBaseUrl=None,thumbcols=2,thumbrows=5,authgroups="mpiwg"): … … 209 143 210 144 templateFolder = Folder('template') 211 #self['template'] = templateFolder # Zope-2.12 style212 self._setObject('template',templateFolder) # old style145 self['template'] = templateFolder # Zope-2.12 style 146 #self._setObject('template',templateFolder) # old style 213 147 try: 214 148 import MpdlXmlTextServer 215 149 textServer = MpdlXmlTextServer.MpdlXmlTextServer(id='fulltextclient',serverName=textServerName) 216 #templateFolder['fulltextclient'] = xmlRpcClient217 templateFolder._setObject('fulltextclient',textServer)150 templateFolder['fulltextclient'] = textServer 151 #templateFolder._setObject('fulltextclient',textServer) 218 152 except Exception, e: 219 153 logging.error("Unable to create MpdlXmlTextServer for fulltextclient: "+str(e)) 154 220 155 try: 221 156 from Products.zogiLib.zogiLib import zogiLib 222 157 zogilib = zogiLib(id="zogilib", title="zogilib for docuviewer", dlServerURL=imageScalerUrl, layout="book") 223 #templateFolder['zogilib'] = zogilib224 templateFolder._setObject('zogilib',zogilib)158 templateFolder['zogilib'] = zogilib 159 #templateFolder._setObject('zogilib',zogilib) 225 160 except Exception, e: 226 161 logging.error("Unable to create zogiLib for zogilib: "+str(e)) 227 162 163 try: 164 # assume MetaDataFolder instance is called metadata 165 self.metadataService = getattr(self, 'metadata') 166 except Exception, e: 167 logging.error("Unable to find MetaDataFolder 'metadata': "+str(e)) 168 169 if digilibBaseUrl is not None: 170 self.digilibBaseUrl = digilibBaseUrl 171 228 172 229 173 # proxy text server methods to fulltextclient 230 174 def getTextPage(self, **args): 231 """ getpage"""175 """returns full text content of page""" 232 176 return self.template.fulltextclient.getTextPage(**args) 233 177 234 def get OrigPages(self, **args):235 """ get page"""236 return self.template.fulltextclient.get OrigPages(**args)237 238 def get OrigPagesNorm(self, **args):239 """ get page"""240 return self.template.fulltextclient.get OrigPagesNorm(**args)241 242 def get Query(self, **args):243 """ get query in search"""244 return self.template.fulltextclient.get Query(**args)245 246 def get Search(self, **args):247 """ get search"""248 return self.template.fulltextclient.get Search(**args)249 250 def get GisPlaces(self, **args):251 """get gis places"""252 return self.template.fulltextclient.get GisPlaces(**args)178 def getSearchResults(self, **args): 179 """loads list of search results and stores XML in docinfo""" 180 return self.template.fulltextclient.getSearchResults(**args) 181 182 def getResultsPage(self, **args): 183 """returns one page of the search results""" 184 return self.template.fulltextclient.getResultsPage(**args) 185 186 def getToc(self, **args): 187 """loads table of contents and stores XML in docinfo""" 188 return self.template.fulltextclient.getToc(**args) 189 190 def getTocPage(self, **args): 191 """returns one page of the table of contents""" 192 return self.template.fulltextclient.getTocPage(**args) 193 194 def getPlacesOnPage(self, **args): 195 """get list of gis places on one page""" 196 return self.template.fulltextclient.getPlacesOnPage(**args) 253 197 254 def getAllGisPlaces(self, **args): 255 """get all gis places """ 256 return self.template.fulltextclient.getAllGisPlaces(**args) 257 258 def getTranslate(self, **args): 259 """get translate""" 260 return self.template.fulltextclient.getTranslate(**args) 261 262 def getLemma(self, **args): 263 """get lemma""" 264 return self.template.fulltextclient.getLemma(**args) 265 266 def getLemmaQuery(self, **args): 267 """get query""" 268 return self.template.fulltextclient.getLemmaQuery(**args) 269 270 def getLex(self, **args): 271 """get lex""" 272 return self.template.fulltextclient.getLex(**args) 273 274 def getToc(self, **args): 275 """get toc""" 276 return self.template.fulltextclient.getToc(**args) 277 278 def getTocPage(self, **args): 279 """get tocpage""" 280 return self.template.fulltextclient.getTocPage(**args) 281 282 198 #WTF? 199 thumbs_main_rss = PageTemplateFile('zpt/thumbs_main_rss', globals()) 283 200 security.declareProtected('View','thumbs_rss') 284 201 def thumbs_rss(self,mode,url,viewMode="auto",start=None,pn=1): … … 291 208 ''' 292 209 logging.debug("HHHHHHHHHHHHHH:load the rss") 293 logg er("documentViewer (index)", logging.INFO, "mode: %s url:%s start:%s pn:%s"%(mode,url,start,pn))210 logging.debug("documentViewer (index) mode: %s url:%s start:%s pn:%s"%(mode,url,start,pn)) 294 211 295 212 if not hasattr(self, 'template'): … … 313 230 314 231 return pt(docinfo=docinfo,pageinfo=pageinfo,viewMode=viewMode) 232 315 233 316 234 security.declareProtected('View','index_html') 317 def index_html(self,url,mode="texttool",viewMode="auto",tocMode="thumbs",start=None,pn=1,mk=None): 318 ''' 319 view it 235 def index_html(self,url,mode="texttool",viewMode="auto",viewLayer=None,tocMode="thumbs",start=1,pn=1): 236 """ 237 view page 238 @param url: url which contains display information 320 239 @param mode: defines how to access the document behind url 321 @param url: url which contains display information322 @param view Mode: if images display images, if text display text, default is auto (text,images or auto)240 @param viewMode: 'images': display images, 'text': display text, 'xml': display xml, default is 'auto' 241 @param viewLayer: sub-type of viewMode, e.g. 'dict' for viewMode='text' 323 242 @param tocMode: type of 'table of contents' for navigation (thumbs, text, figures, none) 324 @param characterNormalization type of text display (reg, norm, none) 325 @param querySearch: type of different search modes (fulltext, fulltextMorph, xpath, xquery, ftIndex, ftIndexMorph, fulltextMorphLemma) 326 ''' 327 328 logging.debug("documentViewer (index) mode: %s url:%s start:%s pn:%s"%(mode,url,start,pn)) 243 """ 244 245 logging.debug("documentViewer(index_html) mode=%s url=%s viewMode=%s viewLayer=%s start=%s pn=%s"%(mode,url,viewMode,viewLayer,start,pn)) 329 246 330 247 if not hasattr(self, 'template'): … … 340 257 if tocMode != "thumbs": 341 258 # get table of contents 342 docinfo = self.getToc(mode=tocMode, docinfo=docinfo) 343 344 if viewMode=="auto": # automodus gewaehlt 345 if docinfo.has_key('textURL') or docinfo.get('textURLPath',None): #texturl gesetzt und textViewer konfiguriert 346 viewMode="text_dict" 259 self.getToc(mode=tocMode, docinfo=docinfo) 260 261 # auto viewMode: text if there is a text else images 262 if viewMode=="auto": 263 if docinfo.get('textURL', None) or docinfo.get('textURLPath', None): 264 viewMode = "text" 265 if viewLayer is None: 266 viewLayer = "dict" 347 267 else: 348 viewMode ="images"268 viewMode = "images" 349 269 350 pageinfo = self.getPageinfo(start=start,current=pn, docinfo=docinfo,viewMode=viewMode,tocMode=tocMode) 351 352 if (docinfo.get('textURLPath',None)): 353 page = self.getTextPage(docinfo=docinfo, pageinfo=pageinfo) 354 pageinfo['textPage'] = page 355 tt = getattr(self, 'template') 356 pt = getattr(tt, 'viewer_main') 357 return pt(docinfo=docinfo,pageinfo=pageinfo,viewMode=viewMode,mk=self.generateMarks(mk)) 270 elif viewMode == "text_dict": 271 # legacy fix 272 viewMode = "text" 273 viewLayer = "dict" 274 275 pageinfo = self.getPageinfo(start=start, current=pn, docinfo=docinfo, viewMode=viewMode, viewLayer=viewLayer, tocMode=tocMode) 276 277 # get template /template/viewer_$viewMode 278 pt = getattr(self.template, 'viewer_%s'%viewMode, None) 279 if pt is None: 280 logging.error("No template for viewMode=%s!"%viewMode) 281 # TODO: error page? 282 return "No template for viewMode=%s!"%viewMode 283 284 # and execute with parameters 285 return pt(docinfo=docinfo, pageinfo=pageinfo) 358 286 287 #WTF? 359 288 def generateMarks(self,mk): 360 289 ret="" … … 378 307 url = self.template.zogilib.getDLBaseUrl() 379 308 return url 309 310 def getScalerUrl(self, fn=None, pn=None, dw=100, dh=100, docinfo=None): 311 """returns URL to digilib Scaler with params""" 312 url = None 313 if docinfo is not None: 314 url = docinfo.get('imageURL', None) 315 316 if url is None: 317 url = "%s/servlet/Scaler?"%self.digilibBaseUrl 318 if fn is None and docinfo is not None: 319 fn = docinfo.get('imagePath','') 320 321 url += "fn=%s"%fn 322 323 if pn: 324 url += "&pn=%s"%pn 325 326 url += "&dw=%s&dh=%s"%(dw,dh) 327 return url 380 328 381 329 def getDocumentViewerURL(self): … … 384 332 385 333 def getStyle(self, idx, selected, style=""): 386 """returns a string with the given style and append 'sel' if path== selected."""334 """returns a string with the given style and append 'sel' if idx == selected.""" 387 335 #logger("documentViewer (getstyle)", logging.INFO, "idx: %s selected: %s style: %s"%(idx,selected,style)) 388 336 if idx == selected: … … 391 339 return style 392 340 393 def getLink(self, param=None, val=None, params=None, baseUrl=None, paramSep='&'): 394 """returns URL to documentviewer with parameter param set to val or from dict params""" 341 def getParams(self, param=None, val=None, params=None, duplicates=None): 342 """returns dict with URL parameters. 343 344 Takes URL parameters and additionally param=val or dict params. 345 Deletes key if value is None.""" 395 346 # copy existing request params 396 urlParams=self.REQUEST.form.copy()347 newParams=self.REQUEST.form.copy() 397 348 # change single param 398 349 if param is not None: 399 350 if val is None: 400 if urlParams.has_key(param):401 del urlParams[param]351 if newParams.has_key(param): 352 del newParams[param] 402 353 else: 403 urlParams[param] = str(val)354 newParams[param] = str(val) 404 355 405 356 # change more params 406 357 if params is not None: 407 for k in params.keys(): 408 v = params[k] 358 for (k, v) in params.items(): 409 359 if v is None: 410 360 # val=None removes param 411 if urlParams.has_key(k):412 del urlParams[k]361 if newParams.has_key(k): 362 del newParams[k] 413 363 414 364 else: 415 urlParams[k] = v 416 417 # FIXME: does this belong here? 418 if urlParams.get("mode", None) == "filepath": #wenn beim erst Aufruf filepath gesetzt wurde aendere das nun zu imagepath 419 urlParams["mode"] = "imagepath" 420 urlParams["url"] = getParentDir(urlParams["url"]) 421 365 newParams[k] = v 366 367 if duplicates: 368 # eliminate lists (coming from duplicate keys) 369 for (k,v) in newParams.items(): 370 if isinstance(v, list): 371 if duplicates == 'comma': 372 # make comma-separated list of non-empty entries 373 newParams[k] = ','.join([t for t in v if t]) 374 elif duplicates == 'first': 375 # take first non-empty entry 376 newParams[k] = [t for t in v if t][0] 377 378 return newParams 379 380 def getLink(self, param=None, val=None, params=None, baseUrl=None, paramSep='&', duplicates='comma'): 381 """returns URL to documentviewer with parameter param set to val or from dict params""" 382 urlParams = self.getParams(param=param, val=val, params=params, duplicates=duplicates) 422 383 # quote values and assemble into query string (not escaping '/') 423 ps = paramSep.join(["%s=%s"%(k,urllib.quote_plus(v,'/')) for (k, v) in urlParams.items()]) 424 #ps = urllib.urlencode(urlParams) 384 ps = paramSep.join(["%s=%s"%(k,urllib.quote_plus(unicode(v),'/')) for (k, v) in urlParams.items()]) 425 385 if baseUrl is None: 426 baseUrl = self. REQUEST['URL1']386 baseUrl = self.getDocumentViewerURL() 427 387 428 388 url = "%s?%s"%(baseUrl, ps) 429 389 return url 430 390 431 432 def getLinkAmp(self, param=None, val=None, params=None, baseUrl=None): 391 def getLinkAmp(self, param=None, val=None, params=None, baseUrl=None, duplicates='comma'): 433 392 """link to documentviewer with parameter param set to val""" 434 return self.getLink(param, val, params, baseUrl, '&') 393 return self.getLink(param=param, val=val, params=params, baseUrl=baseUrl, paramSep='&', duplicates=duplicates) 394 435 395 436 396 def getInfo_xml(self,url,mode): 437 397 """returns info about the document as XML""" 438 439 398 if not self.digilibBaseUrl: 440 399 self.digilibBaseUrl = self.findDigilibUrl() or "http://nausikaa.mpiwg-berlin.mpg.de/digitallibrary" … … 444 403 return pt(docinfo=docinfo) 445 404 446 def getOptionToggle(self, newState=None, optionName='text_options_open', initialState=True):447 """returns new option state"""448 if not self.REQUEST.SESSION.has_key(optionName):449 # not in session -- initial450 opt = {'lastState': newState, 'state': initialState}451 else:452 opt = self.REQUEST.SESSION.get(optionName)453 if opt['lastState'] != newState:454 # state in session has changed -- toggle455 opt['state'] = not opt['state']456 opt['lastState'] = newState457 458 self.REQUEST.SESSION[optionName] = opt459 return opt['state']460 461 405 def isAccessible(self, docinfo): 462 406 """returns if access to the resource is granted""" 463 407 access = docinfo.get('accessType', None) 464 408 logging.debug("documentViewer (accessOK) access type %s"%access) 465 if access is not None and access== 'free':409 if access == 'free': 466 410 logging.debug("documentViewer (accessOK) access is free") 467 411 return True 412 468 413 elif access is None or access in self.authgroups: 469 414 # only local access -- only logged in users … … 479 424 return False 480 425 426 427 428 def getDocinfo(self, mode, url): 429 """returns docinfo depending on mode""" 430 logging.debug("getDocinfo: mode=%s, url=%s"%(mode,url)) 431 # look for cached docinfo in session 432 if self.REQUEST.SESSION.has_key('docinfo'): 433 docinfo = self.REQUEST.SESSION['docinfo'] 434 # check if its still current 435 if docinfo is not None and docinfo.get('mode', None) == mode and docinfo.get('url', None) == url: 436 logging.debug("getDocinfo: docinfo in session. keys=%s"%docinfo.keys()) 437 return docinfo 438 439 # new docinfo 440 docinfo = {'mode': mode, 'url': url} 441 # add self url 442 docinfo['viewerUrl'] = self.getDocumentViewerURL() 443 docinfo['digilibBaseUrl'] = self.digilibBaseUrl 444 # get index.meta DOM 445 docUrl = None 446 metaDom = None 447 if mode=="texttool": 448 # url points to document dir or index.meta 449 metaDom = self.metadataService.getDomFromPathOrUrl(url) 450 docUrl = url.replace('/index.meta', '') 451 if metaDom is None: 452 raise IOError("Unable to find index.meta for mode=texttool!") 453 454 elif mode=="imagepath": 455 # url points to folder with images, index.meta optional 456 # asssume index.meta in parent dir 457 docUrl = getParentPath(url) 458 metaDom = self.metadataService.getDomFromPathOrUrl(docUrl) 459 460 elif mode=="filepath": 461 # url points to image file, index.meta optional 462 # asssume index.meta is two path segments up 463 docUrl = getParentPath(url, 2) 464 metaDom = self.metadataService.getDomFromPathOrUrl(docUrl) 465 466 else: 467 logging.error("documentViewer (getdocinfo) unknown mode: %s!"%mode) 468 raise ValueError("Unknown mode %s! Has to be one of 'texttool','imagepath','filepath'."%(mode)) 469 470 docinfo['documentUrl'] = docUrl 471 # process index.meta contents 472 if metaDom is not None and metaDom.tag == 'resource': 473 # document directory name and path 474 resource = self.metadataService.getResourceData(dom=metaDom) 475 if resource: 476 docinfo = self.getDocinfoFromResource(docinfo, resource) 477 478 # texttool info 479 texttool = self.metadataService.getTexttoolData(dom=metaDom) 480 if texttool: 481 docinfo = self.getDocinfoFromTexttool(docinfo, texttool) 482 483 # bib info 484 bib = self.metadataService.getBibData(dom=metaDom) 485 if bib: 486 docinfo = self.getDocinfoFromBib(docinfo, bib) 487 else: 488 # no bib - try info.xml 489 docinfo = self.getDocinfoFromPresentationInfoXml(docinfo) 481 490 482 def getDirinfoFromDigilib(self,path,docinfo=None,cut=0): 483 """gibt param von dlInfo aus""" 484 if docinfo is None: 485 docinfo = {} 486 487 for x in range(cut): 488 489 path=getParentDir(path) 490 491 # auth info 492 access = self.metadataService.getAccessData(dom=metaDom) 493 if access: 494 docinfo = self.getDocinfoFromAccess(docinfo, access) 495 496 # attribution info 497 attribution = self.metadataService.getAttributionData(dom=metaDom) 498 if attribution: 499 logging.debug("getDocinfo: attribution=%s"%repr(attribution)) 500 docinfo['attribution'] = attribution 501 #docinfo = self.getDocinfoFromAccess(docinfo, access) 502 503 # copyright info 504 copyright = self.metadataService.getCopyrightData(dom=metaDom) 505 if copyright: 506 logging.debug("getDocinfo: copyright=%s"%repr(copyright)) 507 docinfo['copyright'] = copyright 508 #docinfo = self.getDocinfoFromAccess(docinfo, access) 509 510 # image path 511 if mode != 'texttool': 512 # override image path from texttool with url 513 docinfo['imagePath'] = url.replace('/mpiwg/online/', '', 1) 514 515 # number of images from digilib 516 if docinfo.get('imagePath', None): 517 docinfo['imageURL'] = self.digilibBaseUrl + "/servlet/Scaler?fn=" + docinfo['imagePath'] 518 docinfo = self.getDocinfoFromDigilib(docinfo, docinfo['imagePath']) 519 520 logging.debug("documentViewer (getdocinfo) docinfo: keys=%s"%docinfo.keys()) 521 #logging.debug("documentViewer (getdocinfo) docinfo: %s"%docinfo) 522 # store in session 523 self.REQUEST.SESSION['docinfo'] = docinfo 524 return docinfo 525 526 def getDocinfoFromResource(self, docinfo, resource): 527 """reads contents of resource element into docinfo""" 528 docName = resource.get('name', None) 529 docinfo['documentName'] = docName 530 docPath = resource.get('archive-path', None) 531 if docPath: 532 # clean up document path 533 if docPath[0] != '/': 534 docPath = '/' + docPath 535 536 if docName and (not docPath.endswith(docName)): 537 docPath += "/" + docName 538 539 else: 540 # use docUrl as docPath 541 docUrl = docinfo['documentURL'] 542 if not docUrl.startswith('http:'): 543 docPath = docUrl 544 if docPath: 545 # fix URLs starting with /mpiwg/online 546 docPath = docPath.replace('/mpiwg/online', '', 1) 547 548 docinfo['documentPath'] = docPath 549 return docinfo 550 551 def getDocinfoFromTexttool(self, docinfo, texttool): 552 """reads contents of texttool element into docinfo""" 553 # image dir 554 imageDir = texttool.get('image', None) 555 docPath = docinfo.get('documentPath', None) 556 if imageDir and docPath: 557 #print "image: ", imageDir, " archivepath: ", archivePath 558 imageDir = os.path.join(docPath, imageDir) 559 imageDir = imageDir.replace('/mpiwg/online', '', 1) 560 docinfo['imagePath'] = imageDir 561 562 # old style text URL 563 textUrl = texttool.get('text', None) 564 if textUrl and docPath: 565 if urlparse.urlparse(textUrl)[0] == "": #keine url 566 textUrl = os.path.join(docPath, textUrl) 567 568 docinfo['textURL'] = textUrl 569 570 # new style text-url-path 571 textUrl = texttool.get('text-url-path', None) 572 if textUrl: 573 docinfo['textURLPath'] = textUrl 574 575 # page flow 576 docinfo['pageFlow'] = texttool.get('page-flow', 'ltr') 577 578 # odd pages are left 579 docinfo['oddPage'] = texttool.get('odd-scan-position', 'left') 580 581 # number of title page (0: not defined) 582 docinfo['titlePage'] = texttool.get('title-scan-no', 0) 583 584 # old presentation stuff 585 presentation = texttool.get('presentation', None) 586 if presentation and docPath: 587 if presentation.startswith('http:'): 588 docinfo['presentationUrl'] = presentation 589 else: 590 docinfo['presentationUrl'] = os.path.join(docPath, presentation) 591 592 593 return docinfo 594 595 def getDocinfoFromBib(self, docinfo, bib): 596 """reads contents of bib element into docinfo""" 597 logging.debug("getDocinfoFromBib bib=%s"%repr(bib)) 598 # put all raw bib fields in dict "bib" 599 docinfo['bib'] = bib 600 bibtype = bib.get('@type', None) 601 docinfo['bibType'] = bibtype 602 # also store DC metadata for convenience 603 dc = self.metadataService.getDCMappedData(bib) 604 docinfo['creator'] = dc.get('creator',None) 605 docinfo['title'] = dc.get('title',None) 606 docinfo['date'] = dc.get('date',None) 607 return docinfo 608 609 def getDocinfoFromAccess(self, docinfo, acc): 610 """reads contents of access element into docinfo""" 611 #TODO: also read resource type 612 logging.debug("getDocinfoFromAccess acc=%s"%repr(acc)) 613 try: 614 acctype = acc['@attr']['type'] 615 if acctype: 616 access=acctype 617 if access in ['group', 'institution']: 618 access = acc['name'].lower() 619 620 docinfo['accessType'] = access 621 622 except: 623 pass 624 625 return docinfo 626 627 def getDocinfoFromDigilib(self, docinfo, path): 491 628 infoUrl=self.digilibBaseUrl+"/dirInfo-xml.jsp?mo=dir&fn="+path 492 493 logging.debug("documentViewer (getparamfromdigilib) dirInfo from %s"%(infoUrl)) 494 629 # fetch data 495 630 txt = getHttpData(infoUrl) 496 if txt is None:497 raise IOError("Unable to get dir-info from %s"%(infoUrl))498 499 dom = Parse(txt) 500 sizes=dom.xpath("//dir/size")501 logging.debug("documentViewer (getparamfromdigilib) dirInfo:size"%sizes)502 503 if size s:504 docinfo['numPages'] = int( getTextFromNode(sizes[0]))631 if not txt: 632 logging.error("Unable to get dir-info from %s"%(infoUrl)) 633 return docinfo 634 635 dom = ET.fromstring(txt) 636 size = getText(dom.find("size")) 637 logging.debug("getDocinfoFromDigilib: size=%s"%size) 638 if size: 639 docinfo['numPages'] = int(size) 505 640 else: 506 641 docinfo['numPages'] = 0 507 642 508 643 # TODO: produce and keep list of image names and numbers 509 510 644 return docinfo 511 512 def getIndexMetaPath(self,url): 513 """gib nur den Pfad zurueck""" 514 regexp = re.compile(r".*(experimental|permanent)/(.*)") 515 regpath = regexp.match(url) 516 if (regpath==None): 517 return "" 518 logging.debug("(getDomFromIndexMeta): URLXAXA: %s"%regpath.group(2)) 519 return ("/mpiwg/online/"+regpath.group(1)+"/"+regpath.group(2)) 520 521 522 523 def getIndexMetaUrl(self,url): 524 """returns utr of index.meta document at url""" 525 526 metaUrl = None 527 if url.startswith("http://"): 528 # real URL 529 metaUrl = url 530 else: 531 # online path 532 server=self.digilibBaseUrl+"/servlet/Texter?fn=" 533 metaUrl=server+url.replace("/mpiwg/online","") 534 if not metaUrl.endswith("index.meta"): 535 metaUrl += "/index.meta" 536 537 return metaUrl 538 539 def getDomFromIndexMeta(self, url): 540 """get dom from index meta""" 541 dom = None 542 metaUrl = self.getIndexMetaUrl(url) 543 544 logging.debug("(getDomFromIndexMeta): METAURL: %s"%metaUrl) 545 txt=getHttpData(metaUrl) 546 if txt is None: 547 raise IOError("Unable to read index meta from %s"%(url)) 548 549 dom = Parse(txt) 550 return dom 551 552 def getPresentationInfoXML(self, url): 553 """returns dom of info.xml document at url""" 645 646 647 def getDocinfoFromPresentationInfoXml(self,docinfo): 648 """gets DC-like bibliographical information from the presentation entry in texttools""" 649 url = docinfo.get('presentationUrl', None) 650 if not url: 651 logging.error("getDocinfoFromPresentation: no URL!") 652 return docinfo 653 554 654 dom = None 555 655 metaUrl = None … … 559 659 else: 560 660 # online path 661 561 662 server=self.digilibBaseUrl+"/servlet/Texter?fn=" 562 metaUrl=server+url .replace("/mpiwg/online","")663 metaUrl=server+url 563 664 564 665 txt=getHttpData(metaUrl) 565 666 if txt is None: 566 raise IOError("Unable to read infoXMLfrom %s"%(url)) 567 568 dom = Parse(txt) 569 return dom 667 logging.error("Unable to read info.xml from %s"%(url)) 668 return docinfo 669 670 dom = ET.fromstring(txt) 671 docinfo['creator']=getText(dom.find(".//author")) 672 docinfo['title']=getText(dom.find(".//title")) 673 docinfo['date']=getText(dom.find(".//date")) 674 return docinfo 675 676 677 def getPageinfo(self, current=None, start=None, rows=None, cols=None, docinfo=None, viewMode=None, viewLayer=None, tocMode=None): 678 """returns pageinfo with the given parameters""" 679 logging.debug("getPageInfo(current=%s, start=%s, rows=%s, cols=%s, viewMode=%s, viewLayer=%s, tocMode=%s)"%(current,start,rows,cols,viewMode,viewLayer,tocMode)) 680 pageinfo = {} 681 pageinfo['viewMode'] = viewMode 682 # split viewLayer if necessary 683 if isinstance(viewLayer,basestring): 684 viewLayer = viewLayer.split(',') 685 686 if isinstance(viewLayer, list): 687 logging.debug("getPageinfo: viewLayer is list:%s"%viewLayer) 688 # save (unique) list in viewLayers 689 seen = set() 690 viewLayers = [l for l in viewLayer if l and l not in seen and not seen.add(l)] 691 pageinfo['viewLayers'] = viewLayers 692 # stringify viewLayer 693 viewLayer = ','.join(viewLayers) 694 else: 695 #create list 696 pageinfo['viewLayers'] = [viewLayer] 570 697 571 572 def getAuthinfoFromIndexMeta(self,path,docinfo=None,dom=None,cut=0): 573 """gets authorization info from the index.meta file at path or given by dom""" 574 logging.debug("documentViewer (getauthinfofromindexmeta) path: %s"%(path)) 575 576 access = None 577 578 if docinfo is None: 579 docinfo = {} 580 581 if dom is None: 582 for x in range(cut): 583 path=getParentDir(path) 584 dom = self.getDomFromIndexMeta(path) 585 586 acctype = dom.xpath("//access-conditions/access/@type") 587 if acctype and (len(acctype)>0): 588 access=acctype[0].value 589 if access in ['group', 'institution']: 590 access = getTextFromNode(dom.xpath("//access-conditions/access/name")[0]).lower() 591 592 docinfo['accessType'] = access 593 return docinfo 594 595 596 def getBibinfoFromIndexMeta(self,path,docinfo=None,dom=None,cut=0): 597 """gets bibliographical info from the index.meta file at path or given by dom""" 598 #logging.debug("documentViewer (getbibinfofromindexmeta) path: %s"%(path)) 599 600 if docinfo is None: 601 docinfo = {} 602 603 if dom is None: 604 for x in range(cut): 605 path=getParentDir(path) 606 dom = self.getDomFromIndexMeta(path) 607 608 docinfo['indexMetaPath']=self.getIndexMetaPath(path); 609 610 #logging.debug("documentViewer (getbibinfofromindexmeta cutted) path: %s"%(path)) 611 # put in all raw bib fields as dict "bib" 612 bib = dom.xpath("//bib/*") 613 if bib and len(bib)>0: 614 bibinfo = {} 615 for e in bib: 616 bibinfo[e.localName] = getTextFromNode(e) 617 docinfo['bib'] = bibinfo 618 619 # extract some fields (author, title, year) according to their mapping 620 metaData=self.metadata.main.meta.bib 621 bibtype=dom.xpath("//bib/@type") 622 if bibtype and (len(bibtype)>0): 623 bibtype=bibtype[0].value 624 else: 625 bibtype="generic" 626 627 bibtype=bibtype.replace("-"," ") # wrong typesiin index meta "-" instead of " " (not wrong! ROC) 628 docinfo['bib_type'] = bibtype 629 bibmap=metaData.generateMappingForType(bibtype) 630 #logging.debug("documentViewer (getbibinfofromindexmeta) bibmap:"+repr(bibmap)) 631 #logging.debug("documentViewer (getbibinfofromindexmeta) bibtype:"+repr(bibtype)) 632 # if there is no mapping bibmap is empty (mapping sometimes has empty fields) 633 if len(bibmap) > 0 and len(bibmap['author'][0]) > 0: 634 try: 635 docinfo['author']=getTextFromNode(dom.xpath("//bib/%s"%bibmap['author'][0])[0]) 636 except: pass 637 try: 638 docinfo['title']=getTextFromNode(dom.xpath("//bib/%s"%bibmap['title'][0])[0]) 639 except: pass 640 try: 641 docinfo['year']=getTextFromNode(dom.xpath("//bib/%s"%bibmap['year'][0])[0]) 642 except: pass 643 #logging.debug("documentViewer (getbibinfofromindexmeta) using mapping for %s"%bibtype) 644 try: 645 docinfo['lang']=getTextFromNode(dom.xpath("//bib/lang")[0]) 646 except: 647 docinfo['lang']='' 648 try: 649 docinfo['city']=getTextFromNode(dom.xpath("//bib/city")[0]) 650 except: 651 docinfo['city']='' 652 try: 653 docinfo['number_of_pages']=getTextFromNode(dom.xpath("//bib/number_of_pages")[0]) 654 except: 655 docinfo['number_of_pages']='' 656 try: 657 docinfo['series_volume']=getTextFromNode(dom.xpath("//bib/series_volume")[0]) 658 except: 659 docinfo['series_volume']='' 660 try: 661 docinfo['number_of_volumes']=getTextFromNode(dom.xpath("//bib/number_of_volumes")[0]) 662 except: 663 docinfo['number_of_volumes']='' 664 try: 665 docinfo['translator']=getTextFromNode(dom.xpath("//bib/translator")[0]) 666 except: 667 docinfo['translator']='' 668 try: 669 docinfo['edition']=getTextFromNode(dom.xpath("//bib/edition")[0]) 670 except: 671 docinfo['edition']='' 672 try: 673 docinfo['series_author']=getTextFromNode(dom.xpath("//bib/series_author")[0]) 674 except: 675 docinfo['series_author']='' 676 try: 677 docinfo['publisher']=getTextFromNode(dom.xpath("//bib/publisher")[0]) 678 except: 679 docinfo['publisher']='' 680 try: 681 docinfo['series_title']=getTextFromNode(dom.xpath("//bib/series_title")[0]) 682 except: 683 docinfo['series_title']='' 684 try: 685 docinfo['isbn_issn']=getTextFromNode(dom.xpath("//bib/isbn_issn")[0]) 686 except: 687 docinfo['isbn_issn']='' 688 #logging.debug("I NEED BIBTEX %s"%docinfo) 689 return docinfo 690 691 692 def getNameFromIndexMeta(self,path,docinfo=None,dom=None,cut=0): 693 """gets name info from the index.meta file at path or given by dom""" 694 if docinfo is None: 695 docinfo = {} 696 697 if dom is None: 698 for x in range(cut): 699 path=getParentDir(path) 700 dom = self.getDomFromIndexMeta(path) 701 702 docinfo['name']=getTextFromNode(dom.xpath("/resource/name")[0]) 703 logging.debug("documentViewer docinfo[name] %s"%docinfo['name']) 704 return docinfo 705 706 def getDocinfoFromTextTool(self, url, dom=None, docinfo=None): 707 """parse texttool tag in index meta""" 708 logging.debug("documentViewer (getdocinfofromtexttool) url: %s" % (url)) 709 if docinfo is None: 710 docinfo = {} 711 if docinfo.get('lang', None) is None: 712 docinfo['lang'] = '' # default keine Sprache gesetzt 713 if dom is None: 714 dom = self.getDomFromIndexMeta(url) 715 716 archivePath = None 717 archiveName = None 718 719 archiveNames = dom.xpath("//resource/name") 720 if archiveNames and (len(archiveNames) > 0): 721 archiveName = getTextFromNode(archiveNames[0]) 722 else: 723 logging.warning("documentViewer (getdocinfofromtexttool) resource/name missing in: %s" % (url)) 724 725 archivePaths = dom.xpath("//resource/archive-path") 726 if archivePaths and (len(archivePaths) > 0): 727 archivePath = getTextFromNode(archivePaths[0]) 728 # clean up archive path 729 if archivePath[0] != '/': 730 archivePath = '/' + archivePath 731 if archiveName and (not archivePath.endswith(archiveName)): 732 archivePath += "/" + archiveName 733 else: 734 # try to get archive-path from url 735 logging.warning("documentViewer (getdocinfofromtexttool) resource/archive-path missing in: %s" % (url)) 736 if (not url.startswith('http')): 737 archivePath = url.replace('index.meta', '') 738 739 if archivePath is None: 740 # we balk without archive-path 741 raise IOError("Missing archive-path (for text-tool) in %s" % (url)) 742 743 imageDirs = dom.xpath("//texttool/image") 744 if imageDirs and (len(imageDirs) > 0): 745 imageDir = getTextFromNode(imageDirs[0]) 746 747 else: 748 # we balk with no image tag / not necessary anymore because textmode is now standard 749 #raise IOError("No text-tool info in %s"%(url)) 750 imageDir = "" 751 #xquery="//pb" 752 docinfo['imagePath'] = "" # keine Bilder 753 docinfo['imageURL'] = "" 754 755 if imageDir and archivePath: 756 #print "image: ", imageDir, " archivepath: ", archivePath 757 imageDir = os.path.join(archivePath, imageDir) 758 imageDir = imageDir.replace("/mpiwg/online", '') 759 docinfo = self.getDirinfoFromDigilib(imageDir, docinfo=docinfo) 760 docinfo['imagePath'] = imageDir 761 762 docinfo['imageURL'] = self.digilibBaseUrl + "/servlet/Scaler?fn=" + imageDir 763 764 viewerUrls = dom.xpath("//texttool/digiliburlprefix") 765 if viewerUrls and (len(viewerUrls) > 0): 766 viewerUrl = getTextFromNode(viewerUrls[0]) 767 docinfo['viewerURL'] = viewerUrl 768 769 # old style text URL 770 textUrls = dom.xpath("//texttool/text") 771 if textUrls and (len(textUrls) > 0): 772 textUrl = getTextFromNode(textUrls[0]) 773 if urlparse.urlparse(textUrl)[0] == "": #keine url 774 textUrl = os.path.join(archivePath, textUrl) 775 # fix URLs starting with /mpiwg/online 776 if textUrl.startswith("/mpiwg/online"): 777 textUrl = textUrl.replace("/mpiwg/online", '', 1) 778 779 docinfo['textURL'] = textUrl 780 781 782 #TODO: hack-DW for annalen 783 if (textUrl is not None) and (textUrl.startswith("/permanent/einstein/annalen")): 784 textUrl=textUrl.replace("/permanent/einstein/annalen/","/diverse/de/") 785 splitted=textUrl.split("/fulltext") 786 textUrl=splitted[0]+".xml" 787 textUrlkurz = string.split(textUrl, ".")[0] 788 docinfo['textURLPathkurz'] = textUrlkurz 789 docinfo['textURLPath'] = textUrl 790 logging.debug("hack") 791 logging.debug(textUrl) 792 793 794 # new style text-url-path 795 textUrls = dom.xpath("//texttool/text-url-path") 796 if textUrls and (len(textUrls) > 0): 797 textUrl = getTextFromNode(textUrls[0]) 798 docinfo['textURLPath'] = textUrl 799 textUrlkurz = string.split(textUrl, ".")[0] 800 docinfo['textURLPathkurz'] = textUrlkurz 801 #if not docinfo['imagePath']: 802 # text-only, no page images 803 #docinfo = self.getNumTextPages(docinfo) 804 805 806 presentationUrls = dom.xpath("//texttool/presentation") 807 docinfo = self.getBibinfoFromIndexMeta(url, docinfo=docinfo, dom=dom) # get info von bib tag 808 #docinfo = self.getDownloadfromDocinfoToBibtex(url, docinfo=docinfo, dom=dom) 809 docinfo = self.getNameFromIndexMeta(url, docinfo=docinfo, dom=dom) 810 811 812 if presentationUrls and (len(presentationUrls) > 0): # ueberschreibe diese durch presentation informationen 813 # presentation url ergiebt sich ersetzen von index.meta in der url der fuer die Metadaten 814 # durch den relativen Pfad auf die presentation infos 815 presentationPath = getTextFromNode(presentationUrls[0]) 816 if url.endswith("index.meta"): 817 presentationUrl = url.replace('index.meta', presentationPath) 818 else: 819 presentationUrl = url + "/" + presentationPath 820 821 docinfo = self.getBibinfoFromTextToolPresentation(presentationUrl, docinfo=docinfo, dom=dom) 822 823 docinfo = self.getAuthinfoFromIndexMeta(url, docinfo=docinfo, dom=dom) # get access info 824 825 return docinfo 826 827 828 def getBibinfoFromTextToolPresentation(self,url,docinfo=None,dom=None): 829 """gets the bibliographical information from the preseantion entry in texttools 830 """ 831 dom=self.getPresentationInfoXML(url) 832 try: 833 docinfo['author']=getTextFromNode(dom.xpath("//author")[0]) 834 except: 835 pass 836 try: 837 docinfo['title']=getTextFromNode(dom.xpath("//title")[0]) 838 except: 839 pass 840 try: 841 docinfo['year']=getTextFromNode(dom.xpath("//date")[0]) 842 except: 843 pass 844 return docinfo 845 846 def getDocinfoFromImagePath(self,path,docinfo=None,cut=0): 847 """path ist the path to the images it assumes that the index.meta file is one level higher.""" 848 logging.debug("documentViewer (getdocinfofromimagepath) path: %s"%(path)) 849 if docinfo is None: 850 docinfo = {} 851 path=path.replace("/mpiwg/online","") 852 docinfo['imagePath'] = path 853 docinfo=self.getDirinfoFromDigilib(path,docinfo=docinfo,cut=cut) 854 855 pathorig=path 856 for x in range(cut): 857 path=getParentDir(path) 858 logging.debug("documentViewer (getdocinfofromimagepath) PATH:"+path) 859 imageUrl=self.digilibBaseUrl+"/servlet/Scaler?fn="+path 860 docinfo['imageURL'] = imageUrl 861 862 #path ist the path to the images it assumes that the index.meta file is one level higher. 863 docinfo = self.getBibinfoFromIndexMeta(pathorig,docinfo=docinfo,cut=cut+1) 864 #docinfo = self.getDownloadfromDocinfoToBibtex(pathorig,docinfo=docinfo,cut=cut+1) 865 docinfo = self.getAuthinfoFromIndexMeta(pathorig,docinfo=docinfo,cut=cut+1) 866 return docinfo 867 868 869 def getDocinfo(self, mode, url): 870 """returns docinfo depending on mode""" 871 logging.debug("documentViewer (getdocinfo) mode: %s, url: %s"%(mode,url)) 872 # look for cached docinfo in session 873 if self.REQUEST.SESSION.has_key('docinfo'): 874 docinfo = self.REQUEST.SESSION['docinfo'] 875 # check if its still current 876 if docinfo is not None and docinfo.get('mode') == mode and docinfo.get('url') == url: 877 logging.debug("documentViewer (getdocinfo) docinfo in session: %s"%docinfo) 878 return docinfo 879 # new docinfo 880 docinfo = {'mode': mode, 'url': url} 881 if mode=="texttool": #index.meta with texttool information 882 docinfo = self.getDocinfoFromTextTool(url, docinfo=docinfo) 883 elif mode=="imagepath": 884 docinfo = self.getDocinfoFromImagePath(url, docinfo=docinfo) 885 elif mode=="filepath": 886 docinfo = self.getDocinfoFromImagePath(url, docinfo=docinfo,cut=1) 887 else: 888 logging.error("documentViewer (getdocinfo) unknown mode: %s!"%mode) 889 raise ValueError("Unknown mode %s! Has to be one of 'texttool','imagepath','filepath'."%(mode)) 890 891 # FIXME: fake texturlpath 892 if not docinfo.has_key('textURLPath'): 893 docinfo['textURLPath'] = None 894 895 logging.debug("documentViewer (getdocinfo) docinfo: %s"%docinfo) 896 #logging.debug("documentViewer (getdocinfo) docinfo: %s"%) 897 self.REQUEST.SESSION['docinfo'] = docinfo 898 return docinfo 899 900 def getPageinfo(self, current, start=None, rows=None, cols=None, docinfo=None, viewMode=None, tocMode=None): 901 """returns pageinfo with the given parameters""" 902 pageinfo = {} 698 pageinfo['viewLayer'] = viewLayer 699 pageinfo['tocMode'] = tocMode 700 903 701 current = getInt(current) 904 905 702 pageinfo['current'] = current 703 pageinfo['pn'] = current 906 704 rows = int(rows or self.thumbrows) 907 705 pageinfo['rows'] = rows … … 910 708 grpsize = cols * rows 911 709 pageinfo['groupsize'] = grpsize 710 # is start is empty use one around current 912 711 start = getInt(start, default=(math.ceil(float(current)/float(grpsize))*grpsize-(grpsize-1))) 913 712 # int(current / grpsize) * grpsize +1)) 914 713 pageinfo['start'] = start 915 pageinfo['end'] = start + grpsize 916 if (docinfo is not None) and ('numPages' in docinfo): 917 np = int(docinfo['numPages']) 918 pageinfo['end'] = min(pageinfo['end'], np) 919 pageinfo['numgroups'] = int(np / grpsize) 920 if np % grpsize > 0: 921 pageinfo['numgroups'] += 1 922 pageinfo['viewMode'] = viewMode 923 pageinfo['tocMode'] = tocMode 714 # get number of pages 715 np = int(docinfo.get('numPages', 0)) 716 if np == 0: 717 # numPages unknown - maybe we can get it from text page 718 if docinfo.get('textURLPath', None): 719 # cache text page as well 720 pageinfo['textPage'] = self.getTextPage(mode=viewLayer, pn=current, docinfo=docinfo, pageinfo=pageinfo) 721 np = int(docinfo.get('numPages', 0)) 722 723 # cache table of contents 724 pageinfo['tocPageSize'] = getInt(self.REQUEST.get('tocPageSize', 30)) 725 pageinfo['numgroups'] = int(np / grpsize) 726 if np % grpsize > 0: 727 pageinfo['numgroups'] += 1 728 729 pageFlowLtr = docinfo.get('pageFlow', 'ltr') != 'rtl' 730 oddScanLeft = docinfo.get('oddPage', 'left') != 'right' 731 # add zeroth page for two columns 732 pageZero = (cols == 2 and (pageFlowLtr != oddScanLeft)) 733 pageinfo['pageZero'] = pageZero 734 pageinfo['pageBatch'] = self.getPageBatch(start=start, rows=rows, cols=cols, pageFlowLtr=pageFlowLtr, pageZero=pageZero, minIdx=1, maxIdx=np) 735 924 736 pageinfo['characterNormalization'] = self.REQUEST.get('characterNormalization','reg') 925 #pageinfo['optionToggle'] = self.REQUEST.get('optionToggle','1') 926 pageinfo['query'] = self.REQUEST.get('query','') 927 pageinfo['queryType'] = self.REQUEST.get('queryType','') 928 pageinfo['querySearch'] =self.REQUEST.get('querySearch', 'fulltext') 929 pageinfo['textPN'] = self.REQUEST.get('textPN','1') 930 pageinfo['highlightQuery'] = self.REQUEST.get('highlightQuery','') 931 932 pageinfo ['highlightElementPos'] = self.REQUEST.get('highlightElementPos','') 933 pageinfo ['highlightElement'] = self.REQUEST.get('highlightElement','') 934 935 pageinfo ['xpointer'] = self.REQUEST.get('xpointer','') 936 937 pageinfo['tocPageSize'] = self.REQUEST.get('tocPageSize', '30') 938 pageinfo['queryPageSize'] =self.REQUEST.get('queryPageSize', '10') 939 pageinfo['tocPN'] = self.REQUEST.get('tocPN', '1') 940 toc = int (pageinfo['tocPN']) 941 pageinfo['textPages'] =int (toc) 942 943 if 'tocSize_%s'%tocMode in docinfo: 944 tocSize = int(docinfo['tocSize_%s'%tocMode]) 945 tocPageSize = int(pageinfo['tocPageSize']) 946 # cached toc 947 if tocSize%tocPageSize>0: 948 tocPages=tocSize/tocPageSize+1 737 738 # cache search results 739 pageinfo['resultPageSize'] = getInt(self.REQUEST.get('resultPageSize', 10)) 740 query = self.REQUEST.get('query',None) 741 pageinfo['query'] = query 742 if query: 743 queryType = self.REQUEST.get('queryType', 'fulltextMorph') 744 pageinfo['queryType'] = queryType 745 pageinfo['resultStart'] = getInt(self.REQUEST.get('resultStart', '1')) 746 self.getSearchResults(mode=queryType, query=query, pageinfo=pageinfo, docinfo=docinfo) 747 748 # highlighting 749 highlightQuery = self.REQUEST.get('highlightQuery', None) 750 if highlightQuery: 751 pageinfo['highlightQuery'] = highlightQuery 752 pageinfo['highlightElement'] = self.REQUEST.get('highlightElement', '') 753 pageinfo['highlightElementPos'] = self.REQUEST.get('highlightElementPos', '') 754 755 return pageinfo 756 757 758 def getPageBatch(self, start=1, rows=10, cols=2, pageFlowLtr=True, pageZero=False, minIdx=1, maxIdx=0): 759 """returns dict with array of page informations for one screenfull of thumbnails""" 760 batch = {} 761 grpsize = rows * cols 762 if maxIdx == 0: 763 maxIdx = start + grpsize 764 765 nb = int(math.ceil(maxIdx / float(grpsize))) 766 # list of all batch start and end points 767 batches = [] 768 if pageZero: 769 ofs = 0 770 else: 771 ofs = 1 772 773 for i in range(nb): 774 s = i * grpsize + ofs 775 e = min((i + 1) * grpsize + ofs - 1, maxIdx) 776 batches.append({'start':s, 'end':e}) 777 778 batch['batches'] = batches 779 780 pages = [] 781 if pageZero and start == 1: 782 # correct beginning 783 idx = 0 784 else: 785 idx = start 786 787 for r in range(rows): 788 row = [] 789 for c in range(cols): 790 if idx < minIdx or idx > maxIdx: 791 page = {'idx':None} 792 else: 793 page = {'idx':idx} 794 795 idx += 1 796 if pageFlowLtr: 797 row.append(page) 798 else: 799 row.insert(0, page) 800 801 pages.append(row) 802 803 if start > 1: 804 batch['prevStart'] = max(start - grpsize, 1) 805 else: 806 batch['prevStart'] = None 807 808 if start + grpsize < maxIdx: 809 batch['nextStart'] = start + grpsize 810 else: 811 batch['nextStart'] = None 812 813 batch['pages'] = pages 814 return batch 815 816 def getBatch(self, start=1, size=10, end=0, data=None, fullData=True): 817 """returns dict with information for one screenfull of data.""" 818 batch = {} 819 if end == 0: 820 end = start + size 821 822 nb = int(math.ceil(end / float(size))) 823 # list of all batch start and end points 824 batches = [] 825 for i in range(nb): 826 s = i * size + 1 827 e = min((i + 1) * size, end) 828 batches.append({'start':s, 'end':e}) 829 830 batch['batches'] = batches 831 # list of elements in this batch 832 this = [] 833 j = 0 834 for i in range(start, min(start+size, end)): 835 if data: 836 if fullData: 837 d = data[i] 838 else: 839 d = data[j] 840 j += 1 841 949 842 else: 950 tocPages=tocSize/tocPageSize 951 pageinfo['tocPN'] = min (tocPages,toc) 952 pageinfo['searchPN'] =self.REQUEST.get('searchPN','1') 953 #pageinfo['sn'] =self.REQUEST.get('sn','') 954 pageinfo['s'] =self.REQUEST.get('s','') 955 return pageinfo 956 957 def changeDocumentViewer(self,title="",digilibBaseUrl=None,thumbrows=2,thumbcols=5,authgroups='mpiwg',RESPONSE=None): 843 d = i+1 844 845 this.append(d) 846 847 batch['this'] = this 848 if start > 1: 849 batch['prevStart'] = max(start - size, 1) 850 else: 851 batch['prevStart'] = None 852 853 if start + size < end: 854 batch['nextStart'] = start + size 855 else: 856 batch['nextStart'] = None 857 858 return batch 859 860 861 security.declareProtected('View management screens','changeDocumentViewerForm') 862 changeDocumentViewerForm = PageTemplateFile('zpt/changeDocumentViewer', globals()) 863 864 def changeDocumentViewer(self,title="",digilibBaseUrl=None,thumbrows=2,thumbcols=5,authgroups='mpiwg',RESPONSE=None): 958 865 """init document viewer""" 959 866 self.title=title … … 962 869 self.thumbcols = thumbcols 963 870 self.authgroups = [s.strip().lower() for s in authgroups.split(',')] 871 try: 872 # assume MetaDataFolder instance is called metadata 873 self.metadataService = getattr(self, 'metadata') 874 except Exception, e: 875 logging.error("Unable to find MetaDataFolder 'metadata': "+str(e)) 876 964 877 if RESPONSE is not None: 965 878 RESPONSE.redirect('manage_main') … … 977 890 if RESPONSE is not None: 978 891 RESPONSE.redirect('manage_main') 979 980 ## DocumentViewerTemplate class981 class DocumentViewerTemplate(ZopePageTemplate):982 """Template for document viewer"""983 meta_type="DocumentViewer Template"984 985 986 def manage_addDocumentViewerTemplateForm(self):987 """Form for adding"""988 pt=PageTemplateFile('zpt/addDocumentViewerTemplate', globals()).__of__(self)989 return pt()990 991 def manage_addDocumentViewerTemplate(self, id='viewer_main', title=None, text=None,992 REQUEST=None, submit=None):993 "Add a Page Template with optional file content."994 995 self._setObject(id, DocumentViewerTemplate(id))996 ob = getattr(self, id)997 txt=file(os.path.join(package_home(globals()),'zpt/viewer_main.zpt'),'r').read()998 logging.info("txt %s:"%txt)999 ob.pt_edit(txt,"text/html")1000 if title:1001 ob.pt_setTitle(title)1002 try:1003 u = self.DestinationURL()1004 except AttributeError:1005 u = REQUEST['URL1']1006 1007 u = "%s/%s" % (u, urllib.quote(id))1008 REQUEST.RESPONSE.redirect(u+'/manage_main')1009 return ''1010 1011 1012
Note: See TracChangeset
for help on using the changeset viewer.