Changeset 495:ede0c93de798 in documentViewer for documentViewer.py
- Timestamp:
- Jun 17, 2010, 5:35:24 PM (14 years ago)
- Branch:
- metalify-1
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
documentViewer.py
r0 r495 1 2 genericDigilib="http://nausikaa2.rz-berlin.mpg.de/digitallibrary/"3 1 4 2 from OFS.Folder import Folder 5 3 from Products.PageTemplates.ZopePageTemplate import ZopePageTemplate 6 from Products.PageTemplates.PageTemplateFile import PageTemplateFile 4 from Products.PageTemplates.PageTemplateFile import PageTemplateFile 7 5 from AccessControl import ClassSecurityInfo 6 from AccessControl import getSecurityManager 8 7 from Globals import package_home 9 8 10 from Ft.Xml .Domlette import NonvalidatingReader9 from Ft.Xml import EMPTY_NAMESPACE, Parse 11 10 from Ft.Xml.Domlette import PrettyPrint, Print 12 from Ft.Xml import EMPTY_NAMESPACE13 14 import Ft.Xml.XPath15 16 11 import os.path 17 import cgi12 import sys 18 13 import urllib 14 import urllib2 15 import logging 16 import math 17 import urlparse 18 import cStringIO 19 20 def logger(txt,method,txt2): 21 """logging""" 22 logging.info(txt+ txt2) 23 24 25 def getInt(number, default=0): 26 """returns always an int (0 in case of problems)""" 27 try: 28 return int(number) 29 except: 30 return int(default) 19 31 20 32 def getTextFromNode(nodename): 33 """get the cdata content of a node""" 34 if nodename is None: 35 return "" 21 36 nodelist=nodename.childNodes 22 37 rc = "" … … 26 41 return rc 27 42 28 import socket 29 30 def urlopen(url): 31 """urlopen mit timeout""" 32 socket.setdefaulttimeout(2) 33 ret=urllib.urlopen(url) 34 socket.setdefaulttimeout(5) 35 return ret 36 37 def getParamFromDigilib(path,param): 38 """gibt param von dlInfo aus""" 39 imageUrl=genericDigilib+"/dlInfo-xml.jsp?fn="+path 40 41 try: 42 dom = NonvalidatingReader.parseUri(imageUrl) 43 except: 44 return None 45 46 47 params=dom.xpath("//document-parameters/parameter[@name='%s']/@value"%param) 48 49 if params: 50 return params[0].value 51 52 def parseUrlTextTool(url): 53 """parse index meta""" 54 55 try: 56 dom = NonvalidatingReader.parseUri(url) 57 except: 58 zLOG.LOG("documentViewer (parseUrlTexttool)", zLOG.INFO,"%s (%s)"%sys.exc_info()[0:2]) 59 return (None,None,None) 60 61 archivePaths=dom.xpath("//resource/archive-path") 62 63 if archivePaths and (len(archivePaths)>0): 64 archivePath=getTextFromNode(archivePaths[0]) 65 else: 66 archivePath=None 67 68 69 images=dom.xpath("//texttool/image") 70 71 if images and (len(images)>0): 72 image=getTextFromNode(images[0]) 73 else: 74 image=None 75 76 if image and archivePath: 77 image=os.path.join(archivePath,image) 78 image=image.replace("/mpiwg/online",'') 79 pt=getParamFromDigilib(image,'pt') 80 81 else: 82 image=None 83 84 viewerUrls=dom.xpath("//texttool/digiliburlprefix") 85 86 if viewerUrls and (len(viewerUrls)>0): 87 viewerUrl=getTextFromNode(viewerUrls[0]) 88 else: 89 viewerUrl=None 90 91 92 textUrls=dom.xpath("//texttool/text") 93 94 if textUrls and (len(textUrls)>0): 95 textUrl=getTextFromNode(textUrls[0]) 96 else: 97 textUrl=None 98 return viewerUrl,(image,pt),textUrl 99 100 101 class documentViewer(ZopePageTemplate): 43 def serializeNode(node, encoding='utf-8'): 44 """returns a string containing node as XML""" 45 buf = cStringIO.StringIO() 46 Print(node, stream=buf, encoding=encoding) 47 s = buf.getvalue() 48 buf.close() 49 return s 50 51 52 def getParentDir(path): 53 """returns pathname shortened by one""" 54 return '/'.join(path.split('/')[0:-1]) 55 56 57 def getHttpData(url, data=None, num_tries=3, timeout=10): 58 """returns result from url+data HTTP request""" 59 # we do GET (by appending data to url) 60 if isinstance(data, str) or isinstance(data, unicode): 61 # if data is string then append 62 url = "%s?%s"%(url,data) 63 elif isinstance(data, dict) or isinstance(data, list) or isinstance(data, tuple): 64 # urlencode 65 url = "%s?%s"%(url,urllib.urlencode(data)) 66 67 response = None 68 errmsg = None 69 for cnt in range(num_tries): 70 try: 71 logging.debug("getHttpData(#%s %ss) url=%s"%(cnt+1,timeout,url)) 72 if sys.version_info < (2, 6): 73 # set timeout on socket -- ugly :-( 74 import socket 75 socket.setdefaulttimeout(float(timeout)) 76 response = urllib2.urlopen(url) 77 else: 78 response = urllib2.urlopen(url,timeout=float(timeout)) 79 # check result? 80 break 81 except urllib2.HTTPError, e: 82 logging.error("getHttpData: HTTP error(%s): %s"%(e.code,e)) 83 errmsg = str(e) 84 # stop trying 85 break 86 except urllib2.URLError, e: 87 logging.error("getHttpData: URLLIB error(%s): %s"%(e.reason,e)) 88 errmsg = str(e) 89 # stop trying 90 #break 91 92 if response is not None: 93 data = response.read() 94 response.close() 95 return data 96 97 raise IOError("ERROR fetching HTTP data from %s: %s"%(url,errmsg)) 98 #return None 99 100 101 102 ## 103 ## documentViewer class 104 ## 105 class documentViewer(Folder): 102 106 """document viewer""" 103 104 107 meta_type="Document viewer" 105 108 106 109 security=ClassSecurityInfo() 107 manage_options= ZopePageTemplate.manage_options+(110 manage_options=Folder.manage_options+( 108 111 {'label':'main config','action':'changeDocumentViewerForm'}, 109 112 ) 110 113 111 _default_content_fn = os.path.join(package_home(globals()),'zpt','documentViewer_template.zpt') 112 113 def __init__(self,id,imageViewerUrl,title=""): 114 # templates and forms 115 viewer_main = PageTemplateFile('zpt/viewer_main', globals()) 116 toc_thumbs = PageTemplateFile('zpt/toc_thumbs', globals()) 117 toc_text = PageTemplateFile('zpt/toc_text', globals()) 118 toc_figures = PageTemplateFile('zpt/toc_figures', globals()) 119 page_main_images = PageTemplateFile('zpt/page_main_images', globals()) 120 page_main_text = PageTemplateFile('zpt/page_main_text', globals()) 121 page_main_text_dict = PageTemplateFile('zpt/page_main_text_dict', globals()) 122 page_main_xml = PageTemplateFile('zpt/page_main_xml', globals()) 123 head_main = PageTemplateFile('zpt/head_main', globals()) 124 docuviewer_css = PageTemplateFile('css/docuviewer.css', globals()) 125 info_xml = PageTemplateFile('zpt/info_xml', globals()) 126 127 128 thumbs_main_rss = PageTemplateFile('zpt/thumbs_main_rss', globals()) 129 security.declareProtected('View management screens','changeDocumentViewerForm') 130 changeDocumentViewerForm = PageTemplateFile('zpt/changeDocumentViewer', globals()) 131 132 133 def __init__(self,id,imageScalerUrl=None,textServerName=None,title="",digilibBaseUrl=None,thumbcols=2,thumbrows=5,authgroups="mpiwg"): 114 134 """init document viewer""" 115 135 self.id=id 116 136 self.title=title 117 self.imageViewerUrl=imageViewerUrl 118 119 security.declareProtected('View management screens','changeDocumentViewerForm') 120 def changeDocumentViewerForm(self): 121 """change it""" 122 pt=PageTemplateFile(os.path.join(package_home(globals()),'zpt','changeDocumentViewer.zpt')).__of__(self) 123 return pt() 137 self.thumbcols = thumbcols 138 self.thumbrows = thumbrows 139 # authgroups is list of authorized groups (delimited by ,) 140 self.authgroups = [s.strip().lower() for s in authgroups.split(',')] 141 # create template folder so we can always use template.something 142 143 templateFolder = Folder('template') 144 #self['template'] = templateFolder # Zope-2.12 style 145 self._setObject('template',templateFolder) # old style 146 try: 147 import MpdlXmlTextServer 148 textServer = MpdlXmlTextServer.MpdlXmlTextServer(id='fulltextclient',serverName=textServerName) 149 #templateFolder['fulltextclient'] = xmlRpcClient 150 templateFolder._setObject('fulltextclient',textServer) 151 except Exception, e: 152 logging.error("Unable to create MpdlXmlTextServer for fulltextclient: "+str(e)) 153 try: 154 from Products.zogiLib.zogiLib import zogiLib 155 zogilib = zogiLib(id="zogilib", title="zogilib for docuviewer", dlServerURL=imageScalerUrl, layout="book") 156 #templateFolder['zogilib'] = zogilib 157 templateFolder._setObject('zogilib',zogilib) 158 except Exception, e: 159 logging.error("Unable to create zogiLib for zogilib: "+str(e)) 160 161 162 # proxy text server methods to fulltextclient 163 def getTextPage(self, **args): 164 """get page""" 165 return self.template.fulltextclient.getTextPage(**args) 166 167 def getQuery(self, **args): 168 """get query""" 169 return self.template.fulltextclient.getQuery(**args) 170 171 def getSearch(self, **args): 172 """get search""" 173 return self.template.fulltextclient.getSearch(**args) 174 175 def getNumPages(self, **args): 176 """get numpages""" 177 return self.template.fulltextclient.getNumPages(**args) 178 179 def getTranslate(self, **args): 180 """get translate""" 181 return self.template.fulltextclient.getTranslate(**args) 182 183 def getLemma(self, **args): 184 """get lemma""" 185 return self.template.fulltextclient.getLemma(**args) 186 187 def getToc(self, **args): 188 """get toc""" 189 return self.template.fulltextclient.getToc(**args) 190 191 def getTocPage(self, **args): 192 """get tocpage""" 193 return self.template.fulltextclient.getTocPage(**args) 194 195 196 security.declareProtected('View','thumbs_rss') 197 def thumbs_rss(self,mode,url,viewMode="auto",start=None,pn=1): 198 ''' 199 view it 200 @param mode: defines how to access the document behind url 201 @param url: url which contains display information 202 @param viewMode: if images display images, if text display text, default is images (text,images or auto) 203 204 ''' 205 logging.debug("HHHHHHHHHHHHHH:load the rss") 206 logger("documentViewer (index)", logging.INFO, "mode: %s url:%s start:%s pn:%s"%(mode,url,start,pn)) 207 208 if not hasattr(self, 'template'): 209 # create template folder if it doesn't exist 210 self.manage_addFolder('template') 211 212 if not self.digilibBaseUrl: 213 self.digilibBaseUrl = self.findDigilibUrl() or "http://nausikaa.mpiwg-berlin.mpg.de/digitallibrary" 214 215 docinfo = self.getDocinfo(mode=mode,url=url) 216 pageinfo = self.getPageinfo(start=start,current=pn,docinfo=docinfo) 217 pt = getattr(self.template, 'thumbs_main_rss') 218 219 if viewMode=="auto": # automodus gewaehlt 220 if docinfo.has_key("textURL") or docinfo.has_key('textURLPath'): #texturl gesetzt und textViewer konfiguriert 221 viewMode="text" 222 else: 223 viewMode="images" 224 225 return pt(docinfo=docinfo,pageinfo=pageinfo,viewMode=viewMode) 124 226 227 security.declareProtected('View','index_html') 228 def index_html(self,url,mode="texttool",viewMode="auto",tocMode="thumbs",start=None,pn=1,mk=None, query=None, querySearch=None): 229 ''' 230 view it 231 @param mode: defines how to access the document behind url 232 @param url: url which contains display information 233 @param viewMode: if images display images, if text display text, default is auto (text,images or auto) 234 @param tocMode: type of 'table of contents' for navigation (thumbs, text, figures, none) 235 @param querySearch: type of different search modes (fulltext, fulltextMorph, xpath, xquery, ftIndex, ftIndexMorph, fulltextMorphLemma) 236 ''' 237 238 logging.debug("documentViewer (index) mode: %s url:%s start:%s pn:%s"%(mode,url,start,pn)) 239 240 if not hasattr(self, 'template'): 241 # this won't work 242 logging.error("template folder missing!") 243 return "ERROR: template folder missing!" 244 245 if not getattr(self, 'digilibBaseUrl', None): 246 self.digilibBaseUrl = self.findDigilibUrl() or "http://digilib.mpiwg-berlin.mpg.de/digitallibrary" 247 248 docinfo = self.getDocinfo(mode=mode,url=url) 249 250 if tocMode != "thumbs": 251 # get table of contents 252 docinfo = self.getToc(mode=tocMode, docinfo=docinfo) 253 254 if viewMode=="auto": # automodus gewaehlt 255 if docinfo.has_key('textURL') or docinfo.has_key('textURLPath'): #texturl gesetzt und textViewer konfiguriert 256 viewMode="text_dict" 257 else: 258 viewMode="images" 259 260 pageinfo = self.getPageinfo(start=start,current=pn,docinfo=docinfo,viewMode=viewMode,tocMode=tocMode) 261 262 pt = getattr(self.template, 'viewer_main') 263 return pt(docinfo=docinfo,pageinfo=pageinfo,viewMode=viewMode,mk=self.generateMarks(mk)) 125 264 126 def changeDocumentViewer(self,imageViewerUrl,title="",RESPONSE=None): 265 def generateMarks(self,mk): 266 ret="" 267 if mk is None: 268 return "" 269 if type(mk) is not ListType: 270 mk=[mk] 271 for m in mk: 272 ret+="mk=%s"%m 273 return ret 274 275 276 def findDigilibUrl(self): 277 """try to get the digilib URL from zogilib""" 278 url = self.template.zogilib.getDLBaseUrl() 279 return url 280 281 def getDocumentViewerURL(self): 282 """returns the URL of this instance""" 283 return self.absolute_url() 284 285 def getStyle(self, idx, selected, style=""): 286 """returns a string with the given style and append 'sel' if path == selected.""" 287 #logger("documentViewer (getstyle)", logging.INFO, "idx: %s selected: %s style: %s"%(idx,selected,style)) 288 if idx == selected: 289 return style + 'sel' 290 else: 291 return style 292 293 def getLink(self,param=None,val=None): 294 """link to documentviewer with parameter param set to val""" 295 params=self.REQUEST.form.copy() 296 if param is not None: 297 if val is None: 298 if params.has_key(param): 299 del params[param] 300 else: 301 params[param] = str(val) 302 303 if params.get("mode", None) == "filepath": #wenn beim erst Aufruf filepath gesetzt wurde aendere das nun zu imagepath 304 params["mode"] = "imagepath" 305 params["url"] = getParentDir(params["url"]) 306 307 # quote values and assemble into query string 308 ps = "&".join(["%s=%s"%(k,urllib.quote(v)) for (k, v) in params.items()]) 309 url=self.REQUEST['URL1']+"?"+ps 310 return url 311 312 def getLinkAmp(self,param=None,val=None): 313 """link to documentviewer with parameter param set to val""" 314 params=self.REQUEST.form.copy() 315 if param is not None: 316 if val is None: 317 if params.has_key(param): 318 del params[param] 319 else: 320 params[param] = str(val) 321 322 # quote values and assemble into query string 323 logging.debug("XYXXXXX: %s"%repr(params.items())) 324 ps = "&".join(["%s=%s"%(k,urllib.quote(v)) for (k, v) in params.items()]) 325 url=self.REQUEST['URL1']+"?"+ps 326 return url 327 328 def getInfo_xml(self,url,mode): 329 """returns info about the document as XML""" 330 331 if not self.digilibBaseUrl: 332 self.digilibBaseUrl = self.findDigilibUrl() or "http://nausikaa.mpiwg-berlin.mpg.de/digitallibrary" 333 334 docinfo = self.getDocinfo(mode=mode,url=url) 335 pt = getattr(self.template, 'info_xml') 336 return pt(docinfo=docinfo) 337 338 339 def isAccessible(self, docinfo): 340 """returns if access to the resource is granted""" 341 access = docinfo.get('accessType', None) 342 logging.debug("documentViewer (accessOK) access type %s"%access) 343 if access is not None and access == 'free': 344 logging.debug("documentViewer (accessOK) access is free") 345 return True 346 elif access is None or access in self.authgroups: 347 # only local access -- only logged in users 348 user = getSecurityManager().getUser() 349 if user is not None: 350 #print "user: ", user 351 return (user.getUserName() != "Anonymous User") 352 else: 353 return False 354 355 logging.debug("documentViewer (accessOK) unknown access type %s"%access) 356 return False 357 358 359 def getDirinfoFromDigilib(self,path,docinfo=None,cut=0): 360 """gibt param von dlInfo aus""" 361 if docinfo is None: 362 docinfo = {} 363 364 for x in range(cut): 365 366 path=getParentDir(path) 367 368 infoUrl=self.digilibBaseUrl+"/dirInfo-xml.jsp?mo=dir&fn="+path 369 370 logging.debug("documentViewer (getparamfromdigilib) dirInfo from %s"%(infoUrl)) 371 372 txt = getHttpData(infoUrl) 373 if txt is None: 374 raise IOError("Unable to get dir-info from %s"%(infoUrl)) 375 376 dom = Parse(txt) 377 sizes=dom.xpath("//dir/size") 378 logging.debug("documentViewer (getparamfromdigilib) dirInfo:size"%sizes) 379 380 if sizes: 381 docinfo['numPages'] = int(getTextFromNode(sizes[0])) 382 else: 383 docinfo['numPages'] = 0 384 385 # TODO: produce and keep list of image names and numbers 386 387 return docinfo 388 389 390 def getIndexMeta(self, url): 391 """returns dom of index.meta document at url""" 392 dom = None 393 metaUrl = None 394 if url.startswith("http://"): 395 # real URL 396 metaUrl = url 397 else: 398 # online path 399 server=self.digilibBaseUrl+"/servlet/Texter?fn=" 400 metaUrl=server+url.replace("/mpiwg/online","") 401 if not metaUrl.endswith("index.meta"): 402 metaUrl += "/index.meta" 403 404 logging.debug("(getIndexMeta): METAURL: %s"%metaUrl) 405 txt=getHttpData(metaUrl) 406 if txt is None: 407 raise IOError("Unable to read index meta from %s"%(url)) 408 409 dom = Parse(txt) 410 return dom 411 412 def getPresentationInfoXML(self, url): 413 """returns dom of info.xml document at url""" 414 dom = None 415 metaUrl = None 416 if url.startswith("http://"): 417 # real URL 418 metaUrl = url 419 else: 420 # online path 421 server=self.digilibBaseUrl+"/servlet/Texter?fn=" 422 metaUrl=server+url.replace("/mpiwg/online","") 423 424 txt=getHttpData(metaUrl) 425 if txt is None: 426 raise IOError("Unable to read infoXMLfrom %s"%(url)) 427 428 dom = Parse(txt) 429 return dom 430 431 432 def getAuthinfoFromIndexMeta(self,path,docinfo=None,dom=None,cut=0): 433 """gets authorization info from the index.meta file at path or given by dom""" 434 logging.debug("documentViewer (getauthinfofromindexmeta) path: %s"%(path)) 435 436 access = None 437 438 if docinfo is None: 439 docinfo = {} 440 441 if dom is None: 442 for x in range(cut): 443 path=getParentDir(path) 444 dom = self.getIndexMeta(path) 445 446 acctype = dom.xpath("//access-conditions/access/@type") 447 if acctype and (len(acctype)>0): 448 access=acctype[0].value 449 if access in ['group', 'institution']: 450 access = getTextFromNode(dom.xpath("//access-conditions/access/name")[0]).lower() 451 452 docinfo['accessType'] = access 453 return docinfo 454 455 456 def getBibinfoFromIndexMeta(self,path,docinfo=None,dom=None,cut=0): 457 """gets bibliographical info from the index.meta file at path or given by dom""" 458 logging.debug("documentViewer (getbibinfofromindexmeta) path: %s"%(path)) 459 460 if docinfo is None: 461 docinfo = {} 462 463 if dom is None: 464 for x in range(cut): 465 path=getParentDir(path) 466 dom = self.getIndexMeta(path) 467 468 logging.debug("documentViewer (getbibinfofromindexmeta cutted) path: %s"%(path)) 469 # put in all raw bib fields as dict "bib" 470 bib = dom.xpath("//bib/*") 471 if bib and len(bib)>0: 472 bibinfo = {} 473 for e in bib: 474 bibinfo[e.localName] = getTextFromNode(e) 475 docinfo['bib'] = bibinfo 476 477 # extract some fields (author, title, year) according to their mapping 478 metaData=self.metadata.main.meta.bib 479 bibtype=dom.xpath("//bib/@type") 480 if bibtype and (len(bibtype)>0): 481 bibtype=bibtype[0].value 482 else: 483 bibtype="generic" 484 485 bibtype=bibtype.replace("-"," ") # wrong typesiin index meta "-" instead of " " (not wrong! ROC) 486 docinfo['bib_type'] = bibtype 487 bibmap=metaData.generateMappingForType(bibtype) 488 # if there is no mapping bibmap is empty (mapping sometimes has empty fields) 489 if len(bibmap) > 0 and len(bibmap['author'][0]) > 0: 490 try: 491 docinfo['author']=getTextFromNode(dom.xpath("//bib/%s"%bibmap['author'][0])[0]) 492 except: pass 493 try: 494 docinfo['title']=getTextFromNode(dom.xpath("//bib/%s"%bibmap['title'][0])[0]) 495 except: pass 496 try: 497 docinfo['year']=getTextFromNode(dom.xpath("//bib/%s"%bibmap['year'][0])[0]) 498 except: pass 499 logging.debug("documentViewer (getbibinfofromindexmeta) using mapping for %s"%bibtype) 500 try: 501 docinfo['lang']=getTextFromNode(dom.xpath("//bib/lang")[0]) 502 except: 503 docinfo['lang']='' 504 505 return docinfo 506 507 508 def getDocinfoFromTextTool(self, url, dom=None, docinfo=None): 509 """parse texttool tag in index meta""" 510 logging.debug("documentViewer (getdocinfofromtexttool) url: %s" % (url)) 511 if docinfo is None: 512 docinfo = {} 513 if docinfo.get('lang', None) is None: 514 docinfo['lang'] = '' # default keine Sprache gesetzt 515 if dom is None: 516 dom = self.getIndexMeta(url) 517 518 archivePath = None 519 archiveName = None 520 521 archiveNames = dom.xpath("//resource/name") 522 if archiveNames and (len(archiveNames) > 0): 523 archiveName = getTextFromNode(archiveNames[0]) 524 else: 525 logging.warning("documentViewer (getdocinfofromtexttool) resource/name missing in: %s" % (url)) 526 527 archivePaths = dom.xpath("//resource/archive-path") 528 if archivePaths and (len(archivePaths) > 0): 529 archivePath = getTextFromNode(archivePaths[0]) 530 # clean up archive path 531 if archivePath[0] != '/': 532 archivePath = '/' + archivePath 533 if archiveName and (not archivePath.endswith(archiveName)): 534 archivePath += "/" + archiveName 535 else: 536 # try to get archive-path from url 537 logging.warning("documentViewer (getdocinfofromtexttool) resource/archive-path missing in: %s" % (url)) 538 if (not url.startswith('http')): 539 archivePath = url.replace('index.meta', '') 540 541 if archivePath is None: 542 # we balk without archive-path 543 raise IOError("Missing archive-path (for text-tool) in %s" % (url)) 544 545 imageDirs = dom.xpath("//texttool/image") 546 if imageDirs and (len(imageDirs) > 0): 547 imageDir = getTextFromNode(imageDirs[0]) 548 549 else: 550 # we balk with no image tag / not necessary anymore because textmode is now standard 551 #raise IOError("No text-tool info in %s"%(url)) 552 imageDir = "" 553 #xquery="//pb" 554 docinfo['imagePath'] = "" # keine Bilder 555 docinfo['imageURL'] = "" 556 557 if imageDir and archivePath: 558 #print "image: ", imageDir, " archivepath: ", archivePath 559 imageDir = os.path.join(archivePath, imageDir) 560 imageDir = imageDir.replace("/mpiwg/online", '') 561 docinfo = self.getDirinfoFromDigilib(imageDir, docinfo=docinfo) 562 docinfo['imagePath'] = imageDir 563 564 docinfo['imageURL'] = self.digilibBaseUrl + "/servlet/Scaler?fn=" + imageDir 565 566 viewerUrls = dom.xpath("//texttool/digiliburlprefix") 567 if viewerUrls and (len(viewerUrls) > 0): 568 viewerUrl = getTextFromNode(viewerUrls[0]) 569 docinfo['viewerURL'] = viewerUrl 570 571 # old style text URL 572 textUrls = dom.xpath("//texttool/text") 573 if textUrls and (len(textUrls) > 0): 574 textUrl = getTextFromNode(textUrls[0]) 575 if urlparse.urlparse(textUrl)[0] == "": #keine url 576 textUrl = os.path.join(archivePath, textUrl) 577 # fix URLs starting with /mpiwg/online 578 if textUrl.startswith("/mpiwg/online"): 579 textUrl = textUrl.replace("/mpiwg/online", '', 1) 580 581 docinfo['textURL'] = textUrl 582 583 # new style text-url-path 584 textUrls = dom.xpath("//texttool/text-url-path") 585 if textUrls and (len(textUrls) > 0): 586 textUrl = getTextFromNode(textUrls[0]) 587 docinfo['textURLPath'] = textUrl 588 if not docinfo['imagePath']: 589 # text-only, no page images 590 docinfo = self.getNumPages(docinfo) #im moment einfach auf eins setzen, navigation ueber die thumbs geht natuerlich nicht 591 592 presentationUrls = dom.xpath("//texttool/presentation") 593 docinfo = self.getBibinfoFromIndexMeta(url, docinfo=docinfo, dom=dom) # get info von bib tag 594 595 if presentationUrls and (len(presentationUrls) > 0): # ueberschreibe diese durch presentation informationen 596 # presentation url ergiebt sich ersetzen von index.meta in der url der fuer die Metadaten 597 # durch den relativen Pfad auf die presentation infos 598 presentationPath = getTextFromNode(presentationUrls[0]) 599 if url.endswith("index.meta"): 600 presentationUrl = url.replace('index.meta', presentationPath) 601 else: 602 presentationUrl = url + "/" + presentationPath 603 604 docinfo = self.getBibinfoFromTextToolPresentation(presentationUrl, docinfo=docinfo, dom=dom) 605 606 docinfo = self.getAuthinfoFromIndexMeta(url, docinfo=docinfo, dom=dom) # get access info 607 608 return docinfo 609 610 611 def getBibinfoFromTextToolPresentation(self,url,docinfo=None,dom=None): 612 """gets the bibliographical information from the preseantion entry in texttools 613 """ 614 dom=self.getPresentationInfoXML(url) 615 try: 616 docinfo['author']=getTextFromNode(dom.xpath("//author")[0]) 617 except: 618 pass 619 try: 620 docinfo['title']=getTextFromNode(dom.xpath("//title")[0]) 621 except: 622 pass 623 try: 624 docinfo['year']=getTextFromNode(dom.xpath("//date")[0]) 625 except: 626 pass 627 return docinfo 628 629 def getDocinfoFromImagePath(self,path,docinfo=None,cut=0): 630 """path ist the path to the images it assumes that the index.meta file is one level higher.""" 631 logging.debug("documentViewer (getdocinfofromimagepath) path: %s"%(path)) 632 if docinfo is None: 633 docinfo = {} 634 path=path.replace("/mpiwg/online","") 635 docinfo['imagePath'] = path 636 docinfo=self.getDirinfoFromDigilib(path,docinfo=docinfo,cut=cut) 637 638 pathorig=path 639 for x in range(cut): 640 path=getParentDir(path) 641 logging.debug("documentViewer (getdocinfofromimagepath) PATH:"+path) 642 imageUrl=self.digilibBaseUrl+"/servlet/Scaler?fn="+path 643 docinfo['imageURL'] = imageUrl 644 645 #path ist the path to the images it assumes that the index.meta file is one level higher. 646 docinfo = self.getBibinfoFromIndexMeta(pathorig,docinfo=docinfo,cut=cut+1) 647 docinfo = self.getAuthinfoFromIndexMeta(pathorig,docinfo=docinfo,cut=cut+1) 648 return docinfo 649 650 651 def getDocinfo(self, mode, url): 652 """returns docinfo depending on mode""" 653 logging.debug("documentViewer (getdocinfo) mode: %s, url: %s"%(mode,url)) 654 # look for cached docinfo in session 655 if self.REQUEST.SESSION.has_key('docinfo'): 656 docinfo = self.REQUEST.SESSION['docinfo'] 657 # check if its still current 658 if docinfo is not None and docinfo.get('mode') == mode and docinfo.get('url') == url: 659 logging.debug("documentViewer (getdocinfo) docinfo in session: %s"%docinfo) 660 return docinfo 661 # new docinfo 662 docinfo = {'mode': mode, 'url': url} 663 if mode=="texttool": #index.meta with texttool information 664 docinfo = self.getDocinfoFromTextTool(url, docinfo=docinfo) 665 elif mode=="imagepath": 666 docinfo = self.getDocinfoFromImagePath(url, docinfo=docinfo) 667 elif mode=="filepath": 668 docinfo = self.getDocinfoFromImagePath(url, docinfo=docinfo,cut=1) 669 else: 670 logging.error("documentViewer (getdocinfo) unknown mode: %s!"%mode) 671 raise ValueError("Unknown mode %s! Has to be one of 'texttool','imagepath','filepath'."%(mode)) 672 673 logging.debug("documentViewer (getdocinfo) docinfo: %s"%docinfo) 674 self.REQUEST.SESSION['docinfo'] = docinfo 675 return docinfo 676 677 def getPageinfo(self, current, start=None, rows=None, cols=None, docinfo=None, viewMode=None, tocMode=None): 678 """returns pageinfo with the given parameters""" 679 pageinfo = {} 680 current = getInt(current) 681 pageinfo['current'] = current 682 rows = int(rows or self.thumbrows) 683 pageinfo['rows'] = rows 684 cols = int(cols or self.thumbcols) 685 pageinfo['cols'] = cols 686 grpsize = cols * rows 687 pageinfo['groupsize'] = grpsize 688 start = getInt(start, default=(math.ceil(float(current)/float(grpsize))*grpsize-(grpsize-1))) 689 # int(current / grpsize) * grpsize +1)) 690 pageinfo['start'] = start 691 pageinfo['end'] = start + grpsize 692 if (docinfo is not None) and ('numPages' in docinfo): 693 np = int(docinfo['numPages']) 694 pageinfo['end'] = min(pageinfo['end'], np) 695 pageinfo['numgroups'] = int(np / grpsize) 696 if np % grpsize > 0: 697 pageinfo['numgroups'] += 1 698 pageinfo['viewMode'] = viewMode 699 pageinfo['tocMode'] = tocMode 700 pageinfo['query'] = self.REQUEST.get('query',' ') 701 pageinfo['queryType'] = self.REQUEST.get('queryType',' ') 702 pageinfo['querySearch'] =self.REQUEST.get('querySearch', 'fulltext') 703 pageinfo['textPN'] = self.REQUEST.get('textPN','1') 704 pageinfo['highlightQuery'] = self.REQUEST.get('highlightQuery','') 705 pageinfo['tocPageSize'] = self.REQUEST.get('tocPageSize', '30') 706 pageinfo['queryPageSize'] =self.REQUEST.get('queryPageSize', '10') 707 pageinfo['tocPN'] = self.REQUEST.get('tocPN', '1') 708 toc = int (pageinfo['tocPN']) 709 pageinfo['textPages'] =int (toc) 710 711 if 'tocSize_%s'%tocMode in docinfo: 712 tocSize = int(docinfo['tocSize_%s'%tocMode]) 713 tocPageSize = int(pageinfo['tocPageSize']) 714 # cached toc 715 if tocSize%tocPageSize>0: 716 tocPages=tocSize/tocPageSize+1 717 else: 718 tocPages=tocSize/tocPageSize 719 pageinfo['tocPN'] = min (tocPages,toc) 720 pageinfo['searchPN'] =self.REQUEST.get('searchPN','1') 721 pageinfo['sn'] =self.REQUEST.get('sn','') 722 return pageinfo 723 724 def changeDocumentViewer(self,title="",digilibBaseUrl=None,thumbrows=2,thumbcols=5,authgroups='mpiwg',RESPONSE=None): 127 725 """init document viewer""" 128 726 self.title=title 129 self.imageViewerUrl=imageViewerUrl 130 727 self.digilibBaseUrl = digilibBaseUrl 728 self.thumbrows = thumbrows 729 self.thumbcols = thumbcols 730 self.authgroups = [s.strip().lower() for s in authgroups.split(',')] 131 731 if RESPONSE is not None: 132 732 RESPONSE.redirect('manage_main') 133 134 135 def imageLink(self,nr): 136 """link hinter den images""" 137 paramsTmp=cgi.parse_qs(self.REQUEST['QUERY_STRING']) 138 params={} 139 for x in paramsTmp.iteritems(): 140 params[x[0]]=x[1][0] 141 142 params['pn']=nr 143 newUrl=self.REQUEST['URL']+"?"+urllib.urlencode(params) 144 return newUrl 145 146 147 def thumbruler(self,cols,rows,start,maximum): 148 """ruler for thumbs""" 149 ret="" 150 paramsTmp=cgi.parse_qs(self.REQUEST['QUERY_STRING']) 151 params={} 152 for x in paramsTmp.iteritems(): 153 154 if not x[0]=="start": 155 params[x[0]]=x[1][0] 156 157 newUrlSelect=self.REQUEST['URL']+"?"+urllib.urlencode(params) 158 if start>0: 159 newStart=max(start-cols*rows,0) 160 params['start']=newStart 161 newUrl=self.REQUEST['URL']+"?"+urllib.urlencode(params) 162 ret+="""<a href="%s">prev</a>"""%newUrl 163 164 165 ret+="""<select onChange="location.href='%s&start='+this.options[this.selectedIndex].value" """%newUrlSelect 166 nr,rest=divmod(maximum,cols*rows) 167 if rest > 0: 168 nr+=1 169 for i in range(nr): 170 nr=i*cols*rows 171 172 if (start >= nr) and (start < nr+cols*rows): 173 ret+="""<option value="%s" selected>%s</option>"""%(nr,nr) 174 else: 175 ret+="""<option value="%s">%s</option>"""%(nr,nr) 176 ret+="</select>" 177 178 if start<maximum: 179 newStart=min(start+cols*rows,maximum) 180 params['start']=newStart 181 newUrl=self.REQUEST['URL']+"?"+urllib.urlencode(params) 182 ret+="""<a href="%s">next</a>"""%newUrl 183 184 return ret 185 186 def textToolThumb(self,url,start=0): 187 """understands the texttool format 188 @param url: url to index.meta with texttool tag 189 """ 190 (viewerUrl,imagepath,textpath)=parseUrlTextTool(url) 191 192 imageUrl=genericDigilib+"/servlet/Scaler?fn=%s"%imagepath[0] 193 194 pt=PageTemplateFile(os.path.join(package_home(globals()),'zpt','thumbs.zpt')).__of__(self) 195 return pt(imageUrl=imageUrl,pt=imagepath[1],start=start) 196 197 def text(self,mode,url,pn): 198 """give text""" 199 if mode=="texttool": #index.meta with texttool information 200 (viewerUrl,imagepath,textpath)=parseUrlTextTool(url) 201 202 print textpath 203 try: 204 dom = NonvalidatingReader.parseUri(textpath) 205 except: 206 return None 207 208 list=[] 209 nodes=dom.xpath("//pb") 210 211 node=nodes[int(pn)-1] 212 213 p=node 214 215 while p.tagName!="p": 216 p=p.parentNode 217 218 219 endNode=nodes[int(pn)] 220 221 222 e=endNode 223 224 while e.tagName!="p": 225 e=e.parentNode 226 227 228 next=node.parentNode 229 230 #sammle s 231 while next and (next!=endNode.parentNode): 232 list.append(next) 233 next=next.nextSibling 234 list.append(endNode.parentNode) 235 236 if p==e:# beide im selben paragraphen 237 238 else: 239 next=p 240 while next!=e: 241 print next,e 242 list.append(next) 243 next=next.nextSibling 244 245 for x in list: 246 PrettyPrint(x) 247 248 return list 249 250 def image(self,mode,url,pn): 251 """give image out""" 252 if mode=="texttool": #index.meta with texttool information 253 (viewerUrl,imagepath,textpath)=parseUrlTextTool(url) 254 url=viewerUrl+"pn=%s&fn=%s"%(pn,imagepath[0]) 255 ret="""<iframe height="100%%" width="100%%" src="%s"/>"""%url 256 return url 257 258 def thumbs(self,mode,url,start): 259 """give thumbs out""" 260 if mode=="texttool": #index.meta with texttool information 261 return self.textToolThumb(url,int(start)) 262 263 security.declareProtected('View','index_html') 264 265 266 def index_html(self,mode,url,start=0,pn=0): 267 ''' 268 view it 269 @param mode: defines which type of document is behind url 270 @param url: url which contains display information 271 ''' 272 273 274 pt=PageTemplateFile(os.path.join(package_home(globals()),'zpt','documentViewer_template.zpt')).__of__(self) 275 return pt(mode=mode,url=url,start=start,pn=pn) 276 277 278 279 # security.declareProtected('View management screens','renameImageForm') 280 733 281 734 def manage_AddDocumentViewerForm(self): 282 735 """add the viewer form""" 283 pt=PageTemplateFile( os.path.join(package_home(globals()),'zpt','addDocumentViewer.zpt')).__of__(self)736 pt=PageTemplateFile('zpt/addDocumentViewer', globals()).__of__(self) 284 737 return pt() 285 738 286 def manage_AddDocumentViewer(self,id,image ViewerUrl="",title="",RESPONSE=None):739 def manage_AddDocumentViewer(self,id,imageScalerUrl="",textServerName="",title="",RESPONSE=None): 287 740 """add the viewer""" 288 newObj=documentViewer(id,image ViewerUrl,title)741 newObj=documentViewer(id,imageScalerUrl=imageScalerUrl,title=title,textServerName=textServerName) 289 742 self._setObject(id,newObj) 290 743 291 744 if RESPONSE is not None: 292 745 RESPONSE.redirect('manage_main') 293 746 747 ## DocumentViewerTemplate class 748 class DocumentViewerTemplate(ZopePageTemplate): 749 """Template for document viewer""" 750 meta_type="DocumentViewer Template" 751 752 753 def manage_addDocumentViewerTemplateForm(self): 754 """Form for adding""" 755 pt=PageTemplateFile('zpt/addDocumentViewerTemplate', globals()).__of__(self) 756 return pt() 757 758 def manage_addDocumentViewerTemplate(self, id='viewer_main', title=None, text=None, 759 REQUEST=None, submit=None): 760 "Add a Page Template with optional file content." 761 762 self._setObject(id, DocumentViewerTemplate(id)) 763 ob = getattr(self, id) 764 txt=file(os.path.join(package_home(globals()),'zpt/viewer_main.zpt'),'r').read() 765 logging.info("txt %s:"%txt) 766 ob.pt_edit(txt,"text/html") 767 if title: 768 ob.pt_setTitle(title) 769 try: 770 u = self.DestinationURL() 771 except AttributeError: 772 u = REQUEST['URL1'] 773 774 u = "%s/%s" % (u, urllib.quote(id)) 775 REQUEST.RESPONSE.redirect(u+'/manage_main') 776 return '' 777 778 779
Note: See TracChangeset
for help on using the changeset viewer.