Changeset 2:8cc283757c39 in documentViewer
- Timestamp:
- Jun 14, 2010, 10:50:06 AM (14 years ago)
- Branch:
- modularisierung
- Files:
-
- 1 added
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
documentViewer.py
r0 r2 1 2 genericDigilib="http://nausikaa2.rz-berlin.mpg.de/digitallibrary/"3 1 4 2 from OFS.Folder import Folder 5 3 from Products.PageTemplates.ZopePageTemplate import ZopePageTemplate 6 from Products.PageTemplates.PageTemplateFile import PageTemplateFile 4 from Products.PageTemplates.PageTemplateFile import PageTemplateFile 5 from Products.PythonScripts.standard import url_quote 7 6 from AccessControl import ClassSecurityInfo 7 from AccessControl import getSecurityManager 8 8 from Globals import package_home 9 9 10 10 from Ft.Xml.Domlette import NonvalidatingReader 11 11 from Ft.Xml.Domlette import PrettyPrint, Print 12 from Ft.Xml import EMPTY_NAMESPACE 12 from Ft.Xml import EMPTY_NAMESPACE, Parse 13 14 from xml.dom.minidom import parse, parseString 15 16 from extraFunction import * 17 13 18 14 19 import Ft.Xml.XPath 15 20 import cStringIO 21 import xmlrpclib 16 22 import os.path 23 import sys 17 24 import cgi 18 25 import urllib 26 import logging 27 import math 28 29 import urlparse 30 from types import * 31 32 def logger(txt,method,txt2): 33 """logging""" 34 logging.info(txt+ txt2) 35 36 37 def getInt(number, default=0): 38 """returns always an int (0 in case of problems)""" 39 try: 40 return int(number) 41 except: 42 return int(default) 19 43 20 44 def getTextFromNode(nodename): 45 """get the cdata content of a node""" 46 if nodename is None: 47 return "" 21 48 nodelist=nodename.childNodes 22 49 rc = "" … … 26 53 return rc 27 54 55 def serializeNode(node, encoding='utf-8'): 56 """returns a string containing node as XML""" 57 buf = cStringIO.StringIO() 58 Print(node, stream=buf, encoding=encoding) 59 s = buf.getvalue() 60 buf.close() 61 return s 62 63 64 def getParentDir(path): 65 """returns pathname shortened by one""" 66 return '/'.join(path.split('/')[0:-1]) 67 68 28 69 import socket 29 70 30 def urlopen(url ):71 def urlopen(url,timeout=2): 31 72 """urlopen mit timeout""" 32 socket.setdefaulttimeout( 2)73 socket.setdefaulttimeout(timeout) 33 74 ret=urllib.urlopen(url) 34 75 socket.setdefaulttimeout(5) 35 76 return ret 36 37 def getParamFromDigilib(path,param): 38 """gibt param von dlInfo aus""" 39 imageUrl=genericDigilib+"/dlInfo-xml.jsp?fn="+path 40 41 try: 42 dom = NonvalidatingReader.parseUri(imageUrl) 43 except: 44 return None 45 46 47 params=dom.xpath("//document-parameters/parameter[@name='%s']/@value"%param) 48 49 if params: 50 return params[0].value 51 52 def parseUrlTextTool(url): 53 """parse index meta""" 54 55 try: 56 dom = NonvalidatingReader.parseUri(url) 57 except: 58 zLOG.LOG("documentViewer (parseUrlTexttool)", zLOG.INFO,"%s (%s)"%sys.exc_info()[0:2]) 59 return (None,None,None) 60 61 archivePaths=dom.xpath("//resource/archive-path") 62 63 if archivePaths and (len(archivePaths)>0): 64 archivePath=getTextFromNode(archivePaths[0]) 65 else: 66 archivePath=None 67 68 69 images=dom.xpath("//texttool/image") 70 71 if images and (len(images)>0): 72 image=getTextFromNode(images[0]) 73 else: 74 image=None 75 76 if image and archivePath: 77 image=os.path.join(archivePath,image) 78 image=image.replace("/mpiwg/online",'') 79 pt=getParamFromDigilib(image,'pt') 80 81 else: 82 image=None 83 84 viewerUrls=dom.xpath("//texttool/digiliburlprefix") 85 86 if viewerUrls and (len(viewerUrls)>0): 87 viewerUrl=getTextFromNode(viewerUrls[0]) 88 else: 89 viewerUrl=None 90 91 92 textUrls=dom.xpath("//texttool/text") 93 94 if textUrls and (len(textUrls)>0): 95 textUrl=getTextFromNode(textUrls[0]) 96 else: 97 textUrl=None 98 return viewerUrl,(image,pt),textUrl 99 100 101 class documentViewer(ZopePageTemplate): 77 78 79 ## 80 ## documentViewer class 81 ## 82 class documentViewer(Folder, extraFunction): 102 83 """document viewer""" 103 84 #textViewerUrl="http://127.0.0.1:8080/HFQP/testXSLT/getPage?" 85 104 86 meta_type="Document viewer" 105 87 106 88 security=ClassSecurityInfo() 107 manage_options= ZopePageTemplate.manage_options+(89 manage_options=Folder.manage_options+( 108 90 {'label':'main config','action':'changeDocumentViewerForm'}, 109 91 ) 110 92 111 _default_content_fn = os.path.join(package_home(globals()),'zpt','documentViewer_template.zpt') 112 113 def __init__(self,id,imageViewerUrl,title=""): 93 # templates and forms 94 viewer_main = PageTemplateFile('zpt/viewer_main', globals()) 95 toc_thumbs = PageTemplateFile('zpt/toc_thumbs', globals()) 96 toc_text = PageTemplateFile('zpt/toc_text', globals()) 97 toc_figures = PageTemplateFile('zpt/toc_figures', globals()) 98 page_main_images = PageTemplateFile('zpt/page_main_images', globals()) 99 page_main_text = PageTemplateFile('zpt/page_main_text', globals()) 100 page_main_text_dict = PageTemplateFile('zpt/page_main_text_dict', globals()) 101 page_main_xml = PageTemplateFile('zpt/page_main_xml', globals()) 102 head_main = PageTemplateFile('zpt/head_main', globals()) 103 docuviewer_css = PageTemplateFile('css/docuviewer.css', globals()) 104 info_xml = PageTemplateFile('zpt/info_xml', globals()) 105 106 107 thumbs_main_rss = PageTemplateFile('zpt/thumbs_main_rss', globals()) 108 security.declareProtected('View management screens','changeDocumentViewerForm') 109 changeDocumentViewerForm = PageTemplateFile('zpt/changeDocumentViewer', globals()) 110 111 112 def __init__(self,id,imageScalerUrl=None,textServerName=None,title="",digilibBaseUrl=None,thumbcols=2,thumbrows=5,authgroups="mpiwg"): 114 113 """init document viewer""" 115 114 self.id=id 116 115 self.title=title 117 self.imageViewerUrl=imageViewerUrl 118 119 security.declareProtected('View management screens','changeDocumentViewerForm') 120 def changeDocumentViewerForm(self): 121 """change it""" 122 pt=PageTemplateFile(os.path.join(package_home(globals()),'zpt','changeDocumentViewer.zpt')).__of__(self) 123 return pt() 116 self.thumbcols = thumbcols 117 self.thumbrows = thumbrows 118 # authgroups is list of authorized groups (delimited by ,) 119 self.authgroups = [s.strip().lower() for s in authgroups.split(',')] 120 # create template folder so we can always use template.something 121 122 templateFolder = Folder('template') 123 #self['template'] = templateFolder # Zope-2.12 style 124 self._setObject('template',templateFolder) # old style 125 try: 126 from Products.XMLRpcTools.XMLRpcTools import XMLRpcServerProxy 127 xmlRpcClient = XMLRpcServerProxy(id='fulltextclient', serverUrl=textServerName, use_xmlrpc=False) 128 #templateFolder['fulltextclient'] = xmlRpcClient 129 templateFolder._setObject('fulltextclient',xmlRpcClient) 130 except Exception, e: 131 logging.error("Unable to create XMLRpcTools for fulltextclient: "+str(e)) 132 try: 133 from Products.zogiLib.zogiLib import zogiLib 134 zogilib = zogiLib(id="zogilib", title="zogilib for docuviewer", dlServerURL=imageScalerUrl, layout="book") 135 #templateFolder['zogilib'] = zogilib 136 templateFolder._setObject('zogilib',zogilib) 137 except Exception, e: 138 logging.error("Unable to create zogiLib for zogilib: "+str(e)) 139 140 141 security.declareProtected('View','thumbs_rss') 142 def thumbs_rss(self,mode,url,viewMode="auto",start=None,pn=1): 143 ''' 144 view it 145 @param mode: defines how to access the document behind url 146 @param url: url which contains display information 147 @param viewMode: if images display images, if text display text, default is images (text,images or auto) 148 149 ''' 150 logging.debug("HHHHHHHHHHHHHH:load the rss") 151 logger("documentViewer (index)", logging.INFO, "mode: %s url:%s start:%s pn:%s"%(mode,url,start,pn)) 152 153 if not hasattr(self, 'template'): 154 # create template folder if it doesn't exist 155 self.manage_addFolder('template') 156 157 if not self.digilibBaseUrl: 158 self.digilibBaseUrl = self.findDigilibUrl() or "http://nausikaa.mpiwg-berlin.mpg.de/digitallibrary" 159 160 docinfo = self.getDocinfo(mode=mode,url=url) 161 pageinfo = self.getPageinfo(start=start,current=pn,docinfo=docinfo) 162 pt = getattr(self.template, 'thumbs_main_rss') 163 164 if viewMode=="auto": # automodus gewaehlt 165 if docinfo.get("textURL",'') and self.textViewerUrl: #texturl gesetzt und textViewer konfiguriert 166 viewMode="text" 167 else: 168 viewMode="images" 169 170 return pt(docinfo=docinfo,pageinfo=pageinfo,viewMode=viewMode) 124 171 172 security.declareProtected('View','index_html') 173 def index_html(self,url,mode="texttool",viewMode="auto",tocMode="thumbs",start=None,pn=1,mk=None, query=None, querySearch=None): 174 ''' 175 view it 176 @param mode: defines how to access the document behind url 177 @param url: url which contains display information 178 @param viewMode: if images display images, if text display text, default is auto (text,images or auto) 179 @param tocMode: type of 'table of contents' for navigation (thumbs, text, figures, none) 180 @param querySearch: type of different search modes (fulltext, fulltextMorph, xpath, xquery, ftIndex, ftIndexMorph, fulltextMorphLemma) 181 ''' 182 183 logging.debug("documentViewer (index) mode: %s url:%s start:%s pn:%s"%(mode,url,start,pn)) 184 185 if not hasattr(self, 'template'): 186 # this won't work 187 logging.error("template folder missing!") 188 return "ERROR: template folder missing!" 189 190 if not getattr(self, 'digilibBaseUrl', None): 191 self.digilibBaseUrl = self.findDigilibUrl() or "http://nausikaa.mpiwg-berlin.mpg.de/digitallibrary" 192 193 docinfo = self.getDocinfo(mode=mode,url=url) 194 195 196 if tocMode != "thumbs": 197 # get table of contents 198 docinfo = self.getToc(mode=tocMode, docinfo=docinfo) 199 200 if viewMode=="auto": # automodus gewaehlt 201 if docinfo.get("textURL",''): #texturl gesetzt und textViewer konfiguriert 202 viewMode="text_dict" 203 else: 204 viewMode="images" 205 206 pageinfo = self.getPageinfo(start=start,current=pn,docinfo=docinfo,viewMode=viewMode,tocMode=tocMode) 207 208 pt = getattr(self.template, 'viewer_main') 209 return pt(docinfo=docinfo,pageinfo=pageinfo,viewMode=viewMode,mk=self.generateMarks(mk)) 125 210 126 def changeDocumentViewer(self,imageViewerUrl,title="",RESPONSE=None): 211 def generateMarks(self,mk): 212 ret="" 213 if mk is None: 214 return "" 215 if type(mk) is not ListType: 216 mk=[mk] 217 for m in mk: 218 ret+="mk=%s"%m 219 return ret 220 221 222 def findDigilibUrl(self): 223 """try to get the digilib URL from zogilib""" 224 url = self.template.zogilib.getDLBaseUrl() 225 return url 226 227 def getDocumentViewerURL(self): 228 """returns the URL of this instance""" 229 return self.absolute_url() 230 231 def getStyle(self, idx, selected, style=""): 232 """returns a string with the given style and append 'sel' if path == selected.""" 233 #logger("documentViewer (getstyle)", logging.INFO, "idx: %s selected: %s style: %s"%(idx,selected,style)) 234 if idx == selected: 235 return style + 'sel' 236 else: 237 return style 238 239 def getLink(self,param=None,val=None): 240 """link to documentviewer with parameter param set to val""" 241 params=self.REQUEST.form.copy() 242 if param is not None: 243 if val is None: 244 if params.has_key(param): 245 del params[param] 246 else: 247 params[param] = str(val) 248 249 if params.get("mode", None) == "filepath": #wenn beim erst Aufruf filepath gesetzt wurde aendere das nun zu imagepath 250 params["mode"] = "imagepath" 251 params["url"] = getParentDir(params["url"]) 252 253 # quote values and assemble into query string 254 ps = "&".join(["%s=%s"%(k,urllib.quote(v)) for (k, v) in params.items()]) 255 url=self.REQUEST['URL1']+"?"+ps 256 return url 257 258 def getLinkAmp(self,param=None,val=None): 259 """link to documentviewer with parameter param set to val""" 260 params=self.REQUEST.form.copy() 261 if param is not None: 262 if val is None: 263 if params.has_key(param): 264 del params[param] 265 else: 266 params[param] = str(val) 267 268 # quote values and assemble into query string 269 logging.info("XYXXXXX: %s"%repr(params.items())) 270 ps = "&".join(["%s=%s"%(k,urllib.quote(v)) for (k, v) in params.items()]) 271 url=self.REQUEST['URL1']+"?"+ps 272 return url 273 274 def getInfo_xml(self,url,mode): 275 """returns info about the document as XML""" 276 277 if not self.digilibBaseUrl: 278 self.digilibBaseUrl = self.findDigilibUrl() or "http://nausikaa.mpiwg-berlin.mpg.de/digitallibrary" 279 280 docinfo = self.getDocinfo(mode=mode,url=url) 281 pt = getattr(self.template, 'info_xml') 282 return pt(docinfo=docinfo) 283 284 285 def isAccessible(self, docinfo): 286 """returns if access to the resource is granted""" 287 access = docinfo.get('accessType', None) 288 logger("documentViewer (accessOK)", logging.INFO, "access type %s"%access) 289 if access is not None and access == 'free': 290 logger("documentViewer (accessOK)", logging.INFO, "access is free") 291 return True 292 elif access is None or access in self.authgroups: 293 # only local access -- only logged in users 294 user = getSecurityManager().getUser() 295 if user is not None: 296 #print "user: ", user 297 return (user.getUserName() != "Anonymous User") 298 else: 299 return False 300 301 logger("documentViewer (accessOK)", logging.INFO, "unknown access type %s"%access) 302 return False 303 304 305 def getDirinfoFromDigilib(self,path,docinfo=None,cut=0): 306 """gibt param von dlInfo aus""" 307 num_retries = 3 308 if docinfo is None: 309 docinfo = {} 310 311 for x in range(cut): 312 313 path=getParentDir(path) 314 315 infoUrl=self.digilibBaseUrl+"/dirInfo-xml.jsp?mo=dir&fn="+path 316 317 logger("documentViewer (getparamfromdigilib)", logging.INFO, "dirInfo from %s"%(infoUrl)) 318 319 for cnt in range(num_retries): 320 try: 321 # dom = NonvalidatingReader.parseUri(imageUrl) 322 txt=urllib.urlopen(infoUrl).read() 323 dom = Parse(txt) 324 break 325 except: 326 logger("documentViewer (getdirinfofromdigilib)", logging.ERROR, "error reading %s (try %d)"%(infoUrl,cnt)) 327 else: 328 raise IOError("Unable to get dir-info from %s"%(infoUrl)) 329 330 sizes=dom.xpath("//dir/size") 331 logger("documentViewer (getparamfromdigilib)", logging.INFO, "dirInfo:size"%sizes) 332 333 if sizes: 334 docinfo['numPages'] = int(getTextFromNode(sizes[0])) 335 else: 336 docinfo['numPages'] = 0 337 338 # TODO: produce and keep list of image names and numbers 339 340 return docinfo 341 342 343 def getIndexMeta(self, url): 344 """returns dom of index.meta document at url""" 345 num_retries = 3 346 dom = None 347 metaUrl = None 348 if url.startswith("http://"): 349 # real URL 350 metaUrl = url 351 else: 352 # online path 353 server=self.digilibBaseUrl+"/servlet/Texter?fn=" 354 metaUrl=server+url.replace("/mpiwg/online","") 355 if not metaUrl.endswith("index.meta"): 356 metaUrl += "/index.meta" 357 logging.debug("METAURL: %s"%metaUrl) 358 for cnt in range(num_retries): 359 try: 360 # patch dirk encoding fehler treten dann nicht mehr auf 361 # dom = NonvalidatingReader.parseUri(metaUrl) 362 txt=urllib.urlopen(metaUrl).read() 363 dom = Parse(txt) 364 break 365 except: 366 logger("ERROR documentViewer (getIndexMeta)", logging.INFO,"%s (%s)"%sys.exc_info()[0:2]) 367 368 if dom is None: 369 raise IOError("Unable to read index meta from %s"%(url)) 370 371 return dom 372 373 def getPresentationInfoXML(self, url): 374 """returns dom of info.xml document at url""" 375 num_retries = 3 376 dom = None 377 metaUrl = None 378 if url.startswith("http://"): 379 # real URL 380 metaUrl = url 381 else: 382 # online path 383 server=self.digilibBaseUrl+"/servlet/Texter?fn=" 384 metaUrl=server+url.replace("/mpiwg/online","") 385 386 for cnt in range(num_retries): 387 try: 388 # patch dirk encoding fehler treten dann nicht mehr auf 389 # dom = NonvalidatingReader.parseUri(metaUrl) 390 txt=urllib.urlopen(metaUrl).read() 391 dom = Parse(txt) 392 break 393 except: 394 logger("ERROR documentViewer (getPresentationInfoXML)", logging.INFO,"%s (%s)"%sys.exc_info()[0:2]) 395 396 if dom is None: 397 raise IOError("Unable to read infoXMLfrom %s"%(url)) 398 399 return dom 400 401 402 def getAuthinfoFromIndexMeta(self,path,docinfo=None,dom=None,cut=0): 403 """gets authorization info from the index.meta file at path or given by dom""" 404 logger("documentViewer (getauthinfofromindexmeta)", logging.INFO,"path: %s"%(path)) 405 406 access = None 407 408 if docinfo is None: 409 docinfo = {} 410 411 if dom is None: 412 for x in range(cut): 413 path=getParentDir(path) 414 dom = self.getIndexMeta(path) 415 416 acctype = dom.xpath("//access-conditions/access/@type") 417 if acctype and (len(acctype)>0): 418 access=acctype[0].value 419 if access in ['group', 'institution']: 420 access = getTextFromNode(dom.xpath("//access-conditions/access/name")[0]).lower() 421 422 docinfo['accessType'] = access 423 return docinfo 424 425 426 def getBibinfoFromIndexMeta(self,path,docinfo=None,dom=None,cut=0): 427 """gets bibliographical info from the index.meta file at path or given by dom""" 428 logging.debug("documentViewer (getbibinfofromindexmeta) path: %s"%(path)) 429 430 if docinfo is None: 431 docinfo = {} 432 433 if dom is None: 434 for x in range(cut): 435 path=getParentDir(path) 436 dom = self.getIndexMeta(path) 437 438 logging.debug("documentViewer (getbibinfofromindexmeta cutted) path: %s"%(path)) 439 # put in all raw bib fields as dict "bib" 440 bib = dom.xpath("//bib/*") 441 if bib and len(bib)>0: 442 bibinfo = {} 443 for e in bib: 444 bibinfo[e.localName] = getTextFromNode(e) 445 docinfo['bib'] = bibinfo 446 447 # extract some fields (author, title, year) according to their mapping 448 metaData=self.metadata.main.meta.bib 449 bibtype=dom.xpath("//bib/@type") 450 if bibtype and (len(bibtype)>0): 451 bibtype=bibtype[0].value 452 else: 453 bibtype="generic" 454 455 bibtype=bibtype.replace("-"," ") # wrong typesiin index meta "-" instead of " " (not wrong! ROC) 456 docinfo['bib_type'] = bibtype 457 bibmap=metaData.generateMappingForType(bibtype) 458 # if there is no mapping bibmap is empty (mapping sometimes has empty fields) 459 if len(bibmap) > 0 and len(bibmap['author'][0]) > 0: 460 try: 461 docinfo['author']=getTextFromNode(dom.xpath("//bib/%s"%bibmap['author'][0])[0]) 462 except: pass 463 try: 464 docinfo['title']=getTextFromNode(dom.xpath("//bib/%s"%bibmap['title'][0])[0]) 465 except: pass 466 try: 467 docinfo['year']=getTextFromNode(dom.xpath("//bib/%s"%bibmap['year'][0])[0]) 468 except: pass 469 logging.debug("documentViewer (getbibinfofromindexmeta) using mapping for %s"%bibtype) 470 try: 471 docinfo['lang']=getTextFromNode(dom.xpath("//bib/lang")[0]) 472 except: 473 docinfo['lang']='' 474 475 return docinfo 476 477 478 def getDocinfoFromTextTool(self, url, dom=None, docinfo=None): 479 """parse texttool tag in index meta""" 480 logger("documentViewer (getdocinfofromtexttool)", logging.INFO, "url: %s" % (url)) 481 if docinfo is None: 482 docinfo = {} 483 if docinfo.get('lang', None) is None: 484 docinfo['lang'] = '' # default keine Sprache gesetzt 485 if dom is None: 486 dom = self.getIndexMeta(url) 487 488 archivePath = None 489 archiveName = None 490 491 archiveNames = dom.xpath("//resource/name") 492 if archiveNames and (len(archiveNames) > 0): 493 archiveName = getTextFromNode(archiveNames[0]) 494 else: 495 logger("documentViewer (getdocinfofromtexttool)", logging.WARNING, "resource/name missing in: %s" % (url)) 496 497 archivePaths = dom.xpath("//resource/archive-path") 498 if archivePaths and (len(archivePaths) > 0): 499 archivePath = getTextFromNode(archivePaths[0]) 500 # clean up archive path 501 if archivePath[0] != '/': 502 archivePath = '/' + archivePath 503 if archiveName and (not archivePath.endswith(archiveName)): 504 archivePath += "/" + archiveName 505 else: 506 # try to get archive-path from url 507 logger("documentViewer (getdocinfofromtexttool)", logging.WARNING, "resource/archive-path missing in: %s" % (url)) 508 if (not url.startswith('http')): 509 archivePath = url.replace('index.meta', '') 510 511 if archivePath is None: 512 # we balk without archive-path 513 raise IOError("Missing archive-path (for text-tool) in %s" % (url)) 514 515 imageDirs = dom.xpath("//texttool/image") 516 if imageDirs and (len(imageDirs) > 0): 517 imageDir = getTextFromNode(imageDirs[0]) 518 519 else: 520 # we balk with no image tag / not necessary anymore because textmode is now standard 521 #raise IOError("No text-tool info in %s"%(url)) 522 imageDir = "" 523 #xquery="//pb" 524 docinfo['imagePath'] = "" # keine Bilder 525 docinfo['imageURL'] = "" 526 527 if imageDir and archivePath: 528 #print "image: ", imageDir, " archivepath: ", archivePath 529 imageDir = os.path.join(archivePath, imageDir) 530 imageDir = imageDir.replace("/mpiwg/online", '') 531 docinfo = self.getDirinfoFromDigilib(imageDir, docinfo=docinfo) 532 docinfo['imagePath'] = imageDir 533 534 docinfo['imageURL'] = self.digilibBaseUrl + "/servlet/Scaler?fn=" + imageDir 535 536 viewerUrls = dom.xpath("//texttool/digiliburlprefix") 537 if viewerUrls and (len(viewerUrls) > 0): 538 viewerUrl = getTextFromNode(viewerUrls[0]) 539 docinfo['viewerURL'] = viewerUrl 540 541 textUrls = dom.xpath("//texttool/text") 542 if textUrls and (len(textUrls) > 0): 543 textUrl = getTextFromNode(textUrls[0]) 544 if urlparse.urlparse(textUrl)[0] == "": #keine url 545 textUrl = os.path.join(archivePath, textUrl) 546 # fix URLs starting with /mpiwg/online 547 if textUrl.startswith("/mpiwg/online"): 548 textUrl = textUrl.replace("/mpiwg/online", '', 1) 549 550 docinfo['textURL'] = textUrl 551 552 textUrls = dom.xpath("//texttool/text-url-path") 553 if textUrls and (len(textUrls) > 0): 554 textUrl = getTextFromNode(textUrls[0]) 555 docinfo['textURLPath'] = textUrl 556 if not docinfo['imagePath']: 557 # text-only, no page images 558 docinfo = self.getNumPages(docinfo) #im moment einfach auf eins setzen, navigation ueber die thumbs geht natuerlich nicht 559 560 presentationUrls = dom.xpath("//texttool/presentation") 561 docinfo = self.getBibinfoFromIndexMeta(url, docinfo=docinfo, dom=dom) # get info von bib tag 562 563 if presentationUrls and (len(presentationUrls) > 0): # ueberschreibe diese durch presentation informationen 564 # presentation url ergiebt sich ersetzen von index.meta in der url der fuer die Metadaten 565 # durch den relativen Pfad auf die presentation infos 566 presentationPath = getTextFromNode(presentationUrls[0]) 567 if url.endswith("index.meta"): 568 presentationUrl = url.replace('index.meta', presentationPath) 569 else: 570 presentationUrl = url + "/" + presentationPath 571 572 docinfo = self.getBibinfoFromTextToolPresentation(presentationUrl, docinfo=docinfo, dom=dom) 573 574 docinfo = self.getAuthinfoFromIndexMeta(url, docinfo=docinfo, dom=dom) # get access info 575 576 return docinfo 577 578 579 def getBibinfoFromTextToolPresentation(self,url,docinfo=None,dom=None): 580 """gets the bibliographical information from the preseantion entry in texttools 581 """ 582 dom=self.getPresentationInfoXML(url) 583 try: 584 docinfo['author']=getTextFromNode(dom.xpath("//author")[0]) 585 except: 586 pass 587 try: 588 docinfo['title']=getTextFromNode(dom.xpath("//title")[0]) 589 except: 590 pass 591 try: 592 docinfo['year']=getTextFromNode(dom.xpath("//date")[0]) 593 except: 594 pass 595 return docinfo 596 597 def getDocinfoFromImagePath(self,path,docinfo=None,cut=0): 598 """path ist the path to the images it assumes that the index.meta file is one level higher.""" 599 logger("documentViewer (getdocinfofromimagepath)", logging.INFO,"path: %s"%(path)) 600 if docinfo is None: 601 docinfo = {} 602 path=path.replace("/mpiwg/online","") 603 docinfo['imagePath'] = path 604 docinfo=self.getDirinfoFromDigilib(path,docinfo=docinfo,cut=cut) 605 606 pathorig=path 607 for x in range(cut): 608 path=getParentDir(path) 609 logging.error("PATH:"+path) 610 imageUrl=self.digilibBaseUrl+"/servlet/Scaler?fn="+path 611 docinfo['imageURL'] = imageUrl 612 613 #path ist the path to the images it assumes that the index.meta file is one level higher. 614 docinfo = self.getBibinfoFromIndexMeta(pathorig,docinfo=docinfo,cut=cut+1) 615 docinfo = self.getAuthinfoFromIndexMeta(pathorig,docinfo=docinfo,cut=cut+1) 616 return docinfo 617 618 619 def getDocinfo(self, mode, url): 620 """returns docinfo depending on mode""" 621 logger("documentViewer (getdocinfo)", logging.INFO,"mode: %s, url: %s"%(mode,url)) 622 # look for cached docinfo in session 623 if self.REQUEST.SESSION.has_key('docinfo'): 624 docinfo = self.REQUEST.SESSION['docinfo'] 625 # check if its still current 626 if docinfo is not None and docinfo.get('mode') == mode and docinfo.get('url') == url: 627 logger("documentViewer (getdocinfo)", logging.INFO,"docinfo in session: %s"%docinfo) 628 return docinfo 629 # new docinfo 630 docinfo = {'mode': mode, 'url': url} 631 if mode=="texttool": #index.meta with texttool information 632 docinfo = self.getDocinfoFromTextTool(url, docinfo=docinfo) 633 elif mode=="imagepath": 634 docinfo = self.getDocinfoFromImagePath(url, docinfo=docinfo) 635 elif mode=="filepath": 636 docinfo = self.getDocinfoFromImagePath(url, docinfo=docinfo,cut=1) 637 else: 638 logger("documentViewer (getdocinfo)", logging.ERROR,"unknown mode!") 639 raise ValueError("Unknown mode %s! Has to be one of 'texttool','imagepath','filepath'."%(mode)) 640 641 logger("documentViewer (getdocinfo)", logging.INFO,"docinfo: %s"%docinfo) 642 self.REQUEST.SESSION['docinfo'] = docinfo 643 return docinfo 644 645 def getPageinfo(self, current, start=None, rows=None, cols=None, docinfo=None, viewMode=None, tocMode=None): 646 """returns pageinfo with the given parameters""" 647 pageinfo = {} 648 current = getInt(current) 649 pageinfo['current'] = current 650 rows = int(rows or self.thumbrows) 651 pageinfo['rows'] = rows 652 cols = int(cols or self.thumbcols) 653 pageinfo['cols'] = cols 654 grpsize = cols * rows 655 pageinfo['groupsize'] = grpsize 656 start = getInt(start, default=(math.ceil(float(current)/float(grpsize))*grpsize-(grpsize-1))) 657 # int(current / grpsize) * grpsize +1)) 658 pageinfo['start'] = start 659 pageinfo['end'] = start + grpsize 660 if (docinfo is not None) and ('numPages' in docinfo): 661 np = int(docinfo['numPages']) 662 pageinfo['end'] = min(pageinfo['end'], np) 663 pageinfo['numgroups'] = int(np / grpsize) 664 if np % grpsize > 0: 665 pageinfo['numgroups'] += 1 666 pageinfo['viewMode'] = viewMode 667 pageinfo['tocMode'] = tocMode 668 pageinfo['query'] = self.REQUEST.get('query',' ') 669 pageinfo['queryType'] = self.REQUEST.get('queryType',' ') 670 pageinfo['querySearch'] =self.REQUEST.get('querySearch', 'fulltext') 671 pageinfo['textPN'] = self.REQUEST.get('textPN','1') 672 pageinfo['highlightQuery'] = self.REQUEST.get('highlightQuery','') 673 pageinfo['tocPageSize'] = self.REQUEST.get('tocPageSize', '30') 674 pageinfo['queryPageSize'] =self.REQUEST.get('queryPageSize', '10') 675 pageinfo['tocPN'] = self.REQUEST.get('tocPN', '1') 676 toc = int (pageinfo['tocPN']) 677 pageinfo['textPages'] =int (toc) 678 679 if 'tocSize_%s'%tocMode in docinfo: 680 tocSize = int(docinfo['tocSize_%s'%tocMode]) 681 tocPageSize = int(pageinfo['tocPageSize']) 682 # cached toc 683 if tocSize%tocPageSize>0: 684 tocPages=tocSize/tocPageSize+1 685 else: 686 tocPages=tocSize/tocPageSize 687 pageinfo['tocPN'] = min (tocPages,toc) 688 pageinfo['searchPN'] =self.REQUEST.get('searchPN','1') 689 pageinfo['sn'] =self.REQUEST.get('sn','') 690 return pageinfo 691 692 def changeDocumentViewer(self,title="",digilibBaseUrl=None,thumbrows=2,thumbcols=5,authgroups='mpiwg',RESPONSE=None): 127 693 """init document viewer""" 128 694 self.title=title 129 self.imageViewerUrl=imageViewerUrl 130 695 self.digilibBaseUrl = digilibBaseUrl 696 self.thumbrows = thumbrows 697 self.thumbcols = thumbcols 698 self.authgroups = [s.strip().lower() for s in authgroups.split(',')] 131 699 if RESPONSE is not None: 132 700 RESPONSE.redirect('manage_main') 133 134 135 def imageLink(self,nr): 136 """link hinter den images""" 137 paramsTmp=cgi.parse_qs(self.REQUEST['QUERY_STRING']) 138 params={} 139 for x in paramsTmp.iteritems(): 140 params[x[0]]=x[1][0] 141 142 params['pn']=nr 143 newUrl=self.REQUEST['URL']+"?"+urllib.urlencode(params) 144 return newUrl 145 146 147 def thumbruler(self,cols,rows,start,maximum): 148 """ruler for thumbs""" 149 ret="" 150 paramsTmp=cgi.parse_qs(self.REQUEST['QUERY_STRING']) 151 params={} 152 for x in paramsTmp.iteritems(): 153 154 if not x[0]=="start": 155 params[x[0]]=x[1][0] 156 157 newUrlSelect=self.REQUEST['URL']+"?"+urllib.urlencode(params) 158 if start>0: 159 newStart=max(start-cols*rows,0) 160 params['start']=newStart 161 newUrl=self.REQUEST['URL']+"?"+urllib.urlencode(params) 162 ret+="""<a href="%s">prev</a>"""%newUrl 163 164 165 ret+="""<select onChange="location.href='%s&start='+this.options[this.selectedIndex].value" """%newUrlSelect 166 nr,rest=divmod(maximum,cols*rows) 167 if rest > 0: 168 nr+=1 169 for i in range(nr): 170 nr=i*cols*rows 171 172 if (start >= nr) and (start < nr+cols*rows): 173 ret+="""<option value="%s" selected>%s</option>"""%(nr,nr) 174 else: 175 ret+="""<option value="%s">%s</option>"""%(nr,nr) 176 ret+="</select>" 177 178 if start<maximum: 179 newStart=min(start+cols*rows,maximum) 180 params['start']=newStart 181 newUrl=self.REQUEST['URL']+"?"+urllib.urlencode(params) 182 ret+="""<a href="%s">next</a>"""%newUrl 183 184 return ret 185 186 def textToolThumb(self,url,start=0): 187 """understands the texttool format 188 @param url: url to index.meta with texttool tag 189 """ 190 (viewerUrl,imagepath,textpath)=parseUrlTextTool(url) 191 192 imageUrl=genericDigilib+"/servlet/Scaler?fn=%s"%imagepath[0] 193 194 pt=PageTemplateFile(os.path.join(package_home(globals()),'zpt','thumbs.zpt')).__of__(self) 195 return pt(imageUrl=imageUrl,pt=imagepath[1],start=start) 196 197 def text(self,mode,url,pn): 198 """give text""" 199 if mode=="texttool": #index.meta with texttool information 200 (viewerUrl,imagepath,textpath)=parseUrlTextTool(url) 201 202 print textpath 203 try: 204 dom = NonvalidatingReader.parseUri(textpath) 205 except: 206 return None 207 208 list=[] 209 nodes=dom.xpath("//pb") 210 211 node=nodes[int(pn)-1] 212 213 p=node 214 215 while p.tagName!="p": 216 p=p.parentNode 217 218 219 endNode=nodes[int(pn)] 220 221 222 e=endNode 223 224 while e.tagName!="p": 225 e=e.parentNode 226 227 228 next=node.parentNode 229 230 #sammle s 231 while next and (next!=endNode.parentNode): 232 list.append(next) 233 next=next.nextSibling 234 list.append(endNode.parentNode) 235 236 if p==e:# beide im selben paragraphen 237 238 else: 239 next=p 240 while next!=e: 241 print next,e 242 list.append(next) 243 next=next.nextSibling 244 245 for x in list: 246 PrettyPrint(x) 247 248 return list 249 250 def image(self,mode,url,pn): 251 """give image out""" 252 if mode=="texttool": #index.meta with texttool information 253 (viewerUrl,imagepath,textpath)=parseUrlTextTool(url) 254 url=viewerUrl+"pn=%s&fn=%s"%(pn,imagepath[0]) 255 ret="""<iframe height="100%%" width="100%%" src="%s"/>"""%url 256 return url 257 258 def thumbs(self,mode,url,start): 259 """give thumbs out""" 260 if mode=="texttool": #index.meta with texttool information 261 return self.textToolThumb(url,int(start)) 262 263 security.declareProtected('View','index_html') 264 265 266 def index_html(self,mode,url,start=0,pn=0): 267 ''' 268 view it 269 @param mode: defines which type of document is behind url 270 @param url: url which contains display information 271 ''' 272 273 274 pt=PageTemplateFile(os.path.join(package_home(globals()),'zpt','documentViewer_template.zpt')).__of__(self) 275 return pt(mode=mode,url=url,start=start,pn=pn) 276 277 278 279 # security.declareProtected('View management screens','renameImageForm') 280 701 281 702 def manage_AddDocumentViewerForm(self): 282 703 """add the viewer form""" 283 pt=PageTemplateFile( os.path.join(package_home(globals()),'zpt','addDocumentViewer.zpt')).__of__(self)704 pt=PageTemplateFile('zpt/addDocumentViewer', globals()).__of__(self) 284 705 return pt() 285 706 286 def manage_AddDocumentViewer(self,id,image ViewerUrl="",title="",RESPONSE=None):707 def manage_AddDocumentViewer(self,id,imageScalerUrl="",textServerName="",title="",RESPONSE=None): 287 708 """add the viewer""" 288 newObj=documentViewer(id,image ViewerUrl,title)709 newObj=documentViewer(id,imageScalerUrl=imageScalerUrl,title=title,textServerName=textServerName) 289 710 self._setObject(id,newObj) 290 711 291 712 if RESPONSE is not None: 292 713 RESPONSE.redirect('manage_main') 293 714 715 ## DocumentViewerTemplate class 716 class DocumentViewerTemplate(ZopePageTemplate): 717 """Template for document viewer""" 718 meta_type="DocumentViewer Template" 719 720 721 def manage_addDocumentViewerTemplateForm(self): 722 """Form for adding""" 723 pt=PageTemplateFile('zpt/addDocumentViewerTemplate', globals()).__of__(self) 724 return pt() 725 726 def manage_addDocumentViewerTemplate(self, id='viewer_main', title=None, text=None, 727 REQUEST=None, submit=None): 728 "Add a Page Template with optional file content." 729 730 self._setObject(id, DocumentViewerTemplate(id)) 731 ob = getattr(self, id) 732 txt=file(os.path.join(package_home(globals()),'zpt/viewer_main.zpt'),'r').read() 733 logging.info("txt %s:"%txt) 734 ob.pt_edit(txt,"text/html") 735 if title: 736 ob.pt_setTitle(title) 737 try: 738 u = self.DestinationURL() 739 except AttributeError: 740 u = REQUEST['URL1'] 741 742 u = "%s/%s" % (u, urllib.quote(id)) 743 REQUEST.RESPONSE.redirect(u+'/manage_main') 744 return '' 745 746 747
Note: See TracChangeset
for help on using the changeset viewer.