comparison documentViewer.py @ 84:a6e4f9b6729a

first version with new full-text infrastructure and slightly changed templates
author casties
date Fri, 19 Mar 2010 12:42:40 +0100
parents ec12a2440daa
children 6a4a72033d58
comparison
equal deleted inserted replaced
83:ec12a2440daa 84:a6e4f9b6729a
10 from Ft.Xml.Domlette import PrettyPrint, Print 10 from Ft.Xml.Domlette import PrettyPrint, Print
11 from Ft.Xml import EMPTY_NAMESPACE, Parse 11 from Ft.Xml import EMPTY_NAMESPACE, Parse
12 12
13 13
14 import Ft.Xml.XPath 14 import Ft.Xml.XPath
15 import cStringIO
15 import xmlrpclib 16 import xmlrpclib
16 import os.path 17 import os.path
17 import sys 18 import sys
18 import cgi 19 import cgi
19 import urllib 20 import urllib
20 import logging 21 import logging
21 import math 22 import math
22 23
23 import urlparse 24 import urlparse
24 from types import * 25 from types import *
26
25 def logger(txt,method,txt2): 27 def logger(txt,method,txt2):
26 """logging""" 28 """logging"""
27 logging.info(txt+ txt2) 29 logging.info(txt+ txt2)
28 30
29 31
43 for node in nodelist: 45 for node in nodelist:
44 if node.nodeType == node.TEXT_NODE: 46 if node.nodeType == node.TEXT_NODE:
45 rc = rc + node.data 47 rc = rc + node.data
46 return rc 48 return rc
47 49
50 def serializeNode(node, encoding='utf-8'):
51 """returns a string containing node as XML"""
52 buf = cStringIO.StringIO()
53 Print(node, stream=buf, encoding=encoding)
54 s = buf.getvalue()
55 buf.close()
56 return s
57
48 58
49 def getParentDir(path): 59 def getParentDir(path):
50 """returns pathname shortened by one""" 60 """returns pathname shortened by one"""
51 return '/'.join(path.split('/')[0:-1]) 61 return '/'.join(path.split('/')[0:-1])
52 62
76 ) 86 )
77 87
78 # templates and forms 88 # templates and forms
79 viewer_main = PageTemplateFile('zpt/viewer_main', globals()) 89 viewer_main = PageTemplateFile('zpt/viewer_main', globals())
80 thumbs_main = PageTemplateFile('zpt/thumbs_main', globals()) 90 thumbs_main = PageTemplateFile('zpt/thumbs_main', globals())
81 image_main = PageTemplateFile('zpt/image_main', globals()) 91 image_main = PageTemplateFile('zpt/image_main', globals()) # obsolete!
92 page_main_images = PageTemplateFile('zpt/page_main_images', globals())
93 page_main_text = PageTemplateFile('zpt/page_main_text', globals())
82 head_main = PageTemplateFile('zpt/head_main', globals()) 94 head_main = PageTemplateFile('zpt/head_main', globals())
83 docuviewer_css = PageTemplateFile('css/docuviewer.css', globals()) 95 docuviewer_css = PageTemplateFile('css/docuviewer.css', globals())
84 info_xml = PageTemplateFile('zpt/info_xml', globals()) 96 info_xml = PageTemplateFile('zpt/info_xml', globals())
85 97
86 thumbs_main_rss = PageTemplateFile('zpt/thumbs_main_rss', globals()) 98 thumbs_main_rss = PageTemplateFile('zpt/thumbs_main_rss', globals())
87 security.declareProtected('View management screens','changeDocumentViewerForm') 99 security.declareProtected('View management screens','changeDocumentViewerForm')
88 changeDocumentViewerForm = PageTemplateFile('zpt/changeDocumentViewer', globals()) 100 changeDocumentViewerForm = PageTemplateFile('zpt/changeDocumentViewer', globals())
89 101
90 102
91 def __init__(self,id,imageViewerUrl,textViewerUrl=None,title="",digilibBaseUrl=None,thumbcols=2,thumbrows=10,authgroups="mpiwg"): 103 def __init__(self,id,imageScalerUrl=None,textServerName=None,title="",digilibBaseUrl=None,thumbcols=2,thumbrows=10,authgroups="mpiwg"):
92 """init document viewer""" 104 """init document viewer"""
93 self.id=id 105 self.id=id
94 self.title=title 106 self.title=title
95 self.imageViewerUrl=imageViewerUrl
96 self.textViewerUrl=textViewerUrl
97
98 if not digilibBaseUrl:
99 self.digilibBaseUrl = self.findDigilibUrl()
100 else:
101 self.digilibBaseUrl = digilibBaseUrl
102 self.thumbcols = thumbcols 107 self.thumbcols = thumbcols
103 self.thumbrows = thumbrows 108 self.thumbrows = thumbrows
104 # authgroups is list of authorized groups (delimited by ,) 109 # authgroups is list of authorized groups (delimited by ,)
105 self.authgroups = [s.strip().lower() for s in authgroups.split(',')] 110 self.authgroups = [s.strip().lower() for s in authgroups.split(',')]
106 # add template folder so we can always use template.something 111 # create template folder so we can always use template.something
107 self.manage_addFolder('template') 112
108 113 templateFolder = Folder('template')
114 #self['template'] = templateFolder # Zope-2.12 style
115 self._setObject('template',templateFolder) # old style
116 try:
117 from Products.XMLRpcTools.XMLRpcTools import XMLRpcServerProxy
118 xmlRpcClient = XMLRpcServerProxy(id='fulltextclient', serverUrl=textServerName, use_xmlrpc=False)
119 #templateFolder['fulltextclient'] = xmlRpcClient
120 templateFolder._setObject('fulltextclient',xmlRpcClient)
121 except Exception, e:
122 logging.error("Unable to create XMLRpcTools for fulltextclient: "+str(e))
123 try:
124 from Products.zogiLib.zogiLib import zogiLib
125 zogilib = zogiLib(id="zogilib", title="zogilib for docuviewer", dlServerURL=imageScalerUrl, layout="book")
126 #templateFolder['zogilib'] = zogilib
127 templateFolder._setObject('zogilib',zogilib)
128 except Exception, e:
129 logging.error("Unable to create zogiLib for zogilib: "+str(e))
130
109 131
110 security.declareProtected('View','thumbs_rss') 132 security.declareProtected('View','thumbs_rss')
111 def thumbs_rss(self,mode,url,viewMode="auto",start=None,pn=1): 133 def thumbs_rss(self,mode,url,viewMode="auto",start=None,pn=1):
112 ''' 134 '''
113 view it 135 view it
114 @param mode: defines how to access the document behind url 136 @param mode: defines how to access the document behind url
115 @param url: url which contains display information 137 @param url: url which contains display information
116 @param viewMode: if images display images, if text display text, default is images (text,images or auto) 138 @param viewMode: if images display images, if text display text, default is images (text,images or auto)
117 139
118 ''' 140 '''
119 logging.info("HHHHHHHHHHHHHH:load the rss") 141 logging.debug("HHHHHHHHHHHHHH:load the rss")
120 logger("documentViewer (index)", logging.INFO, "mode: %s url:%s start:%s pn:%s"%(mode,url,start,pn)) 142 logger("documentViewer (index)", logging.INFO, "mode: %s url:%s start:%s pn:%s"%(mode,url,start,pn))
121 143
122 if not hasattr(self, 'template'): 144 if not hasattr(self, 'template'):
123 # create template folder if it doesn't exist 145 # create template folder if it doesn't exist
124 self.manage_addFolder('template') 146 self.manage_addFolder('template')
137 viewMode="images" 159 viewMode="images"
138 160
139 return pt(docinfo=docinfo,pageinfo=pageinfo,viewMode=viewMode) 161 return pt(docinfo=docinfo,pageinfo=pageinfo,viewMode=viewMode)
140 162
141 security.declareProtected('View','index_html') 163 security.declareProtected('View','index_html')
142 def index_html(self,mode,url,viewMode="auto",start=None,pn=1,mk=None): 164 def index_html(self,url,mode="texttool",viewMode="auto",start=None,pn=1,mk=None):
143 ''' 165 '''
144 view it 166 view it
145 @param mode: defines how to access the document behind url 167 @param mode: defines how to access the document behind url
146 @param url: url which contains display information 168 @param url: url which contains display information
147 @param viewMode: if images display images, if text display text, default is images (text,images or auto) 169 @param viewMode: if images display images, if text display text, default is images (text,images or auto)
148 170
149 ''' 171 '''
150 172
151 logger("documentViewer (index)", logging.INFO, "mode: %s url:%s start:%s pn:%s"%(mode,url,start,pn)) 173 logging.debug("documentViewer (index) mode: %s url:%s start:%s pn:%s"%(mode,url,start,pn))
152 174
153 if not hasattr(self, 'template'): 175 if not hasattr(self, 'template'):
154 # create template folder if it doesn't exist 176 # this won't work
155 self.manage_addFolder('template') 177 logging.error("template folder missing!")
156 178 return "ERROR: template folder missing!"
157 if not self.digilibBaseUrl: 179
180 if not getattr(self, 'digilibBaseUrl', None):
158 self.digilibBaseUrl = self.findDigilibUrl() or "http://nausikaa.mpiwg-berlin.mpg.de/digitallibrary" 181 self.digilibBaseUrl = self.findDigilibUrl() or "http://nausikaa.mpiwg-berlin.mpg.de/digitallibrary"
159 182
160 docinfo = self.getDocinfo(mode=mode,url=url) 183 docinfo = self.getDocinfo(mode=mode,url=url)
161 pageinfo = self.getPageinfo(start=start,current=pn,docinfo=docinfo) 184 pageinfo = self.getPageinfo(start=start,current=pn,docinfo=docinfo)
162 pt = getattr(self.template, 'viewer_main') 185 pt = getattr(self.template, 'viewer_main')
163 186
164 if viewMode=="auto": # automodus gewaehlt 187 if viewMode=="auto": # automodus gewaehlt
165 if docinfo.get("textURL",'') and self.textViewerUrl: #texturl gesetzt und textViewer konfiguriert 188 if docinfo.get("textURL",''): #texturl gesetzt und textViewer konfiguriert
166 viewMode="text" 189 viewMode="text"
167 else: 190 else:
168 viewMode="images" 191 viewMode="images"
169 192
170 return pt(docinfo=docinfo,pageinfo=pageinfo,viewMode=viewMode,mk=self.generateMarks(mk)) 193 return pt(docinfo=docinfo,pageinfo=pageinfo,viewMode=viewMode,mk=self.generateMarks(mk))
177 if type(mk) is not ListType: 200 if type(mk) is not ListType:
178 mk=[mk] 201 mk=[mk]
179 for m in mk: 202 for m in mk:
180 ret+="mk=%s"%m 203 ret+="mk=%s"%m
181 return ret 204 return ret
205
206 def findDigilibUrl(self):
207 """try to get the digilib URL from zogilib"""
208 url = self.template.zogilib.getDLBaseUrl()
209 return url
210
211 def getStyle(self, idx, selected, style=""):
212 """returns a string with the given style and append 'sel' if path == selected."""
213 #logger("documentViewer (getstyle)", logging.INFO, "idx: %s selected: %s style: %s"%(idx,selected,style))
214 if idx == selected:
215 return style + 'sel'
216 else:
217 return style
182 218
183 def getLink(self,param=None,val=None): 219 def getLink(self,param=None,val=None):
184 """link to documentviewer with parameter param set to val""" 220 """link to documentviewer with parameter param set to val"""
185 params=self.REQUEST.form.copy() 221 params=self.REQUEST.form.copy()
186 if param is not None: 222 if param is not None:
187 if val is None: 223 if val is None:
188 if params.has_key(param): 224 if params.has_key(param):
189 del params[param] 225 del params[param]
190 else: 226 else:
191 params[param] = str(val) 227 params[param] = str(val)
192 if params["mode"] == "filepath": #wenn beim erst Aufruf filepath gesetzt wurde aendere das nun zu imagepath 228
229 if params.get("mode", None) == "filepath": #wenn beim erst Aufruf filepath gesetzt wurde aendere das nun zu imagepath
193 params["mode"] = "imagepath" 230 params["mode"] = "imagepath"
194 params["url"] = getParentDir(params["url"]) 231 params["url"] = getParentDir(params["url"])
195 232
196 # quote values and assemble into query string 233 # quote values and assemble into query string
197 ps = "&".join(["%s=%s"%(k,urllib.quote(v)) for (k, v) in params.items()]) 234 ps = "&".join(["%s=%s"%(k,urllib.quote(v)) for (k, v) in params.items()])
223 docinfo = self.getDocinfo(mode=mode,url=url) 260 docinfo = self.getDocinfo(mode=mode,url=url)
224 pt = getattr(self.template, 'info_xml') 261 pt = getattr(self.template, 'info_xml')
225 return pt(docinfo=docinfo) 262 return pt(docinfo=docinfo)
226 263
227 264
228 def getStyle(self, idx, selected, style=""):
229 """returns a string with the given style and append 'sel' if path == selected."""
230 #logger("documentViewer (getstyle)", logging.INFO, "idx: %s selected: %s style: %s"%(idx,selected,style))
231 if idx == selected:
232 return style + 'sel'
233 else:
234 return style
235
236 def getTextLanguage(self,url,docinfo):
237 docinfo['lang']=getTextFromNode(dom.xpath("//bib/lang")[0])
238 lang = urlencode({'':docinfo['lang']})
239 return lang
240
241
242 def isAccessible(self, docinfo): 265 def isAccessible(self, docinfo):
243 """returns if access to the resource is granted""" 266 """returns if access to the resource is granted"""
244 access = docinfo.get('accessType', None) 267 access = docinfo.get('accessType', None)
245 logger("documentViewer (accessOK)", logging.INFO, "access type %s"%access) 268 logger("documentViewer (accessOK)", logging.INFO, "access type %s"%access)
246 if access is not None and access == 'free': 269 if access is not None and access == 'free':
289 312
290 if sizes: 313 if sizes:
291 docinfo['numPages'] = int(getTextFromNode(sizes[0])) 314 docinfo['numPages'] = int(getTextFromNode(sizes[0]))
292 else: 315 else:
293 docinfo['numPages'] = 0 316 docinfo['numPages'] = 0
317
318 # TODO: produce and keep list of image names and numbers
294 319
295 return docinfo 320 return docinfo
296 321
297 322
298 def getIndexMeta(self, url): 323 def getIndexMeta(self, url):
429 docinfo['lang']='' 454 docinfo['lang']=''
430 455
431 return docinfo 456 return docinfo
432 457
433 458
434 def getNumPages(self, xquery, docinfo=None): #New Method 24.02.2010 459 def getDocinfoFromTextTool(self, url, dom=None, docinfo=None):
435 text=self.viewerTemplates.query.eval("/mpdl/interface/xquery.xql","document="+ docinfo['textURLPath'] +"&xquery="+str(xquery)) 460 """parse texttool tag in index meta"""
436 docinfo['numPages'] = text.count("<pb ") 461 logger("documentViewer (getdocinfofromtexttool)", logging.INFO, "url: %s" % (url))
437 return docinfo 462 if docinfo is None:
438
439
440 def getDocinfoFromTextTool(self,url,dom=None,docinfo=None):
441 """parse texttool tag in index meta"""
442 logger("documentViewer (getdocinfofromtexttool)", logging.INFO,"url: %s"%(url))
443 if docinfo is None:
444 docinfo = {} 463 docinfo = {}
445 464
446 if docinfo.get('lang',None) is None: 465 if docinfo.get('lang', None) is None:
447 docinfo['lang']='' # default keine Sprache gesetzt 466 docinfo['lang'] = '' # default keine Sprache gesetzt
448 if dom is None: 467 if dom is None:
449 dom = self.getIndexMeta(url) 468 dom = self.getIndexMeta(url)
450 469
451 archivePath = None 470 archivePath = None
452 archiveName = None 471 archiveName = None
453 472
454 archiveNames=dom.xpath("//resource/name") 473 archiveNames = dom.xpath("//resource/name")
455 if archiveNames and (len(archiveNames)>0): 474 if archiveNames and (len(archiveNames) > 0):
456 archiveName=getTextFromNode(archiveNames[0]) 475 archiveName = getTextFromNode(archiveNames[0])
457 else: 476 else:
458 logger("documentViewer (getdocinfofromtexttool)", logging.WARNING,"resource/name missing in: %s"%(url)) 477 logger("documentViewer (getdocinfofromtexttool)", logging.WARNING, "resource/name missing in: %s" % (url))
459 478
460 archivePaths=dom.xpath("//resource/archive-path") 479 archivePaths = dom.xpath("//resource/archive-path")
461 if archivePaths and (len(archivePaths)>0): 480 if archivePaths and (len(archivePaths) > 0):
462 archivePath=getTextFromNode(archivePaths[0]) 481 archivePath = getTextFromNode(archivePaths[0])
463 # clean up archive path 482 # clean up archive path
464 if archivePath[0] != '/': 483 if archivePath[0] != '/':
465 archivePath = '/' + archivePath 484 archivePath = '/' + archivePath
466 if archiveName and (not archivePath.endswith(archiveName)): 485 if archiveName and (not archivePath.endswith(archiveName)):
467 archivePath += "/" + archiveName 486 archivePath += "/" + archiveName
468 else: 487 else:
469 # try to get archive-path from url 488 # try to get archive-path from url
470 logger("documentViewer (getdocinfofromtexttool)", logging.WARNING,"resource/archive-path missing in: %s"%(url)) 489 logger("documentViewer (getdocinfofromtexttool)", logging.WARNING, "resource/archive-path missing in: %s" % (url))
471 if (not url.startswith('http')): 490 if (not url.startswith('http')):
472 archivePath = url.replace('index.meta', '') 491 archivePath = url.replace('index.meta', '')
473 492
474 if archivePath is None: 493 if archivePath is None:
475 # we balk without archive-path 494 # we balk without archive-path
476 raise IOError("Missing archive-path (for text-tool) in %s"%(url)) 495 raise IOError("Missing archive-path (for text-tool) in %s" % (url))
477 496
478 imageDirs=dom.xpath("//texttool/image") 497 imageDirs = dom.xpath("//texttool/image")
479 if imageDirs and (len(imageDirs)>0): 498 if imageDirs and (len(imageDirs) > 0):
480 imageDir=getTextFromNode(imageDirs[0]) 499 imageDir = getTextFromNode(imageDirs[0])
481 500
482 else: 501 else:
483 # we balk with no image tag / not necessary anymore because textmode is now standard 502 # we balk with no image tag / not necessary anymore because textmode is now standard
484 #raise IOError("No text-tool info in %s"%(url)) 503 #raise IOError("No text-tool info in %s"%(url))
485 imageDir="" 504 imageDir = ""
486 #xquery="//pb" 505 #xquery="//pb"
487 docinfo['imagePath'] = "" # keine Bilder 506 docinfo['imagePath'] = "" # keine Bilder
488 docinfo['imageURL'] = "" 507 docinfo['imageURL'] = ""
489 508
490 if imageDir and archivePath: 509 if imageDir and archivePath:
491 #print "image: ", imageDir, " archivepath: ", archivePath 510 #print "image: ", imageDir, " archivepath: ", archivePath
492 imageDir=os.path.join(archivePath,imageDir) 511 imageDir = os.path.join(archivePath, imageDir)
493 imageDir=imageDir.replace("/mpiwg/online",'') 512 imageDir = imageDir.replace("/mpiwg/online", '')
494 docinfo=self.getDirinfoFromDigilib(imageDir,docinfo=docinfo) 513 docinfo = self.getDirinfoFromDigilib(imageDir, docinfo=docinfo)
495 docinfo['imagePath'] = imageDir 514 docinfo['imagePath'] = imageDir
496 515
497 docinfo['imageURL'] = self.digilibBaseUrl+"/servlet/Scaler?fn="+imageDir 516 docinfo['imageURL'] = self.digilibBaseUrl + "/servlet/Scaler?fn=" + imageDir
498 517
499 viewerUrls=dom.xpath("//texttool/digiliburlprefix") 518 viewerUrls = dom.xpath("//texttool/digiliburlprefix")
500 if viewerUrls and (len(viewerUrls)>0): 519 if viewerUrls and (len(viewerUrls) > 0):
501 viewerUrl=getTextFromNode(viewerUrls[0]) 520 viewerUrl = getTextFromNode(viewerUrls[0])
502 docinfo['viewerURL'] = viewerUrl 521 docinfo['viewerURL'] = viewerUrl
503 522
504 textUrls=dom.xpath("//texttool/text") 523 textUrls = dom.xpath("//texttool/text")
505 if textUrls and (len(textUrls)>0): 524 if textUrls and (len(textUrls) > 0):
506 textUrl=getTextFromNode(textUrls[0]) 525 textUrl = getTextFromNode(textUrls[0])
507 if urlparse.urlparse(textUrl)[0]=="": #keine url 526 if urlparse.urlparse(textUrl)[0] == "": #keine url
508 textUrl=os.path.join(archivePath,textUrl) 527 textUrl = os.path.join(archivePath, textUrl)
509 # fix URLs starting with /mpiwg/online 528 # fix URLs starting with /mpiwg/online
510 if textUrl.startswith("/mpiwg/online"): 529 if textUrl.startswith("/mpiwg/online"):
511 textUrl = textUrl.replace("/mpiwg/online",'',1) 530 textUrl = textUrl.replace("/mpiwg/online", '', 1)
512 531
513 docinfo['textURL'] = textUrl 532 docinfo['textURL'] = textUrl
514 533
515 textUrls=dom.xpath("//texttool/text-url-path") 534 textUrls = dom.xpath("//texttool/text-url-path")
516 if textUrls and (len(textUrls)>0): 535 if textUrls and (len(textUrls) > 0):
517 textUrl=getTextFromNode(textUrls[0]) 536 textUrl = getTextFromNode(textUrls[0])
518 docinfo['textURLPath'] = textUrl 537 docinfo['textURLPath'] = textUrl
519 538
520 presentationUrls=dom.xpath("//texttool/presentation") 539 presentationUrls = dom.xpath("//texttool/presentation")
521 docinfo = self.getBibinfoFromIndexMeta(url,docinfo=docinfo,dom=dom) # get info von bib tag 540 docinfo = self.getBibinfoFromIndexMeta(url, docinfo=docinfo, dom=dom) # get info von bib tag
522 541
523 if presentationUrls and (len(presentationUrls)>0): # ueberschreibe diese durch presentation informationen 542 if presentationUrls and (len(presentationUrls) > 0): # ueberschreibe diese durch presentation informationen
524 # presentation url ergiebt sich ersetzen von index.meta in der url der fuer die Metadaten 543 # presentation url ergiebt sich ersetzen von index.meta in der url der fuer die Metadaten
525 # durch den relativen Pfad auf die presentation infos 544 # durch den relativen Pfad auf die presentation infos
526 presentationPath = getTextFromNode(presentationUrls[0]) 545 presentationPath = getTextFromNode(presentationUrls[0])
527 if url.endswith("index.meta"): 546 if url.endswith("index.meta"):
528 presentationUrl=url.replace('index.meta',presentationPath) 547 presentationUrl = url.replace('index.meta', presentationPath)
529 else: 548 else:
530 presentationUrl=url + "/" + presentationPath 549 presentationUrl = url + "/" + presentationPath
531 docinfo=self.getNumPages('//pb', docinfo) #im moment einfach auf eins setzen, navigation ueber die thumbs geht natuerlich nicht 550 docinfo = self.getNumPages(docinfo) #im moment einfach auf eins setzen, navigation ueber die thumbs geht natuerlich nicht
532 docinfo = self.getBibinfoFromTextToolPresentation(presentationUrl,docinfo=docinfo,dom=dom) 551 docinfo = self.getBibinfoFromTextToolPresentation(presentationUrl, docinfo=docinfo, dom=dom)
533 552
534 docinfo = self.getAuthinfoFromIndexMeta(url,docinfo=docinfo,dom=dom) # get access info 553 docinfo = self.getAuthinfoFromIndexMeta(url, docinfo=docinfo, dom=dom) # get access info
535 554
536 return docinfo 555 return docinfo
556
557
558
537 559
538 560
539 def getBibinfoFromTextToolPresentation(self,url,docinfo=None,dom=None): 561 def getBibinfoFromTextToolPresentation(self,url,docinfo=None,dom=None):
540 """gets the bibliographical information from the preseantion entry in texttools 562 """gets the bibliographical information from the preseantion entry in texttools
541 """ 563 """
617 start = getInt(start, default=(math.ceil(float(current)/float(grpsize))*grpsize-(grpsize-1))) 639 start = getInt(start, default=(math.ceil(float(current)/float(grpsize))*grpsize-(grpsize-1)))
618 # int(current / grpsize) * grpsize +1)) 640 # int(current / grpsize) * grpsize +1))
619 pageinfo['start'] = start 641 pageinfo['start'] = start
620 pageinfo['end'] = start + grpsize 642 pageinfo['end'] = start + grpsize
621 if docinfo is not None: 643 if docinfo is not None:
622
623 np = int(docinfo['numPages']) 644 np = int(docinfo['numPages'])
624 pageinfo['end'] = min(pageinfo['end'], np) 645 pageinfo['end'] = min(pageinfo['end'], np)
625 pageinfo['numgroups'] = int(np / grpsize) 646 pageinfo['numgroups'] = int(np / grpsize)
626 if np % grpsize > 0: 647 if np % grpsize > 0:
627 pageinfo['numgroups'] += 1 648 pageinfo['numgroups'] += 1
628 649
629 return pageinfo 650 return pageinfo
630 651
631 def text(self,mode,url,pn): 652
632 """give text""" 653
633 if mode=="texttool": #index.meta with texttool information 654 def getNumPages(self,docinfo=None):
634 (viewerUrl,imagepath,textpath)=parseUrlTextTool(url) 655 """get list of pages from fulltext and put in docinfo"""
635 656 xquery = '//pb'
636 #print textpath 657 text = self.template.fulltextclient.eval("/mpdl/interface/xquery.xql", "document=%s&xquery=%s"%(docinfo['textURLPath'],xquery))
637 try: 658 # TODO: better processing of the page list. do we need the info somewhere else also?
638 dom = NonvalidatingReader.parseUri(textpath) 659 docinfo['numPages'] = text.count("<pb ")
639 except: 660 return docinfo
640 return None 661
641 662 def getTextPage(self, mode="text", pn=1, docinfo=None):
642 list=[] 663 """returns single page from fulltext"""
643 nodes=dom.xpath("//pb") 664 pagexml=self.template.fulltextclient.eval("/mpdl/interface/page-fragment.xql", "document=%s&mode=%s&pn=%s"%(docinfo['textURLPath'],mode,pn), outputUnicode=False)
644 665 # post-processing downloaded xml
645 node=nodes[int(pn)-1] 666 pagedom = Parse(pagexml)
646 667 # plain text mode
647 p=node 668 if mode == "text":
648 669 # first div contains text
649 while p.tagName!="p": 670 pagedivs = pagedom.xpath("/div")
650 p=p.parentNode 671 if len(pagedivs) > 0:
651 672 pagenode = pagedivs[0]
652 673 return serializeNode(pagenode)
653 endNode=nodes[int(pn)] 674
654 675 # text-with-links mode
655 676 if mode == "textPollux":
656 e=endNode 677 # first div contains text
657 678 pagedivs = pagedom.xpath("/div")
658 while e.tagName!="p": 679 if len(pagedivs) > 0:
659 e=e.parentNode 680 pagenode = pagedivs[0]
660 681 # check all a-tags
661 682 links = pagenode.xpath("//a")
662 next=node.parentNode 683 for l in links:
663 684 hrefNode = l.getAttributeNodeNS(None, u"href")
664 #sammle s 685 if hrefNode:
665 while next and (next!=endNode.parentNode): 686 # is link with href
666 list.append(next) 687 href = hrefNode.nodeValue
667 next=next.nextSibling 688 if href.startswith('lt/lex.xql'):
668 list.append(endNode.parentNode) 689 # is pollux link
669 690 selfurl = self.absolute_url()
670 if p==e:# beide im selben paragraphen 691 # change href
671 pass 692 hrefNode.nodeValue = href.replace('lt/lex.xql','%s/head_main_voc'%selfurl)
672 # else: 693 # add target
673 # next=p 694 l.setAttributeNS(None, 'target', '_blank')
674 # while next!=e: 695 return serializeNode(pagenode)
675 # print next,e 696
676 # list.append(next) 697 return "no text here"
677 # next=next.nextSibling 698
678 # 699
679 # for x in list: 700 def changeDocumentViewer(self,title="",digilibBaseUrl=None,thumbrows=2,thumbcols=10,authgroups='mpiwg',RESPONSE=None):
680 # PrettyPrint(x)
681 #
682 # return list
683 #
684
685 def findDigilibUrl(self):
686 """try to get the digilib URL from zogilib"""
687 url = self.imageViewerUrl[:-1] + "/getScalerUrl"
688 #print urlparse.urlparse(url)[0]
689 #print urlparse.urljoin(self.absolute_url(),url)
690 logging.info("finddigiliburl: %s"%urlparse.urlparse(url)[0])
691 logging.info("finddigiliburl: %s"%urlparse.urljoin(self.absolute_url(),url))
692
693 try:
694 if urlparse.urlparse(url)[0]=='': #relative path
695 url=urlparse.urljoin(self.absolute_url()+"/",url)
696
697 scaler = urlopen(url).read()
698 return scaler.replace("/servlet/Scaler?", "")
699 except:
700 return None
701
702 def changeDocumentViewer(self,imageViewerUrl,textViewerUrl,title="",digilibBaseUrl=None,thumbrows=2,thumbcols=10,authgroups='mpiwg',RESPONSE=None):
703 """init document viewer""" 701 """init document viewer"""
704 self.title=title 702 self.title=title
705 self.imageViewerUrl=imageViewerUrl
706 self.textViewerUrl=textViewerUrl
707 self.digilibBaseUrl = digilibBaseUrl 703 self.digilibBaseUrl = digilibBaseUrl
708 self.thumbrows = thumbrows 704 self.thumbrows = thumbrows
709 self.thumbcols = thumbcols 705 self.thumbcols = thumbcols
710 self.authgroups = [s.strip().lower() for s in authgroups.split(',')] 706 self.authgroups = [s.strip().lower() for s in authgroups.split(',')]
711 if RESPONSE is not None: 707 if RESPONSE is not None:
712 RESPONSE.redirect('manage_main') 708 RESPONSE.redirect('manage_main')
713 709
714 710
715 711
716
717 # security.declareProtected('View management screens','renameImageForm')
718
719 def manage_AddDocumentViewerForm(self): 712 def manage_AddDocumentViewerForm(self):
720 """add the viewer form""" 713 """add the viewer form"""
721 pt=PageTemplateFile('zpt/addDocumentViewer', globals()).__of__(self) 714 pt=PageTemplateFile('zpt/addDocumentViewer', globals()).__of__(self)
722 return pt() 715 return pt()
723 716
724 def manage_AddDocumentViewer(self,id,imageViewerUrl="",textViewerUrl="",title="",RESPONSE=None): 717 def manage_AddDocumentViewer(self,id,imageScalerUrl="",textServerName="",title="",RESPONSE=None):
725 """add the viewer""" 718 """add the viewer"""
726 newObj=documentViewer(id,imageViewerUrl,title=title,textViewerUrl=textViewerUrl) 719 newObj=documentViewer(id,imageScalerUrl=imageScalerUrl,title=title,textServerName=textServerName)
727 self._setObject(id,newObj) 720 self._setObject(id,newObj)
728 721
729 if RESPONSE is not None: 722 if RESPONSE is not None:
730 RESPONSE.redirect('manage_main') 723 RESPONSE.redirect('manage_main')
731 724