Mercurial > hg > documentViewer
comparison documentViewer.py @ 84:a6e4f9b6729a
first version with new full-text infrastructure and slightly changed templates
| author | casties |
|---|---|
| date | Fri, 19 Mar 2010 12:42:40 +0100 |
| parents | ec12a2440daa |
| children | 6a4a72033d58 |
comparison
equal
deleted
inserted
replaced
| 83:ec12a2440daa | 84:a6e4f9b6729a |
|---|---|
| 10 from Ft.Xml.Domlette import PrettyPrint, Print | 10 from Ft.Xml.Domlette import PrettyPrint, Print |
| 11 from Ft.Xml import EMPTY_NAMESPACE, Parse | 11 from Ft.Xml import EMPTY_NAMESPACE, Parse |
| 12 | 12 |
| 13 | 13 |
| 14 import Ft.Xml.XPath | 14 import Ft.Xml.XPath |
| 15 import cStringIO | |
| 15 import xmlrpclib | 16 import xmlrpclib |
| 16 import os.path | 17 import os.path |
| 17 import sys | 18 import sys |
| 18 import cgi | 19 import cgi |
| 19 import urllib | 20 import urllib |
| 20 import logging | 21 import logging |
| 21 import math | 22 import math |
| 22 | 23 |
| 23 import urlparse | 24 import urlparse |
| 24 from types import * | 25 from types import * |
| 26 | |
| 25 def logger(txt,method,txt2): | 27 def logger(txt,method,txt2): |
| 26 """logging""" | 28 """logging""" |
| 27 logging.info(txt+ txt2) | 29 logging.info(txt+ txt2) |
| 28 | 30 |
| 29 | 31 |
| 43 for node in nodelist: | 45 for node in nodelist: |
| 44 if node.nodeType == node.TEXT_NODE: | 46 if node.nodeType == node.TEXT_NODE: |
| 45 rc = rc + node.data | 47 rc = rc + node.data |
| 46 return rc | 48 return rc |
| 47 | 49 |
| 50 def serializeNode(node, encoding='utf-8'): | |
| 51 """returns a string containing node as XML""" | |
| 52 buf = cStringIO.StringIO() | |
| 53 Print(node, stream=buf, encoding=encoding) | |
| 54 s = buf.getvalue() | |
| 55 buf.close() | |
| 56 return s | |
| 57 | |
| 48 | 58 |
| 49 def getParentDir(path): | 59 def getParentDir(path): |
| 50 """returns pathname shortened by one""" | 60 """returns pathname shortened by one""" |
| 51 return '/'.join(path.split('/')[0:-1]) | 61 return '/'.join(path.split('/')[0:-1]) |
| 52 | 62 |
| 76 ) | 86 ) |
| 77 | 87 |
| 78 # templates and forms | 88 # templates and forms |
| 79 viewer_main = PageTemplateFile('zpt/viewer_main', globals()) | 89 viewer_main = PageTemplateFile('zpt/viewer_main', globals()) |
| 80 thumbs_main = PageTemplateFile('zpt/thumbs_main', globals()) | 90 thumbs_main = PageTemplateFile('zpt/thumbs_main', globals()) |
| 81 image_main = PageTemplateFile('zpt/image_main', globals()) | 91 image_main = PageTemplateFile('zpt/image_main', globals()) # obsolete! |
| 92 page_main_images = PageTemplateFile('zpt/page_main_images', globals()) | |
| 93 page_main_text = PageTemplateFile('zpt/page_main_text', globals()) | |
| 82 head_main = PageTemplateFile('zpt/head_main', globals()) | 94 head_main = PageTemplateFile('zpt/head_main', globals()) |
| 83 docuviewer_css = PageTemplateFile('css/docuviewer.css', globals()) | 95 docuviewer_css = PageTemplateFile('css/docuviewer.css', globals()) |
| 84 info_xml = PageTemplateFile('zpt/info_xml', globals()) | 96 info_xml = PageTemplateFile('zpt/info_xml', globals()) |
| 85 | 97 |
| 86 thumbs_main_rss = PageTemplateFile('zpt/thumbs_main_rss', globals()) | 98 thumbs_main_rss = PageTemplateFile('zpt/thumbs_main_rss', globals()) |
| 87 security.declareProtected('View management screens','changeDocumentViewerForm') | 99 security.declareProtected('View management screens','changeDocumentViewerForm') |
| 88 changeDocumentViewerForm = PageTemplateFile('zpt/changeDocumentViewer', globals()) | 100 changeDocumentViewerForm = PageTemplateFile('zpt/changeDocumentViewer', globals()) |
| 89 | 101 |
| 90 | 102 |
| 91 def __init__(self,id,imageViewerUrl,textViewerUrl=None,title="",digilibBaseUrl=None,thumbcols=2,thumbrows=10,authgroups="mpiwg"): | 103 def __init__(self,id,imageScalerUrl=None,textServerName=None,title="",digilibBaseUrl=None,thumbcols=2,thumbrows=10,authgroups="mpiwg"): |
| 92 """init document viewer""" | 104 """init document viewer""" |
| 93 self.id=id | 105 self.id=id |
| 94 self.title=title | 106 self.title=title |
| 95 self.imageViewerUrl=imageViewerUrl | |
| 96 self.textViewerUrl=textViewerUrl | |
| 97 | |
| 98 if not digilibBaseUrl: | |
| 99 self.digilibBaseUrl = self.findDigilibUrl() | |
| 100 else: | |
| 101 self.digilibBaseUrl = digilibBaseUrl | |
| 102 self.thumbcols = thumbcols | 107 self.thumbcols = thumbcols |
| 103 self.thumbrows = thumbrows | 108 self.thumbrows = thumbrows |
| 104 # authgroups is list of authorized groups (delimited by ,) | 109 # authgroups is list of authorized groups (delimited by ,) |
| 105 self.authgroups = [s.strip().lower() for s in authgroups.split(',')] | 110 self.authgroups = [s.strip().lower() for s in authgroups.split(',')] |
| 106 # add template folder so we can always use template.something | 111 # create template folder so we can always use template.something |
| 107 self.manage_addFolder('template') | 112 |
| 108 | 113 templateFolder = Folder('template') |
| 114 #self['template'] = templateFolder # Zope-2.12 style | |
| 115 self._setObject('template',templateFolder) # old style | |
| 116 try: | |
| 117 from Products.XMLRpcTools.XMLRpcTools import XMLRpcServerProxy | |
| 118 xmlRpcClient = XMLRpcServerProxy(id='fulltextclient', serverUrl=textServerName, use_xmlrpc=False) | |
| 119 #templateFolder['fulltextclient'] = xmlRpcClient | |
| 120 templateFolder._setObject('fulltextclient',xmlRpcClient) | |
| 121 except Exception, e: | |
| 122 logging.error("Unable to create XMLRpcTools for fulltextclient: "+str(e)) | |
| 123 try: | |
| 124 from Products.zogiLib.zogiLib import zogiLib | |
| 125 zogilib = zogiLib(id="zogilib", title="zogilib for docuviewer", dlServerURL=imageScalerUrl, layout="book") | |
| 126 #templateFolder['zogilib'] = zogilib | |
| 127 templateFolder._setObject('zogilib',zogilib) | |
| 128 except Exception, e: | |
| 129 logging.error("Unable to create zogiLib for zogilib: "+str(e)) | |
| 130 | |
| 109 | 131 |
| 110 security.declareProtected('View','thumbs_rss') | 132 security.declareProtected('View','thumbs_rss') |
| 111 def thumbs_rss(self,mode,url,viewMode="auto",start=None,pn=1): | 133 def thumbs_rss(self,mode,url,viewMode="auto",start=None,pn=1): |
| 112 ''' | 134 ''' |
| 113 view it | 135 view it |
| 114 @param mode: defines how to access the document behind url | 136 @param mode: defines how to access the document behind url |
| 115 @param url: url which contains display information | 137 @param url: url which contains display information |
| 116 @param viewMode: if images display images, if text display text, default is images (text,images or auto) | 138 @param viewMode: if images display images, if text display text, default is images (text,images or auto) |
| 117 | 139 |
| 118 ''' | 140 ''' |
| 119 logging.info("HHHHHHHHHHHHHH:load the rss") | 141 logging.debug("HHHHHHHHHHHHHH:load the rss") |
| 120 logger("documentViewer (index)", logging.INFO, "mode: %s url:%s start:%s pn:%s"%(mode,url,start,pn)) | 142 logger("documentViewer (index)", logging.INFO, "mode: %s url:%s start:%s pn:%s"%(mode,url,start,pn)) |
| 121 | 143 |
| 122 if not hasattr(self, 'template'): | 144 if not hasattr(self, 'template'): |
| 123 # create template folder if it doesn't exist | 145 # create template folder if it doesn't exist |
| 124 self.manage_addFolder('template') | 146 self.manage_addFolder('template') |
| 137 viewMode="images" | 159 viewMode="images" |
| 138 | 160 |
| 139 return pt(docinfo=docinfo,pageinfo=pageinfo,viewMode=viewMode) | 161 return pt(docinfo=docinfo,pageinfo=pageinfo,viewMode=viewMode) |
| 140 | 162 |
| 141 security.declareProtected('View','index_html') | 163 security.declareProtected('View','index_html') |
| 142 def index_html(self,mode,url,viewMode="auto",start=None,pn=1,mk=None): | 164 def index_html(self,url,mode="texttool",viewMode="auto",start=None,pn=1,mk=None): |
| 143 ''' | 165 ''' |
| 144 view it | 166 view it |
| 145 @param mode: defines how to access the document behind url | 167 @param mode: defines how to access the document behind url |
| 146 @param url: url which contains display information | 168 @param url: url which contains display information |
| 147 @param viewMode: if images display images, if text display text, default is images (text,images or auto) | 169 @param viewMode: if images display images, if text display text, default is images (text,images or auto) |
| 148 | 170 |
| 149 ''' | 171 ''' |
| 150 | 172 |
| 151 logger("documentViewer (index)", logging.INFO, "mode: %s url:%s start:%s pn:%s"%(mode,url,start,pn)) | 173 logging.debug("documentViewer (index) mode: %s url:%s start:%s pn:%s"%(mode,url,start,pn)) |
| 152 | 174 |
| 153 if not hasattr(self, 'template'): | 175 if not hasattr(self, 'template'): |
| 154 # create template folder if it doesn't exist | 176 # this won't work |
| 155 self.manage_addFolder('template') | 177 logging.error("template folder missing!") |
| 156 | 178 return "ERROR: template folder missing!" |
| 157 if not self.digilibBaseUrl: | 179 |
| 180 if not getattr(self, 'digilibBaseUrl', None): | |
| 158 self.digilibBaseUrl = self.findDigilibUrl() or "http://nausikaa.mpiwg-berlin.mpg.de/digitallibrary" | 181 self.digilibBaseUrl = self.findDigilibUrl() or "http://nausikaa.mpiwg-berlin.mpg.de/digitallibrary" |
| 159 | 182 |
| 160 docinfo = self.getDocinfo(mode=mode,url=url) | 183 docinfo = self.getDocinfo(mode=mode,url=url) |
| 161 pageinfo = self.getPageinfo(start=start,current=pn,docinfo=docinfo) | 184 pageinfo = self.getPageinfo(start=start,current=pn,docinfo=docinfo) |
| 162 pt = getattr(self.template, 'viewer_main') | 185 pt = getattr(self.template, 'viewer_main') |
| 163 | 186 |
| 164 if viewMode=="auto": # automodus gewaehlt | 187 if viewMode=="auto": # automodus gewaehlt |
| 165 if docinfo.get("textURL",'') and self.textViewerUrl: #texturl gesetzt und textViewer konfiguriert | 188 if docinfo.get("textURL",''): #texturl gesetzt und textViewer konfiguriert |
| 166 viewMode="text" | 189 viewMode="text" |
| 167 else: | 190 else: |
| 168 viewMode="images" | 191 viewMode="images" |
| 169 | 192 |
| 170 return pt(docinfo=docinfo,pageinfo=pageinfo,viewMode=viewMode,mk=self.generateMarks(mk)) | 193 return pt(docinfo=docinfo,pageinfo=pageinfo,viewMode=viewMode,mk=self.generateMarks(mk)) |
| 177 if type(mk) is not ListType: | 200 if type(mk) is not ListType: |
| 178 mk=[mk] | 201 mk=[mk] |
| 179 for m in mk: | 202 for m in mk: |
| 180 ret+="mk=%s"%m | 203 ret+="mk=%s"%m |
| 181 return ret | 204 return ret |
| 205 | |
| 206 def findDigilibUrl(self): | |
| 207 """try to get the digilib URL from zogilib""" | |
| 208 url = self.template.zogilib.getDLBaseUrl() | |
| 209 return url | |
| 210 | |
| 211 def getStyle(self, idx, selected, style=""): | |
| 212 """returns a string with the given style and append 'sel' if path == selected.""" | |
| 213 #logger("documentViewer (getstyle)", logging.INFO, "idx: %s selected: %s style: %s"%(idx,selected,style)) | |
| 214 if idx == selected: | |
| 215 return style + 'sel' | |
| 216 else: | |
| 217 return style | |
| 182 | 218 |
| 183 def getLink(self,param=None,val=None): | 219 def getLink(self,param=None,val=None): |
| 184 """link to documentviewer with parameter param set to val""" | 220 """link to documentviewer with parameter param set to val""" |
| 185 params=self.REQUEST.form.copy() | 221 params=self.REQUEST.form.copy() |
| 186 if param is not None: | 222 if param is not None: |
| 187 if val is None: | 223 if val is None: |
| 188 if params.has_key(param): | 224 if params.has_key(param): |
| 189 del params[param] | 225 del params[param] |
| 190 else: | 226 else: |
| 191 params[param] = str(val) | 227 params[param] = str(val) |
| 192 if params["mode"] == "filepath": #wenn beim erst Aufruf filepath gesetzt wurde aendere das nun zu imagepath | 228 |
| 229 if params.get("mode", None) == "filepath": #wenn beim erst Aufruf filepath gesetzt wurde aendere das nun zu imagepath | |
| 193 params["mode"] = "imagepath" | 230 params["mode"] = "imagepath" |
| 194 params["url"] = getParentDir(params["url"]) | 231 params["url"] = getParentDir(params["url"]) |
| 195 | 232 |
| 196 # quote values and assemble into query string | 233 # quote values and assemble into query string |
| 197 ps = "&".join(["%s=%s"%(k,urllib.quote(v)) for (k, v) in params.items()]) | 234 ps = "&".join(["%s=%s"%(k,urllib.quote(v)) for (k, v) in params.items()]) |
| 223 docinfo = self.getDocinfo(mode=mode,url=url) | 260 docinfo = self.getDocinfo(mode=mode,url=url) |
| 224 pt = getattr(self.template, 'info_xml') | 261 pt = getattr(self.template, 'info_xml') |
| 225 return pt(docinfo=docinfo) | 262 return pt(docinfo=docinfo) |
| 226 | 263 |
| 227 | 264 |
| 228 def getStyle(self, idx, selected, style=""): | |
| 229 """returns a string with the given style and append 'sel' if path == selected.""" | |
| 230 #logger("documentViewer (getstyle)", logging.INFO, "idx: %s selected: %s style: %s"%(idx,selected,style)) | |
| 231 if idx == selected: | |
| 232 return style + 'sel' | |
| 233 else: | |
| 234 return style | |
| 235 | |
| 236 def getTextLanguage(self,url,docinfo): | |
| 237 docinfo['lang']=getTextFromNode(dom.xpath("//bib/lang")[0]) | |
| 238 lang = urlencode({'':docinfo['lang']}) | |
| 239 return lang | |
| 240 | |
| 241 | |
| 242 def isAccessible(self, docinfo): | 265 def isAccessible(self, docinfo): |
| 243 """returns if access to the resource is granted""" | 266 """returns if access to the resource is granted""" |
| 244 access = docinfo.get('accessType', None) | 267 access = docinfo.get('accessType', None) |
| 245 logger("documentViewer (accessOK)", logging.INFO, "access type %s"%access) | 268 logger("documentViewer (accessOK)", logging.INFO, "access type %s"%access) |
| 246 if access is not None and access == 'free': | 269 if access is not None and access == 'free': |
| 289 | 312 |
| 290 if sizes: | 313 if sizes: |
| 291 docinfo['numPages'] = int(getTextFromNode(sizes[0])) | 314 docinfo['numPages'] = int(getTextFromNode(sizes[0])) |
| 292 else: | 315 else: |
| 293 docinfo['numPages'] = 0 | 316 docinfo['numPages'] = 0 |
| 317 | |
| 318 # TODO: produce and keep list of image names and numbers | |
| 294 | 319 |
| 295 return docinfo | 320 return docinfo |
| 296 | 321 |
| 297 | 322 |
| 298 def getIndexMeta(self, url): | 323 def getIndexMeta(self, url): |
| 429 docinfo['lang']='' | 454 docinfo['lang']='' |
| 430 | 455 |
| 431 return docinfo | 456 return docinfo |
| 432 | 457 |
| 433 | 458 |
| 434 def getNumPages(self, xquery, docinfo=None): #New Method 24.02.2010 | 459 def getDocinfoFromTextTool(self, url, dom=None, docinfo=None): |
| 435 text=self.viewerTemplates.query.eval("/mpdl/interface/xquery.xql","document="+ docinfo['textURLPath'] +"&xquery="+str(xquery)) | 460 """parse texttool tag in index meta""" |
| 436 docinfo['numPages'] = text.count("<pb ") | 461 logger("documentViewer (getdocinfofromtexttool)", logging.INFO, "url: %s" % (url)) |
| 437 return docinfo | 462 if docinfo is None: |
| 438 | |
| 439 | |
| 440 def getDocinfoFromTextTool(self,url,dom=None,docinfo=None): | |
| 441 """parse texttool tag in index meta""" | |
| 442 logger("documentViewer (getdocinfofromtexttool)", logging.INFO,"url: %s"%(url)) | |
| 443 if docinfo is None: | |
| 444 docinfo = {} | 463 docinfo = {} |
| 445 | 464 |
| 446 if docinfo.get('lang',None) is None: | 465 if docinfo.get('lang', None) is None: |
| 447 docinfo['lang']='' # default keine Sprache gesetzt | 466 docinfo['lang'] = '' # default keine Sprache gesetzt |
| 448 if dom is None: | 467 if dom is None: |
| 449 dom = self.getIndexMeta(url) | 468 dom = self.getIndexMeta(url) |
| 450 | 469 |
| 451 archivePath = None | 470 archivePath = None |
| 452 archiveName = None | 471 archiveName = None |
| 453 | 472 |
| 454 archiveNames=dom.xpath("//resource/name") | 473 archiveNames = dom.xpath("//resource/name") |
| 455 if archiveNames and (len(archiveNames)>0): | 474 if archiveNames and (len(archiveNames) > 0): |
| 456 archiveName=getTextFromNode(archiveNames[0]) | 475 archiveName = getTextFromNode(archiveNames[0]) |
| 457 else: | 476 else: |
| 458 logger("documentViewer (getdocinfofromtexttool)", logging.WARNING,"resource/name missing in: %s"%(url)) | 477 logger("documentViewer (getdocinfofromtexttool)", logging.WARNING, "resource/name missing in: %s" % (url)) |
| 459 | 478 |
| 460 archivePaths=dom.xpath("//resource/archive-path") | 479 archivePaths = dom.xpath("//resource/archive-path") |
| 461 if archivePaths and (len(archivePaths)>0): | 480 if archivePaths and (len(archivePaths) > 0): |
| 462 archivePath=getTextFromNode(archivePaths[0]) | 481 archivePath = getTextFromNode(archivePaths[0]) |
| 463 # clean up archive path | 482 # clean up archive path |
| 464 if archivePath[0] != '/': | 483 if archivePath[0] != '/': |
| 465 archivePath = '/' + archivePath | 484 archivePath = '/' + archivePath |
| 466 if archiveName and (not archivePath.endswith(archiveName)): | 485 if archiveName and (not archivePath.endswith(archiveName)): |
| 467 archivePath += "/" + archiveName | 486 archivePath += "/" + archiveName |
| 468 else: | 487 else: |
| 469 # try to get archive-path from url | 488 # try to get archive-path from url |
| 470 logger("documentViewer (getdocinfofromtexttool)", logging.WARNING,"resource/archive-path missing in: %s"%(url)) | 489 logger("documentViewer (getdocinfofromtexttool)", logging.WARNING, "resource/archive-path missing in: %s" % (url)) |
| 471 if (not url.startswith('http')): | 490 if (not url.startswith('http')): |
| 472 archivePath = url.replace('index.meta', '') | 491 archivePath = url.replace('index.meta', '') |
| 473 | 492 |
| 474 if archivePath is None: | 493 if archivePath is None: |
| 475 # we balk without archive-path | 494 # we balk without archive-path |
| 476 raise IOError("Missing archive-path (for text-tool) in %s"%(url)) | 495 raise IOError("Missing archive-path (for text-tool) in %s" % (url)) |
| 477 | 496 |
| 478 imageDirs=dom.xpath("//texttool/image") | 497 imageDirs = dom.xpath("//texttool/image") |
| 479 if imageDirs and (len(imageDirs)>0): | 498 if imageDirs and (len(imageDirs) > 0): |
| 480 imageDir=getTextFromNode(imageDirs[0]) | 499 imageDir = getTextFromNode(imageDirs[0]) |
| 481 | 500 |
| 482 else: | 501 else: |
| 483 # we balk with no image tag / not necessary anymore because textmode is now standard | 502 # we balk with no image tag / not necessary anymore because textmode is now standard |
| 484 #raise IOError("No text-tool info in %s"%(url)) | 503 #raise IOError("No text-tool info in %s"%(url)) |
| 485 imageDir="" | 504 imageDir = "" |
| 486 #xquery="//pb" | 505 #xquery="//pb" |
| 487 docinfo['imagePath'] = "" # keine Bilder | 506 docinfo['imagePath'] = "" # keine Bilder |
| 488 docinfo['imageURL'] = "" | 507 docinfo['imageURL'] = "" |
| 489 | 508 |
| 490 if imageDir and archivePath: | 509 if imageDir and archivePath: |
| 491 #print "image: ", imageDir, " archivepath: ", archivePath | 510 #print "image: ", imageDir, " archivepath: ", archivePath |
| 492 imageDir=os.path.join(archivePath,imageDir) | 511 imageDir = os.path.join(archivePath, imageDir) |
| 493 imageDir=imageDir.replace("/mpiwg/online",'') | 512 imageDir = imageDir.replace("/mpiwg/online", '') |
| 494 docinfo=self.getDirinfoFromDigilib(imageDir,docinfo=docinfo) | 513 docinfo = self.getDirinfoFromDigilib(imageDir, docinfo=docinfo) |
| 495 docinfo['imagePath'] = imageDir | 514 docinfo['imagePath'] = imageDir |
| 496 | 515 |
| 497 docinfo['imageURL'] = self.digilibBaseUrl+"/servlet/Scaler?fn="+imageDir | 516 docinfo['imageURL'] = self.digilibBaseUrl + "/servlet/Scaler?fn=" + imageDir |
| 498 | 517 |
| 499 viewerUrls=dom.xpath("//texttool/digiliburlprefix") | 518 viewerUrls = dom.xpath("//texttool/digiliburlprefix") |
| 500 if viewerUrls and (len(viewerUrls)>0): | 519 if viewerUrls and (len(viewerUrls) > 0): |
| 501 viewerUrl=getTextFromNode(viewerUrls[0]) | 520 viewerUrl = getTextFromNode(viewerUrls[0]) |
| 502 docinfo['viewerURL'] = viewerUrl | 521 docinfo['viewerURL'] = viewerUrl |
| 503 | 522 |
| 504 textUrls=dom.xpath("//texttool/text") | 523 textUrls = dom.xpath("//texttool/text") |
| 505 if textUrls and (len(textUrls)>0): | 524 if textUrls and (len(textUrls) > 0): |
| 506 textUrl=getTextFromNode(textUrls[0]) | 525 textUrl = getTextFromNode(textUrls[0]) |
| 507 if urlparse.urlparse(textUrl)[0]=="": #keine url | 526 if urlparse.urlparse(textUrl)[0] == "": #keine url |
| 508 textUrl=os.path.join(archivePath,textUrl) | 527 textUrl = os.path.join(archivePath, textUrl) |
| 509 # fix URLs starting with /mpiwg/online | 528 # fix URLs starting with /mpiwg/online |
| 510 if textUrl.startswith("/mpiwg/online"): | 529 if textUrl.startswith("/mpiwg/online"): |
| 511 textUrl = textUrl.replace("/mpiwg/online",'',1) | 530 textUrl = textUrl.replace("/mpiwg/online", '', 1) |
| 512 | 531 |
| 513 docinfo['textURL'] = textUrl | 532 docinfo['textURL'] = textUrl |
| 514 | 533 |
| 515 textUrls=dom.xpath("//texttool/text-url-path") | 534 textUrls = dom.xpath("//texttool/text-url-path") |
| 516 if textUrls and (len(textUrls)>0): | 535 if textUrls and (len(textUrls) > 0): |
| 517 textUrl=getTextFromNode(textUrls[0]) | 536 textUrl = getTextFromNode(textUrls[0]) |
| 518 docinfo['textURLPath'] = textUrl | 537 docinfo['textURLPath'] = textUrl |
| 519 | 538 |
| 520 presentationUrls=dom.xpath("//texttool/presentation") | 539 presentationUrls = dom.xpath("//texttool/presentation") |
| 521 docinfo = self.getBibinfoFromIndexMeta(url,docinfo=docinfo,dom=dom) # get info von bib tag | 540 docinfo = self.getBibinfoFromIndexMeta(url, docinfo=docinfo, dom=dom) # get info von bib tag |
| 522 | 541 |
| 523 if presentationUrls and (len(presentationUrls)>0): # ueberschreibe diese durch presentation informationen | 542 if presentationUrls and (len(presentationUrls) > 0): # ueberschreibe diese durch presentation informationen |
| 524 # presentation url ergiebt sich ersetzen von index.meta in der url der fuer die Metadaten | 543 # presentation url ergiebt sich ersetzen von index.meta in der url der fuer die Metadaten |
| 525 # durch den relativen Pfad auf die presentation infos | 544 # durch den relativen Pfad auf die presentation infos |
| 526 presentationPath = getTextFromNode(presentationUrls[0]) | 545 presentationPath = getTextFromNode(presentationUrls[0]) |
| 527 if url.endswith("index.meta"): | 546 if url.endswith("index.meta"): |
| 528 presentationUrl=url.replace('index.meta',presentationPath) | 547 presentationUrl = url.replace('index.meta', presentationPath) |
| 529 else: | 548 else: |
| 530 presentationUrl=url + "/" + presentationPath | 549 presentationUrl = url + "/" + presentationPath |
| 531 docinfo=self.getNumPages('//pb', docinfo) #im moment einfach auf eins setzen, navigation ueber die thumbs geht natuerlich nicht | 550 docinfo = self.getNumPages(docinfo) #im moment einfach auf eins setzen, navigation ueber die thumbs geht natuerlich nicht |
| 532 docinfo = self.getBibinfoFromTextToolPresentation(presentationUrl,docinfo=docinfo,dom=dom) | 551 docinfo = self.getBibinfoFromTextToolPresentation(presentationUrl, docinfo=docinfo, dom=dom) |
| 533 | 552 |
| 534 docinfo = self.getAuthinfoFromIndexMeta(url,docinfo=docinfo,dom=dom) # get access info | 553 docinfo = self.getAuthinfoFromIndexMeta(url, docinfo=docinfo, dom=dom) # get access info |
| 535 | 554 |
| 536 return docinfo | 555 return docinfo |
| 556 | |
| 557 | |
| 558 | |
| 537 | 559 |
| 538 | 560 |
| 539 def getBibinfoFromTextToolPresentation(self,url,docinfo=None,dom=None): | 561 def getBibinfoFromTextToolPresentation(self,url,docinfo=None,dom=None): |
| 540 """gets the bibliographical information from the preseantion entry in texttools | 562 """gets the bibliographical information from the preseantion entry in texttools |
| 541 """ | 563 """ |
| 617 start = getInt(start, default=(math.ceil(float(current)/float(grpsize))*grpsize-(grpsize-1))) | 639 start = getInt(start, default=(math.ceil(float(current)/float(grpsize))*grpsize-(grpsize-1))) |
| 618 # int(current / grpsize) * grpsize +1)) | 640 # int(current / grpsize) * grpsize +1)) |
| 619 pageinfo['start'] = start | 641 pageinfo['start'] = start |
| 620 pageinfo['end'] = start + grpsize | 642 pageinfo['end'] = start + grpsize |
| 621 if docinfo is not None: | 643 if docinfo is not None: |
| 622 | |
| 623 np = int(docinfo['numPages']) | 644 np = int(docinfo['numPages']) |
| 624 pageinfo['end'] = min(pageinfo['end'], np) | 645 pageinfo['end'] = min(pageinfo['end'], np) |
| 625 pageinfo['numgroups'] = int(np / grpsize) | 646 pageinfo['numgroups'] = int(np / grpsize) |
| 626 if np % grpsize > 0: | 647 if np % grpsize > 0: |
| 627 pageinfo['numgroups'] += 1 | 648 pageinfo['numgroups'] += 1 |
| 628 | 649 |
| 629 return pageinfo | 650 return pageinfo |
| 630 | 651 |
| 631 def text(self,mode,url,pn): | 652 |
| 632 """give text""" | 653 |
| 633 if mode=="texttool": #index.meta with texttool information | 654 def getNumPages(self,docinfo=None): |
| 634 (viewerUrl,imagepath,textpath)=parseUrlTextTool(url) | 655 """get list of pages from fulltext and put in docinfo""" |
| 635 | 656 xquery = '//pb' |
| 636 #print textpath | 657 text = self.template.fulltextclient.eval("/mpdl/interface/xquery.xql", "document=%s&xquery=%s"%(docinfo['textURLPath'],xquery)) |
| 637 try: | 658 # TODO: better processing of the page list. do we need the info somewhere else also? |
| 638 dom = NonvalidatingReader.parseUri(textpath) | 659 docinfo['numPages'] = text.count("<pb ") |
| 639 except: | 660 return docinfo |
| 640 return None | 661 |
| 641 | 662 def getTextPage(self, mode="text", pn=1, docinfo=None): |
| 642 list=[] | 663 """returns single page from fulltext""" |
| 643 nodes=dom.xpath("//pb") | 664 pagexml=self.template.fulltextclient.eval("/mpdl/interface/page-fragment.xql", "document=%s&mode=%s&pn=%s"%(docinfo['textURLPath'],mode,pn), outputUnicode=False) |
| 644 | 665 # post-processing downloaded xml |
| 645 node=nodes[int(pn)-1] | 666 pagedom = Parse(pagexml) |
| 646 | 667 # plain text mode |
| 647 p=node | 668 if mode == "text": |
| 648 | 669 # first div contains text |
| 649 while p.tagName!="p": | 670 pagedivs = pagedom.xpath("/div") |
| 650 p=p.parentNode | 671 if len(pagedivs) > 0: |
| 651 | 672 pagenode = pagedivs[0] |
| 652 | 673 return serializeNode(pagenode) |
| 653 endNode=nodes[int(pn)] | 674 |
| 654 | 675 # text-with-links mode |
| 655 | 676 if mode == "textPollux": |
| 656 e=endNode | 677 # first div contains text |
| 657 | 678 pagedivs = pagedom.xpath("/div") |
| 658 while e.tagName!="p": | 679 if len(pagedivs) > 0: |
| 659 e=e.parentNode | 680 pagenode = pagedivs[0] |
| 660 | 681 # check all a-tags |
| 661 | 682 links = pagenode.xpath("//a") |
| 662 next=node.parentNode | 683 for l in links: |
| 663 | 684 hrefNode = l.getAttributeNodeNS(None, u"href") |
| 664 #sammle s | 685 if hrefNode: |
| 665 while next and (next!=endNode.parentNode): | 686 # is link with href |
| 666 list.append(next) | 687 href = hrefNode.nodeValue |
| 667 next=next.nextSibling | 688 if href.startswith('lt/lex.xql'): |
| 668 list.append(endNode.parentNode) | 689 # is pollux link |
| 669 | 690 selfurl = self.absolute_url() |
| 670 if p==e:# beide im selben paragraphen | 691 # change href |
| 671 pass | 692 hrefNode.nodeValue = href.replace('lt/lex.xql','%s/head_main_voc'%selfurl) |
| 672 # else: | 693 # add target |
| 673 # next=p | 694 l.setAttributeNS(None, 'target', '_blank') |
| 674 # while next!=e: | 695 return serializeNode(pagenode) |
| 675 # print next,e | 696 |
| 676 # list.append(next) | 697 return "no text here" |
| 677 # next=next.nextSibling | 698 |
| 678 # | 699 |
| 679 # for x in list: | 700 def changeDocumentViewer(self,title="",digilibBaseUrl=None,thumbrows=2,thumbcols=10,authgroups='mpiwg',RESPONSE=None): |
| 680 # PrettyPrint(x) | |
| 681 # | |
| 682 # return list | |
| 683 # | |
| 684 | |
| 685 def findDigilibUrl(self): | |
| 686 """try to get the digilib URL from zogilib""" | |
| 687 url = self.imageViewerUrl[:-1] + "/getScalerUrl" | |
| 688 #print urlparse.urlparse(url)[0] | |
| 689 #print urlparse.urljoin(self.absolute_url(),url) | |
| 690 logging.info("finddigiliburl: %s"%urlparse.urlparse(url)[0]) | |
| 691 logging.info("finddigiliburl: %s"%urlparse.urljoin(self.absolute_url(),url)) | |
| 692 | |
| 693 try: | |
| 694 if urlparse.urlparse(url)[0]=='': #relative path | |
| 695 url=urlparse.urljoin(self.absolute_url()+"/",url) | |
| 696 | |
| 697 scaler = urlopen(url).read() | |
| 698 return scaler.replace("/servlet/Scaler?", "") | |
| 699 except: | |
| 700 return None | |
| 701 | |
| 702 def changeDocumentViewer(self,imageViewerUrl,textViewerUrl,title="",digilibBaseUrl=None,thumbrows=2,thumbcols=10,authgroups='mpiwg',RESPONSE=None): | |
| 703 """init document viewer""" | 701 """init document viewer""" |
| 704 self.title=title | 702 self.title=title |
| 705 self.imageViewerUrl=imageViewerUrl | |
| 706 self.textViewerUrl=textViewerUrl | |
| 707 self.digilibBaseUrl = digilibBaseUrl | 703 self.digilibBaseUrl = digilibBaseUrl |
| 708 self.thumbrows = thumbrows | 704 self.thumbrows = thumbrows |
| 709 self.thumbcols = thumbcols | 705 self.thumbcols = thumbcols |
| 710 self.authgroups = [s.strip().lower() for s in authgroups.split(',')] | 706 self.authgroups = [s.strip().lower() for s in authgroups.split(',')] |
| 711 if RESPONSE is not None: | 707 if RESPONSE is not None: |
| 712 RESPONSE.redirect('manage_main') | 708 RESPONSE.redirect('manage_main') |
| 713 | 709 |
| 714 | 710 |
| 715 | 711 |
| 716 | |
| 717 # security.declareProtected('View management screens','renameImageForm') | |
| 718 | |
| 719 def manage_AddDocumentViewerForm(self): | 712 def manage_AddDocumentViewerForm(self): |
| 720 """add the viewer form""" | 713 """add the viewer form""" |
| 721 pt=PageTemplateFile('zpt/addDocumentViewer', globals()).__of__(self) | 714 pt=PageTemplateFile('zpt/addDocumentViewer', globals()).__of__(self) |
| 722 return pt() | 715 return pt() |
| 723 | 716 |
| 724 def manage_AddDocumentViewer(self,id,imageViewerUrl="",textViewerUrl="",title="",RESPONSE=None): | 717 def manage_AddDocumentViewer(self,id,imageScalerUrl="",textServerName="",title="",RESPONSE=None): |
| 725 """add the viewer""" | 718 """add the viewer""" |
| 726 newObj=documentViewer(id,imageViewerUrl,title=title,textViewerUrl=textViewerUrl) | 719 newObj=documentViewer(id,imageScalerUrl=imageScalerUrl,title=title,textServerName=textServerName) |
| 727 self._setObject(id,newObj) | 720 self._setObject(id,newObj) |
| 728 | 721 |
| 729 if RESPONSE is not None: | 722 if RESPONSE is not None: |
| 730 RESPONSE.redirect('manage_main') | 723 RESPONSE.redirect('manage_main') |
| 731 | 724 |
