Mercurial > hg > documentViewer
comparison documentViewer.py @ 84:a6e4f9b6729a
first version with new full-text infrastructure and slightly changed templates
author | casties |
---|---|
date | Fri, 19 Mar 2010 12:42:40 +0100 |
parents | ec12a2440daa |
children | 6a4a72033d58 |
comparison
equal
deleted
inserted
replaced
83:ec12a2440daa | 84:a6e4f9b6729a |
---|---|
10 from Ft.Xml.Domlette import PrettyPrint, Print | 10 from Ft.Xml.Domlette import PrettyPrint, Print |
11 from Ft.Xml import EMPTY_NAMESPACE, Parse | 11 from Ft.Xml import EMPTY_NAMESPACE, Parse |
12 | 12 |
13 | 13 |
14 import Ft.Xml.XPath | 14 import Ft.Xml.XPath |
15 import cStringIO | |
15 import xmlrpclib | 16 import xmlrpclib |
16 import os.path | 17 import os.path |
17 import sys | 18 import sys |
18 import cgi | 19 import cgi |
19 import urllib | 20 import urllib |
20 import logging | 21 import logging |
21 import math | 22 import math |
22 | 23 |
23 import urlparse | 24 import urlparse |
24 from types import * | 25 from types import * |
26 | |
25 def logger(txt,method,txt2): | 27 def logger(txt,method,txt2): |
26 """logging""" | 28 """logging""" |
27 logging.info(txt+ txt2) | 29 logging.info(txt+ txt2) |
28 | 30 |
29 | 31 |
43 for node in nodelist: | 45 for node in nodelist: |
44 if node.nodeType == node.TEXT_NODE: | 46 if node.nodeType == node.TEXT_NODE: |
45 rc = rc + node.data | 47 rc = rc + node.data |
46 return rc | 48 return rc |
47 | 49 |
50 def serializeNode(node, encoding='utf-8'): | |
51 """returns a string containing node as XML""" | |
52 buf = cStringIO.StringIO() | |
53 Print(node, stream=buf, encoding=encoding) | |
54 s = buf.getvalue() | |
55 buf.close() | |
56 return s | |
57 | |
48 | 58 |
49 def getParentDir(path): | 59 def getParentDir(path): |
50 """returns pathname shortened by one""" | 60 """returns pathname shortened by one""" |
51 return '/'.join(path.split('/')[0:-1]) | 61 return '/'.join(path.split('/')[0:-1]) |
52 | 62 |
76 ) | 86 ) |
77 | 87 |
78 # templates and forms | 88 # templates and forms |
79 viewer_main = PageTemplateFile('zpt/viewer_main', globals()) | 89 viewer_main = PageTemplateFile('zpt/viewer_main', globals()) |
80 thumbs_main = PageTemplateFile('zpt/thumbs_main', globals()) | 90 thumbs_main = PageTemplateFile('zpt/thumbs_main', globals()) |
81 image_main = PageTemplateFile('zpt/image_main', globals()) | 91 image_main = PageTemplateFile('zpt/image_main', globals()) # obsolete! |
92 page_main_images = PageTemplateFile('zpt/page_main_images', globals()) | |
93 page_main_text = PageTemplateFile('zpt/page_main_text', globals()) | |
82 head_main = PageTemplateFile('zpt/head_main', globals()) | 94 head_main = PageTemplateFile('zpt/head_main', globals()) |
83 docuviewer_css = PageTemplateFile('css/docuviewer.css', globals()) | 95 docuviewer_css = PageTemplateFile('css/docuviewer.css', globals()) |
84 info_xml = PageTemplateFile('zpt/info_xml', globals()) | 96 info_xml = PageTemplateFile('zpt/info_xml', globals()) |
85 | 97 |
86 thumbs_main_rss = PageTemplateFile('zpt/thumbs_main_rss', globals()) | 98 thumbs_main_rss = PageTemplateFile('zpt/thumbs_main_rss', globals()) |
87 security.declareProtected('View management screens','changeDocumentViewerForm') | 99 security.declareProtected('View management screens','changeDocumentViewerForm') |
88 changeDocumentViewerForm = PageTemplateFile('zpt/changeDocumentViewer', globals()) | 100 changeDocumentViewerForm = PageTemplateFile('zpt/changeDocumentViewer', globals()) |
89 | 101 |
90 | 102 |
91 def __init__(self,id,imageViewerUrl,textViewerUrl=None,title="",digilibBaseUrl=None,thumbcols=2,thumbrows=10,authgroups="mpiwg"): | 103 def __init__(self,id,imageScalerUrl=None,textServerName=None,title="",digilibBaseUrl=None,thumbcols=2,thumbrows=10,authgroups="mpiwg"): |
92 """init document viewer""" | 104 """init document viewer""" |
93 self.id=id | 105 self.id=id |
94 self.title=title | 106 self.title=title |
95 self.imageViewerUrl=imageViewerUrl | |
96 self.textViewerUrl=textViewerUrl | |
97 | |
98 if not digilibBaseUrl: | |
99 self.digilibBaseUrl = self.findDigilibUrl() | |
100 else: | |
101 self.digilibBaseUrl = digilibBaseUrl | |
102 self.thumbcols = thumbcols | 107 self.thumbcols = thumbcols |
103 self.thumbrows = thumbrows | 108 self.thumbrows = thumbrows |
104 # authgroups is list of authorized groups (delimited by ,) | 109 # authgroups is list of authorized groups (delimited by ,) |
105 self.authgroups = [s.strip().lower() for s in authgroups.split(',')] | 110 self.authgroups = [s.strip().lower() for s in authgroups.split(',')] |
106 # add template folder so we can always use template.something | 111 # create template folder so we can always use template.something |
107 self.manage_addFolder('template') | 112 |
108 | 113 templateFolder = Folder('template') |
114 #self['template'] = templateFolder # Zope-2.12 style | |
115 self._setObject('template',templateFolder) # old style | |
116 try: | |
117 from Products.XMLRpcTools.XMLRpcTools import XMLRpcServerProxy | |
118 xmlRpcClient = XMLRpcServerProxy(id='fulltextclient', serverUrl=textServerName, use_xmlrpc=False) | |
119 #templateFolder['fulltextclient'] = xmlRpcClient | |
120 templateFolder._setObject('fulltextclient',xmlRpcClient) | |
121 except Exception, e: | |
122 logging.error("Unable to create XMLRpcTools for fulltextclient: "+str(e)) | |
123 try: | |
124 from Products.zogiLib.zogiLib import zogiLib | |
125 zogilib = zogiLib(id="zogilib", title="zogilib for docuviewer", dlServerURL=imageScalerUrl, layout="book") | |
126 #templateFolder['zogilib'] = zogilib | |
127 templateFolder._setObject('zogilib',zogilib) | |
128 except Exception, e: | |
129 logging.error("Unable to create zogiLib for zogilib: "+str(e)) | |
130 | |
109 | 131 |
110 security.declareProtected('View','thumbs_rss') | 132 security.declareProtected('View','thumbs_rss') |
111 def thumbs_rss(self,mode,url,viewMode="auto",start=None,pn=1): | 133 def thumbs_rss(self,mode,url,viewMode="auto",start=None,pn=1): |
112 ''' | 134 ''' |
113 view it | 135 view it |
114 @param mode: defines how to access the document behind url | 136 @param mode: defines how to access the document behind url |
115 @param url: url which contains display information | 137 @param url: url which contains display information |
116 @param viewMode: if images display images, if text display text, default is images (text,images or auto) | 138 @param viewMode: if images display images, if text display text, default is images (text,images or auto) |
117 | 139 |
118 ''' | 140 ''' |
119 logging.info("HHHHHHHHHHHHHH:load the rss") | 141 logging.debug("HHHHHHHHHHHHHH:load the rss") |
120 logger("documentViewer (index)", logging.INFO, "mode: %s url:%s start:%s pn:%s"%(mode,url,start,pn)) | 142 logger("documentViewer (index)", logging.INFO, "mode: %s url:%s start:%s pn:%s"%(mode,url,start,pn)) |
121 | 143 |
122 if not hasattr(self, 'template'): | 144 if not hasattr(self, 'template'): |
123 # create template folder if it doesn't exist | 145 # create template folder if it doesn't exist |
124 self.manage_addFolder('template') | 146 self.manage_addFolder('template') |
137 viewMode="images" | 159 viewMode="images" |
138 | 160 |
139 return pt(docinfo=docinfo,pageinfo=pageinfo,viewMode=viewMode) | 161 return pt(docinfo=docinfo,pageinfo=pageinfo,viewMode=viewMode) |
140 | 162 |
141 security.declareProtected('View','index_html') | 163 security.declareProtected('View','index_html') |
142 def index_html(self,mode,url,viewMode="auto",start=None,pn=1,mk=None): | 164 def index_html(self,url,mode="texttool",viewMode="auto",start=None,pn=1,mk=None): |
143 ''' | 165 ''' |
144 view it | 166 view it |
145 @param mode: defines how to access the document behind url | 167 @param mode: defines how to access the document behind url |
146 @param url: url which contains display information | 168 @param url: url which contains display information |
147 @param viewMode: if images display images, if text display text, default is images (text,images or auto) | 169 @param viewMode: if images display images, if text display text, default is images (text,images or auto) |
148 | 170 |
149 ''' | 171 ''' |
150 | 172 |
151 logger("documentViewer (index)", logging.INFO, "mode: %s url:%s start:%s pn:%s"%(mode,url,start,pn)) | 173 logging.debug("documentViewer (index) mode: %s url:%s start:%s pn:%s"%(mode,url,start,pn)) |
152 | 174 |
153 if not hasattr(self, 'template'): | 175 if not hasattr(self, 'template'): |
154 # create template folder if it doesn't exist | 176 # this won't work |
155 self.manage_addFolder('template') | 177 logging.error("template folder missing!") |
156 | 178 return "ERROR: template folder missing!" |
157 if not self.digilibBaseUrl: | 179 |
180 if not getattr(self, 'digilibBaseUrl', None): | |
158 self.digilibBaseUrl = self.findDigilibUrl() or "http://nausikaa.mpiwg-berlin.mpg.de/digitallibrary" | 181 self.digilibBaseUrl = self.findDigilibUrl() or "http://nausikaa.mpiwg-berlin.mpg.de/digitallibrary" |
159 | 182 |
160 docinfo = self.getDocinfo(mode=mode,url=url) | 183 docinfo = self.getDocinfo(mode=mode,url=url) |
161 pageinfo = self.getPageinfo(start=start,current=pn,docinfo=docinfo) | 184 pageinfo = self.getPageinfo(start=start,current=pn,docinfo=docinfo) |
162 pt = getattr(self.template, 'viewer_main') | 185 pt = getattr(self.template, 'viewer_main') |
163 | 186 |
164 if viewMode=="auto": # automodus gewaehlt | 187 if viewMode=="auto": # automodus gewaehlt |
165 if docinfo.get("textURL",'') and self.textViewerUrl: #texturl gesetzt und textViewer konfiguriert | 188 if docinfo.get("textURL",''): #texturl gesetzt und textViewer konfiguriert |
166 viewMode="text" | 189 viewMode="text" |
167 else: | 190 else: |
168 viewMode="images" | 191 viewMode="images" |
169 | 192 |
170 return pt(docinfo=docinfo,pageinfo=pageinfo,viewMode=viewMode,mk=self.generateMarks(mk)) | 193 return pt(docinfo=docinfo,pageinfo=pageinfo,viewMode=viewMode,mk=self.generateMarks(mk)) |
177 if type(mk) is not ListType: | 200 if type(mk) is not ListType: |
178 mk=[mk] | 201 mk=[mk] |
179 for m in mk: | 202 for m in mk: |
180 ret+="mk=%s"%m | 203 ret+="mk=%s"%m |
181 return ret | 204 return ret |
205 | |
206 def findDigilibUrl(self): | |
207 """try to get the digilib URL from zogilib""" | |
208 url = self.template.zogilib.getDLBaseUrl() | |
209 return url | |
210 | |
211 def getStyle(self, idx, selected, style=""): | |
212 """returns a string with the given style and append 'sel' if path == selected.""" | |
213 #logger("documentViewer (getstyle)", logging.INFO, "idx: %s selected: %s style: %s"%(idx,selected,style)) | |
214 if idx == selected: | |
215 return style + 'sel' | |
216 else: | |
217 return style | |
182 | 218 |
183 def getLink(self,param=None,val=None): | 219 def getLink(self,param=None,val=None): |
184 """link to documentviewer with parameter param set to val""" | 220 """link to documentviewer with parameter param set to val""" |
185 params=self.REQUEST.form.copy() | 221 params=self.REQUEST.form.copy() |
186 if param is not None: | 222 if param is not None: |
187 if val is None: | 223 if val is None: |
188 if params.has_key(param): | 224 if params.has_key(param): |
189 del params[param] | 225 del params[param] |
190 else: | 226 else: |
191 params[param] = str(val) | 227 params[param] = str(val) |
192 if params["mode"] == "filepath": #wenn beim erst Aufruf filepath gesetzt wurde aendere das nun zu imagepath | 228 |
229 if params.get("mode", None) == "filepath": #wenn beim erst Aufruf filepath gesetzt wurde aendere das nun zu imagepath | |
193 params["mode"] = "imagepath" | 230 params["mode"] = "imagepath" |
194 params["url"] = getParentDir(params["url"]) | 231 params["url"] = getParentDir(params["url"]) |
195 | 232 |
196 # quote values and assemble into query string | 233 # quote values and assemble into query string |
197 ps = "&".join(["%s=%s"%(k,urllib.quote(v)) for (k, v) in params.items()]) | 234 ps = "&".join(["%s=%s"%(k,urllib.quote(v)) for (k, v) in params.items()]) |
223 docinfo = self.getDocinfo(mode=mode,url=url) | 260 docinfo = self.getDocinfo(mode=mode,url=url) |
224 pt = getattr(self.template, 'info_xml') | 261 pt = getattr(self.template, 'info_xml') |
225 return pt(docinfo=docinfo) | 262 return pt(docinfo=docinfo) |
226 | 263 |
227 | 264 |
228 def getStyle(self, idx, selected, style=""): | |
229 """returns a string with the given style and append 'sel' if path == selected.""" | |
230 #logger("documentViewer (getstyle)", logging.INFO, "idx: %s selected: %s style: %s"%(idx,selected,style)) | |
231 if idx == selected: | |
232 return style + 'sel' | |
233 else: | |
234 return style | |
235 | |
236 def getTextLanguage(self,url,docinfo): | |
237 docinfo['lang']=getTextFromNode(dom.xpath("//bib/lang")[0]) | |
238 lang = urlencode({'':docinfo['lang']}) | |
239 return lang | |
240 | |
241 | |
242 def isAccessible(self, docinfo): | 265 def isAccessible(self, docinfo): |
243 """returns if access to the resource is granted""" | 266 """returns if access to the resource is granted""" |
244 access = docinfo.get('accessType', None) | 267 access = docinfo.get('accessType', None) |
245 logger("documentViewer (accessOK)", logging.INFO, "access type %s"%access) | 268 logger("documentViewer (accessOK)", logging.INFO, "access type %s"%access) |
246 if access is not None and access == 'free': | 269 if access is not None and access == 'free': |
289 | 312 |
290 if sizes: | 313 if sizes: |
291 docinfo['numPages'] = int(getTextFromNode(sizes[0])) | 314 docinfo['numPages'] = int(getTextFromNode(sizes[0])) |
292 else: | 315 else: |
293 docinfo['numPages'] = 0 | 316 docinfo['numPages'] = 0 |
317 | |
318 # TODO: produce and keep list of image names and numbers | |
294 | 319 |
295 return docinfo | 320 return docinfo |
296 | 321 |
297 | 322 |
298 def getIndexMeta(self, url): | 323 def getIndexMeta(self, url): |
429 docinfo['lang']='' | 454 docinfo['lang']='' |
430 | 455 |
431 return docinfo | 456 return docinfo |
432 | 457 |
433 | 458 |
434 def getNumPages(self, xquery, docinfo=None): #New Method 24.02.2010 | 459 def getDocinfoFromTextTool(self, url, dom=None, docinfo=None): |
435 text=self.viewerTemplates.query.eval("/mpdl/interface/xquery.xql","document="+ docinfo['textURLPath'] +"&xquery="+str(xquery)) | 460 """parse texttool tag in index meta""" |
436 docinfo['numPages'] = text.count("<pb ") | 461 logger("documentViewer (getdocinfofromtexttool)", logging.INFO, "url: %s" % (url)) |
437 return docinfo | 462 if docinfo is None: |
438 | |
439 | |
440 def getDocinfoFromTextTool(self,url,dom=None,docinfo=None): | |
441 """parse texttool tag in index meta""" | |
442 logger("documentViewer (getdocinfofromtexttool)", logging.INFO,"url: %s"%(url)) | |
443 if docinfo is None: | |
444 docinfo = {} | 463 docinfo = {} |
445 | 464 |
446 if docinfo.get('lang',None) is None: | 465 if docinfo.get('lang', None) is None: |
447 docinfo['lang']='' # default keine Sprache gesetzt | 466 docinfo['lang'] = '' # default keine Sprache gesetzt |
448 if dom is None: | 467 if dom is None: |
449 dom = self.getIndexMeta(url) | 468 dom = self.getIndexMeta(url) |
450 | 469 |
451 archivePath = None | 470 archivePath = None |
452 archiveName = None | 471 archiveName = None |
453 | 472 |
454 archiveNames=dom.xpath("//resource/name") | 473 archiveNames = dom.xpath("//resource/name") |
455 if archiveNames and (len(archiveNames)>0): | 474 if archiveNames and (len(archiveNames) > 0): |
456 archiveName=getTextFromNode(archiveNames[0]) | 475 archiveName = getTextFromNode(archiveNames[0]) |
457 else: | 476 else: |
458 logger("documentViewer (getdocinfofromtexttool)", logging.WARNING,"resource/name missing in: %s"%(url)) | 477 logger("documentViewer (getdocinfofromtexttool)", logging.WARNING, "resource/name missing in: %s" % (url)) |
459 | 478 |
460 archivePaths=dom.xpath("//resource/archive-path") | 479 archivePaths = dom.xpath("//resource/archive-path") |
461 if archivePaths and (len(archivePaths)>0): | 480 if archivePaths and (len(archivePaths) > 0): |
462 archivePath=getTextFromNode(archivePaths[0]) | 481 archivePath = getTextFromNode(archivePaths[0]) |
463 # clean up archive path | 482 # clean up archive path |
464 if archivePath[0] != '/': | 483 if archivePath[0] != '/': |
465 archivePath = '/' + archivePath | 484 archivePath = '/' + archivePath |
466 if archiveName and (not archivePath.endswith(archiveName)): | 485 if archiveName and (not archivePath.endswith(archiveName)): |
467 archivePath += "/" + archiveName | 486 archivePath += "/" + archiveName |
468 else: | 487 else: |
469 # try to get archive-path from url | 488 # try to get archive-path from url |
470 logger("documentViewer (getdocinfofromtexttool)", logging.WARNING,"resource/archive-path missing in: %s"%(url)) | 489 logger("documentViewer (getdocinfofromtexttool)", logging.WARNING, "resource/archive-path missing in: %s" % (url)) |
471 if (not url.startswith('http')): | 490 if (not url.startswith('http')): |
472 archivePath = url.replace('index.meta', '') | 491 archivePath = url.replace('index.meta', '') |
473 | 492 |
474 if archivePath is None: | 493 if archivePath is None: |
475 # we balk without archive-path | 494 # we balk without archive-path |
476 raise IOError("Missing archive-path (for text-tool) in %s"%(url)) | 495 raise IOError("Missing archive-path (for text-tool) in %s" % (url)) |
477 | 496 |
478 imageDirs=dom.xpath("//texttool/image") | 497 imageDirs = dom.xpath("//texttool/image") |
479 if imageDirs and (len(imageDirs)>0): | 498 if imageDirs and (len(imageDirs) > 0): |
480 imageDir=getTextFromNode(imageDirs[0]) | 499 imageDir = getTextFromNode(imageDirs[0]) |
481 | 500 |
482 else: | 501 else: |
483 # we balk with no image tag / not necessary anymore because textmode is now standard | 502 # we balk with no image tag / not necessary anymore because textmode is now standard |
484 #raise IOError("No text-tool info in %s"%(url)) | 503 #raise IOError("No text-tool info in %s"%(url)) |
485 imageDir="" | 504 imageDir = "" |
486 #xquery="//pb" | 505 #xquery="//pb" |
487 docinfo['imagePath'] = "" # keine Bilder | 506 docinfo['imagePath'] = "" # keine Bilder |
488 docinfo['imageURL'] = "" | 507 docinfo['imageURL'] = "" |
489 | 508 |
490 if imageDir and archivePath: | 509 if imageDir and archivePath: |
491 #print "image: ", imageDir, " archivepath: ", archivePath | 510 #print "image: ", imageDir, " archivepath: ", archivePath |
492 imageDir=os.path.join(archivePath,imageDir) | 511 imageDir = os.path.join(archivePath, imageDir) |
493 imageDir=imageDir.replace("/mpiwg/online",'') | 512 imageDir = imageDir.replace("/mpiwg/online", '') |
494 docinfo=self.getDirinfoFromDigilib(imageDir,docinfo=docinfo) | 513 docinfo = self.getDirinfoFromDigilib(imageDir, docinfo=docinfo) |
495 docinfo['imagePath'] = imageDir | 514 docinfo['imagePath'] = imageDir |
496 | 515 |
497 docinfo['imageURL'] = self.digilibBaseUrl+"/servlet/Scaler?fn="+imageDir | 516 docinfo['imageURL'] = self.digilibBaseUrl + "/servlet/Scaler?fn=" + imageDir |
498 | 517 |
499 viewerUrls=dom.xpath("//texttool/digiliburlprefix") | 518 viewerUrls = dom.xpath("//texttool/digiliburlprefix") |
500 if viewerUrls and (len(viewerUrls)>0): | 519 if viewerUrls and (len(viewerUrls) > 0): |
501 viewerUrl=getTextFromNode(viewerUrls[0]) | 520 viewerUrl = getTextFromNode(viewerUrls[0]) |
502 docinfo['viewerURL'] = viewerUrl | 521 docinfo['viewerURL'] = viewerUrl |
503 | 522 |
504 textUrls=dom.xpath("//texttool/text") | 523 textUrls = dom.xpath("//texttool/text") |
505 if textUrls and (len(textUrls)>0): | 524 if textUrls and (len(textUrls) > 0): |
506 textUrl=getTextFromNode(textUrls[0]) | 525 textUrl = getTextFromNode(textUrls[0]) |
507 if urlparse.urlparse(textUrl)[0]=="": #keine url | 526 if urlparse.urlparse(textUrl)[0] == "": #keine url |
508 textUrl=os.path.join(archivePath,textUrl) | 527 textUrl = os.path.join(archivePath, textUrl) |
509 # fix URLs starting with /mpiwg/online | 528 # fix URLs starting with /mpiwg/online |
510 if textUrl.startswith("/mpiwg/online"): | 529 if textUrl.startswith("/mpiwg/online"): |
511 textUrl = textUrl.replace("/mpiwg/online",'',1) | 530 textUrl = textUrl.replace("/mpiwg/online", '', 1) |
512 | 531 |
513 docinfo['textURL'] = textUrl | 532 docinfo['textURL'] = textUrl |
514 | 533 |
515 textUrls=dom.xpath("//texttool/text-url-path") | 534 textUrls = dom.xpath("//texttool/text-url-path") |
516 if textUrls and (len(textUrls)>0): | 535 if textUrls and (len(textUrls) > 0): |
517 textUrl=getTextFromNode(textUrls[0]) | 536 textUrl = getTextFromNode(textUrls[0]) |
518 docinfo['textURLPath'] = textUrl | 537 docinfo['textURLPath'] = textUrl |
519 | 538 |
520 presentationUrls=dom.xpath("//texttool/presentation") | 539 presentationUrls = dom.xpath("//texttool/presentation") |
521 docinfo = self.getBibinfoFromIndexMeta(url,docinfo=docinfo,dom=dom) # get info von bib tag | 540 docinfo = self.getBibinfoFromIndexMeta(url, docinfo=docinfo, dom=dom) # get info von bib tag |
522 | 541 |
523 if presentationUrls and (len(presentationUrls)>0): # ueberschreibe diese durch presentation informationen | 542 if presentationUrls and (len(presentationUrls) > 0): # ueberschreibe diese durch presentation informationen |
524 # presentation url ergiebt sich ersetzen von index.meta in der url der fuer die Metadaten | 543 # presentation url ergiebt sich ersetzen von index.meta in der url der fuer die Metadaten |
525 # durch den relativen Pfad auf die presentation infos | 544 # durch den relativen Pfad auf die presentation infos |
526 presentationPath = getTextFromNode(presentationUrls[0]) | 545 presentationPath = getTextFromNode(presentationUrls[0]) |
527 if url.endswith("index.meta"): | 546 if url.endswith("index.meta"): |
528 presentationUrl=url.replace('index.meta',presentationPath) | 547 presentationUrl = url.replace('index.meta', presentationPath) |
529 else: | 548 else: |
530 presentationUrl=url + "/" + presentationPath | 549 presentationUrl = url + "/" + presentationPath |
531 docinfo=self.getNumPages('//pb', docinfo) #im moment einfach auf eins setzen, navigation ueber die thumbs geht natuerlich nicht | 550 docinfo = self.getNumPages(docinfo) #im moment einfach auf eins setzen, navigation ueber die thumbs geht natuerlich nicht |
532 docinfo = self.getBibinfoFromTextToolPresentation(presentationUrl,docinfo=docinfo,dom=dom) | 551 docinfo = self.getBibinfoFromTextToolPresentation(presentationUrl, docinfo=docinfo, dom=dom) |
533 | 552 |
534 docinfo = self.getAuthinfoFromIndexMeta(url,docinfo=docinfo,dom=dom) # get access info | 553 docinfo = self.getAuthinfoFromIndexMeta(url, docinfo=docinfo, dom=dom) # get access info |
535 | 554 |
536 return docinfo | 555 return docinfo |
556 | |
557 | |
558 | |
537 | 559 |
538 | 560 |
539 def getBibinfoFromTextToolPresentation(self,url,docinfo=None,dom=None): | 561 def getBibinfoFromTextToolPresentation(self,url,docinfo=None,dom=None): |
540 """gets the bibliographical information from the preseantion entry in texttools | 562 """gets the bibliographical information from the preseantion entry in texttools |
541 """ | 563 """ |
617 start = getInt(start, default=(math.ceil(float(current)/float(grpsize))*grpsize-(grpsize-1))) | 639 start = getInt(start, default=(math.ceil(float(current)/float(grpsize))*grpsize-(grpsize-1))) |
618 # int(current / grpsize) * grpsize +1)) | 640 # int(current / grpsize) * grpsize +1)) |
619 pageinfo['start'] = start | 641 pageinfo['start'] = start |
620 pageinfo['end'] = start + grpsize | 642 pageinfo['end'] = start + grpsize |
621 if docinfo is not None: | 643 if docinfo is not None: |
622 | |
623 np = int(docinfo['numPages']) | 644 np = int(docinfo['numPages']) |
624 pageinfo['end'] = min(pageinfo['end'], np) | 645 pageinfo['end'] = min(pageinfo['end'], np) |
625 pageinfo['numgroups'] = int(np / grpsize) | 646 pageinfo['numgroups'] = int(np / grpsize) |
626 if np % grpsize > 0: | 647 if np % grpsize > 0: |
627 pageinfo['numgroups'] += 1 | 648 pageinfo['numgroups'] += 1 |
628 | 649 |
629 return pageinfo | 650 return pageinfo |
630 | 651 |
631 def text(self,mode,url,pn): | 652 |
632 """give text""" | 653 |
633 if mode=="texttool": #index.meta with texttool information | 654 def getNumPages(self,docinfo=None): |
634 (viewerUrl,imagepath,textpath)=parseUrlTextTool(url) | 655 """get list of pages from fulltext and put in docinfo""" |
635 | 656 xquery = '//pb' |
636 #print textpath | 657 text = self.template.fulltextclient.eval("/mpdl/interface/xquery.xql", "document=%s&xquery=%s"%(docinfo['textURLPath'],xquery)) |
637 try: | 658 # TODO: better processing of the page list. do we need the info somewhere else also? |
638 dom = NonvalidatingReader.parseUri(textpath) | 659 docinfo['numPages'] = text.count("<pb ") |
639 except: | 660 return docinfo |
640 return None | 661 |
641 | 662 def getTextPage(self, mode="text", pn=1, docinfo=None): |
642 list=[] | 663 """returns single page from fulltext""" |
643 nodes=dom.xpath("//pb") | 664 pagexml=self.template.fulltextclient.eval("/mpdl/interface/page-fragment.xql", "document=%s&mode=%s&pn=%s"%(docinfo['textURLPath'],mode,pn), outputUnicode=False) |
644 | 665 # post-processing downloaded xml |
645 node=nodes[int(pn)-1] | 666 pagedom = Parse(pagexml) |
646 | 667 # plain text mode |
647 p=node | 668 if mode == "text": |
648 | 669 # first div contains text |
649 while p.tagName!="p": | 670 pagedivs = pagedom.xpath("/div") |
650 p=p.parentNode | 671 if len(pagedivs) > 0: |
651 | 672 pagenode = pagedivs[0] |
652 | 673 return serializeNode(pagenode) |
653 endNode=nodes[int(pn)] | 674 |
654 | 675 # text-with-links mode |
655 | 676 if mode == "textPollux": |
656 e=endNode | 677 # first div contains text |
657 | 678 pagedivs = pagedom.xpath("/div") |
658 while e.tagName!="p": | 679 if len(pagedivs) > 0: |
659 e=e.parentNode | 680 pagenode = pagedivs[0] |
660 | 681 # check all a-tags |
661 | 682 links = pagenode.xpath("//a") |
662 next=node.parentNode | 683 for l in links: |
663 | 684 hrefNode = l.getAttributeNodeNS(None, u"href") |
664 #sammle s | 685 if hrefNode: |
665 while next and (next!=endNode.parentNode): | 686 # is link with href |
666 list.append(next) | 687 href = hrefNode.nodeValue |
667 next=next.nextSibling | 688 if href.startswith('lt/lex.xql'): |
668 list.append(endNode.parentNode) | 689 # is pollux link |
669 | 690 selfurl = self.absolute_url() |
670 if p==e:# beide im selben paragraphen | 691 # change href |
671 pass | 692 hrefNode.nodeValue = href.replace('lt/lex.xql','%s/head_main_voc'%selfurl) |
672 # else: | 693 # add target |
673 # next=p | 694 l.setAttributeNS(None, 'target', '_blank') |
674 # while next!=e: | 695 return serializeNode(pagenode) |
675 # print next,e | 696 |
676 # list.append(next) | 697 return "no text here" |
677 # next=next.nextSibling | 698 |
678 # | 699 |
679 # for x in list: | 700 def changeDocumentViewer(self,title="",digilibBaseUrl=None,thumbrows=2,thumbcols=10,authgroups='mpiwg',RESPONSE=None): |
680 # PrettyPrint(x) | |
681 # | |
682 # return list | |
683 # | |
684 | |
685 def findDigilibUrl(self): | |
686 """try to get the digilib URL from zogilib""" | |
687 url = self.imageViewerUrl[:-1] + "/getScalerUrl" | |
688 #print urlparse.urlparse(url)[0] | |
689 #print urlparse.urljoin(self.absolute_url(),url) | |
690 logging.info("finddigiliburl: %s"%urlparse.urlparse(url)[0]) | |
691 logging.info("finddigiliburl: %s"%urlparse.urljoin(self.absolute_url(),url)) | |
692 | |
693 try: | |
694 if urlparse.urlparse(url)[0]=='': #relative path | |
695 url=urlparse.urljoin(self.absolute_url()+"/",url) | |
696 | |
697 scaler = urlopen(url).read() | |
698 return scaler.replace("/servlet/Scaler?", "") | |
699 except: | |
700 return None | |
701 | |
702 def changeDocumentViewer(self,imageViewerUrl,textViewerUrl,title="",digilibBaseUrl=None,thumbrows=2,thumbcols=10,authgroups='mpiwg',RESPONSE=None): | |
703 """init document viewer""" | 701 """init document viewer""" |
704 self.title=title | 702 self.title=title |
705 self.imageViewerUrl=imageViewerUrl | |
706 self.textViewerUrl=textViewerUrl | |
707 self.digilibBaseUrl = digilibBaseUrl | 703 self.digilibBaseUrl = digilibBaseUrl |
708 self.thumbrows = thumbrows | 704 self.thumbrows = thumbrows |
709 self.thumbcols = thumbcols | 705 self.thumbcols = thumbcols |
710 self.authgroups = [s.strip().lower() for s in authgroups.split(',')] | 706 self.authgroups = [s.strip().lower() for s in authgroups.split(',')] |
711 if RESPONSE is not None: | 707 if RESPONSE is not None: |
712 RESPONSE.redirect('manage_main') | 708 RESPONSE.redirect('manage_main') |
713 | 709 |
714 | 710 |
715 | 711 |
716 | |
717 # security.declareProtected('View management screens','renameImageForm') | |
718 | |
719 def manage_AddDocumentViewerForm(self): | 712 def manage_AddDocumentViewerForm(self): |
720 """add the viewer form""" | 713 """add the viewer form""" |
721 pt=PageTemplateFile('zpt/addDocumentViewer', globals()).__of__(self) | 714 pt=PageTemplateFile('zpt/addDocumentViewer', globals()).__of__(self) |
722 return pt() | 715 return pt() |
723 | 716 |
724 def manage_AddDocumentViewer(self,id,imageViewerUrl="",textViewerUrl="",title="",RESPONSE=None): | 717 def manage_AddDocumentViewer(self,id,imageScalerUrl="",textServerName="",title="",RESPONSE=None): |
725 """add the viewer""" | 718 """add the viewer""" |
726 newObj=documentViewer(id,imageViewerUrl,title=title,textViewerUrl=textViewerUrl) | 719 newObj=documentViewer(id,imageScalerUrl=imageScalerUrl,title=title,textServerName=textServerName) |
727 self._setObject(id,newObj) | 720 self._setObject(id,newObj) |
728 | 721 |
729 if RESPONSE is not None: | 722 if RESPONSE is not None: |
730 RESPONSE.redirect('manage_main') | 723 RESPONSE.redirect('manage_main') |
731 | 724 |