Changes in [497:73fb73577961:513:67095296c95a] in documentViewer
- Files:
-
- 9 added
- 3 deleted
- 9 edited
Legend:
- Unmodified
- Added
- Removed
-
MpdlXmlTextServer.py
r451 r513 1 2 1 from OFS.SimpleItem import SimpleItem 3 2 from Products.PageTemplates.PageTemplateFile import PageTemplateFile 4 from Ft.Xml import EMPTY_NAMESPACE, Parse 5 from Ft.Xml.Domlette import NonvalidatingReader 6 7 import md5 8 import sys 3 4 import xml.etree.ElementTree as ET 5 6 import re 9 7 import logging 10 8 import urllib 11 import documentViewer 12 from documentViewer import getTextFromNode, serializeNode 9 import urlparse 10 import base64 11 12 from SrvTxtUtils import getInt, getText, getHttpData 13 14 def serialize(node): 15 """returns a string containing an XML snippet of node""" 16 s = ET.tostring(node, 'UTF-8') 17 # snip off XML declaration 18 if s.startswith('<?xml'): 19 i = s.find('?>') 20 return s[i+3:] 21 22 return s 23 13 24 14 25 class MpdlXmlTextServer(SimpleItem): … … 22 33 manage_changeMpdlXmlTextServerForm = PageTemplateFile("zpt/manage_changeMpdlXmlTextServer", globals()) 23 34 24 def __init__(self,id,title="",serverUrl="http://mpdl-test.mpiwg-berlin.mpg.de/mpdl/interface/", serverName=None, timeout=40): 25 #def __init__(self,id,title="",serverUrl="http://mpdl-text.mpiwg-berlin.mpg.de:30030/mpdl/interface/", serverName=None, timeout=40): 26 35 def __init__(self,id,title="",serverUrl="http://mpdl-text.mpiwg-berlin.mpg.de/mpdl/interface/", serverName=None, timeout=40): 27 36 """constructor""" 28 37 self.id=id … … 36 45 def getHttpData(self, url, data=None): 37 46 """returns result from url+data HTTP request""" 38 return documentViewer.getHttpData(url,data,timeout=self.timeout)47 return getHttpData(url,data,timeout=self.timeout) 39 48 40 49 def getServerData(self, method, data=None): 41 50 """returns result from text server for method+data""" 42 51 url = self.serverUrl+method 43 return documentViewer.getHttpData(url,data,timeout=self.timeout) 44 45 def getSearch(self, pageinfo=None, docinfo=None): 46 """get search list""" 47 docpath = docinfo['textURLPath'] 48 url = docinfo['url'] 49 pagesize = pageinfo['queryPageSize'] 50 pn = pageinfo.get('searchPN',1) 51 #sn = pageinfo['sn'] 52 s = pageinfo['s'] 53 highlightElementPos =pageinfo ['highlightElementPos'] 54 highlightElement = pageinfo ['highlightElement'] 55 56 highlightQuery = pageinfo['highlightQuery'] 57 query =pageinfo['query'] 58 queryType =pageinfo['queryType'] 59 viewMode= pageinfo['viewMode'] 60 tocMode = pageinfo['tocMode'] 61 characterNormalization = pageinfo['characterNormalization'] 62 #optionToggle = pageinfo['optionToggle'] 63 tocPN = pageinfo['tocPN'] 64 selfurl = self.absolute_url() 65 data = self.getServerData("doc-query.xql","document=%s&mode=%s&queryType=%s&query=%s&queryResultPageSize=%s&queryResultPN=%s&s=%s&viewMode=%s&characterNormalization=%s&highlightElementPos=%s&highlightElement=%s&highlightQuery=%s"%(docpath, 'text', queryType, urllib.quote(query), pagesize, pn, s, viewMode,characterNormalization, highlightElementPos, highlightElement, urllib.quote(highlightQuery))) 66 #data = self.getServerData("doc-query.xql","document=%s&mode=%s&queryType=%s&query=%s&queryResultPageSize=%s&queryResultPN=%s&sn=%s&viewMode=%s&characterNormalization=%s&highlightQuery=%s"%(docpath, 'text', queryType, urllib.quote(query), pagesize, pn, sn, viewMode,characterNormalization, urllib.quote(highlightQuery))) 67 pagexml = data.replace('?document=%s'%str(docpath),'?url=%s'%url) 68 pagedom = Parse(pagexml) 69 70 """ 71 pagedivs = pagedom.xpath("//div[@class='queryResultHits']") 72 if (pagedivs == pagedom.xpath("//div[@class='queryResultHits']")): 73 if len(pagedivs)>0: 74 docinfo['queryResultHits'] = int(getTextFromNode(pagedivs[0])) 75 s = getTextFromNode(pagedivs[0]) 76 s1 = int(s)/10+1 77 try: 78 docinfo['queryResultHits'] = int(s1) 79 logging.debug("SEARCH ENTRIES: %s"%(s1)) 80 except: 81 docinfo['queryResultHits'] = 0 82 """ 83 if (queryType=="fulltext")or(queryType=="xpath")or(queryType=="xquery")or(queryType=="fulltextMorphLemma"): 84 pagedivs = pagedom.xpath("//div[@class='queryResultPage']") 85 if len(pagedivs)>0: 86 pagenode=pagedivs[0] 87 links=pagenode.xpath("//a") 88 for l in links: 89 hrefNode = l.getAttributeNodeNS(None, u"href") 90 if hrefNode: 91 href = hrefNode.nodeValue 92 if href.startswith('page-fragment.xql'): 93 selfurl = self.absolute_url() 94 pagexml=href.replace('mode=text','mode=texttool&viewMode=%s&queryType=%s&query=%s&queryResultPageSize=%s&queryResultPN=%s&tocMode=%s&searchPN=%s&tocPN=%s&characterNormalization=%s'%(viewMode,queryType,urllib.quote(query),pagesize,pn,tocMode,pn,tocPN, characterNormalization)) 95 hrefNode.nodeValue = pagexml.replace('page-fragment.xql','%s'%selfurl) 96 #logging.debug("PUREXML :%s"%(serializeNode(pagenode))) 97 return serializeNode(pagenode) 98 if (queryType=="fulltextMorph"): 99 pagedivs = pagedom.xpath("//div[@class='queryResult']") 100 if len(pagedivs)>0: 101 pagenode=pagedivs[0] 102 links=pagenode.xpath("//a") 103 for l in links: 104 hrefNode = l.getAttributeNodeNS(None, u"href") 105 if hrefNode: 106 href = hrefNode.nodeValue 107 if href.startswith('page-fragment.xql'): 108 selfurl = self.absolute_url() 109 pagexml=href.replace('mode=text','mode=texttool&viewMode=%s&queryType=%s&query=%s&queryResultPageSize=%s&queryResultPN=%s&tocMode=%s&searchPN=%s&tocPN=%s&characterNormalization=%s'%(viewMode,queryType,urllib.quote(query),pagesize,pn,tocMode,pn,tocPN,characterNormalization)) 110 hrefNode.nodeValue = pagexml.replace('page-fragment.xql','%s'%selfurl) 111 if href.startswith('../lt/lemma.xql'): 112 hrefNode.nodeValue = href.replace('../lt/lemma.xql','%s/template/head_main_query'%(selfurl)) 113 l.setAttributeNS(None, 'target', '_blank') 114 l.setAttributeNS(None, 'onClick',"popupWin = window.open(this.href, 'contacts', 'location,width=300,height=400,top=180, left=400, scrollbars=1'); return false;") 115 l.setAttributeNS(None, 'onDblclick', 'popupWin.focus();') 116 pagedivs = pagedom.xpath("//div[@class='queryResultMorphExpansion']") 117 return serializeNode(pagenode) 118 if (queryType=="ftIndex")or(queryType=="ftIndexMorph"): 119 pagedivs= pagedom.xpath("//div[@class='queryResultPage']") 120 if len(pagedivs)>0: 121 pagenode=pagedivs[0] 122 links=pagenode.xpath("//a") 123 for l in links: 124 hrefNode = l.getAttributeNodeNS(None, u"href") 125 if hrefNode: 126 href = hrefNode.nodeValue 127 hrefNode.nodeValue=href.replace('mode=text','mode=texttool&viewMode=%s&tocMode=%s&tocPN=%s&pn=%s&characterNormalization=%s'%(viewMode,tocMode,tocPN,pn,characterNormalization)) 128 if href.startswith('../lt/lex.xql'): 129 hrefNode.nodeValue = href.replace('../lt/lex.xql','%s/template/head_main_lex'%selfurl) 130 l.setAttributeNS(None, 'target', '_blank') 131 l.setAttributeNS(None, 'onClick',"popupWin = window.open(this.href, 'contacts', 'location,width=500,height=600,top=180, left=400, scrollbars=1'); return false;") 132 l.setAttributeNS(None, 'onDblclick', 'popupWin.focus();') 133 if href.startswith('../lt/lemma.xql'): 134 hrefNode.nodeValue = href.replace('../lt/lemma.xql','%s/template/head_main_lemma'%(selfurl)) 135 l.setAttributeNS(None, 'target', '_blank') 136 l.setAttributeNS(None, 'onClick',"popupWin = window.open(this.href, 'contacts', 'location,width=300,height=400,top=180, left=400, scrollbars=1'); return false;") 137 l.setAttributeNS(None, 'onDblclick', 'popupWin.focus();') 138 return serializeNode(pagenode) 139 return "no text here" 140 141 def getGisPlaces(self, docinfo=None, pageinfo=None): 142 """ Show all Gis Places of whole Page""" 143 xpath='//place' 52 return getHttpData(url,data,timeout=self.timeout) 53 54 55 def getPlacesOnPage(self, docinfo=None, pn=None): 56 """Returns list of GIS places of page pn""" 144 57 docpath = docinfo.get('textURLPath',None) 145 58 if not docpath: 146 59 return None 147 60 148 url = docinfo['url'] 149 selfurl = self.absolute_url() 150 pn = pageinfo['current'] 151 hrefList=[] 152 myList= "" 153 text=self.getServerData("xpath.xql", "document=%s&xpath=%s&pn=%s"%(docinfo['textURLPath'],xpath,pn)) 154 dom = Parse(text) 155 result = dom.xpath("//result/resultPage/place") 61 places=[] 62 text=self.getServerData("xpath.xql", "document=%s&xpath=//place&pn=%s"%(docpath,pn)) 63 dom = ET.fromstring(text) 64 result = dom.findall(".//resultPage/place") 156 65 for l in result: 157 hrefNode= l.getAttributeNodeNS(None, u"id") 158 href= hrefNode.nodeValue 159 hrefList.append(href) 160 myList = ",".join(hrefList) 161 #logging.debug("getGisPlaces :%s"%(myList)) 162 return myList 163 164 def getAllGisPlaces (self, docinfo=None, pageinfo=None): 165 """Show all Gis Places of whole Book """ 166 xpath ='//echo:place' 167 docpath =docinfo['textURLPath'] 168 url = docinfo['url'] 169 selfurl =self.absolute_url() 170 pn =pageinfo['current'] 171 hrefList=[] 172 myList="" 173 text=self.getServerData("xpath.xql", "document=%s&xpath=%s"%(docinfo['textURLPath'],xpath)) 174 dom =Parse(text) 175 result = dom.xpath("//result/resultPage/place") 176 177 for l in result: 178 hrefNode = l.getAttributeNodeNS(None, u"id") 179 href= hrefNode.nodeValue 180 hrefList.append(href) 181 myList = ",".join(hrefList) 182 #logging.debug("getALLGisPlaces :%s"%(myList)) 183 return myList 66 id = l.get("id") 67 name = l.text 68 place = {'id': id, 'name': name} 69 places.append(place) 70 71 return places 72 184 73 74 def processPageInfo(self, dom, docinfo, pageinfo): 75 """processes page info divs from dom and stores in docinfo and pageinfo""" 76 # assume first second level div is pageMeta 77 alldivs = dom.find("div") 78 79 if alldivs is None or alldivs.get('class', '') != 'pageMeta': 80 logging.error("processPageInfo: pageMeta div not found!") 81 return 82 83 for div in alldivs: 84 dc = div.get('class') 85 86 # pageNumberOrig 87 if dc == 'pageNumberOrig': 88 pageinfo['pageNumberOrig'] = div.text 89 90 # pageNumberOrigNorm 91 elif dc == 'pageNumberOrigNorm': 92 pageinfo['pageNumberOrigNorm'] = div.text 93 94 # pageHeaderTitle 95 elif dc == 'pageHeaderTitle': 96 pageinfo['pageHeaderTitle'] = div.text 97 98 # numFigureEntries 99 elif dc == 'countFigureEntries': 100 docinfo['numFigureEntries'] = getInt(div.text) 101 102 # numTocEntries 103 elif dc == 'countTocEntries': 104 # WTF: s1 = int(s)/30+1 105 docinfo['numTocEntries'] = getInt(div.text) 106 107 # numPlaces 108 elif dc == 'countPlaces': 109 docinfo['numPlaces'] = getInt(div.text) 110 111 # numTextPages 112 elif dc == 'countPages': 113 np = getInt(div.text) 114 if np > 0: 115 docinfo['numTextPages'] = np 116 if docinfo.get('numPages', 0) == 0: 117 # seems to be text-only - update page count 118 docinfo['numPages'] = np 119 #pageinfo['end'] = min(pageinfo['end'], np) 120 pageinfo['numgroups'] = int(np / pageinfo['groupsize']) 121 if np % pageinfo['groupsize'] > 0: 122 pageinfo['numgroups'] += 1 123 124 #logging.debug("processPageInfo: pageinfo=%s"%repr(pageinfo)) 125 return 126 185 127 186 def getTextPage(self, mode="text _dict", pn=1, docinfo=None, pageinfo=None):128 def getTextPage(self, mode="text", pn=1, docinfo=None, pageinfo=None): 187 129 """returns single page from fulltext""" 130 131 logging.debug("getTextPage mode=%s, pn=%s"%(mode,pn)) 132 # check for cached text -- but ideally this shouldn't be called twice 133 if pageinfo.has_key('textPage'): 134 logging.debug("getTextPage: using cached text") 135 return pageinfo['textPage'] 136 188 137 docpath = docinfo['textURLPath'] 189 path = docinfo['textURLPath'] 190 url = docinfo.get('url',None) 191 name = docinfo.get('name',None) 192 pn =pageinfo['current'] 193 #sn = pageinfo['sn'] 194 s = pageinfo['s'] 195 highlightElementPos =pageinfo ['highlightElementPos'] 196 highlightElement = pageinfo ['highlightElement'] 197 #optionToggle =pageinfo ['optionToggle'] 198 highlightQuery = pageinfo['highlightQuery'] 199 #mode = pageinfo ['viewMode'] 200 tocMode = pageinfo['tocMode'] 201 xpointer = pageinfo['xpointer'] 202 characterNormalization=pageinfo['characterNormalization'] 203 tocPN = pageinfo['tocPN'] 204 selfurl = self.absolute_url() 205 206 if mode == "text_dict": 207 textmode = "textPollux" 138 # just checking 139 if pageinfo['current'] != pn: 140 logging.warning("getTextPage: current!=pn!") 141 142 # stuff for constructing full urls 143 selfurl = docinfo['viewerUrl'] 144 textParams = {'document': docpath, 145 'pn': pn} 146 if 'characterNormalization' in pageinfo: 147 textParams['characterNormalization'] = pageinfo['characterNormalization'] 148 149 if not mode: 150 # default is dict 151 mode = 'text' 152 153 modes = mode.split(',') 154 # check for multiple layers 155 if len(modes) > 1: 156 logging.debug("getTextPage: more than one mode=%s"%mode) 157 158 # search mode 159 if 'search' in modes: 160 # add highlighting 161 highlightQuery = pageinfo.get('highlightQuery', None) 162 if highlightQuery: 163 textParams['highlightQuery'] = highlightQuery 164 textParams['highlightElement'] = pageinfo.get('highlightElement', '') 165 textParams['highlightElementPos'] = pageinfo.get('highlightElementPos', '') 166 167 # ignore mode in the following 168 modes.remove('search') 169 170 # other modes don't combine 171 if 'dict' in modes: 172 # dict is called textPollux in the backend 173 textmode = 'textPollux' 174 elif len(modes) == 0: 175 # text is default mode 176 textmode = 'text' 208 177 else: 209 textmode = mode 210 211 textParam = "document=%s&mode=%s&pn=%s&characterNormalization=%s&xpointer=%s&options=withIdentifier"%(docpath,textmode,pn,characterNormalization, xpointer) 212 if highlightQuery is not None: 213 #textParam +="&highlightQuery=%s&sn=%s"%(urllib.quote(highlightQuery),sn) 214 textParam +="&highlightQuery=%s&s=%s&highlightElement=%s&highlightElementPos=%s"%(urllib.quote(highlightQuery),s, highlightElement, highlightElementPos) 215 216 pagexml = self.getServerData("page-fragment.xql",textParam) 217 dom = Parse(pagexml) 218 #dom = NonvalidatingReader.parseStream(pagexml) 219 220 #original Pages 221 pagedivs = dom.xpath("//div[@class='pageNumberOrig']") 222 223 """if pagedivs == dom.xpath("//div[@class='pageNumberOrig']"): 224 if len(pagedivs)>0: 225 docinfo['pageNumberOrig']= getTextFromNode(pagedivs[0]) 226 logging.debug("ORIGINAL PAGE: %s"%(docinfo['pageNumberOrig'])) 227 228 #original Pages Norm 229 pagedivs = dom.xpath("//div[@class='pageNumberOrigNorm']") 230 if pagedivs == dom.xpath("//div[@class='pageNumberOrigNorm']"): 231 if len(pagedivs)>0: 232 docinfo['pageNumberOrigNorm']= getTextFromNode(pagedivs[0]) 233 logging.debug("ORIGINAL PAGE NORM: %s"%(docinfo['pageNumberOrigNorm'])) 234 """ 235 #figureEntries 236 pagedivs = dom.xpath("//div[@class='countFigureEntries']") 237 if pagedivs == dom.xpath("//div[@class='countFigureEntries']"): 238 if len(pagedivs)>0: 239 docinfo['countFigureEntries'] = getTextFromNode(pagedivs[0]) 240 s = getTextFromNode(pagedivs[0]) 241 if s=='0': 242 try: 243 docinfo['countFigureEntries'] = int(s) 244 except: 245 docinfo['countFigureEntries'] = 0 246 else: 247 s1 = int(s)/30+1 248 try: 249 docinfo['countFigureEntries'] = int(s1) 250 except: 251 docinfo['countFigureEntries'] = 0 252 253 #allPlaces 254 pagedivs = dom.xpath("//div[@class='countPlaces']") 255 if pagedivs == dom.xpath("//div[@class='countPlaces']"): 256 if len(pagedivs)>0: 257 docinfo['countPlaces']= getTextFromNode(pagedivs[0]) 258 s = getTextFromNode(pagedivs[0]) 259 try: 260 docinfo['countPlaces'] = int(s) 261 except: 262 docinfo['countPlaces'] = 0 263 264 #tocEntries 265 pagedivs = dom.xpath("//div[@class='countTocEntries']") 266 if pagedivs == dom.xpath("//div[@class='countTocEntries']"): 267 if len(pagedivs)>0: 268 docinfo['countTocEntries'] = int(getTextFromNode(pagedivs[0])) 269 s = getTextFromNode(pagedivs[0]) 270 if s=='0': 271 try: 272 docinfo['countTocEntries'] = int(s) 273 except: 274 docinfo['countTocEntries'] = 0 275 else: 276 s1 = int(s)/30+1 277 try: 278 docinfo['countTocEntries'] = int(s1) 279 except: 280 docinfo['countTocEntries'] = 0 281 282 #numTextPages 283 pagedivs = dom.xpath("//div[@class='countPages']") 284 if pagedivs == dom.xpath("//div[@class='countPages']"): 285 if len(pagedivs)>0: 286 docinfo['numPages'] = getTextFromNode(pagedivs[0]) 287 s = getTextFromNode(pagedivs[0]) 288 289 try: 290 docinfo['numPages'] = int(s) 291 #logging.debug("PAGE NUMBER: %s"%(s)) 292 293 np = docinfo['numPages'] 294 pageinfo['end'] = min(pageinfo['end'], np) 295 pageinfo['numgroups'] = int(np / pageinfo['groupsize']) 296 if np % pageinfo['groupsize'] > 0: 297 pageinfo['numgroups'] += 1 298 except: 299 docinfo['numPages'] = 0 300 301 else: 302 #no full text -- init to 0 303 docinfo['pageNumberOrig'] = 0 304 docinfo['countFigureEntries'] = 0 305 docinfo['countPlaces'] = 0 306 docinfo['countTocEntries'] = 0 307 docinfo['numPages'] = 0 308 docinfo['pageNumberOrigNorm'] = 0 309 #return docinfo 178 # just take first mode 179 textmode = modes[0] 180 181 textParams['mode'] = textmode 182 183 # fetch the page 184 pagexml = self.getServerData("page-fragment.xql",urllib.urlencode(textParams)) 185 dom = ET.fromstring(pagexml) 186 # extract additional info 187 self.processPageInfo(dom, docinfo, pageinfo) 188 # page content is in <div class="pageContent"> 189 pagediv = None 190 # ElementTree 1.2 in Python 2.6 can't do div[@class='pageContent'] 191 # so we look at the second level divs 192 alldivs = dom.findall("div") 193 for div in alldivs: 194 dc = div.get('class') 195 # page content div 196 if dc == 'pageContent': 197 pagediv = div 198 break 310 199 311 200 # plain text mode 312 if mode == "text": 313 # first div contains text 314 pagedivs = dom.xpath("/div") 315 if len(pagedivs) > 0: 316 pagenode = pagedivs[0] 317 links = pagenode.xpath("//a") 201 if textmode == "text": 202 # get full url assuming documentViewer is parent 203 selfurl = self.getLink() 204 if pagediv is not None: 205 links = pagediv.findall(".//a") 318 206 for l in links: 319 hrefNode = l.getAttributeNodeNS(None, u"href") 320 if hrefNode: 321 href= hrefNode.nodeValue 322 if href.startswith('#note-'): 323 hrefNode.nodeValue = href.replace('#note-',"?url=%s&viewMode=text&tocMode=%s&tocPN=%s&pn=%s#note-"%(url,tocMode,tocPN,pn)) 324 #if href.startswith(): 325 return serializeNode(pagenode) 326 if mode == "xml": 327 # first div contains text 328 pagedivs = dom.xpath("/div") 329 if len(pagedivs) > 0: 330 pagenode = pagedivs[0] 331 return serializeNode(pagenode) 332 if mode == "gis": 333 # first div contains text 334 pagedivs = dom.xpath("/div") 335 if len(pagedivs) > 0: 336 pagenode = pagedivs[0] 337 links =pagenode.xpath("//a") 338 for l in links: 339 hrefNode =l.getAttributeNodeNS(None, u"href") 340 if hrefNode: 341 href=hrefNode.nodeValue 342 if href.startswith('http://mappit.mpiwg-berlin.mpg.de'): 343 hrefNode.nodeValue =href.replace('db/REST/db/chgis/mpdl','db/RESTdb/db/mpdl/%s'%name) 344 l.setAttributeNS(None, 'target', '_blank') 345 return serializeNode(pagenode) 207 href = l.get('href') 208 if href and href.startswith('#note-'): 209 href = href.replace('#note-',"%s#note-"%selfurl) 210 l.set('href', href) 211 212 return serialize(pagediv) 213 214 # text-with-links mode 215 elif textmode == "textPollux": 216 if pagediv is not None: 217 viewerurl = docinfo['viewerUrl'] 218 selfurl = self.getLink() 219 # check all a-tags 220 links = pagediv.findall(".//a") 221 for l in links: 222 href = l.get('href') 346 223 347 if mode == "pureXml": 348 # first div contains text 349 pagedivs = dom.xpath("/div") 350 if len(pagedivs) > 0: 351 pagenode = pagedivs[0] 352 return serializeNode(pagenode) 353 # text-with-links mode 354 if mode == "text_dict": 355 # first div contains text 356 #mode = pageinfo ['viewMode'] 357 pagedivs = dom.xpath("/div") 358 if len(pagedivs) > 0: 359 pagenode = pagedivs[0] 360 # check all a-tags 361 links = pagenode.xpath("//a") 362 363 for l in links: 364 hrefNode = l.getAttributeNodeNS(None, u"href") 365 366 if hrefNode: 224 if href: 367 225 # is link with href 368 href = hrefNode.nodeValue 369 if href.startswith('http://mpdl-text.mpiwg-berlin.mpg.de/mpdl/interface/lt/wordInfo.xql'): 370 # is pollux link 371 selfurl = self.absolute_url() 372 # change href 373 hrefNode.nodeValue = href.replace('http://mpdl-text.mpiwg-berlin.mpg.de/mpdl/interface/lt/wordInfo.xql','%s/head_main_voc'%selfurl) 374 # add target 375 l.setAttributeNS(None, 'target', '_blank') 376 #l.setAttributeNS(None, 'onclick',"popupWin = window.open(this.href, 'InfoWindow', 'menubar=no, location,width=500,height=600,top=180, left=700, toolbar=no, scrollbars=1'); return false;") 377 #l.setAttributeNS(None, "ondblclick", "popupWin.focus();") 378 #window.open("this.href, 'InfoWindow', 'menubar=no, location,width=500,height=600,top=180, left=700, toolbar=yes, scrollbars=1'"); return false;") 226 linkurl = urlparse.urlparse(href) 227 #logging.debug("getTextPage: linkurl=%s"%repr(linkurl)) 228 if linkurl.path.endswith('GetDictionaryEntries'): 229 #TODO: replace wordInfo page 230 # is dictionary link - change href (keeping parameters) 231 #l.set('href', href.replace('http://mpdl-proto.mpiwg-berlin.mpg.de/mpdl/interface/lt/wordInfo.xql','%s/template/viewer_wordinfo'%viewerurl)) 232 # add target to open new page 233 l.set('target', '_blank') 379 234 380 if href.startswith('http://mpdl-text.mpiwg-berlin.mpg.de/mpdl/lt/lemma.xql'): 381 selfurl = self.absolute_url() 382 hrefNode.nodeValue = href.replace('http://mpdl-text.mpiwg-berlin.mpg.de/mpdl/lt/lemma.xql','%s/head_main_lemma'%selfurl) 383 l.setAttributeNS(None, 'target', '_blank') 384 l.setAttributeNS(None, 'onclick',"popupWin = window.open(this.href, 'InfoWindow', 'menubar=no, location,width=300,height=400,top=180, left=700, toolbar=no, scrollbars=1'); return false;") 385 l.setAttributeNS(None, 'ondblclick', 'popupWin.focus();') 235 # TODO: is this needed? 236 # if href.startswith('http://mpdl-proto.mpiwg-berlin.mpg.de/mpdl/lt/lemma.xql'): 237 # selfurl = self.absolute_url() 238 # l.set('href', href.replace('http://mpdl-proto.mpiwg-berlin.mpg.de/mpdl/lt/lemma.xql','%s/head_main_lemma'%selfurl)) 239 # l.set('target', '_blank') 240 # l.set('onclick',"popupWin = window.open(this.href, 'InfoWindow', 'menubar=no, location,width=500,height=600,top=180, left=700, toolbar=no, scrollbars=1'); return false;") 241 # l.set('ondblclick', 'popupWin.focus();') 386 242 387 243 if href.startswith('#note-'): 388 hrefNode.nodeValue = href.replace('#note-',"?url=%s&viewMode=text_dict&tocMode=%s&tocPN=%s&pn=%s#note-"%(url,tocMode,tocPN,pn)) 244 # note link 245 l.set('href', href.replace('#note-',"%s#note-"%selfurl)) 389 246 390 return serializeNode(pagenode) 391 return "no text here" 392 393 def getOrigPages(self, docinfo=None, pageinfo=None): 247 return serialize(pagediv) 248 249 # xml mode 250 elif textmode == "xml": 251 if pagediv is not None: 252 return serialize(pagediv) 253 254 # pureXml mode 255 elif textmode == "pureXml": 256 if pagediv is not None: 257 return serialize(pagediv) 258 259 # gis mode 260 elif textmode == "gis": 261 if pagediv is not None: 262 # check all a-tags 263 links = pagediv.findall(".//a") 264 # add our URL as backlink 265 selfurl = self.getLink() 266 doc = base64.b64encode(selfurl) 267 for l in links: 268 href = l.get('href') 269 if href: 270 if href.startswith('http://mappit.mpiwg-berlin.mpg.de'): 271 l.set('href', re.sub(r'doc=[\w+/=]+', 'doc=%s'%doc, href)) 272 l.set('target', '_blank') 273 274 return serialize(pagediv) 275 276 return None 277 278 279 def getSearchResults(self, mode, query=None, pageinfo=None, docinfo=None): 280 """loads list of search results and stores XML in docinfo""" 281 282 logging.debug("getSearchResults mode=%s query=%s"%(mode, query)) 283 if mode == "none": 284 return docinfo 285 286 cachedQuery = docinfo.get('cachedQuery', None) 287 if cachedQuery is not None: 288 # cached search result 289 if cachedQuery == '%s_%s'%(mode,query): 290 # same query 291 return docinfo 292 293 else: 294 # different query 295 del docinfo['resultSize'] 296 del docinfo['resultXML'] 297 298 # cache query 299 docinfo['cachedQuery'] = '%s_%s'%(mode,query) 300 301 # fetch full results 394 302 docpath = docinfo['textURLPath'] 395 pn =pageinfo['current'] 396 selfurl = self.absolute_url() 397 pagexml = self.getServerData("page-fragment.xql","document=%s&pn=%s"%(docpath, pn)) 398 dom = Parse(pagexml) 399 pagedivs = dom.xpath("//div[@class='pageNumberOrig']") 400 if pagedivs == dom.xpath("//div[@class='pageNumberOrig']"): 401 if len(pagedivs)>0: 402 docinfo['pageNumberOrig']= getTextFromNode(pagedivs[0]) 403 return docinfo['pageNumberOrig'] 404 405 def getOrigPagesNorm(self, docinfo=None, pageinfo=None): 406 docpath = docinfo['textURLPath'] 407 pn =pageinfo['current'] 408 selfurl = self.absolute_url() 409 pagexml = self.getServerData("page-fragment.xql","document=%s&pn=%s"%(docpath, pn)) 410 dom = Parse(pagexml) 411 pagedivs = dom.xpath("//div[@class='pageNumberOrigNorm']") 412 if pagedivs == dom.xpath("//div[@class='pageNumberOrigNorm']"): 413 if len(pagedivs)>0: 414 docinfo['pageNumberOrigNorm']= getTextFromNode(pagedivs[0]) 415 return docinfo['pageNumberOrigNorm'] 416 417 418 def getTranslate(self, word=None, language=None, display=None): 419 """translate into another languages""" 420 data = self.getServerData("lt/wordInfo.xql","language="+str(language)+"&word="+urllib.quote(word)+"&display="+urllib.quote(display)+"&output=html") 421 #pagexml=self.template.fulltextclient.eval("/mpdl/interface/lt/lex.xql","document=&language="+str(language)+"&query="+url_quote(str(query))) 422 return data 423 424 def getLemma(self, lemma=None, language=None): 425 """simular words lemma """ 426 data = self.getServerData("lt/lemma.xql","language="+str(language)+"&lemma="+urllib.quote(lemma)+"&output=html") 427 return data 428 429 def getLemmaQuery(self, query=None, language=None): 430 """simular words lemma """ 431 data = self.getServerData("lt/lemma.xql","language="+str(language)+"&query="+urllib.quote(query)+"&output=html") 432 return data 433 434 def getLex(self, query=None, language=None): 435 #simular words lemma 436 data = self.getServerData("lt/lex.xql","document=&language="+str(language)+"&query="+urllib.quote(query)) 437 return data 438 439 def getQuery (self, docinfo=None, pageinfo=None, query=None, queryType=None, pn=1): 440 #number of 441 docpath = docinfo['textURLPath'] 442 pagesize = pageinfo['queryPageSize'] 443 pn = pageinfo['searchPN'] 444 query =pageinfo['query'] 445 queryType =pageinfo['queryType'] 446 tocSearch = 0 447 tocDiv = None 448 449 pagexml = self.getServerData("doc-query.xql","document=%s&mode=%s&queryType=%s&query=%s&queryResultPageSize=%s&queryResultPN=%s"%(docpath, 'text', queryType, urllib.quote(query), pagesize, pn)) 450 pagedom = Parse(pagexml) 451 numdivs = pagedom.xpath("//div[@class='queryResultHits']") 452 tocSearch = int(getTextFromNode(numdivs[0])) 453 tc=int((tocSearch/10)+1) 454 return tc 455 303 params = {'document': docpath, 304 'mode': 'text', 305 'queryType': mode, 306 'query': query, 307 'queryResultPageSize': 1000, 308 'queryResultPN': 1, 309 'characterNormalization': pageinfo.get('characterNormalization', 'reg')} 310 pagexml = self.getServerData("doc-query.xql",urllib.urlencode(params)) 311 #pagexml = self.getServerData("doc-query.xql","document=%s&mode=%s&queryType=%s&query=%s&queryResultPageSize=%s&queryResultPN=%s&s=%s&viewMode=%s&characterNormalization=%s&highlightElementPos=%s&highlightElement=%s&highlightQuery=%s"%(docpath, 'text', queryType, urllib.quote(query), pagesize, pn, s, viewMode,characterNormalization, highlightElementPos, highlightElement, urllib.quote(highlightQuery))) 312 dom = ET.fromstring(pagexml) 313 # page content is in <div class="queryResultPage"> 314 pagediv = None 315 # ElementTree 1.2 in Python 2.6 can't do div[@class='queryResultPage'] 316 alldivs = dom.findall("div") 317 for div in alldivs: 318 dc = div.get('class') 319 # page content div 320 if dc == 'queryResultPage': 321 pagediv = div 322 323 elif dc == 'queryResultHits': 324 docinfo['resultSize'] = getInt(div.text) 325 326 if pagediv is not None: 327 # store XML in docinfo 328 docinfo['resultXML'] = ET.tostring(pagediv, 'UTF-8') 329 330 return docinfo 331 332 333 def getResultsPage(self, mode="text", query=None, pn=None, start=None, size=None, pageinfo=None, docinfo=None): 334 """returns single page from the table of contents""" 335 logging.debug("getResultsPage mode=%s, pn=%s"%(mode,pn)) 336 # check for cached result 337 if not 'resultXML' in docinfo: 338 self.getSearchResults(mode=mode, query=query, pageinfo=pageinfo, docinfo=docinfo) 339 340 resultxml = docinfo.get('resultXML', None) 341 if not resultxml: 342 logging.error("getResultPage: unable to find resultXML") 343 return "Error: no result!" 344 345 if size is None: 346 size = pageinfo.get('resultPageSize', 10) 347 348 if start is None: 349 start = (pn - 1) * size 350 351 fullresult = ET.fromstring(resultxml) 352 353 if fullresult is not None: 354 # paginate 355 first = start-1 356 len = size 357 del fullresult[:first] 358 del fullresult[len:] 359 tocdivs = fullresult 360 361 # check all a-tags 362 links = tocdivs.findall(".//a") 363 for l in links: 364 href = l.get('href') 365 if href: 366 # assume all links go to pages 367 linkUrl = urlparse.urlparse(href) 368 linkParams = urlparse.parse_qs(linkUrl.query) 369 # take some parameters 370 params = {'pn': linkParams['pn'], 371 'highlightQuery': linkParams.get('highlightQuery',''), 372 'highlightElement': linkParams.get('highlightElement',''), 373 'highlightElementPos': linkParams.get('highlightElementPos','') 374 } 375 url = self.getLink(params=params) 376 l.set('href', url) 377 378 return serialize(tocdivs) 379 380 return "ERROR: no results!" 381 382 456 383 def getToc(self, mode="text", docinfo=None): 457 """loads table of contents and stores in docinfo""" 384 """loads table of contents and stores XML in docinfo""" 385 logging.debug("getToc mode=%s"%mode) 458 386 if mode == "none": 459 return docinfo 387 return docinfo 388 460 389 if 'tocSize_%s'%mode in docinfo: 461 390 # cached toc … … 473 402 tocSize = 0 474 403 tocDiv = None 475 404 # fetch full toc 476 405 pagexml = self.getServerData("doc-query.xql","document=%s&queryType=%s&queryResultPageSize=%s&queryResultPN=%s"%(docpath,queryType, pagesize, pn)) 477 478 # post-processing downloaded xml 479 pagedom = Parse(pagexml) 480 # get number of entries 481 numdivs = pagedom.xpath("//div[@class='queryResultHits']") 482 if len(numdivs) > 0: 483 tocSize = int(getTextFromNode(numdivs[0])) 484 docinfo['tocSize_%s'%mode] = tocSize 406 dom = ET.fromstring(pagexml) 407 # page content is in <div class="queryResultPage"> 408 pagediv = None 409 # ElementTree 1.2 in Python 2.6 can't do div[@class='queryResultPage'] 410 alldivs = dom.findall("div") 411 for div in alldivs: 412 dc = div.get('class') 413 # page content div 414 if dc == 'queryResultPage': 415 pagediv = div 416 417 elif dc == 'queryResultHits': 418 docinfo['tocSize_%s'%mode] = getInt(div.text) 419 420 if pagediv is not None: 421 # store XML in docinfo 422 docinfo['tocXML_%s'%mode] = ET.tostring(pagediv, 'UTF-8') 423 485 424 return docinfo 486 425 487 def getTocPage(self, mode="text", pn= 1, pageinfo=None, docinfo=None):426 def getTocPage(self, mode="text", pn=None, start=None, size=None, pageinfo=None, docinfo=None): 488 427 """returns single page from the table of contents""" 489 # TODO: this should use the cached TOC428 logging.debug("getTocPage mode=%s, pn=%s"%(mode,pn)) 490 429 if mode == "text": 491 430 queryType = "toc" 492 431 else: 493 432 queryType = mode 494 docpath = docinfo['textURLPath'] 495 path = docinfo['textURLPath'] 496 pagesize = pageinfo['tocPageSize'] 497 pn = pageinfo['tocPN'] 498 url = docinfo['url'] 499 selfurl = self.absolute_url() 500 viewMode= pageinfo['viewMode'] 501 characterNormalization = pageinfo ['characterNormalization'] 502 #optionToggle =pageinfo ['optionToggle'] 503 tocMode = pageinfo['tocMode'] 504 tocPN = pageinfo['tocPN'] 505 506 data = self.getServerData("doc-query.xql","document=%s&queryType=%s&queryResultPageSize=%s&queryResultPN=%s&characterNormalization=regPlusNorm"%(docpath,queryType, pagesize, pn)) 507 page = data.replace('page-fragment.xql?document=%s'%str(path),'%s?url=%s&viewMode=%s&tocMode=%s&tocPN=%s'%(selfurl,url, viewMode, tocMode, tocPN)) 508 text = page.replace('mode=image','mode=texttool') 509 return text 433 434 # check for cached TOC 435 if not docinfo.has_key('tocXML_%s'%mode): 436 self.getToc(mode=mode, docinfo=docinfo) 437 438 tocxml = docinfo.get('tocXML_%s'%mode, None) 439 if not tocxml: 440 logging.error("getTocPage: unable to find tocXML") 441 return "Error: no table of contents!" 442 443 if size is None: 444 size = pageinfo.get('tocPageSize', 30) 445 446 if start is None: 447 start = (pn - 1) * size 448 449 fulltoc = ET.fromstring(tocxml) 450 451 if fulltoc is not None: 452 # paginate 453 first = (start - 1) * 2 454 len = size * 2 455 del fulltoc[:first] 456 del fulltoc[len:] 457 tocdivs = fulltoc 458 459 # check all a-tags 460 links = tocdivs.findall(".//a") 461 for l in links: 462 href = l.get('href') 463 if href: 464 # take pn from href 465 m = re.match(r'page-fragment\.xql.*pn=(\d+)', href) 466 if m is not None: 467 # and create new url (assuming parent is documentViewer) 468 url = self.getLink('pn', m.group(1)) 469 l.set('href', url) 470 else: 471 logging.warning("getTocPage: Problem with link=%s"%href) 472 473 # fix two-divs-per-row with containing div 474 newtoc = ET.Element('div', {'class':'queryResultPage'}) 475 for (d1,d2) in zip(tocdivs[::2],tocdivs[1::2]): 476 e = ET.Element('div',{'class':'tocline'}) 477 e.append(d1) 478 e.append(d2) 479 newtoc.append(e) 480 481 return serialize(newtoc) 482 483 return "ERROR: no table of contents!" 484 510 485 511 486 def manage_changeMpdlXmlTextServer(self,title="",serverUrl="http://mpdl-text.mpiwg-berlin.mpg.de/mpdl/interface/",timeout=40,RESPONSE=None): 512 #def manage_changeMpdlXmlTextServer(self,title="",serverUrl="http://mpdl-text.mpiwg-berlin.mpg.de:30030/mpdl/interface/",timeout=40,RESPONSE=None):513 487 """change settings""" 514 488 self.title=title … … 531 505 if RESPONSE is not None: 532 506 RESPONSE.redirect('manage_main') 507 508 -
__init__.py
r130 r486 13 13 14 14 context.registerClass( 15 documentViewer.DocumentViewerTemplate,16 constructors = (17 documentViewer.manage_addDocumentViewerTemplateForm,18 documentViewer.manage_addDocumentViewerTemplate19 )20 )21 22 context.registerClass(23 15 MpdlXmlTextServer.MpdlXmlTextServer, 24 16 constructors = ( -
css/docuviewer.css
r21 r511 1 .thumb { 2 padding: 3px; 1 2 div.page-body { 3 display: table-row; 3 4 } 4 .thumbsel {5 padding: 2px;6 border: 1px solid blue;5 div.page-body div.col { 6 display: table-cell; 7 vertical-align: top; 7 8 } 8 .thumbcap { 9 color: black; 9 10 div.toc-text, 11 div.toc-figures { 12 max-width: 20em; 10 13 } 14 15 div.col.results { 16 max-width: 20em; 17 } 18 19 div.toc-text .toc, 20 div.toc-figures .toc { 21 float:left; 22 clear:right; 23 } 24 div.toc-text .toc.float.right, 25 div.toc-figures .toc.float.right { 26 float:right; 27 } 28 29 div.toc-thumbs .thumb { 30 padding: 3px; 31 } 32 div.toc-thumbs .thumbsel { 33 padding: 2px; 34 border: 1px solid blue; 35 } 36 div.toc-thumbs .thumbcap { 37 color: black; 38 } 39 40 span.hit.highlight { 41 background-color: lightgreen; 42 } -
documentViewer.py
r452 r513 1 2 1 from OFS.Folder import Folder 3 2 from Products.PageTemplates.ZopePageTemplate import ZopePageTemplate 4 from Products.PageTemplates.PageTemplateFile import PageTemplateFile 3 from Products.PageTemplates.PageTemplateFile import PageTemplateFile 4 from App.ImageFile import ImageFile 5 5 from AccessControl import ClassSecurityInfo 6 6 from AccessControl import getSecurityManager 7 7 from Globals import package_home 8 from Products.zogiLib.zogiLib import browserCheck 9 10 from Ft.Xml import EMPTY_NAMESPACE, Parse 11 import Ft.Xml.Domlette 12 import os.path 8 9 import xml.etree.ElementTree as ET 10 11 import os 13 12 import sys 14 13 import urllib 15 import urllib216 14 import logging 17 15 import math 18 16 import urlparse 19 import cStringIO20 17 import re 21 18 import string 22 19 23 def logger(txt,method,txt2): 24 """logging""" 25 logging.info(txt+ txt2) 26 27 28 def getInt(number, default=0): 29 """returns always an int (0 in case of problems)""" 30 try: 31 return int(number) 32 except: 33 return int(default) 34 35 def getTextFromNode(nodename): 36 """get the cdata content of a node""" 37 if nodename is None: 38 return "" 39 nodelist=nodename.childNodes 40 rc = "" 41 for node in nodelist: 42 if node.nodeType == node.TEXT_NODE: 43 rc = rc + node.data 44 return rc 45 20 from SrvTxtUtils import getInt, getText, getHttpData, refreshingImageFileIndexHtml 21 46 22 def serializeNode(node, encoding="utf-8"): 47 23 """returns a string containing node as XML""" 48 stream = cStringIO.StringIO() 49 #logging.debug("BUF: %s"%(stream)) 50 Ft.Xml.Domlette.Print(node, stream=stream, encoding=encoding) 51 s = stream.getvalue() 52 #logging.debug("BUF: %s"%(s)) 53 stream.close() 24 s = ET.tostring(node) 25 26 # 4Suite: 27 # stream = cStringIO.StringIO() 28 # Ft.Xml.Domlette.Print(node, stream=stream, encoding=encoding) 29 # s = stream.getvalue() 30 # stream.close() 54 31 return s 55 32 … … 115 92 return bt 116 93 117 118 def getParentDir(path): 119 """returns pathname shortened by one""" 120 return '/'.join(path.split('/')[0:-1]) 121 122 123 def getHttpData(url, data=None, num_tries=3, timeout=10): 124 """returns result from url+data HTTP request""" 125 # we do GET (by appending data to url) 126 if isinstance(data, str) or isinstance(data, unicode): 127 # if data is string then append 128 url = "%s?%s"%(url,data) 129 elif isinstance(data, dict) or isinstance(data, list) or isinstance(data, tuple): 130 # urlencode 131 url = "%s?%s"%(url,urllib.urlencode(data)) 132 133 response = None 134 errmsg = None 135 for cnt in range(num_tries): 136 try: 137 logging.debug("getHttpData(#%s %ss) url=%s"%(cnt+1,timeout,url)) 138 if sys.version_info < (2, 6): 139 # set timeout on socket -- ugly :-( 140 import socket 141 socket.setdefaulttimeout(float(timeout)) 142 response = urllib2.urlopen(url) 143 else: 144 response = urllib2.urlopen(url,timeout=float(timeout)) 145 # check result? 146 break 147 except urllib2.HTTPError, e: 148 logging.error("getHttpData: HTTP error(%s): %s"%(e.code,e)) 149 errmsg = str(e) 150 # stop trying 151 break 152 except urllib2.URLError, e: 153 logging.error("getHttpData: URLLIB error(%s): %s"%(e.reason,e)) 154 errmsg = str(e) 155 # stop trying 156 #break 157 158 if response is not None: 159 data = response.read() 160 response.close() 161 return data 162 163 raise IOError("ERROR fetching HTTP data from %s: %s"%(url,errmsg)) 164 #return None 94 def getParentPath(path, cnt=1): 95 """returns pathname shortened by cnt""" 96 # make sure path doesn't end with / 97 path = path.rstrip('/') 98 # split by /, shorten, and reassemble 99 return '/'.join(path.split('/')[0:-cnt]) 165 100 166 101 ## … … 173 108 security=ClassSecurityInfo() 174 109 manage_options=Folder.manage_options+( 175 {'label':' main config','action':'changeDocumentViewerForm'},110 {'label':'Configuration','action':'changeDocumentViewerForm'}, 176 111 ) 112 113 metadataService = None 114 """MetaDataFolder instance""" 177 115 178 116 # templates and forms 179 viewer_main = PageTemplateFile('zpt/viewer_main', globals()) 117 viewer_text = PageTemplateFile('zpt/viewer_text', globals()) 118 viewer_xml = PageTemplateFile('zpt/viewer_xml', globals()) 119 viewer_images = PageTemplateFile('zpt/viewer_images', globals()) 120 viewer_index = PageTemplateFile('zpt/viewer_index', globals()) 180 121 toc_thumbs = PageTemplateFile('zpt/toc_thumbs', globals()) 181 122 toc_text = PageTemplateFile('zpt/toc_text', globals()) 182 123 toc_figures = PageTemplateFile('zpt/toc_figures', globals()) 183 page_main_images = PageTemplateFile('zpt/page_main_images', globals()) 184 page_main_double = PageTemplateFile('zpt/page_main_double', globals()) 185 page_main_text = PageTemplateFile('zpt/page_main_text', globals()) 186 page_main_text_dict = PageTemplateFile('zpt/page_main_text_dict', globals()) 187 page_main_gis =PageTemplateFile ('zpt/page_main_gis', globals()) 188 page_main_xml = PageTemplateFile('zpt/page_main_xml', globals()) 189 page_main_pureXml = PageTemplateFile('zpt/page_main_pureXml', globals()) 190 head_main = PageTemplateFile('zpt/head_main', globals()) 191 docuviewer_css = PageTemplateFile('css/docuviewer.css', globals()) 124 toc_none = PageTemplateFile('zpt/toc_none', globals()) 125 common_template = PageTemplateFile('zpt/common_template', globals()) 126 search_template = PageTemplateFile('zpt/search_template', globals()) 192 127 info_xml = PageTemplateFile('zpt/info_xml', globals()) 193 194 195 thumbs_main_rss = PageTemplateFile('zpt/thumbs_main_rss', globals()) 196 security.declareProtected('View management screens','changeDocumentViewerForm') 197 changeDocumentViewerForm = PageTemplateFile('zpt/changeDocumentViewer', globals()) 198 128 docuviewer_css = ImageFile('css/docuviewer.css',globals()) 129 # make ImageFile better for development 130 docuviewer_css.index_html = refreshingImageFileIndexHtml 131 jquery_js = ImageFile('js/jquery.js',globals()) 132 199 133 200 134 def __init__(self,id,imageScalerUrl=None,textServerName=None,title="",digilibBaseUrl=None,thumbcols=2,thumbrows=5,authgroups="mpiwg"): … … 209 143 210 144 templateFolder = Folder('template') 211 #self['template'] = templateFolder # Zope-2.12 style212 self._setObject('template',templateFolder) # old style145 self['template'] = templateFolder # Zope-2.12 style 146 #self._setObject('template',templateFolder) # old style 213 147 try: 214 148 import MpdlXmlTextServer 215 149 textServer = MpdlXmlTextServer.MpdlXmlTextServer(id='fulltextclient',serverName=textServerName) 216 #templateFolder['fulltextclient'] = xmlRpcClient217 templateFolder._setObject('fulltextclient',textServer)150 templateFolder['fulltextclient'] = textServer 151 #templateFolder._setObject('fulltextclient',textServer) 218 152 except Exception, e: 219 153 logging.error("Unable to create MpdlXmlTextServer for fulltextclient: "+str(e)) 154 220 155 try: 221 156 from Products.zogiLib.zogiLib import zogiLib 222 157 zogilib = zogiLib(id="zogilib", title="zogilib for docuviewer", dlServerURL=imageScalerUrl, layout="book") 223 #templateFolder['zogilib'] = zogilib224 templateFolder._setObject('zogilib',zogilib)158 templateFolder['zogilib'] = zogilib 159 #templateFolder._setObject('zogilib',zogilib) 225 160 except Exception, e: 226 161 logging.error("Unable to create zogiLib for zogilib: "+str(e)) 227 162 163 try: 164 # assume MetaDataFolder instance is called metadata 165 self.metadataService = getattr(self, 'metadata') 166 except Exception, e: 167 logging.error("Unable to find MetaDataFolder 'metadata': "+str(e)) 168 169 if digilibBaseUrl is not None: 170 self.digilibBaseUrl = digilibBaseUrl 171 228 172 229 173 # proxy text server methods to fulltextclient 230 174 def getTextPage(self, **args): 231 """ getpage"""175 """returns full text content of page""" 232 176 return self.template.fulltextclient.getTextPage(**args) 233 177 234 def get OrigPages(self, **args):235 """ get page"""236 return self.template.fulltextclient.get OrigPages(**args)237 238 def get OrigPagesNorm(self, **args):239 """ get page"""240 return self.template.fulltextclient.get OrigPagesNorm(**args)241 242 def get Query(self, **args):243 """ get query in search"""244 return self.template.fulltextclient.get Query(**args)245 246 def get Search(self, **args):247 """ get search"""248 return self.template.fulltextclient.get Search(**args)249 250 def get GisPlaces(self, **args):251 """get gis places"""252 return self.template.fulltextclient.get GisPlaces(**args)178 def getSearchResults(self, **args): 179 """loads list of search results and stores XML in docinfo""" 180 return self.template.fulltextclient.getSearchResults(**args) 181 182 def getResultsPage(self, **args): 183 """returns one page of the search results""" 184 return self.template.fulltextclient.getResultsPage(**args) 185 186 def getToc(self, **args): 187 """loads table of contents and stores XML in docinfo""" 188 return self.template.fulltextclient.getToc(**args) 189 190 def getTocPage(self, **args): 191 """returns one page of the table of contents""" 192 return self.template.fulltextclient.getTocPage(**args) 193 194 def getPlacesOnPage(self, **args): 195 """get list of gis places on one page""" 196 return self.template.fulltextclient.getPlacesOnPage(**args) 253 197 254 def getAllGisPlaces(self, **args): 255 """get all gis places """ 256 return self.template.fulltextclient.getAllGisPlaces(**args) 257 258 def getTranslate(self, **args): 259 """get translate""" 260 return self.template.fulltextclient.getTranslate(**args) 261 262 def getLemma(self, **args): 263 """get lemma""" 264 return self.template.fulltextclient.getLemma(**args) 265 266 def getLemmaQuery(self, **args): 267 """get query""" 268 return self.template.fulltextclient.getLemmaQuery(**args) 269 270 def getLex(self, **args): 271 """get lex""" 272 return self.template.fulltextclient.getLex(**args) 273 274 def getToc(self, **args): 275 """get toc""" 276 return self.template.fulltextclient.getToc(**args) 277 278 def getTocPage(self, **args): 279 """get tocpage""" 280 return self.template.fulltextclient.getTocPage(**args) 281 282 198 #WTF? 199 thumbs_main_rss = PageTemplateFile('zpt/thumbs_main_rss', globals()) 283 200 security.declareProtected('View','thumbs_rss') 284 201 def thumbs_rss(self,mode,url,viewMode="auto",start=None,pn=1): … … 291 208 ''' 292 209 logging.debug("HHHHHHHHHHHHHH:load the rss") 293 logg er("documentViewer (index)", logging.INFO, "mode: %s url:%s start:%s pn:%s"%(mode,url,start,pn))210 logging.debug("documentViewer (index) mode: %s url:%s start:%s pn:%s"%(mode,url,start,pn)) 294 211 295 212 if not hasattr(self, 'template'): … … 313 230 314 231 return pt(docinfo=docinfo,pageinfo=pageinfo,viewMode=viewMode) 232 315 233 316 234 security.declareProtected('View','index_html') 317 def index_html(self,url,mode="texttool",viewMode="auto",tocMode="thumbs",start=None,pn=1,mk=None): 318 ''' 319 view it 235 def index_html(self,url,mode="texttool",viewMode="auto",viewLayer=None,tocMode="thumbs",start=1,pn=1): 236 """ 237 view page 238 @param url: url which contains display information 320 239 @param mode: defines how to access the document behind url 321 @param url: url which contains display information322 @param view Mode: if images display images, if text display text, default is auto (text,images or auto)240 @param viewMode: 'images': display images, 'text': display text, 'xml': display xml, default is 'auto' 241 @param viewLayer: sub-type of viewMode, e.g. 'dict' for viewMode='text' 323 242 @param tocMode: type of 'table of contents' for navigation (thumbs, text, figures, none) 324 @param characterNormalization type of text display (reg, norm, none) 325 @param querySearch: type of different search modes (fulltext, fulltextMorph, xpath, xquery, ftIndex, ftIndexMorph, fulltextMorphLemma) 326 ''' 327 328 logging.debug("documentViewer (index) mode: %s url:%s start:%s pn:%s"%(mode,url,start,pn)) 243 """ 244 245 logging.debug("documentViewer(index_html) mode=%s url=%s viewMode=%s viewLayer=%s start=%s pn=%s"%(mode,url,viewMode,viewLayer,start,pn)) 329 246 330 247 if not hasattr(self, 'template'): … … 340 257 if tocMode != "thumbs": 341 258 # get table of contents 342 docinfo = self.getToc(mode=tocMode, docinfo=docinfo) 343 344 if viewMode=="auto": # automodus gewaehlt 345 if docinfo.has_key('textURL') or docinfo.get('textURLPath',None): #texturl gesetzt und textViewer konfiguriert 346 viewMode="text_dict" 259 self.getToc(mode=tocMode, docinfo=docinfo) 260 261 # auto viewMode: text if there is a text else images 262 if viewMode=="auto": 263 if docinfo.get('textURL', None) or docinfo.get('textURLPath', None): 264 viewMode = "text" 265 if viewLayer is None: 266 viewLayer = "dict" 347 267 else: 348 viewMode ="images"268 viewMode = "images" 349 269 350 pageinfo = self.getPageinfo(start=start,current=pn, docinfo=docinfo,viewMode=viewMode,tocMode=tocMode) 351 352 if (docinfo.get('textURLPath',None)): 353 page = self.getTextPage(docinfo=docinfo, pageinfo=pageinfo) 354 pageinfo['textPage'] = page 355 tt = getattr(self, 'template') 356 pt = getattr(tt, 'viewer_main') 357 return pt(docinfo=docinfo,pageinfo=pageinfo,viewMode=viewMode,mk=self.generateMarks(mk)) 270 elif viewMode == "text_dict": 271 # legacy fix 272 viewMode = "text" 273 viewLayer = "dict" 274 275 pageinfo = self.getPageinfo(start=start, current=pn, docinfo=docinfo, viewMode=viewMode, viewLayer=viewLayer, tocMode=tocMode) 276 277 # get template /template/viewer_$viewMode 278 pt = getattr(self.template, 'viewer_%s'%viewMode, None) 279 if pt is None: 280 logging.error("No template for viewMode=%s!"%viewMode) 281 # TODO: error page? 282 return "No template for viewMode=%s!"%viewMode 283 284 # and execute with parameters 285 return pt(docinfo=docinfo, pageinfo=pageinfo) 358 286 287 #WTF? 359 288 def generateMarks(self,mk): 360 289 ret="" … … 378 307 url = self.template.zogilib.getDLBaseUrl() 379 308 return url 309 310 def getScalerUrl(self, fn=None, pn=None, dw=100, dh=100, docinfo=None): 311 """returns URL to digilib Scaler with params""" 312 url = None 313 if docinfo is not None: 314 url = docinfo.get('imageURL', None) 315 316 if url is None: 317 url = "%s/servlet/Scaler?"%self.digilibBaseUrl 318 if fn is None and docinfo is not None: 319 fn = docinfo.get('imagePath','') 320 321 url += "fn=%s"%fn 322 323 if pn: 324 url += "&pn=%s"%pn 325 326 url += "&dw=%s&dh=%s"%(dw,dh) 327 return url 380 328 381 329 def getDocumentViewerURL(self): … … 384 332 385 333 def getStyle(self, idx, selected, style=""): 386 """returns a string with the given style and append 'sel' if path== selected."""334 """returns a string with the given style and append 'sel' if idx == selected.""" 387 335 #logger("documentViewer (getstyle)", logging.INFO, "idx: %s selected: %s style: %s"%(idx,selected,style)) 388 336 if idx == selected: … … 391 339 return style 392 340 393 def getLink(self, param=None, val=None, params=None, baseUrl=None, paramSep='&'): 394 """returns URL to documentviewer with parameter param set to val or from dict params""" 341 def getParams(self, param=None, val=None, params=None, duplicates=None): 342 """returns dict with URL parameters. 343 344 Takes URL parameters and additionally param=val or dict params. 345 Deletes key if value is None.""" 395 346 # copy existing request params 396 urlParams=self.REQUEST.form.copy()347 newParams=self.REQUEST.form.copy() 397 348 # change single param 398 349 if param is not None: 399 350 if val is None: 400 if urlParams.has_key(param):401 del urlParams[param]351 if newParams.has_key(param): 352 del newParams[param] 402 353 else: 403 urlParams[param] = str(val)354 newParams[param] = str(val) 404 355 405 356 # change more params 406 357 if params is not None: 407 for k in params.keys(): 408 v = params[k] 358 for (k, v) in params.items(): 409 359 if v is None: 410 360 # val=None removes param 411 if urlParams.has_key(k):412 del urlParams[k]361 if newParams.has_key(k): 362 del newParams[k] 413 363 414 364 else: 415 urlParams[k] = v 416 417 # FIXME: does this belong here? 418 if urlParams.get("mode", None) == "filepath": #wenn beim erst Aufruf filepath gesetzt wurde aendere das nun zu imagepath 419 urlParams["mode"] = "imagepath" 420 urlParams["url"] = getParentDir(urlParams["url"]) 421 365 newParams[k] = v 366 367 if duplicates: 368 # eliminate lists (coming from duplicate keys) 369 for (k,v) in newParams.items(): 370 if isinstance(v, list): 371 if duplicates == 'comma': 372 # make comma-separated list of non-empty entries 373 newParams[k] = ','.join([t for t in v if t]) 374 elif duplicates == 'first': 375 # take first non-empty entry 376 newParams[k] = [t for t in v if t][0] 377 378 return newParams 379 380 def getLink(self, param=None, val=None, params=None, baseUrl=None, paramSep='&', duplicates='comma'): 381 """returns URL to documentviewer with parameter param set to val or from dict params""" 382 urlParams = self.getParams(param=param, val=val, params=params, duplicates=duplicates) 422 383 # quote values and assemble into query string (not escaping '/') 423 ps = paramSep.join(["%s=%s"%(k,urllib.quote_plus(v,'/')) for (k, v) in urlParams.items()]) 424 #ps = urllib.urlencode(urlParams) 384 ps = paramSep.join(["%s=%s"%(k,urllib.quote_plus(unicode(v),'/')) for (k, v) in urlParams.items()]) 425 385 if baseUrl is None: 426 baseUrl = self. REQUEST['URL1']386 baseUrl = self.getDocumentViewerURL() 427 387 428 388 url = "%s?%s"%(baseUrl, ps) 429 389 return url 430 390 431 432 def getLinkAmp(self, param=None, val=None, params=None, baseUrl=None): 391 def getLinkAmp(self, param=None, val=None, params=None, baseUrl=None, duplicates='comma'): 433 392 """link to documentviewer with parameter param set to val""" 434 return self.getLink(param, val, params, baseUrl, '&') 393 return self.getLink(param=param, val=val, params=params, baseUrl=baseUrl, paramSep='&', duplicates=duplicates) 394 435 395 436 396 def getInfo_xml(self,url,mode): 437 397 """returns info about the document as XML""" 438 439 398 if not self.digilibBaseUrl: 440 399 self.digilibBaseUrl = self.findDigilibUrl() or "http://nausikaa.mpiwg-berlin.mpg.de/digitallibrary" … … 444 403 return pt(docinfo=docinfo) 445 404 446 def getOptionToggle(self, newState=None, optionName='text_options_open', initialState=True):447 """returns new option state"""448 if not self.REQUEST.SESSION.has_key(optionName):449 # not in session -- initial450 opt = {'lastState': newState, 'state': initialState}451 else:452 opt = self.REQUEST.SESSION.get(optionName)453 if opt['lastState'] != newState:454 # state in session has changed -- toggle455 opt['state'] = not opt['state']456 opt['lastState'] = newState457 458 self.REQUEST.SESSION[optionName] = opt459 return opt['state']460 461 405 def isAccessible(self, docinfo): 462 406 """returns if access to the resource is granted""" 463 407 access = docinfo.get('accessType', None) 464 408 logging.debug("documentViewer (accessOK) access type %s"%access) 465 if access is not None and access== 'free':409 if access == 'free': 466 410 logging.debug("documentViewer (accessOK) access is free") 467 411 return True 412 468 413 elif access is None or access in self.authgroups: 469 414 # only local access -- only logged in users … … 479 424 return False 480 425 426 427 428 def getDocinfo(self, mode, url): 429 """returns docinfo depending on mode""" 430 logging.debug("getDocinfo: mode=%s, url=%s"%(mode,url)) 431 # look for cached docinfo in session 432 if self.REQUEST.SESSION.has_key('docinfo'): 433 docinfo = self.REQUEST.SESSION['docinfo'] 434 # check if its still current 435 if docinfo is not None and docinfo.get('mode', None) == mode and docinfo.get('url', None) == url: 436 logging.debug("getDocinfo: docinfo in session. keys=%s"%docinfo.keys()) 437 return docinfo 438 439 # new docinfo 440 docinfo = {'mode': mode, 'url': url} 441 # add self url 442 docinfo['viewerUrl'] = self.getDocumentViewerURL() 443 docinfo['digilibBaseUrl'] = self.digilibBaseUrl 444 # get index.meta DOM 445 docUrl = None 446 metaDom = None 447 if mode=="texttool": 448 # url points to document dir or index.meta 449 metaDom = self.metadataService.getDomFromPathOrUrl(url) 450 docUrl = url.replace('/index.meta', '') 451 if metaDom is None: 452 raise IOError("Unable to find index.meta for mode=texttool!") 453 454 elif mode=="imagepath": 455 # url points to folder with images, index.meta optional 456 # asssume index.meta in parent dir 457 docUrl = getParentPath(url) 458 metaDom = self.metadataService.getDomFromPathOrUrl(docUrl) 459 460 elif mode=="filepath": 461 # url points to image file, index.meta optional 462 # asssume index.meta is two path segments up 463 docUrl = getParentPath(url, 2) 464 metaDom = self.metadataService.getDomFromPathOrUrl(docUrl) 465 466 else: 467 logging.error("documentViewer (getdocinfo) unknown mode: %s!"%mode) 468 raise ValueError("Unknown mode %s! Has to be one of 'texttool','imagepath','filepath'."%(mode)) 469 470 docinfo['documentUrl'] = docUrl 471 # process index.meta contents 472 if metaDom is not None and metaDom.tag == 'resource': 473 # document directory name and path 474 resource = self.metadataService.getResourceData(dom=metaDom) 475 if resource: 476 docinfo = self.getDocinfoFromResource(docinfo, resource) 477 478 # texttool info 479 texttool = self.metadataService.getTexttoolData(dom=metaDom) 480 if texttool: 481 docinfo = self.getDocinfoFromTexttool(docinfo, texttool) 482 483 # bib info 484 bib = self.metadataService.getBibData(dom=metaDom) 485 if bib: 486 docinfo = self.getDocinfoFromBib(docinfo, bib) 487 else: 488 # no bib - try info.xml 489 docinfo = self.getDocinfoFromPresentationInfoXml(docinfo) 481 490 482 def getDirinfoFromDigilib(self,path,docinfo=None,cut=0): 483 """gibt param von dlInfo aus""" 484 if docinfo is None: 485 docinfo = {} 486 487 for x in range(cut): 488 489 path=getParentDir(path) 490 491 # auth info 492 access = self.metadataService.getAccessData(dom=metaDom) 493 if access: 494 docinfo = self.getDocinfoFromAccess(docinfo, access) 495 496 # attribution info 497 attribution = self.metadataService.getAttributionData(dom=metaDom) 498 if attribution: 499 logging.debug("getDocinfo: attribution=%s"%repr(attribution)) 500 docinfo['attribution'] = attribution 501 #docinfo = self.getDocinfoFromAccess(docinfo, access) 502 503 # copyright info 504 copyright = self.metadataService.getCopyrightData(dom=metaDom) 505 if copyright: 506 logging.debug("getDocinfo: copyright=%s"%repr(copyright)) 507 docinfo['copyright'] = copyright 508 #docinfo = self.getDocinfoFromAccess(docinfo, access) 509 510 # image path 511 if mode != 'texttool': 512 # override image path from texttool with url 513 docinfo['imagePath'] = url.replace('/mpiwg/online/', '', 1) 514 515 # number of images from digilib 516 if docinfo.get('imagePath', None): 517 docinfo['imageURL'] = self.digilibBaseUrl + "/servlet/Scaler?fn=" + docinfo['imagePath'] 518 docinfo = self.getDocinfoFromDigilib(docinfo, docinfo['imagePath']) 519 520 logging.debug("documentViewer (getdocinfo) docinfo: keys=%s"%docinfo.keys()) 521 #logging.debug("documentViewer (getdocinfo) docinfo: %s"%docinfo) 522 # store in session 523 self.REQUEST.SESSION['docinfo'] = docinfo 524 return docinfo 525 526 def getDocinfoFromResource(self, docinfo, resource): 527 """reads contents of resource element into docinfo""" 528 docName = resource.get('name', None) 529 docinfo['documentName'] = docName 530 docPath = resource.get('archive-path', None) 531 if docPath: 532 # clean up document path 533 if docPath[0] != '/': 534 docPath = '/' + docPath 535 536 if docName and (not docPath.endswith(docName)): 537 docPath += "/" + docName 538 539 else: 540 # use docUrl as docPath 541 docUrl = docinfo['documentURL'] 542 if not docUrl.startswith('http:'): 543 docPath = docUrl 544 if docPath: 545 # fix URLs starting with /mpiwg/online 546 docPath = docPath.replace('/mpiwg/online', '', 1) 547 548 docinfo['documentPath'] = docPath 549 return docinfo 550 551 def getDocinfoFromTexttool(self, docinfo, texttool): 552 """reads contents of texttool element into docinfo""" 553 # image dir 554 imageDir = texttool.get('image', None) 555 docPath = docinfo.get('documentPath', None) 556 if imageDir and docPath: 557 #print "image: ", imageDir, " archivepath: ", archivePath 558 imageDir = os.path.join(docPath, imageDir) 559 imageDir = imageDir.replace('/mpiwg/online', '', 1) 560 docinfo['imagePath'] = imageDir 561 562 # old style text URL 563 textUrl = texttool.get('text', None) 564 if textUrl and docPath: 565 if urlparse.urlparse(textUrl)[0] == "": #keine url 566 textUrl = os.path.join(docPath, textUrl) 567 568 docinfo['textURL'] = textUrl 569 570 # new style text-url-path 571 textUrl = texttool.get('text-url-path', None) 572 if textUrl: 573 docinfo['textURLPath'] = textUrl 574 575 # page flow 576 docinfo['pageFlow'] = texttool.get('page-flow', 'ltr') 577 578 # odd pages are left 579 docinfo['oddPage'] = texttool.get('odd-scan-position', 'left') 580 581 # number of title page (0: not defined) 582 docinfo['titlePage'] = texttool.get('title-scan-no', 0) 583 584 # old presentation stuff 585 presentation = texttool.get('presentation', None) 586 if presentation and docPath: 587 if presentation.startswith('http:'): 588 docinfo['presentationUrl'] = presentation 589 else: 590 docinfo['presentationUrl'] = os.path.join(docPath, presentation) 591 592 593 return docinfo 594 595 def getDocinfoFromBib(self, docinfo, bib): 596 """reads contents of bib element into docinfo""" 597 logging.debug("getDocinfoFromBib bib=%s"%repr(bib)) 598 # put all raw bib fields in dict "bib" 599 docinfo['bib'] = bib 600 bibtype = bib.get('@type', None) 601 docinfo['bibType'] = bibtype 602 # also store DC metadata for convenience 603 dc = self.metadataService.getDCMappedData(bib) 604 docinfo['creator'] = dc.get('creator',None) 605 docinfo['title'] = dc.get('title',None) 606 docinfo['date'] = dc.get('date',None) 607 return docinfo 608 609 def getDocinfoFromAccess(self, docinfo, acc): 610 """reads contents of access element into docinfo""" 611 #TODO: also read resource type 612 logging.debug("getDocinfoFromAccess acc=%s"%repr(acc)) 613 try: 614 acctype = acc['@attr']['type'] 615 if acctype: 616 access=acctype 617 if access in ['group', 'institution']: 618 access = acc['name'].lower() 619 620 docinfo['accessType'] = access 621 622 except: 623 pass 624 625 return docinfo 626 627 def getDocinfoFromDigilib(self, docinfo, path): 491 628 infoUrl=self.digilibBaseUrl+"/dirInfo-xml.jsp?mo=dir&fn="+path 492 493 logging.debug("documentViewer (getparamfromdigilib) dirInfo from %s"%(infoUrl)) 494 629 # fetch data 495 630 txt = getHttpData(infoUrl) 496 if txt is None:497 raise IOError("Unable to get dir-info from %s"%(infoUrl))498 499 dom = Parse(txt) 500 sizes=dom.xpath("//dir/size")501 logging.debug("documentViewer (getparamfromdigilib) dirInfo:size"%sizes)502 503 if size s:504 docinfo['numPages'] = int( getTextFromNode(sizes[0]))631 if not txt: 632 logging.error("Unable to get dir-info from %s"%(infoUrl)) 633 return docinfo 634 635 dom = ET.fromstring(txt) 636 size = getText(dom.find("size")) 637 logging.debug("getDocinfoFromDigilib: size=%s"%size) 638 if size: 639 docinfo['numPages'] = int(size) 505 640 else: 506 641 docinfo['numPages'] = 0 507 642 508 643 # TODO: produce and keep list of image names and numbers 509 510 644 return docinfo 511 512 def getIndexMetaPath(self,url): 513 """gib nur den Pfad zurueck""" 514 regexp = re.compile(r".*(experimental|permanent)/(.*)") 515 regpath = regexp.match(url) 516 if (regpath==None): 517 return "" 518 logging.debug("(getDomFromIndexMeta): URLXAXA: %s"%regpath.group(2)) 519 return ("/mpiwg/online/"+regpath.group(1)+"/"+regpath.group(2)) 520 521 522 523 def getIndexMetaUrl(self,url): 524 """returns utr of index.meta document at url""" 525 526 metaUrl = None 527 if url.startswith("http://"): 528 # real URL 529 metaUrl = url 530 else: 531 # online path 532 server=self.digilibBaseUrl+"/servlet/Texter?fn=" 533 metaUrl=server+url.replace("/mpiwg/online","") 534 if not metaUrl.endswith("index.meta"): 535 metaUrl += "/index.meta" 536 537 return metaUrl 538 539 def getDomFromIndexMeta(self, url): 540 """get dom from index meta""" 541 dom = None 542 metaUrl = self.getIndexMetaUrl(url) 543 544 logging.debug("(getDomFromIndexMeta): METAURL: %s"%metaUrl) 545 txt=getHttpData(metaUrl) 546 if txt is None: 547 raise IOError("Unable to read index meta from %s"%(url)) 548 549 dom = Parse(txt) 550 return dom 551 552 def getPresentationInfoXML(self, url): 553 """returns dom of info.xml document at url""" 645 646 647 def getDocinfoFromPresentationInfoXml(self,docinfo): 648 """gets DC-like bibliographical information from the presentation entry in texttools""" 649 url = docinfo.get('presentationUrl', None) 650 if not url: 651 logging.error("getDocinfoFromPresentation: no URL!") 652 return docinfo 653 554 654 dom = None 555 655 metaUrl = None … … 559 659 else: 560 660 # online path 661 561 662 server=self.digilibBaseUrl+"/servlet/Texter?fn=" 562 metaUrl=server+url .replace("/mpiwg/online","")663 metaUrl=server+url 563 664 564 665 txt=getHttpData(metaUrl) 565 666 if txt is None: 566 raise IOError("Unable to read infoXMLfrom %s"%(url)) 567 568 dom = Parse(txt) 569 return dom 667 logging.error("Unable to read info.xml from %s"%(url)) 668 return docinfo 669 670 dom = ET.fromstring(txt) 671 docinfo['creator']=getText(dom.find(".//author")) 672 docinfo['title']=getText(dom.find(".//title")) 673 docinfo['date']=getText(dom.find(".//date")) 674 return docinfo 675 676 677 def getPageinfo(self, current=None, start=None, rows=None, cols=None, docinfo=None, viewMode=None, viewLayer=None, tocMode=None): 678 """returns pageinfo with the given parameters""" 679 logging.debug("getPageInfo(current=%s, start=%s, rows=%s, cols=%s, viewMode=%s, viewLayer=%s, tocMode=%s)"%(current,start,rows,cols,viewMode,viewLayer,tocMode)) 680 pageinfo = {} 681 pageinfo['viewMode'] = viewMode 682 # split viewLayer if necessary 683 if isinstance(viewLayer,basestring): 684 viewLayer = viewLayer.split(',') 685 686 if isinstance(viewLayer, list): 687 logging.debug("getPageinfo: viewLayer is list:%s"%viewLayer) 688 # save (unique) list in viewLayers 689 seen = set() 690 viewLayers = [l for l in viewLayer if l and l not in seen and not seen.add(l)] 691 pageinfo['viewLayers'] = viewLayers 692 # stringify viewLayer 693 viewLayer = ','.join(viewLayers) 694 else: 695 #create list 696 pageinfo['viewLayers'] = [viewLayer] 570 697 571 572 def getAuthinfoFromIndexMeta(self,path,docinfo=None,dom=None,cut=0): 573 """gets authorization info from the index.meta file at path or given by dom""" 574 logging.debug("documentViewer (getauthinfofromindexmeta) path: %s"%(path)) 575 576 access = None 577 578 if docinfo is None: 579 docinfo = {} 580 581 if dom is None: 582 for x in range(cut): 583 path=getParentDir(path) 584 dom = self.getDomFromIndexMeta(path) 585 586 acctype = dom.xpath("//access-conditions/access/@type") 587 if acctype and (len(acctype)>0): 588 access=acctype[0].value 589 if access in ['group', 'institution']: 590 access = getTextFromNode(dom.xpath("//access-conditions/access/name")[0]).lower() 591 592 docinfo['accessType'] = access 593 return docinfo 594 595 596 def getBibinfoFromIndexMeta(self,path,docinfo=None,dom=None,cut=0): 597 """gets bibliographical info from the index.meta file at path or given by dom""" 598 #logging.debug("documentViewer (getbibinfofromindexmeta) path: %s"%(path)) 599 600 if docinfo is None: 601 docinfo = {} 602 603 if dom is None: 604 for x in range(cut): 605 path=getParentDir(path) 606 dom = self.getDomFromIndexMeta(path) 607 608 docinfo['indexMetaPath']=self.getIndexMetaPath(path); 609 610 #logging.debug("documentViewer (getbibinfofromindexmeta cutted) path: %s"%(path)) 611 # put in all raw bib fields as dict "bib" 612 bib = dom.xpath("//bib/*") 613 if bib and len(bib)>0: 614 bibinfo = {} 615 for e in bib: 616 bibinfo[e.localName] = getTextFromNode(e) 617 docinfo['bib'] = bibinfo 618 619 # extract some fields (author, title, year) according to their mapping 620 metaData=self.metadata.main.meta.bib 621 bibtype=dom.xpath("//bib/@type") 622 if bibtype and (len(bibtype)>0): 623 bibtype=bibtype[0].value 624 else: 625 bibtype="generic" 626 627 bibtype=bibtype.replace("-"," ") # wrong typesiin index meta "-" instead of " " (not wrong! ROC) 628 docinfo['bib_type'] = bibtype 629 bibmap=metaData.generateMappingForType(bibtype) 630 #logging.debug("documentViewer (getbibinfofromindexmeta) bibmap:"+repr(bibmap)) 631 #logging.debug("documentViewer (getbibinfofromindexmeta) bibtype:"+repr(bibtype)) 632 # if there is no mapping bibmap is empty (mapping sometimes has empty fields) 633 if len(bibmap) > 0 and len(bibmap['author'][0]) > 0: 634 try: 635 docinfo['author']=getTextFromNode(dom.xpath("//bib/%s"%bibmap['author'][0])[0]) 636 except: pass 637 try: 638 docinfo['title']=getTextFromNode(dom.xpath("//bib/%s"%bibmap['title'][0])[0]) 639 except: pass 640 try: 641 docinfo['year']=getTextFromNode(dom.xpath("//bib/%s"%bibmap['year'][0])[0]) 642 except: pass 643 #logging.debug("documentViewer (getbibinfofromindexmeta) using mapping for %s"%bibtype) 644 try: 645 docinfo['lang']=getTextFromNode(dom.xpath("//bib/lang")[0]) 646 except: 647 docinfo['lang']='' 648 try: 649 docinfo['city']=getTextFromNode(dom.xpath("//bib/city")[0]) 650 except: 651 docinfo['city']='' 652 try: 653 docinfo['number_of_pages']=getTextFromNode(dom.xpath("//bib/number_of_pages")[0]) 654 except: 655 docinfo['number_of_pages']='' 656 try: 657 docinfo['series_volume']=getTextFromNode(dom.xpath("//bib/series_volume")[0]) 658 except: 659 docinfo['series_volume']='' 660 try: 661 docinfo['number_of_volumes']=getTextFromNode(dom.xpath("//bib/number_of_volumes")[0]) 662 except: 663 docinfo['number_of_volumes']='' 664 try: 665 docinfo['translator']=getTextFromNode(dom.xpath("//bib/translator")[0]) 666 except: 667 docinfo['translator']='' 668 try: 669 docinfo['edition']=getTextFromNode(dom.xpath("//bib/edition")[0]) 670 except: 671 docinfo['edition']='' 672 try: 673 docinfo['series_author']=getTextFromNode(dom.xpath("//bib/series_author")[0]) 674 except: 675 docinfo['series_author']='' 676 try: 677 docinfo['publisher']=getTextFromNode(dom.xpath("//bib/publisher")[0]) 678 except: 679 docinfo['publisher']='' 680 try: 681 docinfo['series_title']=getTextFromNode(dom.xpath("//bib/series_title")[0]) 682 except: 683 docinfo['series_title']='' 684 try: 685 docinfo['isbn_issn']=getTextFromNode(dom.xpath("//bib/isbn_issn")[0]) 686 except: 687 docinfo['isbn_issn']='' 688 #logging.debug("I NEED BIBTEX %s"%docinfo) 689 return docinfo 690 691 692 def getNameFromIndexMeta(self,path,docinfo=None,dom=None,cut=0): 693 """gets name info from the index.meta file at path or given by dom""" 694 if docinfo is None: 695 docinfo = {} 696 697 if dom is None: 698 for x in range(cut): 699 path=getParentDir(path) 700 dom = self.getDomFromIndexMeta(path) 701 702 docinfo['name']=getTextFromNode(dom.xpath("/resource/name")[0]) 703 logging.debug("documentViewer docinfo[name] %s"%docinfo['name']) 704 return docinfo 705 706 def getDocinfoFromTextTool(self, url, dom=None, docinfo=None): 707 """parse texttool tag in index meta""" 708 logging.debug("documentViewer (getdocinfofromtexttool) url: %s" % (url)) 709 if docinfo is None: 710 docinfo = {} 711 if docinfo.get('lang', None) is None: 712 docinfo['lang'] = '' # default keine Sprache gesetzt 713 if dom is None: 714 dom = self.getDomFromIndexMeta(url) 715 716 archivePath = None 717 archiveName = None 718 719 archiveNames = dom.xpath("//resource/name") 720 if archiveNames and (len(archiveNames) > 0): 721 archiveName = getTextFromNode(archiveNames[0]) 722 else: 723 logging.warning("documentViewer (getdocinfofromtexttool) resource/name missing in: %s" % (url)) 724 725 archivePaths = dom.xpath("//resource/archive-path") 726 if archivePaths and (len(archivePaths) > 0): 727 archivePath = getTextFromNode(archivePaths[0]) 728 # clean up archive path 729 if archivePath[0] != '/': 730 archivePath = '/' + archivePath 731 if archiveName and (not archivePath.endswith(archiveName)): 732 archivePath += "/" + archiveName 733 else: 734 # try to get archive-path from url 735 logging.warning("documentViewer (getdocinfofromtexttool) resource/archive-path missing in: %s" % (url)) 736 if (not url.startswith('http')): 737 archivePath = url.replace('index.meta', '') 738 739 if archivePath is None: 740 # we balk without archive-path 741 raise IOError("Missing archive-path (for text-tool) in %s" % (url)) 742 743 imageDirs = dom.xpath("//texttool/image") 744 if imageDirs and (len(imageDirs) > 0): 745 imageDir = getTextFromNode(imageDirs[0]) 746 747 else: 748 # we balk with no image tag / not necessary anymore because textmode is now standard 749 #raise IOError("No text-tool info in %s"%(url)) 750 imageDir = "" 751 #xquery="//pb" 752 docinfo['imagePath'] = "" # keine Bilder 753 docinfo['imageURL'] = "" 754 755 if imageDir and archivePath: 756 #print "image: ", imageDir, " archivepath: ", archivePath 757 imageDir = os.path.join(archivePath, imageDir) 758 imageDir = imageDir.replace("/mpiwg/online", '') 759 docinfo = self.getDirinfoFromDigilib(imageDir, docinfo=docinfo) 760 docinfo['imagePath'] = imageDir 761 762 docinfo['imageURL'] = self.digilibBaseUrl + "/servlet/Scaler?fn=" + imageDir 763 764 viewerUrls = dom.xpath("//texttool/digiliburlprefix") 765 if viewerUrls and (len(viewerUrls) > 0): 766 viewerUrl = getTextFromNode(viewerUrls[0]) 767 docinfo['viewerURL'] = viewerUrl 768 769 # old style text URL 770 textUrls = dom.xpath("//texttool/text") 771 if textUrls and (len(textUrls) > 0): 772 textUrl = getTextFromNode(textUrls[0]) 773 if urlparse.urlparse(textUrl)[0] == "": #keine url 774 textUrl = os.path.join(archivePath, textUrl) 775 # fix URLs starting with /mpiwg/online 776 if textUrl.startswith("/mpiwg/online"): 777 textUrl = textUrl.replace("/mpiwg/online", '', 1) 778 779 docinfo['textURL'] = textUrl 780 781 782 #TODO: hack-DW for annalen 783 if (textUrl is not None) and (textUrl.startswith("/permanent/einstein/annalen")): 784 textUrl=textUrl.replace("/permanent/einstein/annalen/","/diverse/de/") 785 splitted=textUrl.split("/fulltext") 786 textUrl=splitted[0]+".xml" 787 textUrlkurz = string.split(textUrl, ".")[0] 788 docinfo['textURLPathkurz'] = textUrlkurz 789 docinfo['textURLPath'] = textUrl 790 logging.debug("hack") 791 logging.debug(textUrl) 792 793 794 # new style text-url-path 795 textUrls = dom.xpath("//texttool/text-url-path") 796 if textUrls and (len(textUrls) > 0): 797 textUrl = getTextFromNode(textUrls[0]) 798 docinfo['textURLPath'] = textUrl 799 textUrlkurz = string.split(textUrl, ".")[0] 800 docinfo['textURLPathkurz'] = textUrlkurz 801 #if not docinfo['imagePath']: 802 # text-only, no page images 803 #docinfo = self.getNumTextPages(docinfo) 804 805 806 presentationUrls = dom.xpath("//texttool/presentation") 807 docinfo = self.getBibinfoFromIndexMeta(url, docinfo=docinfo, dom=dom) # get info von bib tag 808 #docinfo = self.getDownloadfromDocinfoToBibtex(url, docinfo=docinfo, dom=dom) 809 docinfo = self.getNameFromIndexMeta(url, docinfo=docinfo, dom=dom) 810 811 812 if presentationUrls and (len(presentationUrls) > 0): # ueberschreibe diese durch presentation informationen 813 # presentation url ergiebt sich ersetzen von index.meta in der url der fuer die Metadaten 814 # durch den relativen Pfad auf die presentation infos 815 presentationPath = getTextFromNode(presentationUrls[0]) 816 if url.endswith("index.meta"): 817 presentationUrl = url.replace('index.meta', presentationPath) 818 else: 819 presentationUrl = url + "/" + presentationPath 820 821 docinfo = self.getBibinfoFromTextToolPresentation(presentationUrl, docinfo=docinfo, dom=dom) 822 823 docinfo = self.getAuthinfoFromIndexMeta(url, docinfo=docinfo, dom=dom) # get access info 824 825 return docinfo 826 827 828 def getBibinfoFromTextToolPresentation(self,url,docinfo=None,dom=None): 829 """gets the bibliographical information from the preseantion entry in texttools 830 """ 831 dom=self.getPresentationInfoXML(url) 832 try: 833 docinfo['author']=getTextFromNode(dom.xpath("//author")[0]) 834 except: 835 pass 836 try: 837 docinfo['title']=getTextFromNode(dom.xpath("//title")[0]) 838 except: 839 pass 840 try: 841 docinfo['year']=getTextFromNode(dom.xpath("//date")[0]) 842 except: 843 pass 844 return docinfo 845 846 def getDocinfoFromImagePath(self,path,docinfo=None,cut=0): 847 """path ist the path to the images it assumes that the index.meta file is one level higher.""" 848 logging.debug("documentViewer (getdocinfofromimagepath) path: %s"%(path)) 849 if docinfo is None: 850 docinfo = {} 851 path=path.replace("/mpiwg/online","") 852 docinfo['imagePath'] = path 853 docinfo=self.getDirinfoFromDigilib(path,docinfo=docinfo,cut=cut) 854 855 pathorig=path 856 for x in range(cut): 857 path=getParentDir(path) 858 logging.debug("documentViewer (getdocinfofromimagepath) PATH:"+path) 859 imageUrl=self.digilibBaseUrl+"/servlet/Scaler?fn="+path 860 docinfo['imageURL'] = imageUrl 861 862 #path ist the path to the images it assumes that the index.meta file is one level higher. 863 docinfo = self.getBibinfoFromIndexMeta(pathorig,docinfo=docinfo,cut=cut+1) 864 #docinfo = self.getDownloadfromDocinfoToBibtex(pathorig,docinfo=docinfo,cut=cut+1) 865 docinfo = self.getAuthinfoFromIndexMeta(pathorig,docinfo=docinfo,cut=cut+1) 866 return docinfo 867 868 869 def getDocinfo(self, mode, url): 870 """returns docinfo depending on mode""" 871 logging.debug("documentViewer (getdocinfo) mode: %s, url: %s"%(mode,url)) 872 # look for cached docinfo in session 873 if self.REQUEST.SESSION.has_key('docinfo'): 874 docinfo = self.REQUEST.SESSION['docinfo'] 875 # check if its still current 876 if docinfo is not None and docinfo.get('mode') == mode and docinfo.get('url') == url: 877 logging.debug("documentViewer (getdocinfo) docinfo in session: %s"%docinfo) 878 return docinfo 879 # new docinfo 880 docinfo = {'mode': mode, 'url': url} 881 if mode=="texttool": #index.meta with texttool information 882 docinfo = self.getDocinfoFromTextTool(url, docinfo=docinfo) 883 elif mode=="imagepath": 884 docinfo = self.getDocinfoFromImagePath(url, docinfo=docinfo) 885 elif mode=="filepath": 886 docinfo = self.getDocinfoFromImagePath(url, docinfo=docinfo,cut=1) 887 else: 888 logging.error("documentViewer (getdocinfo) unknown mode: %s!"%mode) 889 raise ValueError("Unknown mode %s! Has to be one of 'texttool','imagepath','filepath'."%(mode)) 890 891 # FIXME: fake texturlpath 892 if not docinfo.has_key('textURLPath'): 893 docinfo['textURLPath'] = None 894 895 logging.debug("documentViewer (getdocinfo) docinfo: %s"%docinfo) 896 #logging.debug("documentViewer (getdocinfo) docinfo: %s"%) 897 self.REQUEST.SESSION['docinfo'] = docinfo 898 return docinfo 899 900 def getPageinfo(self, current, start=None, rows=None, cols=None, docinfo=None, viewMode=None, tocMode=None): 901 """returns pageinfo with the given parameters""" 902 pageinfo = {} 698 pageinfo['viewLayer'] = viewLayer 699 pageinfo['tocMode'] = tocMode 700 903 701 current = getInt(current) 904 905 702 pageinfo['current'] = current 703 pageinfo['pn'] = current 906 704 rows = int(rows or self.thumbrows) 907 705 pageinfo['rows'] = rows … … 910 708 grpsize = cols * rows 911 709 pageinfo['groupsize'] = grpsize 710 # is start is empty use one around current 912 711 start = getInt(start, default=(math.ceil(float(current)/float(grpsize))*grpsize-(grpsize-1))) 913 712 # int(current / grpsize) * grpsize +1)) 914 713 pageinfo['start'] = start 915 pageinfo['end'] = start + grpsize 916 if (docinfo is not None) and ('numPages' in docinfo): 917 np = int(docinfo['numPages']) 918 pageinfo['end'] = min(pageinfo['end'], np) 919 pageinfo['numgroups'] = int(np / grpsize) 920 if np % grpsize > 0: 921 pageinfo['numgroups'] += 1 922 pageinfo['viewMode'] = viewMode 923 pageinfo['tocMode'] = tocMode 714 # get number of pages 715 np = int(docinfo.get('numPages', 0)) 716 if np == 0: 717 # numPages unknown - maybe we can get it from text page 718 if docinfo.get('textURLPath', None): 719 # cache text page as well 720 pageinfo['textPage'] = self.getTextPage(mode=viewLayer, pn=current, docinfo=docinfo, pageinfo=pageinfo) 721 np = int(docinfo.get('numPages', 0)) 722 723 # cache table of contents 724 pageinfo['tocPageSize'] = getInt(self.REQUEST.get('tocPageSize', 30)) 725 pageinfo['numgroups'] = int(np / grpsize) 726 if np % grpsize > 0: 727 pageinfo['numgroups'] += 1 728 729 pageFlowLtr = docinfo.get('pageFlow', 'ltr') != 'rtl' 730 oddScanLeft = docinfo.get('oddPage', 'left') != 'right' 731 # add zeroth page for two columns 732 pageZero = (cols == 2 and (pageFlowLtr != oddScanLeft)) 733 pageinfo['pageZero'] = pageZero 734 pageinfo['pageBatch'] = self.getPageBatch(start=start, rows=rows, cols=cols, pageFlowLtr=pageFlowLtr, pageZero=pageZero, minIdx=1, maxIdx=np) 735 924 736 pageinfo['characterNormalization'] = self.REQUEST.get('characterNormalization','reg') 925 #pageinfo['optionToggle'] = self.REQUEST.get('optionToggle','1') 926 pageinfo['query'] = self.REQUEST.get('query','') 927 pageinfo['queryType'] = self.REQUEST.get('queryType','') 928 pageinfo['querySearch'] =self.REQUEST.get('querySearch', 'fulltext') 929 pageinfo['textPN'] = self.REQUEST.get('textPN','1') 930 pageinfo['highlightQuery'] = self.REQUEST.get('highlightQuery','') 931 932 pageinfo ['highlightElementPos'] = self.REQUEST.get('highlightElementPos','') 933 pageinfo ['highlightElement'] = self.REQUEST.get('highlightElement','') 934 935 pageinfo ['xpointer'] = self.REQUEST.get('xpointer','') 936 937 pageinfo['tocPageSize'] = self.REQUEST.get('tocPageSize', '30') 938 pageinfo['queryPageSize'] =self.REQUEST.get('queryPageSize', '10') 939 pageinfo['tocPN'] = self.REQUEST.get('tocPN', '1') 940 toc = int (pageinfo['tocPN']) 941 pageinfo['textPages'] =int (toc) 942 943 if 'tocSize_%s'%tocMode in docinfo: 944 tocSize = int(docinfo['tocSize_%s'%tocMode]) 945 tocPageSize = int(pageinfo['tocPageSize']) 946 # cached toc 947 if tocSize%tocPageSize>0: 948 tocPages=tocSize/tocPageSize+1 737 738 # cache search results 739 pageinfo['resultPageSize'] = getInt(self.REQUEST.get('resultPageSize', 10)) 740 query = self.REQUEST.get('query',None) 741 pageinfo['query'] = query 742 if query: 743 queryType = self.REQUEST.get('queryType', 'fulltextMorph') 744 pageinfo['queryType'] = queryType 745 pageinfo['resultStart'] = getInt(self.REQUEST.get('resultStart', '1')) 746 self.getSearchResults(mode=queryType, query=query, pageinfo=pageinfo, docinfo=docinfo) 747 748 # highlighting 749 highlightQuery = self.REQUEST.get('highlightQuery', None) 750 if highlightQuery: 751 pageinfo['highlightQuery'] = highlightQuery 752 pageinfo['highlightElement'] = self.REQUEST.get('highlightElement', '') 753 pageinfo['highlightElementPos'] = self.REQUEST.get('highlightElementPos', '') 754 755 return pageinfo 756 757 758 def getPageBatch(self, start=1, rows=10, cols=2, pageFlowLtr=True, pageZero=False, minIdx=1, maxIdx=0): 759 """returns dict with array of page informations for one screenfull of thumbnails""" 760 batch = {} 761 grpsize = rows * cols 762 if maxIdx == 0: 763 maxIdx = start + grpsize 764 765 nb = int(math.ceil(maxIdx / float(grpsize))) 766 # list of all batch start and end points 767 batches = [] 768 if pageZero: 769 ofs = 0 770 else: 771 ofs = 1 772 773 for i in range(nb): 774 s = i * grpsize + ofs 775 e = min((i + 1) * grpsize + ofs - 1, maxIdx) 776 batches.append({'start':s, 'end':e}) 777 778 batch['batches'] = batches 779 780 pages = [] 781 if pageZero and start == 1: 782 # correct beginning 783 idx = 0 784 else: 785 idx = start 786 787 for r in range(rows): 788 row = [] 789 for c in range(cols): 790 if idx < minIdx or idx > maxIdx: 791 page = {'idx':None} 792 else: 793 page = {'idx':idx} 794 795 idx += 1 796 if pageFlowLtr: 797 row.append(page) 798 else: 799 row.insert(0, page) 800 801 pages.append(row) 802 803 if start > 1: 804 batch['prevStart'] = max(start - grpsize, 1) 805 else: 806 batch['prevStart'] = None 807 808 if start + grpsize < maxIdx: 809 batch['nextStart'] = start + grpsize 810 else: 811 batch['nextStart'] = None 812 813 batch['pages'] = pages 814 return batch 815 816 def getBatch(self, start=1, size=10, end=0, data=None, fullData=True): 817 """returns dict with information for one screenfull of data.""" 818 batch = {} 819 if end == 0: 820 end = start + size 821 822 nb = int(math.ceil(end / float(size))) 823 # list of all batch start and end points 824 batches = [] 825 for i in range(nb): 826 s = i * size + 1 827 e = min((i + 1) * size, end) 828 batches.append({'start':s, 'end':e}) 829 830 batch['batches'] = batches 831 # list of elements in this batch 832 this = [] 833 j = 0 834 for i in range(start, min(start+size, end)): 835 if data: 836 if fullData: 837 d = data[i] 838 else: 839 d = data[j] 840 j += 1 841 949 842 else: 950 tocPages=tocSize/tocPageSize 951 pageinfo['tocPN'] = min (tocPages,toc) 952 pageinfo['searchPN'] =self.REQUEST.get('searchPN','1') 953 #pageinfo['sn'] =self.REQUEST.get('sn','') 954 pageinfo['s'] =self.REQUEST.get('s','') 955 return pageinfo 956 957 def changeDocumentViewer(self,title="",digilibBaseUrl=None,thumbrows=2,thumbcols=5,authgroups='mpiwg',RESPONSE=None): 843 d = i+1 844 845 this.append(d) 846 847 batch['this'] = this 848 if start > 1: 849 batch['prevStart'] = max(start - size, 1) 850 else: 851 batch['prevStart'] = None 852 853 if start + size < end: 854 batch['nextStart'] = start + size 855 else: 856 batch['nextStart'] = None 857 858 return batch 859 860 861 security.declareProtected('View management screens','changeDocumentViewerForm') 862 changeDocumentViewerForm = PageTemplateFile('zpt/changeDocumentViewer', globals()) 863 864 def changeDocumentViewer(self,title="",digilibBaseUrl=None,thumbrows=2,thumbcols=5,authgroups='mpiwg',RESPONSE=None): 958 865 """init document viewer""" 959 866 self.title=title … … 962 869 self.thumbcols = thumbcols 963 870 self.authgroups = [s.strip().lower() for s in authgroups.split(',')] 871 try: 872 # assume MetaDataFolder instance is called metadata 873 self.metadataService = getattr(self, 'metadata') 874 except Exception, e: 875 logging.error("Unable to find MetaDataFolder 'metadata': "+str(e)) 876 964 877 if RESPONSE is not None: 965 878 RESPONSE.redirect('manage_main') … … 977 890 if RESPONSE is not None: 978 891 RESPONSE.redirect('manage_main') 979 980 ## DocumentViewerTemplate class981 class DocumentViewerTemplate(ZopePageTemplate):982 """Template for document viewer"""983 meta_type="DocumentViewer Template"984 985 986 def manage_addDocumentViewerTemplateForm(self):987 """Form for adding"""988 pt=PageTemplateFile('zpt/addDocumentViewerTemplate', globals()).__of__(self)989 return pt()990 991 def manage_addDocumentViewerTemplate(self, id='viewer_main', title=None, text=None,992 REQUEST=None, submit=None):993 "Add a Page Template with optional file content."994 995 self._setObject(id, DocumentViewerTemplate(id))996 ob = getattr(self, id)997 txt=file(os.path.join(package_home(globals()),'zpt/viewer_main.zpt'),'r').read()998 logging.info("txt %s:"%txt)999 ob.pt_edit(txt,"text/html")1000 if title:1001 ob.pt_setTitle(title)1002 try:1003 u = self.DestinationURL()1004 except AttributeError:1005 u = REQUEST['URL1']1006 1007 u = "%s/%s" % (u, urllib.quote(id))1008 REQUEST.RESPONSE.redirect(u+'/manage_main')1009 return ''1010 1011 1012 -
version.txt
r408 r498 1 DocumentViewer 0.6.01 DocumentViewer 2.0a -
zpt/changeDocumentViewer.zpt
r87 r477 18 18 <p class="form-text">Access groups (separated by ',') that are considered local, i.e. when a ressource restricts access 19 19 to one of these groups, local access to the ressource is granted.</p> 20 <p class="form-optional">Digilib base URL</p> 21 <p class="form-element"><input size="80" tal:attributes="value here/digilibBaseUrl | nothing" name="digilibBaseUrl"></p> 22 <p class="form-text">Leave empty for autoconfiguration.</p> 20 23 21 24 <p><input type="submit" value="change"></p> -
zpt/toc_figures.zpt
r122 r501 1 <tal:block tal:define="docinfo options/docinfo; pageinfo options/pageinfo; 2 pn python:int(pageinfo['tocPN']); tocsize python:int(docinfo['tocSize_figures']); grpsize python:int(pageinfo['tocPageSize']); 3 maxpn python:int(tocsize/grpsize);"> 4 <div class="thumbruler"> 5 <span tal:condition="python:(pn>1)"> 6 <a tal:attributes="href python:here.getLink(param='tocPN',val=pn-1)"><</a> 7 </span> 8 <span tal:content="string:$pn of $maxpn"/> 9 <span> 10 <a tal:attributes="href python:here.getLink(param='tocPN',val=pn+1)">></a> 11 </span> 12 </div> 13 <div tal:content="structure python:here.getTocPage(mode='figures',pageinfo=pageinfo,docinfo=docinfo)"/> 14 </tal:block> 1 <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" 2 "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"> 3 <html xmlns="http://www.w3.org/1999/xhtml"> 4 <head> 5 <meta http-equiv="Content-Type" content="text/html; charset=utf-8" /> 6 </head> 7 <body> 8 <!-- block used for main content area --> 9 <div class="toc-figures" metal:define-macro="main" 10 tal:define="start pageinfo/start; tocsize docinfo/tocSize_figures; grpsize pageinfo/tocPageSize; 11 batch python:here.getBatch(start=start,size=grpsize,end=tocsize);"> 12 <ul class="switcher"> 13 <li><a 14 tal:attributes="href python:here.getLink('tocMode','thumbs')">Thumbnails</a> 15 </li> 16 <li 17 tal:condition="python:docpath and docinfo.get('numTocEntries', None)"> 18 <a tal:attributes="href python:here.getLink('tocMode','text')">Content</a> 19 </li> 20 <li class="sel" 21 tal:condition="python:docpath and docinfo.get('numFigureEntries', None)"> 22 <a 23 tal:attributes="href python:here.getLink('tocMode','figures')">Figures</a> 24 </li> 25 <li><a 26 tal:attributes="href python:here.getLink('tocMode','none')">None</a> 27 </li> 28 </ul> 29 <div class="ruler"> 30 <metal:block metal:use-macro="here/template/common_template/macros/toc_ruler"/> 31 </div> 32 <div class="content" 33 tal:content="structure python:here.getTocPage(mode='figures',start=start,pageinfo=pageinfo,docinfo=docinfo)" /> 34 </div> 35 <!-- toc --> 36 </body> 37 </html> -
zpt/toc_text.zpt
r122 r501 1 <tal:block tal:define="docinfo options/docinfo; pageinfo options/pageinfo; 2 pn python:int(pageinfo['tocPN']); tocsize python:int(docinfo['tocSize_text']); grpsize python:int(pageinfo['tocPageSize']); 3 maxpn python:int(tocsize/grpsize);"> 4 <div class="thumbruler"> 5 <span tal:condition="python:(pn>1)"> 6 <a tal:attributes="href python:here.getLink(param='tocPN',val=pn-1)"><</a> 7 </span> 8 <span tal:content="string:$pn of $tocsize"/> 9 <span> 10 <a tal:attributes="href python:here.getLink(param='tocPN',val=pn+1)">></a> 11 </span> 12 </div> 13 <div tal:content="structure python:here.getTocPage(mode='text',pageinfo=pageinfo,docinfo=docinfo)"/> 14 </tal:block> 1 <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" 2 "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"> 3 <html xmlns="http://www.w3.org/1999/xhtml"> 4 <head> 5 <meta http-equiv="Content-Type" content="text/html; charset=utf-8" /> 6 </head> 7 <body> 8 <!-- block used for main content area --> 9 <div class="toc-text" metal:define-macro="main" 10 tal:define="start pageinfo/start; tocsize docinfo/tocSize_text; grpsize pageinfo/tocPageSize; 11 batch python:here.getBatch(start=start,size=grpsize,end=tocsize);"> 12 <ul class="switcher"> 13 <li><a 14 tal:attributes="href python:here.getLink('tocMode','thumbs')">Thumbnails</a> 15 </li> 16 <li class="sel" 17 tal:condition="python:docpath and docinfo.get('numTocEntries', None)"> 18 <a tal:attributes="href python:here.getLink('tocMode','text')">Content</a> 19 </li> 20 <li 21 tal:condition="python:docpath and docinfo.get('numFigureEntries', None)"> 22 <a 23 tal:attributes="href python:here.getLink('tocMode','figures')">Figures</a> 24 </li> 25 <li><a 26 tal:attributes="href python:here.getLink('tocMode','none')">None</a> 27 </li> 28 </ul> 29 <div class="ruler"> 30 <metal:block metal:use-macro="here/template/common_template/macros/toc_ruler"/> 31 </div> 32 <div class="content" 33 tal:content="structure python:here.getTocPage(mode='text',start=start,pageinfo=pageinfo,docinfo=docinfo)" /> 34 </div> 35 <!-- toc --> 36 </body> 37 </html> -
zpt/toc_thumbs.zpt
r122 r489 1 <div tal:define="docinfo options/docinfo; pageinfo options/pageinfo; 2 start pageinfo/start; end pageinfo/end; rows pageinfo/rows; cols pageinfo/cols; 3 current pageinfo/current; grpsize pageinfo/groupsize"> 1 <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" 2 "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"> 3 <html xmlns="http://www.w3.org/1999/xhtml"> 4 <head> 5 <meta http-equiv="Content-Type" content="text/html; charset=utf-8" /> 6 </head> 7 <body> 8 <!-- block used for main content area --> 9 <div class="toc-thumbs" metal:define-macro="main" 10 tal:define="start pageinfo/start; 11 grpsize pageinfo/groupsize; 12 numgroups pageinfo/numgroups; 13 pageBatch pageinfo/pageBatch; pageZero pageinfo/pageZero; 14 left python:test(flowLtr,pageBatch['prevStart'],pageBatch['nextStart']); 15 right python:test(flowLtr,pageBatch['nextStart'],pageBatch['prevStart']);"> 16 <ul class="toctype"> 17 <li class="sel"> 18 <a tal:attributes="href python:here.getLink('tocMode','thumbs')">Thumbnails</a> 19 </li> 20 <li tal:condition="python:docinfo.get('numTocEntries', None)"> 21 <a tal:attributes="href python:here.getLink('tocMode','text')">Content</a> 22 </li> 23 <li tal:condition="python:docinfo.get('numFigureEntries', None)"> 24 <a tal:attributes="href python:here.getLink('tocMode','figures')">Figures</a> 25 </li> 26 <li> 27 <a tal:attributes="href python:here.getLink('tocMode','none')">None</a> 28 </li> 29 </ul> 4 30 5 <div class="thumbruler"> 6 <span tal:condition="python:(start>1)"> 7 <a tal:attributes="href python:here.getLink(param='start',val=max(start-grpsize,1))"><</a> 8 </span> 9 <select tal:attributes="onChange python:'location.href=\''+here.getLink(param='start',val=None)+'&start=\'+this.options[this.selectedIndex].value'"> 10 <option tal:repeat="grp python:range(pageinfo['numgroups'])" 11 tal:attributes="selected python:(start==grp*grpsize+1); value python:(grp*grpsize+1)" 12 tal:content="python:(grp*grpsize+1)"/> 13 </select> 14 <span tal:condition="python:(start+grpsize<int(docinfo['numPages']))"> 15 <a tal:attributes="href python:here.getLink(param='start',val=start+grpsize)">></a> 16 </span> 17 </div> 18 <table> 19 <tr tal:repeat="row python:range(rows)"> 20 <tal:block tal:repeat="idx python:range(start+row*cols,start+(row+1)*cols)"> 21 <td align="center" tal:condition="python:(idx<=end)" 22 tal:attributes="class python:here.getStyle(idx,current,'thumb')"> 23 <a tal:attributes="href python:here.getLink(param='pn',val=idx)"> 24 <img class="thumbimg" border="0" tal:attributes="src string:${docinfo/imageURL}&pn=$idx&dw=100&dh=100"/> 25 <div class="thumbcap" tal:content="idx"/> 26 </a> 27 </td> 28 </tal:block> 29 </tr> 30 </table> 31 </div> 31 <div class="content"> 32 <div class="ruler"> 33 <form class="autosubmit" tal:attributes="action viewerUrl"> 34 <input type="hidden" tal:define="params python:here.getParams('start',None)" 35 tal:repeat="param params" 36 tal:attributes="name param; value python:params[param]" /> 37 <a tal:condition="left" tal:attributes="href python:here.getLink('start',left)"><</a> 38 <span tal:condition="not:left"><</span> 39 <select class="autosubmit" name="start" 40 tal:define="ofs python:test(pageinfo['pageZero'],0,1)"> 41 <tal:block > 42 <option tal:repeat="grp pageBatch/batches" 43 tal:attributes="selected python:start==grp['start']; value grp/start;" 44 tal:content="string:${grp/start} - ${grp/end}" /> 45 </tal:block> 46 </select> 47 <input type="submit" value="Go" /> 48 <a tal:condition="right" tal:attributes="href python:here.getLink('start',right)">></a> 49 <span tal:condition="not:right">></span> 50 </form> 51 </div> 52 53 <table class="thumbs"> 54 <tr tal:repeat="row pageBatch/pages"> 55 <td tal:repeat="thumb row" 56 tal:attributes="class python:here.getStyle(thumb['idx'],pn,'thumb')"> 57 <a tal:define="idx thumb/idx" tal:condition="idx" 58 tal:attributes="href python:here.getLink('pn',idx)"> 59 <img 60 tal:attributes="src python:test(docinfo['imageURL'],here.getScalerUrl(pn=idx,dw=100,dh=100,docinfo=docinfo),'images/pic'); 61 alt idx" /><br/> 62 <span tal:content="idx" /> 63 </a> 64 </td> 65 </tr> 66 </table> 67 </div> <!-- content --> 68 </div> <!-- toc --> 69 </body> 70 </html>
Note: See TracChangeset
for help on using the changeset viewer.