Changeset 453:beb7ccb92564 in documentViewer
- Timestamp:
- Jul 14, 2011, 5:43:56 PM (14 years ago)
- Branch:
- elementtree
- Files:
-
- 2 added
- 3 edited
Legend:
- Unmodified
- Added
- Removed
-
MpdlXmlTextServer.py
r407 r453 3 3 from Products.PageTemplates.PageTemplateFile import PageTemplateFile 4 4 from Ft.Xml import EMPTY_NAMESPACE, Parse 5 5 from Ft.Xml.Domlette import NonvalidatingReader 6 import Ft.Xml.Domlette 7 import cStringIO 8 9 import xml.etree.ElementTree as ET 10 11 import md5 6 12 import sys 7 13 import logging 8 14 import urllib 9 15 import documentViewer 10 from documentViewer import getTextFromNode, serializeNode 16 #from documentViewer import getTextFromNode, serializeNode 17 18 def getText(node): 19 """get the cdata content of a node""" 20 if node is None: 21 return "" 22 # ET: 23 text = node.text or "" 24 for e in node: 25 text += gettext(e) 26 if e.tail: 27 text += e.tail 28 29 return text 30 31 def serialize(node): 32 """returns a string containing an XML snippet of node""" 33 s = ET.tostring(node, 'UTF-8') 34 # snip off XML declaration 35 if s.startswith('<?xml'): 36 i = s.find('?>') 37 return s[i+3:] 38 39 return s 40 41 42 def getTextFromNode(node): 43 """get the cdata content of a node""" 44 if node is None: 45 return "" 46 # ET: 47 #text = node.text or "" 48 #for e in node: 49 # text += gettext(e) 50 # if e.tail: 51 # text += e.tail 52 53 # 4Suite: 54 nodelist=node.childNodes 55 text = "" 56 for n in nodelist: 57 if n.nodeType == node.TEXT_NODE: 58 text = text + n.data 59 60 return text 61 62 def serializeNode(node, encoding="utf-8"): 63 """returns a string containing node as XML""" 64 #s = ET.tostring(node) 65 66 # 4Suite: 67 stream = cStringIO.StringIO() 68 Ft.Xml.Domlette.Print(node, stream=stream, encoding=encoding) 69 s = stream.getvalue() 70 stream.close() 71 72 return s 73 11 74 12 75 class MpdlXmlTextServer(SimpleItem): … … 20 83 manage_changeMpdlXmlTextServerForm = PageTemplateFile("zpt/manage_changeMpdlXmlTextServer", globals()) 21 84 22 def __init__(self,id,title="",serverUrl="http://mpdl-proto.mpiwg-berlin.mpg.de/mpdl/interface/", serverName=None, timeout=40): 85 def __init__(self,id,title="",serverUrl="http://mpdl-system.mpiwg-berlin.mpg.de/mpdl/interface/", serverName=None, timeout=40): 86 #def __init__(self,id,title="",serverUrl="http://mpdl-text.mpiwg-berlin.mpg.de:30030/mpdl/interface/", serverName=None, timeout=40): 87 23 88 """constructor""" 24 89 self.id=id … … 39 104 return documentViewer.getHttpData(url,data,timeout=self.timeout) 40 105 41 def getSearch(self, p n=1, pageinfo=None, docinfo=None, query=None, queryType=None, lemma=None, characterNormalization=None, optionToggle=None):106 def getSearch(self, pageinfo=None, docinfo=None): 42 107 """get search list""" 43 108 docpath = docinfo['textURLPath'] 44 109 url = docinfo['url'] 45 110 pagesize = pageinfo['queryPageSize'] 46 pn = pageinfo ['searchPN']111 pn = pageinfo.get('searchPN',1) 47 112 sn = pageinfo['sn'] 48 113 highlightQuery = pageinfo['highlightQuery'] … … 52 117 tocMode = pageinfo['tocMode'] 53 118 characterNormalization = pageinfo['characterNormalization'] 54 optionToggle = pageinfo['optionToggle']119 #optionToggle = pageinfo['optionToggle'] 55 120 tocPN = pageinfo['tocPN'] 56 121 selfurl = self.absolute_url() 57 58 data = self.getServerData("doc-query.xql","document=%s&mode=%s&queryType=%s&query=%s&queryResultPageSize=%s&queryResultPN=%s&sn=%s&viewMode=%s&characterNormalization=%s&optionToggle=%s&highlightQuery=%s"%(docpath, 'text', queryType, urllib.quote(query), pagesize, pn, sn, viewMode,characterNormalization,optionToggle ,urllib.quote(highlightQuery))) 59 #page=self.template.fulltextclient.eval("/mpdl/interface/doc-query.xql","document=%s&mode=%s&queryType=%s&query=%s&queryResultPageSize=%s&queryResultPN=%s&sn=%s&viewMode=%s&highlightQuery=%s"%(docpath, 'text', queryType, query, pagesize, pn, sn, viewMode,highlightQuery) ,outputUnicode=False) 60 122 data = self.getServerData("doc-query.xql","document=%s&mode=%s&queryType=%s&query=%s&queryResultPageSize=%s&queryResultPN=%s&sn=%s&viewMode=%s&characterNormalization=%s&highlightQuery=%s"%(docpath, 'text', queryType, urllib.quote(query), pagesize, pn, sn, viewMode,characterNormalization, urllib.quote(highlightQuery))) 61 123 pagexml = data.replace('?document=%s'%str(docpath),'?url=%s'%url) 62 124 pagedom = Parse(pagexml) 125 126 """ 127 pagedivs = pagedom.xpath("//div[@class='queryResultHits']") 128 if (pagedivs == pagedom.xpath("//div[@class='queryResultHits']")): 129 if len(pagedivs)>0: 130 docinfo['queryResultHits'] = int(getTextFromNode(pagedivs[0])) 131 s = getTextFromNode(pagedivs[0]) 132 s1 = int(s)/10+1 133 try: 134 docinfo['queryResultHits'] = int(s1) 135 logging.debug("SEARCH ENTRIES: %s"%(s1)) 136 except: 137 docinfo['queryResultHits'] = 0 138 """ 63 139 if (queryType=="fulltext")or(queryType=="xpath")or(queryType=="xquery")or(queryType=="fulltextMorphLemma"): 64 140 pagedivs = pagedom.xpath("//div[@class='queryResultPage']") … … 72 148 if href.startswith('page-fragment.xql'): 73 149 selfurl = self.absolute_url() 74 pagexml=href.replace('mode=text','mode=texttool&viewMode=%s&queryType=%s&query=%s&queryResultPageSize=%s&queryResultPN=%s&tocMode=%s&searchPN=%s&tocPN=%s& optionToggle=%s&characterNormalization=%s'%(viewMode,queryType,urllib.quote(query),pagesize,pn,tocMode,pn,tocPN,optionToggle,characterNormalization))150 pagexml=href.replace('mode=text','mode=texttool&viewMode=%s&queryType=%s&query=%s&queryResultPageSize=%s&queryResultPN=%s&tocMode=%s&searchPN=%s&tocPN=%s&characterNormalization=%s'%(viewMode,queryType,urllib.quote(query),pagesize,pn,tocMode,pn,tocPN, characterNormalization)) 75 151 hrefNode.nodeValue = pagexml.replace('page-fragment.xql','%s'%selfurl) 152 #logging.debug("PUREXML :%s"%(serializeNode(pagenode))) 76 153 return serializeNode(pagenode) 77 154 if (queryType=="fulltextMorph"): … … 86 163 if href.startswith('page-fragment.xql'): 87 164 selfurl = self.absolute_url() 88 pagexml=href.replace('mode=text','mode=texttool&viewMode=%s&queryType=%s&query=%s&queryResultPageSize=%s&queryResultPN=%s&tocMode=%s&searchPN=%s&tocPN=%s& optionToggle=%s&characterNormalization=%s'%(viewMode,queryType,urllib.quote(query),pagesize,pn,tocMode,pn,tocPN,optionToggle,characterNormalization))165 pagexml=href.replace('mode=text','mode=texttool&viewMode=%s&queryType=%s&query=%s&queryResultPageSize=%s&queryResultPN=%s&tocMode=%s&searchPN=%s&tocPN=%s&characterNormalization=%s'%(viewMode,queryType,urllib.quote(query),pagesize,pn,tocMode,pn,tocPN,characterNormalization)) 89 166 hrefNode.nodeValue = pagexml.replace('page-fragment.xql','%s'%selfurl) 90 167 if href.startswith('../lt/lemma.xql'): 91 hrefNode.nodeValue = href.replace('../lt/lemma.xql','%s/template/head_main_ lemma_New'%(selfurl))168 hrefNode.nodeValue = href.replace('../lt/lemma.xql','%s/template/head_main_query'%(selfurl)) 92 169 l.setAttributeNS(None, 'target', '_blank') 93 170 l.setAttributeNS(None, 'onClick',"popupWin = window.open(this.href, 'contacts', 'location,width=500,height=600,top=180, left=400, scrollbars=1'); return false;") 94 l.setAttributeNS(None, 'on Click', 'popupWin.focus();')171 l.setAttributeNS(None, 'onDblclick', 'popupWin.focus();') 95 172 pagedivs = pagedom.xpath("//div[@class='queryResultMorphExpansion']") 96 173 return serializeNode(pagenode) … … 104 181 if hrefNode: 105 182 href = hrefNode.nodeValue 106 hrefNode.nodeValue=href.replace('mode=text','mode=texttool&viewMode=%s&tocMode=%s&tocPN=%s&pn=%s& optionToggle=%s&characterNormalization=%s'%(viewMode,tocMode,tocPN,pn,optionToggle,characterNormalization))183 hrefNode.nodeValue=href.replace('mode=text','mode=texttool&viewMode=%s&tocMode=%s&tocPN=%s&pn=%s&characterNormalization=%s'%(viewMode,tocMode,tocPN,pn,characterNormalization)) 107 184 if href.startswith('../lt/lex.xql'): 108 hrefNode.nodeValue = href.replace('../lt/lex.xql','%s/template/head_main_ voc'%selfurl)185 hrefNode.nodeValue = href.replace('../lt/lex.xql','%s/template/head_main_lex'%selfurl) 109 186 l.setAttributeNS(None, 'target', '_blank') 110 187 l.setAttributeNS(None, 'onClick',"popupWin = window.open(this.href, 'contacts', 'location,width=500,height=600,top=180, left=400, scrollbars=1'); return false;") 111 l.setAttributeNS(None, 'on Click', 'popupWin.focus();')188 l.setAttributeNS(None, 'onDblclick', 'popupWin.focus();') 112 189 if href.startswith('../lt/lemma.xql'): 113 hrefNode.nodeValue = href.replace('../lt/lemma.xql','%s/template/head_main_lemma'% selfurl)190 hrefNode.nodeValue = href.replace('../lt/lemma.xql','%s/template/head_main_lemma'%(selfurl)) 114 191 l.setAttributeNS(None, 'target', '_blank') 115 192 l.setAttributeNS(None, 'onClick',"popupWin = window.open(this.href, 'contacts', 'location,width=500,height=600,top=180, left=400, scrollbars=1'); return false;") 116 l.setAttributeNS(None, 'on Click', 'popupWin.focus();')193 l.setAttributeNS(None, 'onDblclick', 'popupWin.focus();') 117 194 return serializeNode(pagenode) 118 195 return "no text here" 119 120 """def getNumPages(self, docinfo): 121 ""get list of pages from fulltext and put in docinfo"" 122 if 'numPages' in docinfo: 123 # already there 124 return docinfo 125 xquery = '//pb' 126 text = self.getServerData("xquery.xql","document=%s&xquery=%s"%(docinfo['textURLPath'],xquery)) 127 docinfo['numPages'] = text.count("<pb ") 128 return docinfo 129 """ 130 def getNumTextPages (self, docinfo): 131 """get list of pages from fulltext (texts without images) and put in docinfo""" 132 if 'numPages' in docinfo: 133 # allredy there 134 return docinfo 135 xpath ='/count(//pb)' 136 text=self.getServerData("xpath.xql", "document=%s&xpath=%s"%(docinfo['textURLPath'], xpath)) 137 dom = Parse(text) 138 result= dom.xpath("//result/resultPage") 139 140 docinfo['numPages']=int(getTextFromNode(result[0])) 141 return docinfo 142 196 143 197 def getGisPlaces(self, docinfo=None, pageinfo=None): 144 198 """ Show all Gis Places of whole Page""" 145 199 xpath='//place' 146 docpath = docinfo['textURLPath'] 200 docpath = docinfo.get('textURLPath',None) 201 if not docpath: 202 return None 203 147 204 url = docinfo['url'] 148 205 selfurl = self.absolute_url() … … 158 215 hrefList.append(href) 159 216 myList = ",".join(hrefList) 160 logging.debug("getGisPlaces :%s"%(myList))217 #logging.debug("getGisPlaces :%s"%(myList)) 161 218 return myList 162 219 … … 179 236 hrefList.append(href) 180 237 myList = ",".join(hrefList) 181 logging.debug("getALLGisPlaces :%s"%(myList))238 #logging.debug("getALLGisPlaces :%s"%(myList)) 182 239 return myList 183 184 185 def getPDF (self, docinfo=None, pageinfo=None): 186 """Show and Save different Pages as PDF in Options""" 187 selfurl=self.absolute_url() 188 pn=pageinfo['current'] 189 190 viewMode =pageinfo['viewMode'] 191 192 #text = ("page-fragment.xql","document=%s&mode=%s&pn=%s&export=%s"%(docinfo['textURLPath'], 'text', pn,'pdf')) 193 #text = self.getServerData("page-fragment.xql", "document=%s&mode=%s&pn=%s&export=%s"(docinfo['textURLPath'],'text', pn,'pdf')) 194 #logging.debug("text :%s"%(text)) 195 #dom =Parse(text) 196 #logging.debug("text :%s"%(text)) 197 #return text 240 198 241 199 def getOrigPages (self, docinfo=None, pageinfo=None): 200 """Show original page """ 201 docpath = docinfo['textURLPath'] 202 logging.debug ("docinfo['textURLPath']=%s"%(docinfo['textURLPath'])) 203 #url = docinfo['url'] 204 selfurl = self.absolute_url() 205 pn =pageinfo['current'] 206 207 viewMode= pageinfo['viewMode'] 208 text = self.getServerData("page-fragment.xql","document=%s&mode=%s&pn=%s"%(docinfo['textURLPath'], 'text', pn)) 209 dom =Parse(text) 210 pagedivs = dom.xpath("//div[@class='pageNumberOrig']") 211 logging.debug("YYYYYYpagedivs :%s"%(pagedivs)) 212 if len(pagedivs)>0: 213 originalPage= getTextFromNode(pagedivs[0]) 214 #return docinfo['originalPage'] 215 return originalPage 216 217 218 def getTextPage(self, mode="text", pn=1, docinfo=None, pageinfo=None, viewMode=None, tocMode=None, tocPN=None, characterNormalization="regPlusNorm", highlightQuery=None, sn=None, optionToggle=None): 242 def getTextPage(self, mode="text_dict", pn=1, docinfo=None, pageinfo=None): 219 243 """returns single page from fulltext""" 220 244 docpath = docinfo['textURLPath'] 221 245 path = docinfo['textURLPath'] 222 url = docinfo ['url']223 name = docinfo ['name']224 viewMode= pageinfo['viewMode']246 url = docinfo.get('url',None) 247 name = docinfo.get('name',None) 248 pn =pageinfo['current'] 225 249 sn = pageinfo['sn'] 250 #optionToggle =pageinfo ['optionToggle'] 226 251 highlightQuery = pageinfo['highlightQuery'] 227 252 #mode = pageinfo ['viewMode'] 228 253 tocMode = pageinfo['tocMode'] 229 254 characterNormalization=pageinfo['characterNormalization'] … … 234 259 else: 235 260 textmode = mode 236 #logging.debug("documentViewer highlightQuery: %s"%(highlightQuery))261 237 262 textParam = "document=%s&mode=%s&pn=%s&characterNormalization=%s"%(docpath,textmode,pn,characterNormalization) 238 263 if highlightQuery is not None: 239 264 textParam +="&highlightQuery=%s&sn=%s"%(urllib.quote(highlightQuery),sn) 240 #logging.debug("documentViewer highlightQuery: %s"%(highlightQuery))265 241 266 pagexml = self.getServerData("page-fragment.xql",textParam) 242 logging.debug("documentViewer highlightQuery: %s"%(highlightQuery)) 243 #pagexml=self.template.fulltextclient.eval("/mpdl/interface/page-fragment.xql", textParam, outputUnicode=False) 244 245 pagedom = Parse(pagexml) 267 dom = ET.fromstring(pagexml) 268 #dom = NonvalidatingReader.parseStream(pagexml) 269 270 #original Pages 271 #pagedivs = dom.xpath("//div[@class='pageNumberOrig']") 272 273 """if pagedivs == dom.xpath("//div[@class='pageNumberOrig']"): 274 if len(pagedivs)>0: 275 docinfo['pageNumberOrig']= getTextFromNode(pagedivs[0]) 276 logging.debug("ORIGINAL PAGE: %s"%(docinfo['pageNumberOrig'])) 277 278 #original Pages Norm 279 pagedivs = dom.xpath("//div[@class='pageNumberOrigNorm']") 280 if pagedivs == dom.xpath("//div[@class='pageNumberOrigNorm']"): 281 if len(pagedivs)>0: 282 docinfo['pageNumberOrigNorm']= getTextFromNode(pagedivs[0]) 283 logging.debug("ORIGINAL PAGE NORM: %s"%(docinfo['pageNumberOrigNorm'])) 284 """ 285 #figureEntries 286 # pagedivs = dom.xpath("//div[@class='countFigureEntries']") 287 # if pagedivs == dom.xpath("//div[@class='countFigureEntries']"): 288 # if len(pagedivs)>0: 289 # docinfo['countFigureEntries'] = getTextFromNode(pagedivs[0]) 290 # s = getTextFromNode(pagedivs[0]) 291 # if s=='0': 292 # try: 293 # docinfo['countFigureEntries'] = int(s) 294 # except: 295 # docinfo['countFigureEntries'] = 0 296 # else: 297 # s1 = int(s)/30+1 298 # try: 299 # docinfo['countFigureEntries'] = int(s1) 300 # except: 301 # docinfo['countFigureEntries'] = 0 302 # 303 # #allPlaces 304 # pagedivs = dom.xpath("//div[@class='countPlaces']") 305 # if pagedivs == dom.xpath("//div[@class='countPlaces']"): 306 # if len(pagedivs)>0: 307 # docinfo['countPlaces']= getTextFromNode(pagedivs[0]) 308 # s = getTextFromNode(pagedivs[0]) 309 # try: 310 # docinfo['countPlaces'] = int(s) 311 # except: 312 # docinfo['countPlaces'] = 0 313 # 314 # #tocEntries 315 # pagedivs = dom.xpath("//div[@class='countTocEntries']") 316 # if pagedivs == dom.xpath("//div[@class='countTocEntries']"): 317 # if len(pagedivs)>0: 318 # docinfo['countTocEntries'] = int(getTextFromNode(pagedivs[0])) 319 # s = getTextFromNode(pagedivs[0]) 320 # if s=='0': 321 # try: 322 # docinfo['countTocEntries'] = int(s) 323 # except: 324 # docinfo['countTocEntries'] = 0 325 # else: 326 # s1 = int(s)/30+1 327 # try: 328 # docinfo['countTocEntries'] = int(s1) 329 # except: 330 # docinfo['countTocEntries'] = 0 331 332 #numTextPages 333 #pagedivs = dom.xpath("//div[@class='countPages']") 334 alldivs = dom.findall(".//div") 335 pagediv = None 336 for div in alldivs: 337 dc = div.get('class') 338 if dc == 'pageContent': 339 pagediv = div 340 341 if dc == 'countPages': 342 try: 343 np = int(div.text) 344 docinfo['numPages'] = np 345 pageinfo['end'] = min(pageinfo['end'], np) 346 pageinfo['numgroups'] = int(np / pageinfo['groupsize']) 347 if np % pageinfo['groupsize'] > 0: 348 pageinfo['numgroups'] += 1 349 350 except: 351 docinfo['numPages'] = 0 352 353 break 354 355 # ROC: why? 356 # else: 357 # #no full text -- init to 0 358 # docinfo['pageNumberOrig'] = 0 359 # docinfo['countFigureEntries'] = 0 360 # docinfo['countPlaces'] = 0 361 # docinfo['countTocEntries'] = 0 362 # docinfo['numPages'] = 0 363 # docinfo['pageNumberOrigNorm'] = 0 364 # #return docinfo 365 246 366 # plain text mode 247 367 if mode == "text": 248 # first div contains text 249 pagedivs = pagedom.xpath("/div") 250 if len(pagedivs) > 0: 251 pagenode = pagedivs[0] 252 links = pagenode.xpath("//a") 368 #pagedivs = dom.xpath("/div") 369 if pagediv: 370 links = pagediv.findall(".//a") 253 371 for l in links: 254 hrefNode = l.getAttributeNodeNS(None, u"href") 255 if hrefNode: 256 href= hrefNode.nodeValue 257 if href.startswith('#note-'): 258 hrefNode.nodeValue = href.replace('#note-',"?url=%s&viewMode=%s&tocMode=%s&tocPN=%s&pn=%s#note-"%(url,viewMode,tocMode,tocPN,pn)) 259 return serializeNode(pagenode) 372 href = l.get('href') 373 if href and href.startswith('#note-'): 374 href = href.replace('#note-',"?url=%s&viewMode=text&tocMode=%s&tocPN=%s&pn=%s#note-"%(url,tocMode,tocPN,pn)) 375 l.set('href', href) 376 logging.debug("page=%s"%ET.tostring(pagediv, 'UTF-8')) 377 return serialize(pagediv) 378 260 379 if mode == "xml": 261 # first div contains text 262 pagedivs = pagedom.xpath("/div") 263 if len(pagedivs) > 0: 264 pagenode = pagedivs[0] 265 return serializeNode(pagenode) 380 if pagediv: 381 return serialize(pagediv) 382 383 if mode == "pureXml": 384 if pagediv: 385 return serialize(pagediv) 386 266 387 if mode == "gis": 267 # first div contains text 268 pagedivs = pagedom.xpath("/div") 269 if len(pagedivs) > 0: 270 pagenode = pagedivs[0] 271 links =pagenode.xpath("//a") 272 for l in links: 273 hrefNode =l.getAttributeNodeNS(None, u"href") 274 if hrefNode: 275 href=hrefNode.nodeValue 276 if href.startswith('http://chinagis.mpiwg-berlin.mpg.de'): 277 hrefNode.nodeValue =href.replace('chinagis_REST/REST/db/chgis/mpdl','chinagis/REST/db/mpdl/%s'%name) 278 l.setAttributeNS(None, 'target', '_blank') 279 return serializeNode(pagenode) 388 if pagediv: 389 # check all a-tags 390 links = pagediv.findall(".//a") 391 for l in links: 392 href = l.get('href') 393 if href: 394 if href.startswith('http://chinagis.mpiwg-berlin.mpg.de'): 395 l.set('href', href.replace('chinagis_REST/REST/db/chgis/mpdl','chinagis/REST/db/mpdl/%s'%name)) 396 l.set('target', '_blank') 397 398 return serialize(pagenode) 280 399 281 if mode == "pureXml":282 # first div contains text283 pagedivs = pagedom.xpath("/div")284 if len(pagedivs) > 0:285 pagenode = pagedivs[0]286 return serializeNode(pagenode)287 400 # text-with-links mode 288 401 if mode == "text_dict": 289 # first div contains text 290 pagedivs = pagedom.xpath("/div") 291 if len(pagedivs) > 0: 292 pagenode = pagedivs[0] 402 if pagediv: 293 403 # check all a-tags 294 links = page node.xpath("//a")404 links = pagediv.findall(".//a") 295 405 for l in links: 296 hrefNode = l.getAttributeNodeNS(None, u"href") 297 if hrefNode: 406 href = l.get('href') 407 408 if href: 298 409 # is link with href 299 href = hrefNode.nodeValue 300 if href.startswith('lt/lex.xql'): 410 if href.startswith('http://mpdl-proto.mpiwg-berlin.mpg.de/mpdl/interface/lt/wordInfo.xql'): 301 411 # is pollux link 302 412 selfurl = self.absolute_url() 303 413 # change href 304 hrefNode.nodeValue = href.replace('lt/lex.xql','%s/template/head_main_voc'%selfurl)414 l.set('href', href.replace('http://mpdl-proto.mpiwg-berlin.mpg.de/mpdl/interface/lt/wordInfo.xql','%s/head_main_voc'%selfurl)) 305 415 # add target 306 l.setAttributeNS(None, 'target', '_blank') 307 l.setAttributeNS(None, 'onClick',"popupWin = window.open(this.href, 'contacts', 'location,width=500,height=600,top=180, left=700, scrollbars=1'); return false;") 308 l.setAttributeNS(None, 'onClick', 'popupWin.focus();') 309 if href.startswith('lt/lemma.xql'): 416 l.set('target', '_blank') 417 418 if href.startswith('http://mpdl-proto.mpiwg-berlin.mpg.de/mpdl/lt/lemma.xql'): 310 419 selfurl = self.absolute_url() 311 hrefNode.nodeValue = href.replace('lt/lemma.xql','%s/template/head_main_lemma'%selfurl) 312 l.setAttributeNS(None, 'target', '_blank') 313 l.setAttributeNS(None, 'onClick',"popupWin = window.open(this.href, 'contacts', 'location,width=500,height=600,top=180, left=700, scrollbars=1'); return false;") 314 l.setAttributeNS(None, 'onClick', 'popupWin.focus();') 420 l.set('href', href.replace('http://mpdl-proto.mpiwg-berlin.mpg.de/mpdl/lt/lemma.xql','%s/head_main_lemma'%selfurl)) 421 l.set('target', '_blank') 422 l.set('onclick',"popupWin = window.open(this.href, 'InfoWindow', 'menubar=no, location,width=500,height=600,top=180, left=700, toolbar=no, scrollbars=1'); return false;") 423 l.set('ondblclick', 'popupWin.focus();') 424 315 425 if href.startswith('#note-'): 316 hrefNode.nodeValue = href.replace('#note-',"?url=%s&viewMode=%s&tocMode=%s&tocPN=%s&pn=%s#note-"%(url,viewMode,tocMode,tocPN,pn)) 317 return serializeNode(pagenode) 426 l.set('href', href.replace('#note-',"?url=%s&viewMode=text_dict&tocMode=%s&tocPN=%s&pn=%s#note-"%(url,tocMode,tocPN,pn))) 427 428 return serialize(pagediv) 429 318 430 return "no text here" 319 320 def getTranslate(self, query=None, language=None): 431 432 def getOrigPages(self, docinfo=None, pageinfo=None): 433 docpath = docinfo['textURLPath'] 434 pn =pageinfo['current'] 435 selfurl = self.absolute_url() 436 pagexml = self.getServerData("page-fragment.xql","document=%s&pn=%s"%(docpath, pn)) 437 dom = Parse(pagexml) 438 pagedivs = dom.xpath("//div[@class='pageNumberOrig']") 439 if pagedivs == dom.xpath("//div[@class='pageNumberOrig']"): 440 if len(pagedivs)>0: 441 docinfo['pageNumberOrig']= getTextFromNode(pagedivs[0]) 442 return docinfo['pageNumberOrig'] 443 444 def getOrigPagesNorm(self, docinfo=None, pageinfo=None): 445 docpath = docinfo['textURLPath'] 446 pn =pageinfo['current'] 447 selfurl = self.absolute_url() 448 pagexml = self.getServerData("page-fragment.xql","document=%s&pn=%s"%(docpath, pn)) 449 dom = Parse(pagexml) 450 pagedivs = dom.xpath("//div[@class='pageNumberOrigNorm']") 451 if pagedivs == dom.xpath("//div[@class='pageNumberOrigNorm']"): 452 if len(pagedivs)>0: 453 docinfo['pageNumberOrigNorm']= getTextFromNode(pagedivs[0]) 454 return docinfo['pageNumberOrigNorm'] 455 456 457 def getTranslate(self, word=None, language=None): 321 458 """translate into another languages""" 322 data = self.getServerData("lt/ lex.xql","document=&language="+str(language)+"&query="+urllib.quote(query))459 data = self.getServerData("lt/wordInfo.xql","language="+str(language)+"&word="+urllib.quote(word)+"&output=html") 323 460 #pagexml=self.template.fulltextclient.eval("/mpdl/interface/lt/lex.xql","document=&language="+str(language)+"&query="+url_quote(str(query))) 324 461 return data … … 326 463 def getLemma(self, lemma=None, language=None): 327 464 """simular words lemma """ 328 data = self.getServerData("lt/lemma.xql","document=&language="+str(language)+"&lemma="+urllib.quote(lemma)) 329 #pagexml=self.template.fulltextclient.eval("/mpdl/interface/lt/lemma.xql","document=&language="+str(language)+"&lemma="+url_quote(str(lemma))) 465 data = self.getServerData("lt/lemma.xql","language="+str(language)+"&lemma="+urllib.quote(lemma)+"&output=html") 330 466 return data 331 467 332 def getLemma New(self, query=None, language=None):468 def getLemmaQuery(self, query=None, language=None): 333 469 """simular words lemma """ 334 data = self.getServerData("lt/lemma.xql","document=&language="+str(language)+"&lemma="+urllib.quote(query)) 335 #pagexml=self.template.fulltextclient.eval("/mpdl/interface/lt/lemma.xql","document=&language="+str(language)+"&lemma="+url_quote(str(query))) 470 data = self.getServerData("lt/lemma.xql","language="+str(language)+"&query="+urllib.quote(query)+"&output=html") 336 471 return data 337 472 338 def getQuery (self, docinfo=None, pageinfo=None, query=None, queryType=None, pn=1, optionToggle=None): 339 """number of""" 473 def getLex(self, query=None, language=None): 474 #simular words lemma 475 data = self.getServerData("lt/lex.xql","document=&language="+str(language)+"&query="+urllib.quote(query)) 476 return data 477 478 def getQuery (self, docinfo=None, pageinfo=None, query=None, queryType=None, pn=1): 479 #number of 340 480 docpath = docinfo['textURLPath'] 341 481 pagesize = pageinfo['queryPageSize'] … … 347 487 348 488 pagexml = self.getServerData("doc-query.xql","document=%s&mode=%s&queryType=%s&query=%s&queryResultPageSize=%s&queryResultPN=%s"%(docpath, 'text', queryType, urllib.quote(query), pagesize, pn)) 349 #pagexml=self.template.fulltextclient.eval("/mpdl/interface/doc-query.xql","document=%s&mode=%s&queryType=%s&query=%s&queryResultPageSize=%s&queryResultPN=%s"%(docpath, 'text', queryType, query, pagesize, pn) ,outputUnicode=False)350 489 pagedom = Parse(pagexml) 351 490 numdivs = pagedom.xpath("//div[@class='queryResultHits']") 352 491 tocSearch = int(getTextFromNode(numdivs[0])) 353 logging.debug("documentViewer (gettoc) tocSearch: %s"%(tocSearch))354 492 tc=int((tocSearch/10)+1) 355 logging.debug("documentViewer (gettoc) tc: %s"%(tc))356 493 return tc 357 358 def getQueryResultHits(self, docinfo=None, pageinfo=None, query=None, queryType=None, pn=1, optionsClose=None): 359 360 """number of hits in Search mode""" 361 docpath = docinfo['textURLPath'] 362 pagesize = pageinfo['queryPageSize'] 363 pn = pageinfo['searchPN'] 364 query =pageinfo['query'] 365 queryType =pageinfo['queryType'] 366 tocSearch = 0 367 tocDiv = None 368 369 pagexml = self.getServerData("doc-query.xql","document=%s&mode=%s&queryType=%s&query=%s&queryResultPageSize=%s&queryResultPN=%s"%(docpath, 'text', queryType, urllib.quote(query), pagesize, pn)) 370 #pagexml=self.template.fulltextclient.eval("/mpdl/interface/doc-query.xql","document=%s&mode=%s&queryType=%s&query=%s&queryResultPageSize=%s&queryResultPN=%s"%(docpath, 'text', queryType, query, pagesize, pn) ,outputUnicode=False) 371 pagedom = Parse(pagexml) 372 numdivs = pagedom.xpath("//div[@class='queryResultHits']") 373 tocSearch = int(getTextFromNode(numdivs[0])) 374 tc=int((tocSearch/10)+1) 375 return tc 376 377 def getQueryResultHitsText(self, docinfo=None, pageinfo=None): 378 """number of hits in Text of Contents mode""" 379 380 docpath = docinfo['textURLPath'] 381 pagesize = pageinfo['queryPageSize'] 382 pn = pageinfo['searchPN'] 383 query =pageinfo['query'] 384 queryType =pageinfo['queryType'] 385 tocSearch = 0 386 tocDiv = None 387 tocMode = pageinfo['tocMode'] 388 tocPN = pageinfo['tocPN'] 389 pagexml=self.getServerData("doc-query.xql", "document=%s&queryType=%s"%(docpath,'toc')) 390 pagedom = Parse(pagexml) 391 logging.debug("documentViewer (pagedom) pagedom: %s"%(pagedom)) 392 numdivs = pagedom.xpath("//div[@class='queryResultHits']") 393 tocSearch = int(getTextFromNode(numdivs[0])) 394 tc=int((tocSearch/30)+1) 395 return tc 396 397 def getQueryResultHitsFigures(self, docinfo=None, pageinfo=None): 398 """number of hits in Text of Figures mode""" 399 400 docpath = docinfo['textURLPath'] 401 pagesize = pageinfo['queryPageSize'] 402 pn = pageinfo['searchPN'] 403 query =pageinfo['query'] 404 queryType =pageinfo['queryType'] 405 tocSearch = 0 406 tocDiv = None 407 tocMode = pageinfo['tocMode'] 408 tocPN = pageinfo['tocPN'] 409 pagexml=self.getServerData("doc-query.xql", "document=%s&queryType=%s"%(docpath,'figures')) 410 pagedom = Parse(pagexml) 411 logging.debug("documentViewer (pagedom) pagedom: %s"%(pagedom)) 412 numdivs = pagedom.xpath("//div[@class='queryResultHits']") 413 tocSearch = int(getTextFromNode(numdivs[0])) 414 tc=int((tocSearch/30)+1) 415 return tc 416 417 494 418 495 def getToc(self, mode="text", docinfo=None): 419 496 """loads table of contents and stores in docinfo""" 420 logging.debug("documentViewer (gettoc) mode: %s"%(mode))421 497 if mode == "none": 422 498 return docinfo … … 438 514 439 515 pagexml = self.getServerData("doc-query.xql","document=%s&queryType=%s&queryResultPageSize=%s&queryResultPN=%s"%(docpath,queryType, pagesize, pn)) 440 #pagexml=self.template.fulltextclient.eval("/mpdl/interface/doc-query.xql", "document=%s&queryType=%s&queryResultPageSize=%s&queryResultPN=%s"%(docpath,queryType,pagesize,pn), outputUnicode=False)516 441 517 # post-processing downloaded xml 442 518 pagedom = Parse(pagexml) … … 463 539 viewMode= pageinfo['viewMode'] 464 540 characterNormalization = pageinfo ['characterNormalization'] 465 optionToggle =pageinfo ['optionToggle']541 #optionToggle =pageinfo ['optionToggle'] 466 542 tocMode = pageinfo['tocMode'] 467 543 tocPN = pageinfo['tocPN'] 468 544 469 data = self.getServerData("doc-query.xql","document=%s&queryType=%s&queryResultPageSize=%s&queryResultPN=%s&characterNormalization=regPlusNorm &optionToggle=1"%(docpath,queryType, pagesize, pn))470 page = data.replace('page-fragment.xql?document=%s'%str(path),'%s?url=%s&viewMode=%s&tocMode=%s&tocPN=%s &optionToggle=1'%(selfurl,url, viewMode, tocMode, tocPN))545 data = self.getServerData("doc-query.xql","document=%s&queryType=%s&queryResultPageSize=%s&queryResultPN=%s&characterNormalization=regPlusNorm"%(docpath,queryType, pagesize, pn)) 546 page = data.replace('page-fragment.xql?document=%s'%str(path),'%s?url=%s&viewMode=%s&tocMode=%s&tocPN=%s'%(selfurl,url, viewMode, tocMode, tocPN)) 471 547 text = page.replace('mode=image','mode=texttool') 472 logging.debug("documentViewer (characterNormalization) characterNormalization: %s"%(characterNormalization))473 #logging.debug("documentViewer (characterNormalization) text: %s"%(text))474 548 return text 475 549 476 def manage_changeMpdlXmlTextServer(self,title="",serverUrl="http://mpdl-proto.mpiwg-berlin.mpg.de/mpdl/interface/",timeout=40,RESPONSE=None): 550 def manage_changeMpdlXmlTextServer(self,title="",serverUrl="http://mpdl-text.mpiwg-berlin.mpg.de/mpdl/interface/",timeout=40,RESPONSE=None): 551 #def manage_changeMpdlXmlTextServer(self,title="",serverUrl="http://mpdl-text.mpiwg-berlin.mpg.de:30030/mpdl/interface/",timeout=40,RESPONSE=None): 477 552 """change settings""" 478 553 self.title=title … … 488 563 return pt() 489 564 490 def manage_addMpdlXmlTextServer(self,id,title="",serverUrl="http://mpdl-proto.mpiwg-berlin.mpg.de/mpdl/interface/",timeout=40,RESPONSE=None): 565 def manage_addMpdlXmlTextServer(self,id,title="",serverUrl="http://mpdl-text.mpiwg-berlin.mpg.de/mpdl/interface/",timeout=40,RESPONSE=None): 566 #def manage_addMpdlXmlTextServer(self,id,title="",serverUrl="http://mpdl-text.mpiwg-berlin.mpg.de:30030/mpdl/interface/",timeout=40,RESPONSE=None): 491 567 """add zogiimage""" 492 568 newObj = MpdlXmlTextServer(id,title,serverUrl,timeout) … … 494 570 if RESPONSE is not None: 495 571 RESPONSE.redirect('manage_main') 496 497 498 -
documentViewer.py
r405 r453 8 8 from Products.zogiLib.zogiLib import browserCheck 9 9 10 from Ft.Xml import EMPTY_NAMESPACE, Parse 11 import Ft.Xml.Domlette 10 #from Ft.Xml import EMPTY_NAMESPACE, Parse 11 #import Ft.Xml.Domlette 12 13 import xml.etree.ElementTree as ET 14 12 15 import os.path 13 16 import sys … … 17 20 import math 18 21 import urlparse 19 import cStringIO20 22 import re 21 23 import string … … 33 35 return int(default) 34 36 35 def getText FromNode(nodename):37 def getText(node): 36 38 """get the cdata content of a node""" 37 if node nameis None:39 if node is None: 38 40 return "" 39 nodelist=nodename.childNodes 40 rc = "" 41 for node in nodelist: 42 if node.nodeType == node.TEXT_NODE: 43 rc = rc + node.data 44 return rc 45 46 def serializeNode(node, encoding='utf-8'): 41 # ET: 42 text = node.text or "" 43 for e in node: 44 text += gettext(e) 45 if e.tail: 46 text += e.tail 47 48 # 4Suite: 49 #nodelist=node.childNodes 50 #text = "" 51 #for n in nodelist: 52 # if n.nodeType == node.TEXT_NODE: 53 # text = text + n.data 54 55 return text 56 57 getTextFromNode = getText 58 59 def serializeNode(node, encoding="utf-8"): 47 60 """returns a string containing node as XML""" 48 buf = cStringIO.StringIO() 49 Ft.Xml.Domlette.Print(node, stream=buf, encoding=encoding) 50 s = buf.getvalue() 51 buf.close() 61 s = ET.tostring(node) 62 63 # 4Suite: 64 # stream = cStringIO.StringIO() 65 # Ft.Xml.Domlette.Print(node, stream=stream, encoding=encoding) 66 # s = stream.getvalue() 67 # stream.close() 52 68 return s 53 69 … … 59 75 bt['isIE'] = False 60 76 bt['isN4'] = False 77 bt['versFirefox']="" 78 bt['versIE']="" 79 bt['versSafariChrome']="" 80 bt['versOpera']="" 81 61 82 if string.find(ua, 'MSIE') > -1: 62 83 bt['isIE'] = True 63 84 else: 64 85 bt['isN4'] = (string.find(ua, 'Mozilla/4.') > -1) 65 86 # Safari oder Chrome identification 87 try: 88 nav = ua[string.find(ua, '('):] 89 nav1=ua[string.find(ua,')'):] 90 nav2=nav1[string.find(nav1,'('):] 91 nav3=nav2[string.find(nav2,')'):] 92 ie = string.split(nav, "; ")[1] 93 ie1 =string.split(nav1, " ")[2] 94 ie2 =string.split(nav3, " ")[1] 95 ie3 =string.split(nav3, " ")[2] 96 if string.find(ie3, "Safari") >-1: 97 bt['versSafariChrome']=string.split(ie2, "/")[1] 98 except: pass 99 # IE identification 66 100 try: 67 101 nav = ua[string.find(ua, '('):] … … 69 103 if string.find(ie, "MSIE") > -1: 70 104 bt['versIE'] = string.split(ie, " ")[1] 71 except: pass 105 except:pass 106 # Firefox identification 107 try: 108 nav = ua[string.find(ua, '('):] 109 nav1=ua[string.find(ua,')'):] 110 if string.find(ie1, "Firefox") >-1: 111 nav5= string.split(ie1, "/")[1] 112 logging.debug("FIREFOX: %s"%(nav5)) 113 bt['versFirefox']=nav5[0:3] 114 except:pass 115 #Opera identification 116 try: 117 if string.find(ua,"Opera") >-1: 118 nav = ua[string.find(ua, '('):] 119 nav1=nav[string.find(nav,')'):] 120 bt['versOpera']=string.split(nav1,"/")[2] 121 except:pass 72 122 73 123 bt['isMac'] = string.find(ua, 'Macintosh') > -1 … … 128 178 #return None 129 179 130 131 132 180 ## 133 181 ## documentViewer class … … 148 196 toc_figures = PageTemplateFile('zpt/toc_figures', globals()) 149 197 page_main_images = PageTemplateFile('zpt/page_main_images', globals()) 198 page_main_double = PageTemplateFile('zpt/page_main_double', globals()) 150 199 page_main_text = PageTemplateFile('zpt/page_main_text', globals()) 151 200 page_main_text_dict = PageTemplateFile('zpt/page_main_text_dict', globals()) … … 197 246 return self.template.fulltextclient.getTextPage(**args) 198 247 248 def getOrigPages(self, **args): 249 """get page""" 250 return self.template.fulltextclient.getOrigPages(**args) 251 252 def getOrigPagesNorm(self, **args): 253 """get page""" 254 return self.template.fulltextclient.getOrigPagesNorm(**args) 255 199 256 def getQuery(self, **args): 200 """get query """257 """get query in search""" 201 258 return self.template.fulltextclient.getQuery(**args) 202 203 def getQueryResultHits(self, **args): 204 """get query""" 205 return self.template.fulltextclient.getQueryResultHits(**args) 206 207 def getQueryResultHitsText(self, **args): 208 """get query""" 209 return self.template.fulltextclient.getQueryResultHitsText(**args) 210 211 def getQueryResultHitsFigures(self, **args): 212 """get query""" 213 return self.template.fulltextclient.getQueryResultHitsFigures(**args) 214 215 def getPDF(self, **args): 216 """get query""" 217 return self.template.fulltextclient.getPDF(**args) 218 259 219 260 def getSearch(self, **args): 220 261 """get search""" … … 228 269 """get all gis places """ 229 270 return self.template.fulltextclient.getAllGisPlaces(**args) 230 231 def getOrigPages(self, **args): 232 """get original page number """ 233 return self.template.fulltextclient.getOrigPages(**args) 234 235 def getNumPages(self, docinfo): 236 """get numpages""" 237 return self.template.fulltextclient.getNumPages(docinfo) 238 239 def getNumTextPages(self, docinfo): 240 """get numpages text""" 241 return self.template.fulltextclient.getNumTextPages(docinfo) 242 271 243 272 def getTranslate(self, **args): 244 273 """get translate""" … … 248 277 """get lemma""" 249 278 return self.template.fulltextclient.getLemma(**args) 279 280 def getLemmaQuery(self, **args): 281 """get query""" 282 return self.template.fulltextclient.getLemmaQuery(**args) 283 284 def getLex(self, **args): 285 """get lex""" 286 return self.template.fulltextclient.getLex(**args) 250 287 251 288 def getToc(self, **args): … … 284 321 285 322 if viewMode=="auto": # automodus gewaehlt 286 if docinfo.has_key("textURL") or docinfo. has_key('textURLPath'): #texturl gesetzt und textViewer konfiguriert323 if docinfo.has_key("textURL") or docinfo.get('textURLPath',None): #texturl gesetzt und textViewer konfiguriert 287 324 viewMode="text" 288 325 else: … … 320 357 321 358 if viewMode=="auto": # automodus gewaehlt 322 if docinfo.has_key('textURL') or docinfo. has_key('textURLPath'): #texturl gesetzt und textViewer konfiguriert359 if docinfo.has_key('textURL') or docinfo.get('textURLPath',None): #texturl gesetzt und textViewer konfiguriert 323 360 viewMode="text_dict" 324 361 else: … … 327 364 pageinfo = self.getPageinfo(start=start,current=pn, docinfo=docinfo,viewMode=viewMode,tocMode=tocMode) 328 365 329 pt = getattr(self.template, 'viewer_main') 366 if (docinfo.get('textURLPath',None)): 367 page = self.getTextPage(mode=viewMode, docinfo=docinfo, pageinfo=pageinfo) 368 pageinfo['textPage'] = page 369 tt = getattr(self, 'template') 370 pt = getattr(tt, 'viewer_main') 330 371 return pt(docinfo=docinfo,pageinfo=pageinfo,viewMode=viewMode,mk=self.generateMarks(mk)) 331 372 … … 343 384 def getBrowser(self): 344 385 """getBrowser the version of browser """ 345 names="" 346 names = browserCheck(self) 347 #logging.debug("XXXXXXXXXXXXXXXX: %s"%names) 348 return names 386 bt = browserCheck(self) 387 logging.debug("BROWSER VERSION: %s"%(bt)) 388 return bt 349 389 350 390 def findDigilibUrl(self): … … 365 405 return style 366 406 367 def getLink(self,param=None,val=None): 368 """link to documentviewer with parameter param set to val""" 369 params=self.REQUEST.form.copy() 407 def getLink(self, param=None, val=None, params=None, baseUrl=None, paramSep='&'): 408 """returns URL to documentviewer with parameter param set to val or from dict params""" 409 # copy existing request params 410 urlParams=self.REQUEST.form.copy() 411 # change single param 370 412 if param is not None: 371 413 if val is None: 372 if params.has_key(param):373 del params[param]414 if urlParams.has_key(param): 415 del urlParams[param] 374 416 else: 375 params[param] = str(val)417 urlParams[param] = str(val) 376 418 377 if params.get("mode", None) == "filepath": #wenn beim erst Aufruf filepath gesetzt wurde aendere das nun zu imagepath 378 params["mode"] = "imagepath" 379 params["url"] = getParentDir(params["url"]) 419 # change more params 420 if params is not None: 421 for k in params.keys(): 422 v = params[k] 423 if v is None: 424 # val=None removes param 425 if urlParams.has_key(k): 426 del urlParams[k] 427 428 else: 429 urlParams[k] = v 430 431 # FIXME: does this belong here? 432 if urlParams.get("mode", None) == "filepath": #wenn beim erst Aufruf filepath gesetzt wurde aendere das nun zu imagepath 433 urlParams["mode"] = "imagepath" 434 urlParams["url"] = getParentDir(urlParams["url"]) 380 435 381 # quote values and assemble into query string 382 #ps = "&".join(["%s=%s"%(k,urllib.quote(v)) for (k, v) in params.items()]) 383 ps = urllib.urlencode(params) 384 url=self.REQUEST['URL1']+"?"+ps 436 # quote values and assemble into query string (not escaping '/') 437 ps = paramSep.join(["%s=%s"%(k,urllib.quote_plus(v,'/')) for (k, v) in urlParams.items()]) 438 #ps = urllib.urlencode(urlParams) 439 if baseUrl is None: 440 baseUrl = self.REQUEST['URL1'] 441 442 url = "%s?%s"%(baseUrl, ps) 385 443 return url 386 444 387 def getLinkAmp(self,param=None,val=None): 445 446 def getLinkAmp(self, param=None, val=None, params=None, baseUrl=None): 388 447 """link to documentviewer with parameter param set to val""" 389 params=self.REQUEST.form.copy() 390 if param is not None: 391 if val is None: 392 if params.has_key(param): 393 del params[param] 394 else: 395 params[param] = str(val) 396 397 # quote values and assemble into query string 398 logging.debug("XYXXXXX: %s"%repr(params.items())) 399 ps = "&".join(["%s=%s"%(k,urllib.quote(v)) for (k, v) in params.items()]) 400 url=self.REQUEST['URL1']+"?"+ps 401 return url 448 return self.getLink(param, val, params, baseUrl, '&') 402 449 403 450 def getInfo_xml(self,url,mode): … … 464 511 raise IOError("Unable to get dir-info from %s"%(infoUrl)) 465 512 466 dom = Parse(txt) 467 sizes=dom.xpath("//dir/size") 468 logging.debug("documentViewer (getparamfromdigilib) dirInfo:size"%sizes) 469 470 if sizes: 471 docinfo['numPages'] = int(getTextFromNode(sizes[0])) 513 dom = ET.fromstring(txt) 514 #dom = Parse(txt) 515 size=getText(dom.find("size")) 516 #sizes=dom.xpath("//dir/size") 517 logging.debug("documentViewer (getparamfromdigilib) dirInfo:size=%s"%size) 518 519 if size: 520 docinfo['numPages'] = int(size) 472 521 else: 473 522 docinfo['numPages'] = 0 … … 514 563 raise IOError("Unable to read index meta from %s"%(url)) 515 564 516 dom = Parse(txt) 565 dom = ET.fromstring(txt) 566 #dom = Parse(txt) 517 567 return dom 518 568 … … 533 583 raise IOError("Unable to read infoXMLfrom %s"%(url)) 534 584 535 dom = Parse(txt) 585 dom = ET.fromstring(txt) 586 #dom = Parse(txt) 536 587 return dom 537 588 … … 551 602 dom = self.getDomFromIndexMeta(path) 552 603 553 acctype = dom.xpath("//access-conditions/access/@type") 554 if acctype and (len(acctype)>0): 555 access=acctype[0].value 556 if access in ['group', 'institution']: 557 access = getTextFromNode(dom.xpath("//access-conditions/access/name")[0]).lower() 604 acc = dom.find(".//access-conditions/access") 605 if acc is not None: 606 acctype = acc.get('type') 607 #acctype = dom.xpath("//access-conditions/access/@type") 608 if acctype: 609 access=acctype 610 if access in ['group', 'institution']: 611 access = dom.find(".//access-conditions/access/name").text.lower() 558 612 559 613 docinfo['accessType'] = access … … 577 631 logging.debug("documentViewer (getbibinfofromindexmeta cutted) path: %s"%(path)) 578 632 # put in all raw bib fields as dict "bib" 579 bib = dom.xpath("//bib/*") 633 bib = dom.find(".//bib/*") 634 #bib = dom.xpath("//bib/*") 580 635 if bib and len(bib)>0: 581 636 bibinfo = {} … … 586 641 # extract some fields (author, title, year) according to their mapping 587 642 metaData=self.metadata.main.meta.bib 588 bib type=dom.xpath("//bib/@type")589 if bibtype and (len(bibtype)>0):590 bibtype=bibtype[0].value591 else:643 bib = dom.find(".//bib") 644 bibtype=bib.get("type") 645 #bibtype=dom.xpath("//bib/@type") 646 if not bibtype: 592 647 bibtype="generic" 593 648 … … 598 653 logging.debug("documentViewer (getbibinfofromindexmeta) bibtype:"+repr(bibtype)) 599 654 # if there is no mapping bibmap is empty (mapping sometimes has empty fields) 600 if len(bibmap) > 0 and len(bibmap['author'][0]) > 0 :655 if len(bibmap) > 0 and len(bibmap['author'][0]) > 0 or len(bibmap['title'][0]) > 0: 601 656 try: 602 docinfo['author']=getText FromNode(dom.xpath("//bib/%s"%bibmap['author'][0])[0])657 docinfo['author']=getText(bib.find(bibmap['author'][0])) 603 658 except: pass 604 659 try: 605 docinfo['title']=getText FromNode(dom.xpath("//bib/%s"%bibmap['title'][0])[0])660 docinfo['title']=getText(bib.find(bibmap['title'][0])) 606 661 except: pass 607 662 try: 608 docinfo['year']=getText FromNode(dom.xpath("//bib/%s"%bibmap['year'][0])[0])663 docinfo['year']=getText(bib.find(bibmap['year'][0])) 609 664 except: pass 610 logging.debug("documentViewer (getbibinfofromindexmeta) using mapping for %s"%bibtype) 611 try: 612 docinfo['lang']=getTextFromNode(dom.xpath("//bib/lang")[0]) 613 except: 614 docinfo['lang']='' 615 665 666 # ROC: why is this here? 667 # logging.debug("documentViewer (getbibinfofromindexmeta) using mapping for %s"%bibtype) 668 # try: 669 # docinfo['lang']=getTextFromNode(dom.find(".//bib/lang")[0]) 670 # except: 671 # docinfo['lang']='' 672 # try: 673 # docinfo['city']=getTextFromNode(dom.find(".//bib/city")[0]) 674 # except: 675 # docinfo['city']='' 676 # try: 677 # docinfo['number_of_pages']=getTextFromNode(dom.find(".//bib/number_of_pages")[0]) 678 # except: 679 # docinfo['number_of_pages']='' 680 # try: 681 # docinfo['series_volume']=getTextFromNode(dom.find(".//bib/series_volume")[0]) 682 # except: 683 # docinfo['series_volume']='' 684 # try: 685 # docinfo['number_of_volumes']=getTextFromNode(dom.find(".//bib/number_of_volumes")[0]) 686 # except: 687 # docinfo['number_of_volumes']='' 688 # try: 689 # docinfo['translator']=getTextFromNode(dom.find(".//bib/translator")[0]) 690 # except: 691 # docinfo['translator']='' 692 # try: 693 # docinfo['edition']=getTextFromNode(dom.find(".//bib/edition")[0]) 694 # except: 695 # docinfo['edition']='' 696 # try: 697 # docinfo['series_author']=getTextFromNode(dom.find(".//bib/series_author")[0]) 698 # except: 699 # docinfo['series_author']='' 700 # try: 701 # docinfo['publisher']=getTextFromNode(dom.find(".//bib/publisher")[0]) 702 # except: 703 # docinfo['publisher']='' 704 # try: 705 # docinfo['series_title']=getTextFromNode(dom.find(".//bib/series_title")[0]) 706 # except: 707 # docinfo['series_title']='' 708 # try: 709 # docinfo['isbn_issn']=getTextFromNode(dom.find(".//bib/isbn_issn")[0]) 710 # except: 711 # docinfo['isbn_issn']='' 616 712 return docinfo 617 713 … … 627 723 dom = self.getDomFromIndexMeta(path) 628 724 629 docinfo['name']=getText FromNode(dom.xpath("/resource/name")[0])725 docinfo['name']=getText(dom.find("name")) 630 726 logging.debug("documentViewer docinfo[name] %s"%docinfo['name']) 631 727 return docinfo … … 644 740 archiveName = None 645 741 646 archiveNames = dom.xpath("//resource/name") 647 if archiveNames and (len(archiveNames) > 0): 648 archiveName = getTextFromNode(archiveNames[0]) 649 else: 742 archiveName = getTextFromNode(dom.find("name")) 743 if not archiveName: 650 744 logging.warning("documentViewer (getdocinfofromtexttool) resource/name missing in: %s" % (url)) 651 745 652 archivePaths = dom.xpath("//resource/archive-path") 653 if archivePaths and (len(archivePaths) > 0): 654 archivePath = getTextFromNode(archivePaths[0]) 746 archivePath = getTextFromNode(dom.find("archive-path")) 747 if archivePath: 655 748 # clean up archive path 656 749 if archivePath[0] != '/': … … 668 761 raise IOError("Missing archive-path (for text-tool) in %s" % (url)) 669 762 670 imageDirs = dom.xpath("//texttool/image") 671 if imageDirs and (len(imageDirs) > 0): 672 imageDir = getTextFromNode(imageDirs[0]) 673 674 else: 763 imageDir = getText(dom.find(".//texttool/image")) 764 765 if not imageDir: 675 766 # we balk with no image tag / not necessary anymore because textmode is now standard 676 767 #raise IOError("No text-tool info in %s"%(url)) … … 689 780 docinfo['imageURL'] = self.digilibBaseUrl + "/servlet/Scaler?fn=" + imageDir 690 781 691 viewerUrls = dom.xpath("//texttool/digiliburlprefix") 692 if viewerUrls and (len(viewerUrls) > 0): 693 viewerUrl = getTextFromNode(viewerUrls[0]) 782 viewerUrl = getText(dom.find(".//texttool/digiliburlprefix")) 783 if viewerUrl: 694 784 docinfo['viewerURL'] = viewerUrl 695 785 696 786 # old style text URL 697 textUrls = dom.xpath("//texttool/text") 698 if textUrls and (len(textUrls) > 0): 699 textUrl = getTextFromNode(textUrls[0]) 787 textUrl = getText(dom.find(".//texttool/text")) 788 if textUrl: 700 789 if urlparse.urlparse(textUrl)[0] == "": #keine url 701 790 textUrl = os.path.join(archivePath, textUrl) … … 707 796 708 797 # new style text-url-path 709 textUrls = dom.xpath("//texttool/text-url-path") 710 if textUrls and (len(textUrls) > 0): 711 textUrl = getTextFromNode(textUrls[0]) 798 textUrl = getText(dom.find(".//texttool/text-url-path")) 799 if textUrl: 712 800 docinfo['textURLPath'] = textUrl 713 if not docinfo['imagePath']: 801 textUrlkurz = string.split(textUrl, ".")[0] 802 docinfo['textURLPathkurz'] = textUrlkurz 803 #if not docinfo['imagePath']: 714 804 # text-only, no page images 715 docinfo = self.getNumTextPages(docinfo) 805 #docinfo = self.getNumTextPages(docinfo) 806 716 807 717 presentationUrl s = dom.xpath("//texttool/presentation")808 presentationUrl = getText(dom.find(".//texttool/presentation")) 718 809 docinfo = self.getBibinfoFromIndexMeta(url, docinfo=docinfo, dom=dom) # get info von bib tag 719 810 docinfo = self.getNameFromIndexMeta(url, docinfo=docinfo, dom=dom) 720 811 721 812 722 if presentationUrl s and (len(presentationUrls) > 0): # ueberschreibe diese durch presentation informationen813 if presentationUrl: # ueberschreibe diese durch presentation informationen 723 814 # presentation url ergiebt sich ersetzen von index.meta in der url der fuer die Metadaten 724 815 # durch den relativen Pfad auf die presentation infos 725 presentationPath = getTextFromNode(presentationUrls[0])816 presentationPath = presentationUrl 726 817 if url.endswith("index.meta"): 727 818 presentationUrl = url.replace('index.meta', presentationPath) … … 741 832 dom=self.getPresentationInfoXML(url) 742 833 try: 743 docinfo['author']=getText FromNode(dom.xpath("//author")[0])834 docinfo['author']=getText(dom.find(".//author")) 744 835 except: 745 836 pass 746 837 try: 747 docinfo['title']=getText FromNode(dom.xpath("//title")[0])838 docinfo['title']=getText(dom.find(".//title")) 748 839 except: 749 840 pass 750 841 try: 751 docinfo['year']=getText FromNode(dom.xpath("//date")[0])842 docinfo['year']=getText(dom.find(".//date")) 752 843 except: 753 844 pass … … 797 888 logging.error("documentViewer (getdocinfo) unknown mode: %s!"%mode) 798 889 raise ValueError("Unknown mode %s! Has to be one of 'texttool','imagepath','filepath'."%(mode)) 799 890 891 # FIXME: fake texturlpath 892 if not docinfo.has_key('textURLPath'): 893 docinfo['textURLPath'] = None 894 800 895 logging.debug("documentViewer (getdocinfo) docinfo: %s"%docinfo) 801 896 #logging.debug("documentViewer (getdocinfo) docinfo: %s"%) … … 827 922 pageinfo['viewMode'] = viewMode 828 923 pageinfo['tocMode'] = tocMode 829 pageinfo['characterNormalization'] = self.REQUEST.get('characterNormalization','reg PlusNorm')830 pageinfo['optionToggle'] = self.REQUEST.get('optionToggle','')924 pageinfo['characterNormalization'] = self.REQUEST.get('characterNormalization','reg') 925 #pageinfo['optionToggle'] = self.REQUEST.get('optionToggle','1') 831 926 pageinfo['query'] = self.REQUEST.get('query','') 832 927 pageinfo['queryType'] = self.REQUEST.get('queryType','') … … 836 931 pageinfo['tocPageSize'] = self.REQUEST.get('tocPageSize', '30') 837 932 pageinfo['queryPageSize'] =self.REQUEST.get('queryPageSize', '10') 838 pageinfo['tocPN'] = self.REQUEST.get('tocPN', '1') 933 pageinfo['tocPN'] = self.REQUEST.get('tocPN', '1') 839 934 toc = int (pageinfo['tocPN']) 840 935 pageinfo['textPages'] =int (toc) 841 842 843 936 844 937 if 'tocSize_%s'%tocMode in docinfo: -
version.txt
r408 r453 1 DocumentViewer 0. 6.01 DocumentViewer 0.7.0
Note: See TracChangeset
for help on using the changeset viewer.