Changeset 453:beb7ccb92564 in documentViewer for MpdlXmlTextServer.py
- Timestamp:
- Jul 14, 2011, 5:43:56 PM (13 years ago)
- Branch:
- elementtree
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
MpdlXmlTextServer.py
r407 r453 3 3 from Products.PageTemplates.PageTemplateFile import PageTemplateFile 4 4 from Ft.Xml import EMPTY_NAMESPACE, Parse 5 5 from Ft.Xml.Domlette import NonvalidatingReader 6 import Ft.Xml.Domlette 7 import cStringIO 8 9 import xml.etree.ElementTree as ET 10 11 import md5 6 12 import sys 7 13 import logging 8 14 import urllib 9 15 import documentViewer 10 from documentViewer import getTextFromNode, serializeNode 16 #from documentViewer import getTextFromNode, serializeNode 17 18 def getText(node): 19 """get the cdata content of a node""" 20 if node is None: 21 return "" 22 # ET: 23 text = node.text or "" 24 for e in node: 25 text += gettext(e) 26 if e.tail: 27 text += e.tail 28 29 return text 30 31 def serialize(node): 32 """returns a string containing an XML snippet of node""" 33 s = ET.tostring(node, 'UTF-8') 34 # snip off XML declaration 35 if s.startswith('<?xml'): 36 i = s.find('?>') 37 return s[i+3:] 38 39 return s 40 41 42 def getTextFromNode(node): 43 """get the cdata content of a node""" 44 if node is None: 45 return "" 46 # ET: 47 #text = node.text or "" 48 #for e in node: 49 # text += gettext(e) 50 # if e.tail: 51 # text += e.tail 52 53 # 4Suite: 54 nodelist=node.childNodes 55 text = "" 56 for n in nodelist: 57 if n.nodeType == node.TEXT_NODE: 58 text = text + n.data 59 60 return text 61 62 def serializeNode(node, encoding="utf-8"): 63 """returns a string containing node as XML""" 64 #s = ET.tostring(node) 65 66 # 4Suite: 67 stream = cStringIO.StringIO() 68 Ft.Xml.Domlette.Print(node, stream=stream, encoding=encoding) 69 s = stream.getvalue() 70 stream.close() 71 72 return s 73 11 74 12 75 class MpdlXmlTextServer(SimpleItem): … … 20 83 manage_changeMpdlXmlTextServerForm = PageTemplateFile("zpt/manage_changeMpdlXmlTextServer", globals()) 21 84 22 def __init__(self,id,title="",serverUrl="http://mpdl-proto.mpiwg-berlin.mpg.de/mpdl/interface/", serverName=None, timeout=40): 85 def __init__(self,id,title="",serverUrl="http://mpdl-system.mpiwg-berlin.mpg.de/mpdl/interface/", serverName=None, timeout=40): 86 #def __init__(self,id,title="",serverUrl="http://mpdl-text.mpiwg-berlin.mpg.de:30030/mpdl/interface/", serverName=None, timeout=40): 87 23 88 """constructor""" 24 89 self.id=id … … 39 104 return documentViewer.getHttpData(url,data,timeout=self.timeout) 40 105 41 def getSearch(self, p n=1, pageinfo=None, docinfo=None, query=None, queryType=None, lemma=None, characterNormalization=None, optionToggle=None):106 def getSearch(self, pageinfo=None, docinfo=None): 42 107 """get search list""" 43 108 docpath = docinfo['textURLPath'] 44 109 url = docinfo['url'] 45 110 pagesize = pageinfo['queryPageSize'] 46 pn = pageinfo ['searchPN']111 pn = pageinfo.get('searchPN',1) 47 112 sn = pageinfo['sn'] 48 113 highlightQuery = pageinfo['highlightQuery'] … … 52 117 tocMode = pageinfo['tocMode'] 53 118 characterNormalization = pageinfo['characterNormalization'] 54 optionToggle = pageinfo['optionToggle']119 #optionToggle = pageinfo['optionToggle'] 55 120 tocPN = pageinfo['tocPN'] 56 121 selfurl = self.absolute_url() 57 58 data = self.getServerData("doc-query.xql","document=%s&mode=%s&queryType=%s&query=%s&queryResultPageSize=%s&queryResultPN=%s&sn=%s&viewMode=%s&characterNormalization=%s&optionToggle=%s&highlightQuery=%s"%(docpath, 'text', queryType, urllib.quote(query), pagesize, pn, sn, viewMode,characterNormalization,optionToggle ,urllib.quote(highlightQuery))) 59 #page=self.template.fulltextclient.eval("/mpdl/interface/doc-query.xql","document=%s&mode=%s&queryType=%s&query=%s&queryResultPageSize=%s&queryResultPN=%s&sn=%s&viewMode=%s&highlightQuery=%s"%(docpath, 'text', queryType, query, pagesize, pn, sn, viewMode,highlightQuery) ,outputUnicode=False) 60 122 data = self.getServerData("doc-query.xql","document=%s&mode=%s&queryType=%s&query=%s&queryResultPageSize=%s&queryResultPN=%s&sn=%s&viewMode=%s&characterNormalization=%s&highlightQuery=%s"%(docpath, 'text', queryType, urllib.quote(query), pagesize, pn, sn, viewMode,characterNormalization, urllib.quote(highlightQuery))) 61 123 pagexml = data.replace('?document=%s'%str(docpath),'?url=%s'%url) 62 124 pagedom = Parse(pagexml) 125 126 """ 127 pagedivs = pagedom.xpath("//div[@class='queryResultHits']") 128 if (pagedivs == pagedom.xpath("//div[@class='queryResultHits']")): 129 if len(pagedivs)>0: 130 docinfo['queryResultHits'] = int(getTextFromNode(pagedivs[0])) 131 s = getTextFromNode(pagedivs[0]) 132 s1 = int(s)/10+1 133 try: 134 docinfo['queryResultHits'] = int(s1) 135 logging.debug("SEARCH ENTRIES: %s"%(s1)) 136 except: 137 docinfo['queryResultHits'] = 0 138 """ 63 139 if (queryType=="fulltext")or(queryType=="xpath")or(queryType=="xquery")or(queryType=="fulltextMorphLemma"): 64 140 pagedivs = pagedom.xpath("//div[@class='queryResultPage']") … … 72 148 if href.startswith('page-fragment.xql'): 73 149 selfurl = self.absolute_url() 74 pagexml=href.replace('mode=text','mode=texttool&viewMode=%s&queryType=%s&query=%s&queryResultPageSize=%s&queryResultPN=%s&tocMode=%s&searchPN=%s&tocPN=%s& optionToggle=%s&characterNormalization=%s'%(viewMode,queryType,urllib.quote(query),pagesize,pn,tocMode,pn,tocPN,optionToggle,characterNormalization))150 pagexml=href.replace('mode=text','mode=texttool&viewMode=%s&queryType=%s&query=%s&queryResultPageSize=%s&queryResultPN=%s&tocMode=%s&searchPN=%s&tocPN=%s&characterNormalization=%s'%(viewMode,queryType,urllib.quote(query),pagesize,pn,tocMode,pn,tocPN, characterNormalization)) 75 151 hrefNode.nodeValue = pagexml.replace('page-fragment.xql','%s'%selfurl) 152 #logging.debug("PUREXML :%s"%(serializeNode(pagenode))) 76 153 return serializeNode(pagenode) 77 154 if (queryType=="fulltextMorph"): … … 86 163 if href.startswith('page-fragment.xql'): 87 164 selfurl = self.absolute_url() 88 pagexml=href.replace('mode=text','mode=texttool&viewMode=%s&queryType=%s&query=%s&queryResultPageSize=%s&queryResultPN=%s&tocMode=%s&searchPN=%s&tocPN=%s& optionToggle=%s&characterNormalization=%s'%(viewMode,queryType,urllib.quote(query),pagesize,pn,tocMode,pn,tocPN,optionToggle,characterNormalization))165 pagexml=href.replace('mode=text','mode=texttool&viewMode=%s&queryType=%s&query=%s&queryResultPageSize=%s&queryResultPN=%s&tocMode=%s&searchPN=%s&tocPN=%s&characterNormalization=%s'%(viewMode,queryType,urllib.quote(query),pagesize,pn,tocMode,pn,tocPN,characterNormalization)) 89 166 hrefNode.nodeValue = pagexml.replace('page-fragment.xql','%s'%selfurl) 90 167 if href.startswith('../lt/lemma.xql'): 91 hrefNode.nodeValue = href.replace('../lt/lemma.xql','%s/template/head_main_ lemma_New'%(selfurl))168 hrefNode.nodeValue = href.replace('../lt/lemma.xql','%s/template/head_main_query'%(selfurl)) 92 169 l.setAttributeNS(None, 'target', '_blank') 93 170 l.setAttributeNS(None, 'onClick',"popupWin = window.open(this.href, 'contacts', 'location,width=500,height=600,top=180, left=400, scrollbars=1'); return false;") 94 l.setAttributeNS(None, 'on Click', 'popupWin.focus();')171 l.setAttributeNS(None, 'onDblclick', 'popupWin.focus();') 95 172 pagedivs = pagedom.xpath("//div[@class='queryResultMorphExpansion']") 96 173 return serializeNode(pagenode) … … 104 181 if hrefNode: 105 182 href = hrefNode.nodeValue 106 hrefNode.nodeValue=href.replace('mode=text','mode=texttool&viewMode=%s&tocMode=%s&tocPN=%s&pn=%s& optionToggle=%s&characterNormalization=%s'%(viewMode,tocMode,tocPN,pn,optionToggle,characterNormalization))183 hrefNode.nodeValue=href.replace('mode=text','mode=texttool&viewMode=%s&tocMode=%s&tocPN=%s&pn=%s&characterNormalization=%s'%(viewMode,tocMode,tocPN,pn,characterNormalization)) 107 184 if href.startswith('../lt/lex.xql'): 108 hrefNode.nodeValue = href.replace('../lt/lex.xql','%s/template/head_main_ voc'%selfurl)185 hrefNode.nodeValue = href.replace('../lt/lex.xql','%s/template/head_main_lex'%selfurl) 109 186 l.setAttributeNS(None, 'target', '_blank') 110 187 l.setAttributeNS(None, 'onClick',"popupWin = window.open(this.href, 'contacts', 'location,width=500,height=600,top=180, left=400, scrollbars=1'); return false;") 111 l.setAttributeNS(None, 'on Click', 'popupWin.focus();')188 l.setAttributeNS(None, 'onDblclick', 'popupWin.focus();') 112 189 if href.startswith('../lt/lemma.xql'): 113 hrefNode.nodeValue = href.replace('../lt/lemma.xql','%s/template/head_main_lemma'% selfurl)190 hrefNode.nodeValue = href.replace('../lt/lemma.xql','%s/template/head_main_lemma'%(selfurl)) 114 191 l.setAttributeNS(None, 'target', '_blank') 115 192 l.setAttributeNS(None, 'onClick',"popupWin = window.open(this.href, 'contacts', 'location,width=500,height=600,top=180, left=400, scrollbars=1'); return false;") 116 l.setAttributeNS(None, 'on Click', 'popupWin.focus();')193 l.setAttributeNS(None, 'onDblclick', 'popupWin.focus();') 117 194 return serializeNode(pagenode) 118 195 return "no text here" 119 120 """def getNumPages(self, docinfo): 121 ""get list of pages from fulltext and put in docinfo"" 122 if 'numPages' in docinfo: 123 # already there 124 return docinfo 125 xquery = '//pb' 126 text = self.getServerData("xquery.xql","document=%s&xquery=%s"%(docinfo['textURLPath'],xquery)) 127 docinfo['numPages'] = text.count("<pb ") 128 return docinfo 129 """ 130 def getNumTextPages (self, docinfo): 131 """get list of pages from fulltext (texts without images) and put in docinfo""" 132 if 'numPages' in docinfo: 133 # allredy there 134 return docinfo 135 xpath ='/count(//pb)' 136 text=self.getServerData("xpath.xql", "document=%s&xpath=%s"%(docinfo['textURLPath'], xpath)) 137 dom = Parse(text) 138 result= dom.xpath("//result/resultPage") 139 140 docinfo['numPages']=int(getTextFromNode(result[0])) 141 return docinfo 142 196 143 197 def getGisPlaces(self, docinfo=None, pageinfo=None): 144 198 """ Show all Gis Places of whole Page""" 145 199 xpath='//place' 146 docpath = docinfo['textURLPath'] 200 docpath = docinfo.get('textURLPath',None) 201 if not docpath: 202 return None 203 147 204 url = docinfo['url'] 148 205 selfurl = self.absolute_url() … … 158 215 hrefList.append(href) 159 216 myList = ",".join(hrefList) 160 logging.debug("getGisPlaces :%s"%(myList))217 #logging.debug("getGisPlaces :%s"%(myList)) 161 218 return myList 162 219 … … 179 236 hrefList.append(href) 180 237 myList = ",".join(hrefList) 181 logging.debug("getALLGisPlaces :%s"%(myList))238 #logging.debug("getALLGisPlaces :%s"%(myList)) 182 239 return myList 183 184 185 def getPDF (self, docinfo=None, pageinfo=None): 186 """Show and Save different Pages as PDF in Options""" 187 selfurl=self.absolute_url() 188 pn=pageinfo['current'] 189 190 viewMode =pageinfo['viewMode'] 191 192 #text = ("page-fragment.xql","document=%s&mode=%s&pn=%s&export=%s"%(docinfo['textURLPath'], 'text', pn,'pdf')) 193 #text = self.getServerData("page-fragment.xql", "document=%s&mode=%s&pn=%s&export=%s"(docinfo['textURLPath'],'text', pn,'pdf')) 194 #logging.debug("text :%s"%(text)) 195 #dom =Parse(text) 196 #logging.debug("text :%s"%(text)) 197 #return text 240 198 241 199 def getOrigPages (self, docinfo=None, pageinfo=None): 200 """Show original page """ 201 docpath = docinfo['textURLPath'] 202 logging.debug ("docinfo['textURLPath']=%s"%(docinfo['textURLPath'])) 203 #url = docinfo['url'] 204 selfurl = self.absolute_url() 205 pn =pageinfo['current'] 206 207 viewMode= pageinfo['viewMode'] 208 text = self.getServerData("page-fragment.xql","document=%s&mode=%s&pn=%s"%(docinfo['textURLPath'], 'text', pn)) 209 dom =Parse(text) 210 pagedivs = dom.xpath("//div[@class='pageNumberOrig']") 211 logging.debug("YYYYYYpagedivs :%s"%(pagedivs)) 212 if len(pagedivs)>0: 213 originalPage= getTextFromNode(pagedivs[0]) 214 #return docinfo['originalPage'] 215 return originalPage 216 217 218 def getTextPage(self, mode="text", pn=1, docinfo=None, pageinfo=None, viewMode=None, tocMode=None, tocPN=None, characterNormalization="regPlusNorm", highlightQuery=None, sn=None, optionToggle=None): 242 def getTextPage(self, mode="text_dict", pn=1, docinfo=None, pageinfo=None): 219 243 """returns single page from fulltext""" 220 244 docpath = docinfo['textURLPath'] 221 245 path = docinfo['textURLPath'] 222 url = docinfo ['url']223 name = docinfo ['name']224 viewMode= pageinfo['viewMode']246 url = docinfo.get('url',None) 247 name = docinfo.get('name',None) 248 pn =pageinfo['current'] 225 249 sn = pageinfo['sn'] 250 #optionToggle =pageinfo ['optionToggle'] 226 251 highlightQuery = pageinfo['highlightQuery'] 227 252 #mode = pageinfo ['viewMode'] 228 253 tocMode = pageinfo['tocMode'] 229 254 characterNormalization=pageinfo['characterNormalization'] … … 234 259 else: 235 260 textmode = mode 236 #logging.debug("documentViewer highlightQuery: %s"%(highlightQuery))261 237 262 textParam = "document=%s&mode=%s&pn=%s&characterNormalization=%s"%(docpath,textmode,pn,characterNormalization) 238 263 if highlightQuery is not None: 239 264 textParam +="&highlightQuery=%s&sn=%s"%(urllib.quote(highlightQuery),sn) 240 #logging.debug("documentViewer highlightQuery: %s"%(highlightQuery))265 241 266 pagexml = self.getServerData("page-fragment.xql",textParam) 242 logging.debug("documentViewer highlightQuery: %s"%(highlightQuery)) 243 #pagexml=self.template.fulltextclient.eval("/mpdl/interface/page-fragment.xql", textParam, outputUnicode=False) 244 245 pagedom = Parse(pagexml) 267 dom = ET.fromstring(pagexml) 268 #dom = NonvalidatingReader.parseStream(pagexml) 269 270 #original Pages 271 #pagedivs = dom.xpath("//div[@class='pageNumberOrig']") 272 273 """if pagedivs == dom.xpath("//div[@class='pageNumberOrig']"): 274 if len(pagedivs)>0: 275 docinfo['pageNumberOrig']= getTextFromNode(pagedivs[0]) 276 logging.debug("ORIGINAL PAGE: %s"%(docinfo['pageNumberOrig'])) 277 278 #original Pages Norm 279 pagedivs = dom.xpath("//div[@class='pageNumberOrigNorm']") 280 if pagedivs == dom.xpath("//div[@class='pageNumberOrigNorm']"): 281 if len(pagedivs)>0: 282 docinfo['pageNumberOrigNorm']= getTextFromNode(pagedivs[0]) 283 logging.debug("ORIGINAL PAGE NORM: %s"%(docinfo['pageNumberOrigNorm'])) 284 """ 285 #figureEntries 286 # pagedivs = dom.xpath("//div[@class='countFigureEntries']") 287 # if pagedivs == dom.xpath("//div[@class='countFigureEntries']"): 288 # if len(pagedivs)>0: 289 # docinfo['countFigureEntries'] = getTextFromNode(pagedivs[0]) 290 # s = getTextFromNode(pagedivs[0]) 291 # if s=='0': 292 # try: 293 # docinfo['countFigureEntries'] = int(s) 294 # except: 295 # docinfo['countFigureEntries'] = 0 296 # else: 297 # s1 = int(s)/30+1 298 # try: 299 # docinfo['countFigureEntries'] = int(s1) 300 # except: 301 # docinfo['countFigureEntries'] = 0 302 # 303 # #allPlaces 304 # pagedivs = dom.xpath("//div[@class='countPlaces']") 305 # if pagedivs == dom.xpath("//div[@class='countPlaces']"): 306 # if len(pagedivs)>0: 307 # docinfo['countPlaces']= getTextFromNode(pagedivs[0]) 308 # s = getTextFromNode(pagedivs[0]) 309 # try: 310 # docinfo['countPlaces'] = int(s) 311 # except: 312 # docinfo['countPlaces'] = 0 313 # 314 # #tocEntries 315 # pagedivs = dom.xpath("//div[@class='countTocEntries']") 316 # if pagedivs == dom.xpath("//div[@class='countTocEntries']"): 317 # if len(pagedivs)>0: 318 # docinfo['countTocEntries'] = int(getTextFromNode(pagedivs[0])) 319 # s = getTextFromNode(pagedivs[0]) 320 # if s=='0': 321 # try: 322 # docinfo['countTocEntries'] = int(s) 323 # except: 324 # docinfo['countTocEntries'] = 0 325 # else: 326 # s1 = int(s)/30+1 327 # try: 328 # docinfo['countTocEntries'] = int(s1) 329 # except: 330 # docinfo['countTocEntries'] = 0 331 332 #numTextPages 333 #pagedivs = dom.xpath("//div[@class='countPages']") 334 alldivs = dom.findall(".//div") 335 pagediv = None 336 for div in alldivs: 337 dc = div.get('class') 338 if dc == 'pageContent': 339 pagediv = div 340 341 if dc == 'countPages': 342 try: 343 np = int(div.text) 344 docinfo['numPages'] = np 345 pageinfo['end'] = min(pageinfo['end'], np) 346 pageinfo['numgroups'] = int(np / pageinfo['groupsize']) 347 if np % pageinfo['groupsize'] > 0: 348 pageinfo['numgroups'] += 1 349 350 except: 351 docinfo['numPages'] = 0 352 353 break 354 355 # ROC: why? 356 # else: 357 # #no full text -- init to 0 358 # docinfo['pageNumberOrig'] = 0 359 # docinfo['countFigureEntries'] = 0 360 # docinfo['countPlaces'] = 0 361 # docinfo['countTocEntries'] = 0 362 # docinfo['numPages'] = 0 363 # docinfo['pageNumberOrigNorm'] = 0 364 # #return docinfo 365 246 366 # plain text mode 247 367 if mode == "text": 248 # first div contains text 249 pagedivs = pagedom.xpath("/div") 250 if len(pagedivs) > 0: 251 pagenode = pagedivs[0] 252 links = pagenode.xpath("//a") 368 #pagedivs = dom.xpath("/div") 369 if pagediv: 370 links = pagediv.findall(".//a") 253 371 for l in links: 254 hrefNode = l.getAttributeNodeNS(None, u"href") 255 if hrefNode: 256 href= hrefNode.nodeValue 257 if href.startswith('#note-'): 258 hrefNode.nodeValue = href.replace('#note-',"?url=%s&viewMode=%s&tocMode=%s&tocPN=%s&pn=%s#note-"%(url,viewMode,tocMode,tocPN,pn)) 259 return serializeNode(pagenode) 372 href = l.get('href') 373 if href and href.startswith('#note-'): 374 href = href.replace('#note-',"?url=%s&viewMode=text&tocMode=%s&tocPN=%s&pn=%s#note-"%(url,tocMode,tocPN,pn)) 375 l.set('href', href) 376 logging.debug("page=%s"%ET.tostring(pagediv, 'UTF-8')) 377 return serialize(pagediv) 378 260 379 if mode == "xml": 261 # first div contains text 262 pagedivs = pagedom.xpath("/div") 263 if len(pagedivs) > 0: 264 pagenode = pagedivs[0] 265 return serializeNode(pagenode) 380 if pagediv: 381 return serialize(pagediv) 382 383 if mode == "pureXml": 384 if pagediv: 385 return serialize(pagediv) 386 266 387 if mode == "gis": 267 # first div contains text 268 pagedivs = pagedom.xpath("/div") 269 if len(pagedivs) > 0: 270 pagenode = pagedivs[0] 271 links =pagenode.xpath("//a") 272 for l in links: 273 hrefNode =l.getAttributeNodeNS(None, u"href") 274 if hrefNode: 275 href=hrefNode.nodeValue 276 if href.startswith('http://chinagis.mpiwg-berlin.mpg.de'): 277 hrefNode.nodeValue =href.replace('chinagis_REST/REST/db/chgis/mpdl','chinagis/REST/db/mpdl/%s'%name) 278 l.setAttributeNS(None, 'target', '_blank') 279 return serializeNode(pagenode) 388 if pagediv: 389 # check all a-tags 390 links = pagediv.findall(".//a") 391 for l in links: 392 href = l.get('href') 393 if href: 394 if href.startswith('http://chinagis.mpiwg-berlin.mpg.de'): 395 l.set('href', href.replace('chinagis_REST/REST/db/chgis/mpdl','chinagis/REST/db/mpdl/%s'%name)) 396 l.set('target', '_blank') 397 398 return serialize(pagenode) 280 399 281 if mode == "pureXml":282 # first div contains text283 pagedivs = pagedom.xpath("/div")284 if len(pagedivs) > 0:285 pagenode = pagedivs[0]286 return serializeNode(pagenode)287 400 # text-with-links mode 288 401 if mode == "text_dict": 289 # first div contains text 290 pagedivs = pagedom.xpath("/div") 291 if len(pagedivs) > 0: 292 pagenode = pagedivs[0] 402 if pagediv: 293 403 # check all a-tags 294 links = page node.xpath("//a")404 links = pagediv.findall(".//a") 295 405 for l in links: 296 hrefNode = l.getAttributeNodeNS(None, u"href") 297 if hrefNode: 406 href = l.get('href') 407 408 if href: 298 409 # is link with href 299 href = hrefNode.nodeValue 300 if href.startswith('lt/lex.xql'): 410 if href.startswith('http://mpdl-proto.mpiwg-berlin.mpg.de/mpdl/interface/lt/wordInfo.xql'): 301 411 # is pollux link 302 412 selfurl = self.absolute_url() 303 413 # change href 304 hrefNode.nodeValue = href.replace('lt/lex.xql','%s/template/head_main_voc'%selfurl)414 l.set('href', href.replace('http://mpdl-proto.mpiwg-berlin.mpg.de/mpdl/interface/lt/wordInfo.xql','%s/head_main_voc'%selfurl)) 305 415 # add target 306 l.setAttributeNS(None, 'target', '_blank') 307 l.setAttributeNS(None, 'onClick',"popupWin = window.open(this.href, 'contacts', 'location,width=500,height=600,top=180, left=700, scrollbars=1'); return false;") 308 l.setAttributeNS(None, 'onClick', 'popupWin.focus();') 309 if href.startswith('lt/lemma.xql'): 416 l.set('target', '_blank') 417 418 if href.startswith('http://mpdl-proto.mpiwg-berlin.mpg.de/mpdl/lt/lemma.xql'): 310 419 selfurl = self.absolute_url() 311 hrefNode.nodeValue = href.replace('lt/lemma.xql','%s/template/head_main_lemma'%selfurl) 312 l.setAttributeNS(None, 'target', '_blank') 313 l.setAttributeNS(None, 'onClick',"popupWin = window.open(this.href, 'contacts', 'location,width=500,height=600,top=180, left=700, scrollbars=1'); return false;") 314 l.setAttributeNS(None, 'onClick', 'popupWin.focus();') 420 l.set('href', href.replace('http://mpdl-proto.mpiwg-berlin.mpg.de/mpdl/lt/lemma.xql','%s/head_main_lemma'%selfurl)) 421 l.set('target', '_blank') 422 l.set('onclick',"popupWin = window.open(this.href, 'InfoWindow', 'menubar=no, location,width=500,height=600,top=180, left=700, toolbar=no, scrollbars=1'); return false;") 423 l.set('ondblclick', 'popupWin.focus();') 424 315 425 if href.startswith('#note-'): 316 hrefNode.nodeValue = href.replace('#note-',"?url=%s&viewMode=%s&tocMode=%s&tocPN=%s&pn=%s#note-"%(url,viewMode,tocMode,tocPN,pn)) 317 return serializeNode(pagenode) 426 l.set('href', href.replace('#note-',"?url=%s&viewMode=text_dict&tocMode=%s&tocPN=%s&pn=%s#note-"%(url,tocMode,tocPN,pn))) 427 428 return serialize(pagediv) 429 318 430 return "no text here" 319 320 def getTranslate(self, query=None, language=None): 431 432 def getOrigPages(self, docinfo=None, pageinfo=None): 433 docpath = docinfo['textURLPath'] 434 pn =pageinfo['current'] 435 selfurl = self.absolute_url() 436 pagexml = self.getServerData("page-fragment.xql","document=%s&pn=%s"%(docpath, pn)) 437 dom = Parse(pagexml) 438 pagedivs = dom.xpath("//div[@class='pageNumberOrig']") 439 if pagedivs == dom.xpath("//div[@class='pageNumberOrig']"): 440 if len(pagedivs)>0: 441 docinfo['pageNumberOrig']= getTextFromNode(pagedivs[0]) 442 return docinfo['pageNumberOrig'] 443 444 def getOrigPagesNorm(self, docinfo=None, pageinfo=None): 445 docpath = docinfo['textURLPath'] 446 pn =pageinfo['current'] 447 selfurl = self.absolute_url() 448 pagexml = self.getServerData("page-fragment.xql","document=%s&pn=%s"%(docpath, pn)) 449 dom = Parse(pagexml) 450 pagedivs = dom.xpath("//div[@class='pageNumberOrigNorm']") 451 if pagedivs == dom.xpath("//div[@class='pageNumberOrigNorm']"): 452 if len(pagedivs)>0: 453 docinfo['pageNumberOrigNorm']= getTextFromNode(pagedivs[0]) 454 return docinfo['pageNumberOrigNorm'] 455 456 457 def getTranslate(self, word=None, language=None): 321 458 """translate into another languages""" 322 data = self.getServerData("lt/ lex.xql","document=&language="+str(language)+"&query="+urllib.quote(query))459 data = self.getServerData("lt/wordInfo.xql","language="+str(language)+"&word="+urllib.quote(word)+"&output=html") 323 460 #pagexml=self.template.fulltextclient.eval("/mpdl/interface/lt/lex.xql","document=&language="+str(language)+"&query="+url_quote(str(query))) 324 461 return data … … 326 463 def getLemma(self, lemma=None, language=None): 327 464 """simular words lemma """ 328 data = self.getServerData("lt/lemma.xql","document=&language="+str(language)+"&lemma="+urllib.quote(lemma)) 329 #pagexml=self.template.fulltextclient.eval("/mpdl/interface/lt/lemma.xql","document=&language="+str(language)+"&lemma="+url_quote(str(lemma))) 465 data = self.getServerData("lt/lemma.xql","language="+str(language)+"&lemma="+urllib.quote(lemma)+"&output=html") 330 466 return data 331 467 332 def getLemma New(self, query=None, language=None):468 def getLemmaQuery(self, query=None, language=None): 333 469 """simular words lemma """ 334 data = self.getServerData("lt/lemma.xql","document=&language="+str(language)+"&lemma="+urllib.quote(query)) 335 #pagexml=self.template.fulltextclient.eval("/mpdl/interface/lt/lemma.xql","document=&language="+str(language)+"&lemma="+url_quote(str(query))) 470 data = self.getServerData("lt/lemma.xql","language="+str(language)+"&query="+urllib.quote(query)+"&output=html") 336 471 return data 337 472 338 def getQuery (self, docinfo=None, pageinfo=None, query=None, queryType=None, pn=1, optionToggle=None): 339 """number of""" 473 def getLex(self, query=None, language=None): 474 #simular words lemma 475 data = self.getServerData("lt/lex.xql","document=&language="+str(language)+"&query="+urllib.quote(query)) 476 return data 477 478 def getQuery (self, docinfo=None, pageinfo=None, query=None, queryType=None, pn=1): 479 #number of 340 480 docpath = docinfo['textURLPath'] 341 481 pagesize = pageinfo['queryPageSize'] … … 347 487 348 488 pagexml = self.getServerData("doc-query.xql","document=%s&mode=%s&queryType=%s&query=%s&queryResultPageSize=%s&queryResultPN=%s"%(docpath, 'text', queryType, urllib.quote(query), pagesize, pn)) 349 #pagexml=self.template.fulltextclient.eval("/mpdl/interface/doc-query.xql","document=%s&mode=%s&queryType=%s&query=%s&queryResultPageSize=%s&queryResultPN=%s"%(docpath, 'text', queryType, query, pagesize, pn) ,outputUnicode=False)350 489 pagedom = Parse(pagexml) 351 490 numdivs = pagedom.xpath("//div[@class='queryResultHits']") 352 491 tocSearch = int(getTextFromNode(numdivs[0])) 353 logging.debug("documentViewer (gettoc) tocSearch: %s"%(tocSearch))354 492 tc=int((tocSearch/10)+1) 355 logging.debug("documentViewer (gettoc) tc: %s"%(tc))356 493 return tc 357 358 def getQueryResultHits(self, docinfo=None, pageinfo=None, query=None, queryType=None, pn=1, optionsClose=None): 359 360 """number of hits in Search mode""" 361 docpath = docinfo['textURLPath'] 362 pagesize = pageinfo['queryPageSize'] 363 pn = pageinfo['searchPN'] 364 query =pageinfo['query'] 365 queryType =pageinfo['queryType'] 366 tocSearch = 0 367 tocDiv = None 368 369 pagexml = self.getServerData("doc-query.xql","document=%s&mode=%s&queryType=%s&query=%s&queryResultPageSize=%s&queryResultPN=%s"%(docpath, 'text', queryType, urllib.quote(query), pagesize, pn)) 370 #pagexml=self.template.fulltextclient.eval("/mpdl/interface/doc-query.xql","document=%s&mode=%s&queryType=%s&query=%s&queryResultPageSize=%s&queryResultPN=%s"%(docpath, 'text', queryType, query, pagesize, pn) ,outputUnicode=False) 371 pagedom = Parse(pagexml) 372 numdivs = pagedom.xpath("//div[@class='queryResultHits']") 373 tocSearch = int(getTextFromNode(numdivs[0])) 374 tc=int((tocSearch/10)+1) 375 return tc 376 377 def getQueryResultHitsText(self, docinfo=None, pageinfo=None): 378 """number of hits in Text of Contents mode""" 379 380 docpath = docinfo['textURLPath'] 381 pagesize = pageinfo['queryPageSize'] 382 pn = pageinfo['searchPN'] 383 query =pageinfo['query'] 384 queryType =pageinfo['queryType'] 385 tocSearch = 0 386 tocDiv = None 387 tocMode = pageinfo['tocMode'] 388 tocPN = pageinfo['tocPN'] 389 pagexml=self.getServerData("doc-query.xql", "document=%s&queryType=%s"%(docpath,'toc')) 390 pagedom = Parse(pagexml) 391 logging.debug("documentViewer (pagedom) pagedom: %s"%(pagedom)) 392 numdivs = pagedom.xpath("//div[@class='queryResultHits']") 393 tocSearch = int(getTextFromNode(numdivs[0])) 394 tc=int((tocSearch/30)+1) 395 return tc 396 397 def getQueryResultHitsFigures(self, docinfo=None, pageinfo=None): 398 """number of hits in Text of Figures mode""" 399 400 docpath = docinfo['textURLPath'] 401 pagesize = pageinfo['queryPageSize'] 402 pn = pageinfo['searchPN'] 403 query =pageinfo['query'] 404 queryType =pageinfo['queryType'] 405 tocSearch = 0 406 tocDiv = None 407 tocMode = pageinfo['tocMode'] 408 tocPN = pageinfo['tocPN'] 409 pagexml=self.getServerData("doc-query.xql", "document=%s&queryType=%s"%(docpath,'figures')) 410 pagedom = Parse(pagexml) 411 logging.debug("documentViewer (pagedom) pagedom: %s"%(pagedom)) 412 numdivs = pagedom.xpath("//div[@class='queryResultHits']") 413 tocSearch = int(getTextFromNode(numdivs[0])) 414 tc=int((tocSearch/30)+1) 415 return tc 416 417 494 418 495 def getToc(self, mode="text", docinfo=None): 419 496 """loads table of contents and stores in docinfo""" 420 logging.debug("documentViewer (gettoc) mode: %s"%(mode))421 497 if mode == "none": 422 498 return docinfo … … 438 514 439 515 pagexml = self.getServerData("doc-query.xql","document=%s&queryType=%s&queryResultPageSize=%s&queryResultPN=%s"%(docpath,queryType, pagesize, pn)) 440 #pagexml=self.template.fulltextclient.eval("/mpdl/interface/doc-query.xql", "document=%s&queryType=%s&queryResultPageSize=%s&queryResultPN=%s"%(docpath,queryType,pagesize,pn), outputUnicode=False)516 441 517 # post-processing downloaded xml 442 518 pagedom = Parse(pagexml) … … 463 539 viewMode= pageinfo['viewMode'] 464 540 characterNormalization = pageinfo ['characterNormalization'] 465 optionToggle =pageinfo ['optionToggle']541 #optionToggle =pageinfo ['optionToggle'] 466 542 tocMode = pageinfo['tocMode'] 467 543 tocPN = pageinfo['tocPN'] 468 544 469 data = self.getServerData("doc-query.xql","document=%s&queryType=%s&queryResultPageSize=%s&queryResultPN=%s&characterNormalization=regPlusNorm &optionToggle=1"%(docpath,queryType, pagesize, pn))470 page = data.replace('page-fragment.xql?document=%s'%str(path),'%s?url=%s&viewMode=%s&tocMode=%s&tocPN=%s &optionToggle=1'%(selfurl,url, viewMode, tocMode, tocPN))545 data = self.getServerData("doc-query.xql","document=%s&queryType=%s&queryResultPageSize=%s&queryResultPN=%s&characterNormalization=regPlusNorm"%(docpath,queryType, pagesize, pn)) 546 page = data.replace('page-fragment.xql?document=%s'%str(path),'%s?url=%s&viewMode=%s&tocMode=%s&tocPN=%s'%(selfurl,url, viewMode, tocMode, tocPN)) 471 547 text = page.replace('mode=image','mode=texttool') 472 logging.debug("documentViewer (characterNormalization) characterNormalization: %s"%(characterNormalization))473 #logging.debug("documentViewer (characterNormalization) text: %s"%(text))474 548 return text 475 549 476 def manage_changeMpdlXmlTextServer(self,title="",serverUrl="http://mpdl-proto.mpiwg-berlin.mpg.de/mpdl/interface/",timeout=40,RESPONSE=None): 550 def manage_changeMpdlXmlTextServer(self,title="",serverUrl="http://mpdl-text.mpiwg-berlin.mpg.de/mpdl/interface/",timeout=40,RESPONSE=None): 551 #def manage_changeMpdlXmlTextServer(self,title="",serverUrl="http://mpdl-text.mpiwg-berlin.mpg.de:30030/mpdl/interface/",timeout=40,RESPONSE=None): 477 552 """change settings""" 478 553 self.title=title … … 488 563 return pt() 489 564 490 def manage_addMpdlXmlTextServer(self,id,title="",serverUrl="http://mpdl-proto.mpiwg-berlin.mpg.de/mpdl/interface/",timeout=40,RESPONSE=None): 565 def manage_addMpdlXmlTextServer(self,id,title="",serverUrl="http://mpdl-text.mpiwg-berlin.mpg.de/mpdl/interface/",timeout=40,RESPONSE=None): 566 #def manage_addMpdlXmlTextServer(self,id,title="",serverUrl="http://mpdl-text.mpiwg-berlin.mpg.de:30030/mpdl/interface/",timeout=40,RESPONSE=None): 491 567 """add zogiimage""" 492 568 newObj = MpdlXmlTextServer(id,title,serverUrl,timeout) … … 494 570 if RESPONSE is not None: 495 571 RESPONSE.redirect('manage_main') 496 497 498
Note: See TracChangeset
for help on using the changeset viewer.