Annotation of documentViewer/MpdlXmlTextServer.py, revision 1.238.2.2
1.2 casties 1:
2: from OFS.SimpleItem import SimpleItem
3: from Products.PageTemplates.PageTemplateFile import PageTemplateFile
1.238.2.2! casties 4:
1.2 casties 5: from Ft.Xml import EMPTY_NAMESPACE, Parse
1.238 abukhman 6: from Ft.Xml.Domlette import NonvalidatingReader
1.238.2.1 casties 7: import Ft.Xml.Domlette
8: import cStringIO
9:
10: import xml.etree.ElementTree as ET
1.2 casties 11:
1.238.2.2! casties 12: import re
1.2 casties 13: import logging
1.5 casties 14: import urllib
1.2 casties 15: import documentViewer
1.238.2.1 casties 16: #from documentViewer import getTextFromNode, serializeNode
17:
1.238.2.2! casties 18: def intOr0(s, default=0):
! 19: """convert s to int or return default"""
! 20: try:
! 21: return int(s)
! 22: except:
! 23: return default
! 24:
1.238.2.1 casties 25: def getText(node):
26: """get the cdata content of a node"""
27: if node is None:
28: return ""
29: # ET:
30: text = node.text or ""
31: for e in node:
32: text += gettext(e)
33: if e.tail:
34: text += e.tail
35:
36: return text
37:
38: def serialize(node):
39: """returns a string containing an XML snippet of node"""
40: s = ET.tostring(node, 'UTF-8')
41: # snip off XML declaration
42: if s.startswith('<?xml'):
43: i = s.find('?>')
44: return s[i+3:]
45:
46: return s
47:
48:
49: def getTextFromNode(node):
50: """get the cdata content of a node"""
51: if node is None:
52: return ""
53: # ET:
1.238.2.2! casties 54: # text = node.text or ""
! 55: # for e in node:
! 56: # text += gettext(e)
! 57: # if e.tail:
! 58: # text += e.tail
1.238.2.1 casties 59:
60: # 4Suite:
61: nodelist=node.childNodes
62: text = ""
63: for n in nodelist:
64: if n.nodeType == node.TEXT_NODE:
65: text = text + n.data
66:
67: return text
68:
69: def serializeNode(node, encoding="utf-8"):
70: """returns a string containing node as XML"""
71: #s = ET.tostring(node)
72:
73: # 4Suite:
74: stream = cStringIO.StringIO()
75: Ft.Xml.Domlette.Print(node, stream=stream, encoding=encoding)
76: s = stream.getvalue()
77: stream.close()
78:
79: return s
80:
1.2 casties 81:
82: class MpdlXmlTextServer(SimpleItem):
83: """TextServer implementation for MPDL-XML eXist server"""
84: meta_type="MPDL-XML TextServer"
85:
86: manage_options=(
87: {'label':'Config','action':'manage_changeMpdlXmlTextServerForm'},
88: )+SimpleItem.manage_options
89:
90: manage_changeMpdlXmlTextServerForm = PageTemplateFile("zpt/manage_changeMpdlXmlTextServer", globals())
91:
1.238.2.2! casties 92: def __init__(self,id,title="",serverUrl="http://mpdl-text.mpiwg-berlin.mpg.de/mpdl/interface/", serverName=None, timeout=40):
1.231 abukhman 93:
1.2 casties 94: """constructor"""
95: self.id=id
96: self.title=title
97: self.timeout = timeout
1.3 casties 98: if serverName is None:
99: self.serverUrl = serverUrl
100: else:
101: self.serverUrl = "http://%s/mpdl/interface/"%serverName
1.2 casties 102:
103: def getHttpData(self, url, data=None):
104: """returns result from url+data HTTP request"""
105: return documentViewer.getHttpData(url,data,timeout=self.timeout)
106:
107: def getServerData(self, method, data=None):
108: """returns result from text server for method+data"""
109: url = self.serverUrl+method
110: return documentViewer.getHttpData(url,data,timeout=self.timeout)
111:
1.238.2.2! casties 112: # WTF: what does this really do? can it be integrated in getPage?
1.235 abukhman 113: def getSearch(self, pageinfo=None, docinfo=None):
1.2 casties 114: """get search list"""
1.238.2.2! casties 115: logging.debug("getSearch()")
1.2 casties 116: docpath = docinfo['textURLPath']
117: url = docinfo['url']
118: pagesize = pageinfo['queryPageSize']
1.222 abukhman 119: pn = pageinfo.get('searchPN',1)
1.2 casties 120: sn = pageinfo['sn']
121: highlightQuery = pageinfo['highlightQuery']
1.34 abukhman 122: query =pageinfo['query']
1.2 casties 123: queryType =pageinfo['queryType']
124: viewMode= pageinfo['viewMode']
125: tocMode = pageinfo['tocMode']
1.24 abukhman 126: characterNormalization = pageinfo['characterNormalization']
1.237 abukhman 127: #optionToggle = pageinfo['optionToggle']
1.2 casties 128: tocPN = pageinfo['tocPN']
129: selfurl = self.absolute_url()
1.237 abukhman 130: data = self.getServerData("doc-query.xql","document=%s&mode=%s&queryType=%s&query=%s&queryResultPageSize=%s&queryResultPN=%s&sn=%s&viewMode=%s&characterNormalization=%s&highlightQuery=%s"%(docpath, 'text', queryType, urllib.quote(query), pagesize, pn, sn, viewMode,characterNormalization, urllib.quote(highlightQuery)))
1.2 casties 131: pagexml = data.replace('?document=%s'%str(docpath),'?url=%s'%url)
132: pagedom = Parse(pagexml)
1.222 abukhman 133:
134: """
135: pagedivs = pagedom.xpath("//div[@class='queryResultHits']")
136: if (pagedivs == pagedom.xpath("//div[@class='queryResultHits']")):
137: if len(pagedivs)>0:
138: docinfo['queryResultHits'] = int(getTextFromNode(pagedivs[0]))
139: s = getTextFromNode(pagedivs[0])
140: s1 = int(s)/10+1
141: try:
142: docinfo['queryResultHits'] = int(s1)
143: logging.debug("SEARCH ENTRIES: %s"%(s1))
144: except:
145: docinfo['queryResultHits'] = 0
146: """
1.2 casties 147: if (queryType=="fulltext")or(queryType=="xpath")or(queryType=="xquery")or(queryType=="fulltextMorphLemma"):
148: pagedivs = pagedom.xpath("//div[@class='queryResultPage']")
149: if len(pagedivs)>0:
150: pagenode=pagedivs[0]
151: links=pagenode.xpath("//a")
152: for l in links:
153: hrefNode = l.getAttributeNodeNS(None, u"href")
154: if hrefNode:
155: href = hrefNode.nodeValue
156: if href.startswith('page-fragment.xql'):
157: selfurl = self.absolute_url()
1.237 abukhman 158: pagexml=href.replace('mode=text','mode=texttool&viewMode=%s&queryType=%s&query=%s&queryResultPageSize=%s&queryResultPN=%s&tocMode=%s&searchPN=%s&tocPN=%s&characterNormalization=%s'%(viewMode,queryType,urllib.quote(query),pagesize,pn,tocMode,pn,tocPN, characterNormalization))
1.2 casties 159: hrefNode.nodeValue = pagexml.replace('page-fragment.xql','%s'%selfurl)
1.230 abukhman 160: #logging.debug("PUREXML :%s"%(serializeNode(pagenode)))
1.2 casties 161: return serializeNode(pagenode)
162: if (queryType=="fulltextMorph"):
163: pagedivs = pagedom.xpath("//div[@class='queryResult']")
164: if len(pagedivs)>0:
165: pagenode=pagedivs[0]
166: links=pagenode.xpath("//a")
167: for l in links:
168: hrefNode = l.getAttributeNodeNS(None, u"href")
169: if hrefNode:
170: href = hrefNode.nodeValue
171: if href.startswith('page-fragment.xql'):
172: selfurl = self.absolute_url()
1.237 abukhman 173: pagexml=href.replace('mode=text','mode=texttool&viewMode=%s&queryType=%s&query=%s&queryResultPageSize=%s&queryResultPN=%s&tocMode=%s&searchPN=%s&tocPN=%s&characterNormalization=%s'%(viewMode,queryType,urllib.quote(query),pagesize,pn,tocMode,pn,tocPN,characterNormalization))
1.2 casties 174: hrefNode.nodeValue = pagexml.replace('page-fragment.xql','%s'%selfurl)
175: if href.startswith('../lt/lemma.xql'):
1.235 abukhman 176: hrefNode.nodeValue = href.replace('../lt/lemma.xql','%s/template/head_main_query'%(selfurl))
1.2 casties 177: l.setAttributeNS(None, 'target', '_blank')
178: l.setAttributeNS(None, 'onClick',"popupWin = window.open(this.href, 'contacts', 'location,width=500,height=600,top=180, left=400, scrollbars=1'); return false;")
1.235 abukhman 179: l.setAttributeNS(None, 'onDblclick', 'popupWin.focus();')
1.2 casties 180: pagedivs = pagedom.xpath("//div[@class='queryResultMorphExpansion']")
181: return serializeNode(pagenode)
182: if (queryType=="ftIndex")or(queryType=="ftIndexMorph"):
183: pagedivs= pagedom.xpath("//div[@class='queryResultPage']")
184: if len(pagedivs)>0:
185: pagenode=pagedivs[0]
186: links=pagenode.xpath("//a")
187: for l in links:
188: hrefNode = l.getAttributeNodeNS(None, u"href")
189: if hrefNode:
190: href = hrefNode.nodeValue
1.237 abukhman 191: hrefNode.nodeValue=href.replace('mode=text','mode=texttool&viewMode=%s&tocMode=%s&tocPN=%s&pn=%s&characterNormalization=%s'%(viewMode,tocMode,tocPN,pn,characterNormalization))
1.2 casties 192: if href.startswith('../lt/lex.xql'):
1.235 abukhman 193: hrefNode.nodeValue = href.replace('../lt/lex.xql','%s/template/head_main_lex'%selfurl)
1.2 casties 194: l.setAttributeNS(None, 'target', '_blank')
195: l.setAttributeNS(None, 'onClick',"popupWin = window.open(this.href, 'contacts', 'location,width=500,height=600,top=180, left=400, scrollbars=1'); return false;")
1.235 abukhman 196: l.setAttributeNS(None, 'onDblclick', 'popupWin.focus();')
1.2 casties 197: if href.startswith('../lt/lemma.xql'):
1.235 abukhman 198: hrefNode.nodeValue = href.replace('../lt/lemma.xql','%s/template/head_main_lemma'%(selfurl))
1.2 casties 199: l.setAttributeNS(None, 'target', '_blank')
200: l.setAttributeNS(None, 'onClick',"popupWin = window.open(this.href, 'contacts', 'location,width=500,height=600,top=180, left=400, scrollbars=1'); return false;")
1.235 abukhman 201: l.setAttributeNS(None, 'onDblclick', 'popupWin.focus();')
1.2 casties 202: return serializeNode(pagenode)
203: return "no text here"
1.222 abukhman 204:
1.89 abukhman 205: def getGisPlaces(self, docinfo=None, pageinfo=None):
1.58 abukhman 206: """ Show all Gis Places of whole Page"""
1.100 abukhman 207: xpath='//place'
1.214 casties 208: docpath = docinfo.get('textURLPath',None)
209: if not docpath:
210: return None
211:
1.89 abukhman 212: url = docinfo['url']
213: selfurl = self.absolute_url()
1.93 abukhman 214: pn = pageinfo['current']
1.127 abukhman 215: hrefList=[]
1.142 abukhman 216: myList= ""
1.100 abukhman 217: text=self.getServerData("xpath.xql", "document=%s&xpath=%s&pn=%s"%(docinfo['textURLPath'],xpath,pn))
1.238.2.2! casties 218: dom = ET.fromstring(text)
! 219: result = dom.findall(".//result/resultPage/place")
1.72 abukhman 220: for l in result:
1.238.2.2! casties 221: href = l.get("id")
1.128 abukhman 222: hrefList.append(href)
1.238.2.2! casties 223: # WTF: what does this do?
1.145 abukhman 224: myList = ",".join(hrefList)
1.230 abukhman 225: #logging.debug("getGisPlaces :%s"%(myList))
1.143 abukhman 226: return myList
227:
228: def getAllGisPlaces (self, docinfo=None, pageinfo=None):
229: """Show all Gis Places of whole Book """
230: xpath ='//echo:place'
231: docpath =docinfo['textURLPath']
232: url = docinfo['url']
233: selfurl =self.absolute_url()
234: pn =pageinfo['current']
235: hrefList=[]
236: myList=""
237: text=self.getServerData("xpath.xql", "document=%s&xpath=%s"%(docinfo['textURLPath'],xpath))
1.238.2.2! casties 238: dom = ET.fromstring(text)
! 239: result = dom.findall(".//result/resultPage/place")
1.205 abukhman 240:
1.143 abukhman 241: for l in result:
1.238.2.2! casties 242: href = l.get("id")
1.143 abukhman 243: hrefList.append(href)
1.238.2.2! casties 244: # WTF: what does this do?
1.136 abukhman 245: myList = ",".join(hrefList)
1.230 abukhman 246: #logging.debug("getALLGisPlaces :%s"%(myList))
1.145 abukhman 247: return myList
1.222 abukhman 248:
1.238.2.2! casties 249: def processPageInfo(self, dom, docinfo, pageinfo):
! 250: """processes page info divs from dom and stores in docinfo and pageinfo"""
! 251: # process all toplevel divs
! 252: alldivs = dom.findall(".//div")
! 253: pagediv = None
! 254: for div in alldivs:
! 255: dc = div.get('class')
! 256:
! 257: # page content div
! 258: if dc == 'pageContent':
! 259: pagediv = div
! 260:
! 261: # pageNumberOrig
! 262: elif dc == 'pageNumberOrig':
! 263: pageinfo['pageNumberOrig'] = div.text
! 264:
! 265: # pageNumberOrigNorm
! 266: elif dc == 'pageNumberOrigNorm':
! 267: pageinfo['pageNumberOrigNorm'] = div.text
! 268:
! 269: # pageNumberOrigNorm
! 270: elif dc == 'countFigureEntries':
! 271: docinfo['countFigureEntries'] = intOr0(div.text)
! 272:
! 273: # pageNumberOrigNorm
! 274: elif dc == 'countTocEntries':
! 275: # WTF: s1 = int(s)/30+1
! 276: docinfo['countTocEntries'] = intOr0(div.text)
! 277:
! 278: # numTextPages
! 279: elif dc == 'countPages':
! 280: np = intOr0(div.text)
! 281: if np > 0:
! 282: docinfo['numTextPages'] = np
! 283: if docinfo.get('numPages', 0) == 0:
! 284: # seems to be text-only
! 285: docinfo['numTextPages'] = np
! 286: pageinfo['end'] = min(pageinfo['end'], np)
! 287: pageinfo['numgroups'] = int(np / pageinfo['groupsize'])
! 288: if np % pageinfo['groupsize'] > 0:
! 289: pageinfo['numgroups'] += 1
! 290:
! 291: return
! 292:
1.215 abukhman 293:
1.227 abukhman 294: def getTextPage(self, mode="text_dict", pn=1, docinfo=None, pageinfo=None):
1.2 casties 295: """returns single page from fulltext"""
1.238.2.2! casties 296: logging.debug("getTextPage mode=%s, pn=%s"%(mode,pn))
! 297: # check for cached text -- but this shouldn't be called twice
! 298: if pageinfo.has_key('textPage'):
! 299: logging.debug("getTextPage: using cached text")
! 300: return pageinfo['textPage']
! 301:
1.2 casties 302: docpath = docinfo['textURLPath']
1.238.2.2! casties 303: # just checking
! 304: if pageinfo['current'] != pn:
! 305: logging.warning("getTextPage: current!=pn!")
! 306:
! 307: # stuff for constructing full urls
! 308: url = docinfo['url']
! 309: urlmode = docinfo['mode']
! 310: sn = pageinfo.get('sn', None)
! 311: highlightQuery = pageinfo.get('highlightQuery', None)
! 312: tocMode = pageinfo.get('tocMode', None)
! 313: tocPN = pageinfo.get('tocPN',None)
! 314: characterNormalization = pageinfo.get('characterNormalization', None)
! 315: selfurl = docinfo['viewerUrl']
! 316:
1.2 casties 317: if mode == "text_dict":
1.238.2.2! casties 318: # text_dict is called textPollux in the backend
1.2 casties 319: textmode = "textPollux"
320: else:
321: textmode = mode
1.222 abukhman 322:
1.237 abukhman 323: textParam = "document=%s&mode=%s&pn=%s&characterNormalization=%s"%(docpath,textmode,pn,characterNormalization)
1.238.2.2! casties 324: if highlightQuery:
1.196 abukhman 325: textParam +="&highlightQuery=%s&sn=%s"%(urllib.quote(highlightQuery),sn)
1.222 abukhman 326:
1.238.2.2! casties 327: # fetch the page
1.38 abukhman 328: pagexml = self.getServerData("page-fragment.xql",textParam)
1.238.2.1 casties 329: dom = ET.fromstring(pagexml)
1.238.2.2! casties 330: # extract additional info
! 331: self.processPageInfo(dom, docinfo, pageinfo)
! 332: # page content is in <div class="pageContent">
1.238.2.1 casties 333: pagediv = None
1.238.2.2! casties 334: # ElementTree 1.2 in Python 2.6 can't do div[@class='pageContent']
! 335: alldivs = dom.findall(".//div")
1.238.2.1 casties 336: for div in alldivs:
337: dc = div.get('class')
1.238.2.2! casties 338: # page content div
1.238.2.1 casties 339: if dc == 'pageContent':
340: pagediv = div
341: break
1.2 casties 342:
343: # plain text mode
344: if mode == "text":
1.238.2.1 casties 345: if pagediv:
346: links = pagediv.findall(".//a")
1.2 casties 347: for l in links:
1.238.2.1 casties 348: href = l.get('href')
349: if href and href.startswith('#note-'):
1.238.2.2! casties 350: href = href.replace('#note-',"?mode=%s&url=%s&viewMode=text&tocMode=%s&tocPN=%s&pn=%s#note-"%(urlmode,url,tocMode,tocPN,pn))
1.238.2.1 casties 351: l.set('href', href)
1.238.2.2! casties 352:
1.238.2.1 casties 353: return serialize(pagediv)
354:
1.2 casties 355: # text-with-links mode
1.238.2.2! casties 356: elif mode == "text_dict":
1.238.2.1 casties 357: if pagediv:
1.2 casties 358: # check all a-tags
1.238.2.1 casties 359: links = pagediv.findall(".//a")
1.2 casties 360: for l in links:
1.238.2.1 casties 361: href = l.get('href')
1.236 abukhman 362:
1.238.2.1 casties 363: if href:
1.2 casties 364: # is link with href
1.235 abukhman 365: if href.startswith('http://mpdl-proto.mpiwg-berlin.mpg.de/mpdl/interface/lt/wordInfo.xql'):
1.2 casties 366: # is pollux link
367: selfurl = self.absolute_url()
368: # change href
1.238.2.1 casties 369: l.set('href', href.replace('http://mpdl-proto.mpiwg-berlin.mpg.de/mpdl/interface/lt/wordInfo.xql','%s/head_main_voc'%selfurl))
1.2 casties 370: # add target
1.238.2.1 casties 371: l.set('target', '_blank')
1.235 abukhman 372:
373: if href.startswith('http://mpdl-proto.mpiwg-berlin.mpg.de/mpdl/lt/lemma.xql'):
1.2 casties 374: selfurl = self.absolute_url()
1.238.2.1 casties 375: l.set('href', href.replace('http://mpdl-proto.mpiwg-berlin.mpg.de/mpdl/lt/lemma.xql','%s/head_main_lemma'%selfurl))
376: l.set('target', '_blank')
377: l.set('onclick',"popupWin = window.open(this.href, 'InfoWindow', 'menubar=no, location,width=500,height=600,top=180, left=700, toolbar=no, scrollbars=1'); return false;")
378: l.set('ondblclick', 'popupWin.focus();')
1.236 abukhman 379:
1.2 casties 380: if href.startswith('#note-'):
1.238.2.2! casties 381: l.set('href', href.replace('#note-',"?mode=%s&url=%s&viewMode=text_dict&tocMode=%s&tocPN=%s&pn=%s#note-"%(urlmode,url,tocMode,tocPN,pn)))
1.236 abukhman 382:
1.238.2.1 casties 383: return serialize(pagediv)
384:
1.238.2.2! casties 385: # xml mode
! 386: elif mode == "xml":
! 387: if pagediv:
! 388: return serialize(pagediv)
! 389:
! 390: # pureXml mode
! 391: elif mode == "pureXml":
! 392: if pagediv:
! 393: return serialize(pagediv)
! 394:
! 395: # gis mode
! 396: elif mode == "gis":
! 397: name = docinfo['name']
! 398: if pagediv:
! 399: # check all a-tags
! 400: links = pagediv.findall(".//a")
! 401: for l in links:
! 402: href = l.get('href')
! 403: if href:
! 404: if href.startswith('http://chinagis.mpiwg-berlin.mpg.de'):
! 405: l.set('href', href.replace('chinagis_REST/REST/db/chgis/mpdl','chinagis/REST/db/mpdl/%s'%name))
! 406: l.set('target', '_blank')
! 407:
! 408: return serialize(pagediv)
! 409:
1.2 casties 410: return "no text here"
1.225 abukhman 411:
1.238.2.2! casties 412: # WTF: is this needed?
1.230 abukhman 413: def getOrigPages(self, docinfo=None, pageinfo=None):
1.238.2.2! casties 414: logging.debug("CALLED: getOrigPages!")
! 415: if not pageinfo.has_key('pageNumberOrig'):
! 416: logging.warning("getOrigPages: not in pageinfo!")
! 417: return None
! 418:
! 419: return pageinfo['pageNumberOrig']
1.230 abukhman 420:
1.238.2.2! casties 421: # WTF: is this needed?
1.230 abukhman 422: def getOrigPagesNorm(self, docinfo=None, pageinfo=None):
1.238.2.2! casties 423: logging.debug("CALLED: getOrigPagesNorm!")
! 424: if not pageinfo.has_key('pageNumberOrigNorm'):
! 425: logging.warning("getOrigPagesNorm: not in pageinfo!")
! 426: return None
! 427:
! 428: return pageinfo['pageNumberOrigNorm']
1.230 abukhman 429:
1.238.2.2! casties 430: # TODO: should be getWordInfo
1.235 abukhman 431: def getTranslate(self, word=None, language=None):
1.2 casties 432: """translate into another languages"""
1.235 abukhman 433: data = self.getServerData("lt/wordInfo.xql","language="+str(language)+"&word="+urllib.quote(word)+"&output=html")
1.2 casties 434: return data
435:
1.238.2.2! casties 436: # WTF: what does this do?
1.2 casties 437: def getLemma(self, lemma=None, language=None):
438: """simular words lemma """
1.235 abukhman 439: data = self.getServerData("lt/lemma.xql","language="+str(language)+"&lemma="+urllib.quote(lemma)+"&output=html")
1.2 casties 440: return data
441:
1.238.2.2! casties 442: # WTF: what does this do?
1.235 abukhman 443: def getLemmaQuery(self, query=None, language=None):
1.2 casties 444: """simular words lemma """
1.235 abukhman 445: data = self.getServerData("lt/lemma.xql","language="+str(language)+"&query="+urllib.quote(query)+"&output=html")
446: return data
447:
1.238.2.2! casties 448: # WTF: what does this do?
1.235 abukhman 449: def getLex(self, query=None, language=None):
450: #simular words lemma
451: data = self.getServerData("lt/lex.xql","document=&language="+str(language)+"&query="+urllib.quote(query))
1.2 casties 452: return data
1.238.2.2! casties 453:
! 454: # WTF: what does this do?
1.237 abukhman 455: def getQuery (self, docinfo=None, pageinfo=None, query=None, queryType=None, pn=1):
1.222 abukhman 456: #number of
1.2 casties 457: docpath = docinfo['textURLPath']
458: pagesize = pageinfo['queryPageSize']
459: pn = pageinfo['searchPN']
1.34 abukhman 460: query =pageinfo['query']
1.2 casties 461: queryType =pageinfo['queryType']
462: tocSearch = 0
463: tocDiv = None
464:
1.32 abukhman 465: pagexml = self.getServerData("doc-query.xql","document=%s&mode=%s&queryType=%s&query=%s&queryResultPageSize=%s&queryResultPN=%s"%(docpath, 'text', queryType, urllib.quote(query), pagesize, pn))
1.2 casties 466: pagedom = Parse(pagexml)
467: numdivs = pagedom.xpath("//div[@class='queryResultHits']")
468: tocSearch = int(getTextFromNode(numdivs[0]))
469: tc=int((tocSearch/10)+1)
470: return tc
1.222 abukhman 471:
1.2 casties 472: def getToc(self, mode="text", docinfo=None):
1.238.2.2! casties 473: """loads table of contents and stores XML in docinfo"""
! 474: logging.debug("getToc mode=%s"%mode)
1.2 casties 475: if mode == "none":
1.238.2.2! casties 476: return docinfo
! 477:
1.2 casties 478: if 'tocSize_%s'%mode in docinfo:
479: # cached toc
480: return docinfo
481:
482: docpath = docinfo['textURLPath']
483: # we need to set a result set size
484: pagesize = 1000
485: pn = 1
486: if mode == "text":
487: queryType = "toc"
488: else:
489: queryType = mode
490: # number of entries in toc
491: tocSize = 0
492: tocDiv = None
1.238.2.2! casties 493: # fetch full toc
1.2 casties 494: pagexml = self.getServerData("doc-query.xql","document=%s&queryType=%s&queryResultPageSize=%s&queryResultPN=%s"%(docpath,queryType, pagesize, pn))
1.238.2.2! casties 495: dom = ET.fromstring(pagexml)
! 496: # page content is in <div class="queryResultPage">
! 497: pagediv = None
! 498: # ElementTree 1.2 in Python 2.6 can't do div[@class='queryResultPage']
! 499: alldivs = dom.findall("div")
! 500: for div in alldivs:
! 501: dc = div.get('class')
! 502: # page content div
! 503: if dc == 'queryResultPage':
! 504: pagediv = div
! 505:
! 506: elif dc == 'queryResultHits':
! 507: docinfo['tocSize_%s'%mode] = intOr0(div.text)
! 508:
! 509: if pagediv:
! 510: # # split xml in chunks
! 511: # tocs = []
! 512: # tocdivs = pagediv.findall('div')
! 513: # for p in zip(tocdivs[::2], tocdivs[1::2]):
! 514: # toc = serialize(p[0])
! 515: # toc += serialize(p[1])
! 516: # tocs.append(toc)
! 517: # logging.debug("pair: %s"%(toc))
! 518: # store XML in docinfo
! 519: docinfo['tocXML_%s'%mode] = ET.tostring(pagediv, 'UTF-8')
! 520:
1.2 casties 521: return docinfo
522:
523: def getTocPage(self, mode="text", pn=1, pageinfo=None, docinfo=None):
524: """returns single page from the table of contents"""
1.238.2.2! casties 525: logging.debug("getTocPage mode=%s, pn=%s"%(mode,pn))
1.2 casties 526: if mode == "text":
527: queryType = "toc"
528: else:
529: queryType = mode
1.238.2.2! casties 530:
! 531: # check for cached TOC
! 532: if not docinfo.has_key('tocXML_%s'%mode):
! 533: self.getToc(mode=mode, docinfo=docinfo)
! 534:
! 535: tocxml = docinfo.get('tocXML_%s'%mode, None)
! 536: if not tocxml:
! 537: logging.error("getTocPage: unable to find tocXML")
! 538: return "No ToC"
! 539:
! 540: pagesize = int(pageinfo['tocPageSize'])
1.2 casties 541: url = docinfo['url']
1.238.2.2! casties 542: urlmode = docinfo['mode']
! 543: selfurl = docinfo['viewerUrl']
1.2 casties 544: viewMode= pageinfo['viewMode']
545: tocMode = pageinfo['tocMode']
1.238.2.2! casties 546: tocPN = int(pageinfo['tocPN'])
! 547:
! 548: fulltoc = ET.fromstring(tocxml)
1.2 casties 549:
1.238.2.2! casties 550: if fulltoc:
! 551: # paginate
! 552: #start = (pn - 1) * pagesize * 2
! 553: #end = start + pagesize * 2
! 554: #tocdivs = fulltoc[start:end]
! 555: tocdivs = fulltoc
! 556:
! 557: # check all a-tags
! 558: links = tocdivs.findall(".//a")
! 559: for l in links:
! 560: href = l.get('href')
! 561: if href:
! 562: # take pn from href
! 563: m = re.match(r'page-fragment\.xql.*pn=(\d+)', href)
! 564: if m is not None:
! 565: # and create new url
! 566: l.set('href', '%s?mode=%s&url=%s&viewMode=%s&pn=%s&tocMode=%s&tocPN=%s'%(selfurl, urlmode, url, viewMode, m.group(1), tocMode, tocPN))
! 567: else:
! 568: logging.warning("getTocPage: Problem with link=%s"%href)
! 569:
! 570: return serialize(tocdivs)
! 571:
1.2 casties 572:
1.234 abukhman 573: def manage_changeMpdlXmlTextServer(self,title="",serverUrl="http://mpdl-text.mpiwg-berlin.mpg.de/mpdl/interface/",timeout=40,RESPONSE=None):
1.2 casties 574: """change settings"""
575: self.title=title
576: self.timeout = timeout
577: self.serverUrl = serverUrl
578: if RESPONSE is not None:
579: RESPONSE.redirect('manage_main')
580:
581: # management methods
582: def manage_addMpdlXmlTextServerForm(self):
583: """Form for adding"""
584: pt = PageTemplateFile("zpt/manage_addMpdlXmlTextServer", globals()).__of__(self)
585: return pt()
586:
1.234 abukhman 587: def manage_addMpdlXmlTextServer(self,id,title="",serverUrl="http://mpdl-text.mpiwg-berlin.mpg.de/mpdl/interface/",timeout=40,RESPONSE=None):
588: #def manage_addMpdlXmlTextServer(self,id,title="",serverUrl="http://mpdl-text.mpiwg-berlin.mpg.de:30030/mpdl/interface/",timeout=40,RESPONSE=None):
1.2 casties 589: """add zogiimage"""
590: newObj = MpdlXmlTextServer(id,title,serverUrl,timeout)
591: self.Destination()._setObject(id, newObj)
592: if RESPONSE is not None:
1.238.2.2! casties 593: RESPONSE.redirect('manage_main')
! 594:
! 595:
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>