Annotation of documentViewer/MpdlXmlTextServer.py, revision 1.238.2.3
1.2 casties 1:
2: from OFS.SimpleItem import SimpleItem
3: from Products.PageTemplates.PageTemplateFile import PageTemplateFile
1.238.2.2 casties 4:
1.2 casties 5: from Ft.Xml import EMPTY_NAMESPACE, Parse
1.238 abukhman 6: from Ft.Xml.Domlette import NonvalidatingReader
1.238.2.1 casties 7: import Ft.Xml.Domlette
8: import cStringIO
9:
10: import xml.etree.ElementTree as ET
1.2 casties 11:
1.238.2.2 casties 12: import re
1.2 casties 13: import logging
1.5 casties 14: import urllib
1.2 casties 15: import documentViewer
1.238.2.1 casties 16: #from documentViewer import getTextFromNode, serializeNode
17:
1.238.2.2 casties 18: def intOr0(s, default=0):
19: """convert s to int or return default"""
20: try:
21: return int(s)
22: except:
23: return default
24:
1.238.2.1 casties 25: def getText(node):
26: """get the cdata content of a node"""
27: if node is None:
28: return ""
29: # ET:
30: text = node.text or ""
31: for e in node:
32: text += gettext(e)
33: if e.tail:
34: text += e.tail
35:
36: return text
37:
38: def serialize(node):
39: """returns a string containing an XML snippet of node"""
40: s = ET.tostring(node, 'UTF-8')
41: # snip off XML declaration
42: if s.startswith('<?xml'):
43: i = s.find('?>')
44: return s[i+3:]
45:
46: return s
47:
48:
49: def getTextFromNode(node):
50: """get the cdata content of a node"""
51: if node is None:
52: return ""
53: # ET:
1.238.2.2 casties 54: # text = node.text or ""
55: # for e in node:
56: # text += gettext(e)
57: # if e.tail:
58: # text += e.tail
1.238.2.1 casties 59:
60: # 4Suite:
61: nodelist=node.childNodes
62: text = ""
63: for n in nodelist:
64: if n.nodeType == node.TEXT_NODE:
65: text = text + n.data
66:
67: return text
68:
69: def serializeNode(node, encoding="utf-8"):
70: """returns a string containing node as XML"""
71: #s = ET.tostring(node)
72:
73: # 4Suite:
74: stream = cStringIO.StringIO()
75: Ft.Xml.Domlette.Print(node, stream=stream, encoding=encoding)
76: s = stream.getvalue()
77: stream.close()
78:
79: return s
80:
1.2 casties 81:
82: class MpdlXmlTextServer(SimpleItem):
83: """TextServer implementation for MPDL-XML eXist server"""
84: meta_type="MPDL-XML TextServer"
85:
86: manage_options=(
87: {'label':'Config','action':'manage_changeMpdlXmlTextServerForm'},
88: )+SimpleItem.manage_options
89:
90: manage_changeMpdlXmlTextServerForm = PageTemplateFile("zpt/manage_changeMpdlXmlTextServer", globals())
91:
1.238.2.2 casties 92: def __init__(self,id,title="",serverUrl="http://mpdl-text.mpiwg-berlin.mpg.de/mpdl/interface/", serverName=None, timeout=40):
1.231 abukhman 93:
1.2 casties 94: """constructor"""
95: self.id=id
96: self.title=title
97: self.timeout = timeout
1.3 casties 98: if serverName is None:
99: self.serverUrl = serverUrl
100: else:
101: self.serverUrl = "http://%s/mpdl/interface/"%serverName
1.2 casties 102:
103: def getHttpData(self, url, data=None):
104: """returns result from url+data HTTP request"""
105: return documentViewer.getHttpData(url,data,timeout=self.timeout)
106:
107: def getServerData(self, method, data=None):
108: """returns result from text server for method+data"""
109: url = self.serverUrl+method
110: return documentViewer.getHttpData(url,data,timeout=self.timeout)
111:
1.238.2.2 casties 112: # WTF: what does this really do? can it be integrated in getPage?
1.235 abukhman 113: def getSearch(self, pageinfo=None, docinfo=None):
1.2 casties 114: """get search list"""
1.238.2.2 casties 115: logging.debug("getSearch()")
1.2 casties 116: docpath = docinfo['textURLPath']
117: url = docinfo['url']
118: pagesize = pageinfo['queryPageSize']
1.222 abukhman 119: pn = pageinfo.get('searchPN',1)
1.2 casties 120: sn = pageinfo['sn']
121: highlightQuery = pageinfo['highlightQuery']
1.34 abukhman 122: query =pageinfo['query']
1.2 casties 123: queryType =pageinfo['queryType']
124: viewMode= pageinfo['viewMode']
125: tocMode = pageinfo['tocMode']
1.24 abukhman 126: characterNormalization = pageinfo['characterNormalization']
1.237 abukhman 127: #optionToggle = pageinfo['optionToggle']
1.2 casties 128: tocPN = pageinfo['tocPN']
129: selfurl = self.absolute_url()
1.237 abukhman 130: data = self.getServerData("doc-query.xql","document=%s&mode=%s&queryType=%s&query=%s&queryResultPageSize=%s&queryResultPN=%s&sn=%s&viewMode=%s&characterNormalization=%s&highlightQuery=%s"%(docpath, 'text', queryType, urllib.quote(query), pagesize, pn, sn, viewMode,characterNormalization, urllib.quote(highlightQuery)))
1.2 casties 131: pagexml = data.replace('?document=%s'%str(docpath),'?url=%s'%url)
132: pagedom = Parse(pagexml)
1.222 abukhman 133:
134: """
135: pagedivs = pagedom.xpath("//div[@class='queryResultHits']")
136: if (pagedivs == pagedom.xpath("//div[@class='queryResultHits']")):
137: if len(pagedivs)>0:
138: docinfo['queryResultHits'] = int(getTextFromNode(pagedivs[0]))
139: s = getTextFromNode(pagedivs[0])
140: s1 = int(s)/10+1
141: try:
142: docinfo['queryResultHits'] = int(s1)
143: logging.debug("SEARCH ENTRIES: %s"%(s1))
144: except:
145: docinfo['queryResultHits'] = 0
146: """
1.2 casties 147: if (queryType=="fulltext")or(queryType=="xpath")or(queryType=="xquery")or(queryType=="fulltextMorphLemma"):
148: pagedivs = pagedom.xpath("//div[@class='queryResultPage']")
149: if len(pagedivs)>0:
150: pagenode=pagedivs[0]
151: links=pagenode.xpath("//a")
152: for l in links:
153: hrefNode = l.getAttributeNodeNS(None, u"href")
154: if hrefNode:
155: href = hrefNode.nodeValue
156: if href.startswith('page-fragment.xql'):
157: selfurl = self.absolute_url()
1.237 abukhman 158: pagexml=href.replace('mode=text','mode=texttool&viewMode=%s&queryType=%s&query=%s&queryResultPageSize=%s&queryResultPN=%s&tocMode=%s&searchPN=%s&tocPN=%s&characterNormalization=%s'%(viewMode,queryType,urllib.quote(query),pagesize,pn,tocMode,pn,tocPN, characterNormalization))
1.2 casties 159: hrefNode.nodeValue = pagexml.replace('page-fragment.xql','%s'%selfurl)
1.230 abukhman 160: #logging.debug("PUREXML :%s"%(serializeNode(pagenode)))
1.2 casties 161: return serializeNode(pagenode)
162: if (queryType=="fulltextMorph"):
163: pagedivs = pagedom.xpath("//div[@class='queryResult']")
164: if len(pagedivs)>0:
165: pagenode=pagedivs[0]
166: links=pagenode.xpath("//a")
167: for l in links:
168: hrefNode = l.getAttributeNodeNS(None, u"href")
169: if hrefNode:
170: href = hrefNode.nodeValue
171: if href.startswith('page-fragment.xql'):
172: selfurl = self.absolute_url()
1.237 abukhman 173: pagexml=href.replace('mode=text','mode=texttool&viewMode=%s&queryType=%s&query=%s&queryResultPageSize=%s&queryResultPN=%s&tocMode=%s&searchPN=%s&tocPN=%s&characterNormalization=%s'%(viewMode,queryType,urllib.quote(query),pagesize,pn,tocMode,pn,tocPN,characterNormalization))
1.2 casties 174: hrefNode.nodeValue = pagexml.replace('page-fragment.xql','%s'%selfurl)
175: if href.startswith('../lt/lemma.xql'):
1.235 abukhman 176: hrefNode.nodeValue = href.replace('../lt/lemma.xql','%s/template/head_main_query'%(selfurl))
1.2 casties 177: l.setAttributeNS(None, 'target', '_blank')
178: l.setAttributeNS(None, 'onClick',"popupWin = window.open(this.href, 'contacts', 'location,width=500,height=600,top=180, left=400, scrollbars=1'); return false;")
1.235 abukhman 179: l.setAttributeNS(None, 'onDblclick', 'popupWin.focus();')
1.2 casties 180: pagedivs = pagedom.xpath("//div[@class='queryResultMorphExpansion']")
181: return serializeNode(pagenode)
182: if (queryType=="ftIndex")or(queryType=="ftIndexMorph"):
183: pagedivs= pagedom.xpath("//div[@class='queryResultPage']")
184: if len(pagedivs)>0:
185: pagenode=pagedivs[0]
186: links=pagenode.xpath("//a")
187: for l in links:
188: hrefNode = l.getAttributeNodeNS(None, u"href")
189: if hrefNode:
190: href = hrefNode.nodeValue
1.237 abukhman 191: hrefNode.nodeValue=href.replace('mode=text','mode=texttool&viewMode=%s&tocMode=%s&tocPN=%s&pn=%s&characterNormalization=%s'%(viewMode,tocMode,tocPN,pn,characterNormalization))
1.2 casties 192: if href.startswith('../lt/lex.xql'):
1.235 abukhman 193: hrefNode.nodeValue = href.replace('../lt/lex.xql','%s/template/head_main_lex'%selfurl)
1.2 casties 194: l.setAttributeNS(None, 'target', '_blank')
195: l.setAttributeNS(None, 'onClick',"popupWin = window.open(this.href, 'contacts', 'location,width=500,height=600,top=180, left=400, scrollbars=1'); return false;")
1.235 abukhman 196: l.setAttributeNS(None, 'onDblclick', 'popupWin.focus();')
1.2 casties 197: if href.startswith('../lt/lemma.xql'):
1.235 abukhman 198: hrefNode.nodeValue = href.replace('../lt/lemma.xql','%s/template/head_main_lemma'%(selfurl))
1.2 casties 199: l.setAttributeNS(None, 'target', '_blank')
200: l.setAttributeNS(None, 'onClick',"popupWin = window.open(this.href, 'contacts', 'location,width=500,height=600,top=180, left=400, scrollbars=1'); return false;")
1.235 abukhman 201: l.setAttributeNS(None, 'onDblclick', 'popupWin.focus();')
1.2 casties 202: return serializeNode(pagenode)
203: return "no text here"
1.222 abukhman 204:
1.89 abukhman 205: def getGisPlaces(self, docinfo=None, pageinfo=None):
1.58 abukhman 206: """ Show all Gis Places of whole Page"""
1.100 abukhman 207: xpath='//place'
1.214 casties 208: docpath = docinfo.get('textURLPath',None)
209: if not docpath:
210: return None
211:
1.89 abukhman 212: url = docinfo['url']
213: selfurl = self.absolute_url()
1.93 abukhman 214: pn = pageinfo['current']
1.127 abukhman 215: hrefList=[]
1.142 abukhman 216: myList= ""
1.100 abukhman 217: text=self.getServerData("xpath.xql", "document=%s&xpath=%s&pn=%s"%(docinfo['textURLPath'],xpath,pn))
1.238.2.2 casties 218: dom = ET.fromstring(text)
219: result = dom.findall(".//result/resultPage/place")
1.72 abukhman 220: for l in result:
1.238.2.2 casties 221: href = l.get("id")
1.128 abukhman 222: hrefList.append(href)
1.238.2.2 casties 223: # WTF: what does this do?
1.145 abukhman 224: myList = ",".join(hrefList)
1.230 abukhman 225: #logging.debug("getGisPlaces :%s"%(myList))
1.143 abukhman 226: return myList
227:
228: def getAllGisPlaces (self, docinfo=None, pageinfo=None):
229: """Show all Gis Places of whole Book """
230: xpath ='//echo:place'
231: docpath =docinfo['textURLPath']
232: url = docinfo['url']
233: selfurl =self.absolute_url()
234: pn =pageinfo['current']
235: hrefList=[]
236: myList=""
237: text=self.getServerData("xpath.xql", "document=%s&xpath=%s"%(docinfo['textURLPath'],xpath))
1.238.2.2 casties 238: dom = ET.fromstring(text)
239: result = dom.findall(".//result/resultPage/place")
1.205 abukhman 240:
1.143 abukhman 241: for l in result:
1.238.2.2 casties 242: href = l.get("id")
1.143 abukhman 243: hrefList.append(href)
1.238.2.2 casties 244: # WTF: what does this do?
1.136 abukhman 245: myList = ",".join(hrefList)
1.230 abukhman 246: #logging.debug("getALLGisPlaces :%s"%(myList))
1.145 abukhman 247: return myList
1.222 abukhman 248:
1.238.2.2 casties 249: def processPageInfo(self, dom, docinfo, pageinfo):
250: """processes page info divs from dom and stores in docinfo and pageinfo"""
251: # process all toplevel divs
252: alldivs = dom.findall(".//div")
253: pagediv = None
254: for div in alldivs:
255: dc = div.get('class')
256:
257: # page content div
258: if dc == 'pageContent':
259: pagediv = div
260:
261: # pageNumberOrig
262: elif dc == 'pageNumberOrig':
263: pageinfo['pageNumberOrig'] = div.text
264:
265: # pageNumberOrigNorm
266: elif dc == 'pageNumberOrigNorm':
267: pageinfo['pageNumberOrigNorm'] = div.text
268:
269: # pageNumberOrigNorm
270: elif dc == 'countFigureEntries':
271: docinfo['countFigureEntries'] = intOr0(div.text)
272:
273: # pageNumberOrigNorm
274: elif dc == 'countTocEntries':
275: # WTF: s1 = int(s)/30+1
276: docinfo['countTocEntries'] = intOr0(div.text)
277:
278: # numTextPages
279: elif dc == 'countPages':
280: np = intOr0(div.text)
281: if np > 0:
282: docinfo['numTextPages'] = np
283: if docinfo.get('numPages', 0) == 0:
284: # seems to be text-only
285: docinfo['numTextPages'] = np
286: pageinfo['end'] = min(pageinfo['end'], np)
287: pageinfo['numgroups'] = int(np / pageinfo['groupsize'])
288: if np % pageinfo['groupsize'] > 0:
289: pageinfo['numgroups'] += 1
290:
291: return
292:
1.215 abukhman 293:
1.227 abukhman 294: def getTextPage(self, mode="text_dict", pn=1, docinfo=None, pageinfo=None):
1.2 casties 295: """returns single page from fulltext"""
1.238.2.2 casties 296: logging.debug("getTextPage mode=%s, pn=%s"%(mode,pn))
297: # check for cached text -- but this shouldn't be called twice
298: if pageinfo.has_key('textPage'):
299: logging.debug("getTextPage: using cached text")
300: return pageinfo['textPage']
301:
1.2 casties 302: docpath = docinfo['textURLPath']
1.238.2.2 casties 303: # just checking
304: if pageinfo['current'] != pn:
305: logging.warning("getTextPage: current!=pn!")
306:
307: # stuff for constructing full urls
308: url = docinfo['url']
309: urlmode = docinfo['mode']
310: sn = pageinfo.get('sn', None)
311: highlightQuery = pageinfo.get('highlightQuery', None)
312: tocMode = pageinfo.get('tocMode', None)
313: tocPN = pageinfo.get('tocPN',None)
314: characterNormalization = pageinfo.get('characterNormalization', None)
315: selfurl = docinfo['viewerUrl']
316:
1.2 casties 317: if mode == "text_dict":
1.238.2.2 casties 318: # text_dict is called textPollux in the backend
1.2 casties 319: textmode = "textPollux"
320: else:
321: textmode = mode
1.222 abukhman 322:
1.237 abukhman 323: textParam = "document=%s&mode=%s&pn=%s&characterNormalization=%s"%(docpath,textmode,pn,characterNormalization)
1.238.2.2 casties 324: if highlightQuery:
1.196 abukhman 325: textParam +="&highlightQuery=%s&sn=%s"%(urllib.quote(highlightQuery),sn)
1.222 abukhman 326:
1.238.2.2 casties 327: # fetch the page
1.38 abukhman 328: pagexml = self.getServerData("page-fragment.xql",textParam)
1.238.2.1 casties 329: dom = ET.fromstring(pagexml)
1.238.2.2 casties 330: # extract additional info
331: self.processPageInfo(dom, docinfo, pageinfo)
332: # page content is in <div class="pageContent">
1.238.2.1 casties 333: pagediv = None
1.238.2.2 casties 334: # ElementTree 1.2 in Python 2.6 can't do div[@class='pageContent']
335: alldivs = dom.findall(".//div")
1.238.2.1 casties 336: for div in alldivs:
337: dc = div.get('class')
1.238.2.2 casties 338: # page content div
1.238.2.1 casties 339: if dc == 'pageContent':
340: pagediv = div
341: break
1.2 casties 342:
343: # plain text mode
344: if mode == "text":
1.238.2.1 casties 345: if pagediv:
346: links = pagediv.findall(".//a")
1.2 casties 347: for l in links:
1.238.2.1 casties 348: href = l.get('href')
349: if href and href.startswith('#note-'):
1.238.2.2 casties 350: href = href.replace('#note-',"?mode=%s&url=%s&viewMode=text&tocMode=%s&tocPN=%s&pn=%s#note-"%(urlmode,url,tocMode,tocPN,pn))
1.238.2.1 casties 351: l.set('href', href)
1.238.2.2 casties 352:
1.238.2.1 casties 353: return serialize(pagediv)
354:
1.2 casties 355: # text-with-links mode
1.238.2.2 casties 356: elif mode == "text_dict":
1.238.2.1 casties 357: if pagediv:
1.2 casties 358: # check all a-tags
1.238.2.1 casties 359: links = pagediv.findall(".//a")
1.2 casties 360: for l in links:
1.238.2.1 casties 361: href = l.get('href')
1.236 abukhman 362:
1.238.2.1 casties 363: if href:
1.2 casties 364: # is link with href
1.235 abukhman 365: if href.startswith('http://mpdl-proto.mpiwg-berlin.mpg.de/mpdl/interface/lt/wordInfo.xql'):
1.2 casties 366: # is pollux link
367: selfurl = self.absolute_url()
368: # change href
1.238.2.1 casties 369: l.set('href', href.replace('http://mpdl-proto.mpiwg-berlin.mpg.de/mpdl/interface/lt/wordInfo.xql','%s/head_main_voc'%selfurl))
1.2 casties 370: # add target
1.238.2.1 casties 371: l.set('target', '_blank')
1.235 abukhman 372:
373: if href.startswith('http://mpdl-proto.mpiwg-berlin.mpg.de/mpdl/lt/lemma.xql'):
1.2 casties 374: selfurl = self.absolute_url()
1.238.2.1 casties 375: l.set('href', href.replace('http://mpdl-proto.mpiwg-berlin.mpg.de/mpdl/lt/lemma.xql','%s/head_main_lemma'%selfurl))
376: l.set('target', '_blank')
377: l.set('onclick',"popupWin = window.open(this.href, 'InfoWindow', 'menubar=no, location,width=500,height=600,top=180, left=700, toolbar=no, scrollbars=1'); return false;")
378: l.set('ondblclick', 'popupWin.focus();')
1.236 abukhman 379:
1.2 casties 380: if href.startswith('#note-'):
1.238.2.2 casties 381: l.set('href', href.replace('#note-',"?mode=%s&url=%s&viewMode=text_dict&tocMode=%s&tocPN=%s&pn=%s#note-"%(urlmode,url,tocMode,tocPN,pn)))
1.236 abukhman 382:
1.238.2.1 casties 383: return serialize(pagediv)
384:
1.238.2.2 casties 385: # xml mode
386: elif mode == "xml":
387: if pagediv:
388: return serialize(pagediv)
389:
390: # pureXml mode
391: elif mode == "pureXml":
392: if pagediv:
393: return serialize(pagediv)
394:
395: # gis mode
396: elif mode == "gis":
397: name = docinfo['name']
398: if pagediv:
399: # check all a-tags
400: links = pagediv.findall(".//a")
401: for l in links:
402: href = l.get('href')
403: if href:
404: if href.startswith('http://chinagis.mpiwg-berlin.mpg.de'):
405: l.set('href', href.replace('chinagis_REST/REST/db/chgis/mpdl','chinagis/REST/db/mpdl/%s'%name))
406: l.set('target', '_blank')
407:
408: return serialize(pagediv)
409:
1.2 casties 410: return "no text here"
1.225 abukhman 411:
1.238.2.2 casties 412: # WTF: is this needed?
1.230 abukhman 413: def getOrigPages(self, docinfo=None, pageinfo=None):
1.238.2.2 casties 414: logging.debug("CALLED: getOrigPages!")
415: if not pageinfo.has_key('pageNumberOrig'):
416: logging.warning("getOrigPages: not in pageinfo!")
417: return None
418:
419: return pageinfo['pageNumberOrig']
1.230 abukhman 420:
1.238.2.2 casties 421: # WTF: is this needed?
1.230 abukhman 422: def getOrigPagesNorm(self, docinfo=None, pageinfo=None):
1.238.2.2 casties 423: logging.debug("CALLED: getOrigPagesNorm!")
424: if not pageinfo.has_key('pageNumberOrigNorm'):
425: logging.warning("getOrigPagesNorm: not in pageinfo!")
426: return None
427:
428: return pageinfo['pageNumberOrigNorm']
1.230 abukhman 429:
1.238.2.2 casties 430: # TODO: should be getWordInfo
1.235 abukhman 431: def getTranslate(self, word=None, language=None):
1.2 casties 432: """translate into another languages"""
1.235 abukhman 433: data = self.getServerData("lt/wordInfo.xql","language="+str(language)+"&word="+urllib.quote(word)+"&output=html")
1.2 casties 434: return data
435:
1.238.2.2 casties 436: # WTF: what does this do?
1.2 casties 437: def getLemma(self, lemma=None, language=None):
438: """simular words lemma """
1.235 abukhman 439: data = self.getServerData("lt/lemma.xql","language="+str(language)+"&lemma="+urllib.quote(lemma)+"&output=html")
1.2 casties 440: return data
441:
1.238.2.2 casties 442: # WTF: what does this do?
1.235 abukhman 443: def getLemmaQuery(self, query=None, language=None):
1.2 casties 444: """simular words lemma """
1.235 abukhman 445: data = self.getServerData("lt/lemma.xql","language="+str(language)+"&query="+urllib.quote(query)+"&output=html")
446: return data
447:
1.238.2.2 casties 448: # WTF: what does this do?
1.235 abukhman 449: def getLex(self, query=None, language=None):
450: #simular words lemma
451: data = self.getServerData("lt/lex.xql","document=&language="+str(language)+"&query="+urllib.quote(query))
1.2 casties 452: return data
1.238.2.2 casties 453:
454: # WTF: what does this do?
1.237 abukhman 455: def getQuery (self, docinfo=None, pageinfo=None, query=None, queryType=None, pn=1):
1.222 abukhman 456: #number of
1.2 casties 457: docpath = docinfo['textURLPath']
458: pagesize = pageinfo['queryPageSize']
459: pn = pageinfo['searchPN']
1.34 abukhman 460: query =pageinfo['query']
1.2 casties 461: queryType =pageinfo['queryType']
462: tocSearch = 0
463: tocDiv = None
464:
1.32 abukhman 465: pagexml = self.getServerData("doc-query.xql","document=%s&mode=%s&queryType=%s&query=%s&queryResultPageSize=%s&queryResultPN=%s"%(docpath, 'text', queryType, urllib.quote(query), pagesize, pn))
1.2 casties 466: pagedom = Parse(pagexml)
467: numdivs = pagedom.xpath("//div[@class='queryResultHits']")
468: tocSearch = int(getTextFromNode(numdivs[0]))
469: tc=int((tocSearch/10)+1)
470: return tc
1.222 abukhman 471:
1.2 casties 472: def getToc(self, mode="text", docinfo=None):
1.238.2.2 casties 473: """loads table of contents and stores XML in docinfo"""
474: logging.debug("getToc mode=%s"%mode)
1.2 casties 475: if mode == "none":
1.238.2.2 casties 476: return docinfo
477:
1.2 casties 478: if 'tocSize_%s'%mode in docinfo:
479: # cached toc
480: return docinfo
481:
482: docpath = docinfo['textURLPath']
483: # we need to set a result set size
484: pagesize = 1000
485: pn = 1
486: if mode == "text":
487: queryType = "toc"
488: else:
489: queryType = mode
490: # number of entries in toc
491: tocSize = 0
492: tocDiv = None
1.238.2.2 casties 493: # fetch full toc
1.2 casties 494: pagexml = self.getServerData("doc-query.xql","document=%s&queryType=%s&queryResultPageSize=%s&queryResultPN=%s"%(docpath,queryType, pagesize, pn))
1.238.2.2 casties 495: dom = ET.fromstring(pagexml)
496: # page content is in <div class="queryResultPage">
497: pagediv = None
498: # ElementTree 1.2 in Python 2.6 can't do div[@class='queryResultPage']
499: alldivs = dom.findall("div")
500: for div in alldivs:
501: dc = div.get('class')
502: # page content div
503: if dc == 'queryResultPage':
504: pagediv = div
505:
506: elif dc == 'queryResultHits':
507: docinfo['tocSize_%s'%mode] = intOr0(div.text)
508:
509: if pagediv:
510: # # split xml in chunks
511: # tocs = []
512: # tocdivs = pagediv.findall('div')
513: # for p in zip(tocdivs[::2], tocdivs[1::2]):
514: # toc = serialize(p[0])
515: # toc += serialize(p[1])
516: # tocs.append(toc)
517: # logging.debug("pair: %s"%(toc))
518: # store XML in docinfo
519: docinfo['tocXML_%s'%mode] = ET.tostring(pagediv, 'UTF-8')
520:
1.2 casties 521: return docinfo
522:
523: def getTocPage(self, mode="text", pn=1, pageinfo=None, docinfo=None):
524: """returns single page from the table of contents"""
1.238.2.2 casties 525: logging.debug("getTocPage mode=%s, pn=%s"%(mode,pn))
1.2 casties 526: if mode == "text":
527: queryType = "toc"
528: else:
529: queryType = mode
1.238.2.2 casties 530:
531: # check for cached TOC
532: if not docinfo.has_key('tocXML_%s'%mode):
533: self.getToc(mode=mode, docinfo=docinfo)
534:
535: tocxml = docinfo.get('tocXML_%s'%mode, None)
536: if not tocxml:
537: logging.error("getTocPage: unable to find tocXML")
538: return "No ToC"
539:
540: pagesize = int(pageinfo['tocPageSize'])
1.2 casties 541: url = docinfo['url']
1.238.2.2 casties 542: urlmode = docinfo['mode']
543: selfurl = docinfo['viewerUrl']
1.2 casties 544: viewMode= pageinfo['viewMode']
545: tocMode = pageinfo['tocMode']
1.238.2.2 casties 546: tocPN = int(pageinfo['tocPN'])
1.238.2.3! casties 547: pn = tocPN
1.238.2.2 casties 548:
549: fulltoc = ET.fromstring(tocxml)
1.2 casties 550:
1.238.2.2 casties 551: if fulltoc:
552: # paginate
1.238.2.3! casties 553: start = (pn - 1) * pagesize * 2
! 554: len = pagesize * 2
! 555: del fulltoc[:start]
! 556: del fulltoc[len:]
1.238.2.2 casties 557: tocdivs = fulltoc
558:
559: # check all a-tags
560: links = tocdivs.findall(".//a")
561: for l in links:
562: href = l.get('href')
563: if href:
564: # take pn from href
565: m = re.match(r'page-fragment\.xql.*pn=(\d+)', href)
566: if m is not None:
567: # and create new url
568: l.set('href', '%s?mode=%s&url=%s&viewMode=%s&pn=%s&tocMode=%s&tocPN=%s'%(selfurl, urlmode, url, viewMode, m.group(1), tocMode, tocPN))
569: else:
570: logging.warning("getTocPage: Problem with link=%s"%href)
571:
572: return serialize(tocdivs)
573:
1.2 casties 574:
1.234 abukhman 575: def manage_changeMpdlXmlTextServer(self,title="",serverUrl="http://mpdl-text.mpiwg-berlin.mpg.de/mpdl/interface/",timeout=40,RESPONSE=None):
1.2 casties 576: """change settings"""
577: self.title=title
578: self.timeout = timeout
579: self.serverUrl = serverUrl
580: if RESPONSE is not None:
581: RESPONSE.redirect('manage_main')
582:
583: # management methods
584: def manage_addMpdlXmlTextServerForm(self):
585: """Form for adding"""
586: pt = PageTemplateFile("zpt/manage_addMpdlXmlTextServer", globals()).__of__(self)
587: return pt()
588:
1.234 abukhman 589: def manage_addMpdlXmlTextServer(self,id,title="",serverUrl="http://mpdl-text.mpiwg-berlin.mpg.de/mpdl/interface/",timeout=40,RESPONSE=None):
590: #def manage_addMpdlXmlTextServer(self,id,title="",serverUrl="http://mpdl-text.mpiwg-berlin.mpg.de:30030/mpdl/interface/",timeout=40,RESPONSE=None):
1.2 casties 591: """add zogiimage"""
592: newObj = MpdlXmlTextServer(id,title,serverUrl,timeout)
593: self.Destination()._setObject(id, newObj)
594: if RESPONSE is not None:
1.238.2.2 casties 595: RESPONSE.redirect('manage_main')
596:
597:
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>