Annotation of documentViewer/MpdlXmlTextServer.py, revision 1.1.2.1
1.1.2.1 ! casties 1:
! 2: from OFS.SimpleItem import SimpleItem
! 3: from Products.PageTemplates.PageTemplateFile import PageTemplateFile
! 4:
! 5: from Ft.Xml import EMPTY_NAMESPACE, Parse
! 6:
! 7: import sys
! 8: import logging
! 9: import documentViewer
! 10: from documentViewer import getTextFromNode, serializeNode
! 11:
! 12:
! 13: class MpdlXmlTextServer(SimpleItem):
! 14: """TextServer implementation for MPDL-XML eXist server"""
! 15: meta_type="MPDL-XML TextServer"
! 16:
! 17: manage_options=(
! 18: {'label':'Config','action':'manage_changeMpdlXmlTextServerForm'},
! 19: )+SimpleItem.manage_options
! 20:
! 21: manage_changeMpdlXmlTextServerForm = PageTemplateFile("zpt/manage_changeMpdlXmlTextServer", globals())
! 22:
! 23: def __init__(self,id,title="",serverUrl="http://mpdl-proto.mpiwg-berlin.mpg.de/mpdl/interface/", timeout=40):
! 24: """constructor"""
! 25: self.id=id
! 26: self.title=title
! 27: self.timeout = timeout
! 28: self.serverUrl = serverUrl
! 29:
! 30:
! 31: def getHttpData(self, url, data=None):
! 32: """returns result from url+data HTTP request"""
! 33: return documentViewer.getHttpData(url,data,timeout=self.timeout)
! 34:
! 35:
! 36: def getServerData(self, method, data=None):
! 37: """returns result from text server for method+data"""
! 38: url = self.serverUrl+method
! 39: return documentViewer.getHttpData(url,data,timeout=self.timeout)
! 40:
! 41:
! 42: def getSearch(self, pn=1, pageinfo=None, docinfo=None, query=None, queryType=None, lemma=None):
! 43: """get search list"""
! 44: docpath = docinfo['textURLPath']
! 45: url = docinfo['url']
! 46: logging.debug("documentViewer (gettoc) docpath: %s"%(docpath))
! 47: logging.debug("documentViewer (gettoc) url: %s"%(url))
! 48: pagesize = pageinfo['queryPageSize']
! 49: pn = pageinfo['searchPN']
! 50: sn = pageinfo['sn']
! 51: highlightQuery = pageinfo['highlightQuery']
! 52: query =pageinfo['query']
! 53: queryType =pageinfo['queryType']
! 54: viewMode= pageinfo['viewMode']
! 55: tocMode = pageinfo['tocMode']
! 56: tocPN = pageinfo['tocPN']
! 57: selfurl = self.absolute_url()
! 58:
! 59: data = self.getServerData("doc-query.xql","document=%s&mode=%s&queryType=%s&query=%s&queryResultPageSize=%s&queryResultPN=%s&sn=%s&viewMode=%s&highlightQuery=%s"%(docpath, 'text', queryType, query, pagesize, pn, sn, viewMode,highlightQuery))
! 60: #page=self.template.fulltextclient.eval("/mpdl/interface/doc-query.xql","document=%s&mode=%s&queryType=%s&query=%s&queryResultPageSize=%s&queryResultPN=%s&sn=%s&viewMode=%s&highlightQuery=%s"%(docpath, 'text', queryType, query, pagesize, pn, sn, viewMode,highlightQuery) ,outputUnicode=False)
! 61:
! 62: pagexml = data.replace('?document=%s'%str(docpath),'?url=%s'%url)
! 63: pagedom = Parse(pagexml)
! 64: if (queryType=="fulltext")or(queryType=="xpath")or(queryType=="xquery")or(queryType=="fulltextMorphLemma"):
! 65: pagedivs = pagedom.xpath("//div[@class='queryResultPage']")
! 66: if len(pagedivs)>0:
! 67: pagenode=pagedivs[0]
! 68: links=pagenode.xpath("//a")
! 69: for l in links:
! 70: hrefNode = l.getAttributeNodeNS(None, u"href")
! 71: if hrefNode:
! 72: href = hrefNode.nodeValue
! 73: if href.startswith('page-fragment.xql'):
! 74: selfurl = self.absolute_url()
! 75: pagexml=href.replace('mode=text','mode=texttool&viewMode=%s&queryType=%s&query=%s&queryResultPageSize=%s&queryResultPN=%s&tocMode=%s&searchPN=%s&tocPN=%s'%(viewMode,queryType,query,pagesize,pn,tocMode,pn,tocPN))
! 76: hrefNode.nodeValue = pagexml.replace('page-fragment.xql','%s'%selfurl)
! 77: return serializeNode(pagenode)
! 78: if (queryType=="fulltextMorph"):
! 79: pagedivs = pagedom.xpath("//div[@class='queryResult']")
! 80: if len(pagedivs)>0:
! 81: pagenode=pagedivs[0]
! 82: links=pagenode.xpath("//a")
! 83: for l in links:
! 84: hrefNode = l.getAttributeNodeNS(None, u"href")
! 85: if hrefNode:
! 86: href = hrefNode.nodeValue
! 87: if href.startswith('page-fragment.xql'):
! 88: selfurl = self.absolute_url()
! 89: pagexml=href.replace('mode=text','mode=texttool&viewMode=%s&queryType=%s&query=%s&queryResultPageSize=%s&queryResultPN=%s&tocMode=%s&searchPN=%s&tocPN=%s'%(viewMode,queryType,query,pagesize,pn,tocMode,pn,tocPN))
! 90: hrefNode.nodeValue = pagexml.replace('page-fragment.xql','%s'%selfurl)
! 91: if href.startswith('../lt/lemma.xql'):
! 92: hrefNode.nodeValue = href.replace('../lt/lemma.xql','%s/template/head_main_lemma_New'%(selfurl))
! 93: l.setAttributeNS(None, 'target', '_blank')
! 94: l.setAttributeNS(None, 'onClick',"popupWin = window.open(this.href, 'contacts', 'location,width=500,height=600,top=180, left=400, scrollbars=1'); return false;")
! 95: l.setAttributeNS(None, 'onDblclick', 'popupWin.focus();')
! 96: pagedivs = pagedom.xpath("//div[@class='queryResultMorphExpansion']")
! 97: return serializeNode(pagenode)
! 98: if (queryType=="ftIndex")or(queryType=="ftIndexMorph"):
! 99: pagedivs= pagedom.xpath("//div[@class='queryResultPage']")
! 100: if len(pagedivs)>0:
! 101: pagenode=pagedivs[0]
! 102: links=pagenode.xpath("//a")
! 103: for l in links:
! 104: hrefNode = l.getAttributeNodeNS(None, u"href")
! 105: if hrefNode:
! 106: href = hrefNode.nodeValue
! 107: hrefNode.nodeValue=href.replace('mode=text','mode=texttool&viewMode=%s&tocMode=%s&tocPN=%s&pn=%s'%(viewMode,tocMode,tocPN,pn))
! 108: if href.startswith('../lt/lex.xql'):
! 109: hrefNode.nodeValue = href.replace('../lt/lex.xql','%s/template/head_main_voc'%selfurl)
! 110: l.setAttributeNS(None, 'target', '_blank')
! 111: l.setAttributeNS(None, 'onClick',"popupWin = window.open(this.href, 'contacts', 'location,width=500,height=600,top=180, left=400, scrollbars=1'); return false;")
! 112: l.setAttributeNS(None, 'onDblclick', 'popupWin.focus();')
! 113: if href.startswith('../lt/lemma.xql'):
! 114: hrefNode.nodeValue = href.replace('../lt/lemma.xql','%s/template/head_main_lemma'%selfurl)
! 115: l.setAttributeNS(None, 'target', '_blank')
! 116: l.setAttributeNS(None, 'onClick',"popupWin = window.open(this.href, 'contacts', 'location,width=500,height=600,top=180, left=400, scrollbars=1'); return false;")
! 117: l.setAttributeNS(None, 'onDblclick', 'popupWin.focus();')
! 118: return serializeNode(pagenode)
! 119: return "no text here"
! 120:
! 121: def getNumPages(self,docinfo=None):
! 122: """get list of pages from fulltext and put in docinfo"""
! 123: if 'numPages' in docinfo:
! 124: # already there
! 125: return docinfo
! 126:
! 127: xquery = '//pb'
! 128: text = self.getServerData("xquery.xql","document=%s&xquery=%s"%(docinfo['textURLPath'],xquery))
! 129: #text = self.template.fulltextclient.eval("/mpdl/interface/xquery.xql", "document=%s&xquery=%s"%(docinfo['textURLPath'],xquery))
! 130: docinfo['numPages'] = text.count("<pb ")
! 131: return docinfo
! 132:
! 133: def getTextPage(self, mode="text", pn=1, docinfo=None, pageinfo=None, highlightQuery=None,sn=None, viewMode=None, tocMode=None, tocPN=None):
! 134: """returns single page from fulltext"""
! 135: docpath = docinfo['textURLPath']
! 136: path = docinfo['textURLPath']
! 137: url = docinfo['url']
! 138: viewMode= pageinfo['viewMode']
! 139: tocMode = pageinfo['tocMode']
! 140: tocPN = pageinfo['tocPN']
! 141: selfurl = self.absolute_url()
! 142: if mode == "text_dict":
! 143: textmode = "textPollux"
! 144: else:
! 145: textmode = mode
! 146:
! 147: textParam = "document=%s&mode=%s&pn=%s"%(docpath,textmode,pn)
! 148: if highlightQuery is not None:
! 149: textParam +="&highlightQuery=%s&sn=%s"%(highlightQuery,sn)
! 150:
! 151: pagexml = self.getServerData("page-fragment.xql",textParam)
! 152: #pagexml=self.template.fulltextclient.eval("/mpdl/interface/page-fragment.xql", textParam, outputUnicode=False)
! 153:
! 154: pagedom = Parse(pagexml)
! 155: # plain text mode
! 156: if mode == "text":
! 157: # first div contains text
! 158: pagedivs = pagedom.xpath("/div")
! 159: if len(pagedivs) > 0:
! 160: pagenode = pagedivs[0]
! 161: links = pagenode.xpath("//a")
! 162: for l in links:
! 163: hrefNode = l.getAttributeNodeNS(None, u"href")
! 164: if hrefNode:
! 165: href= hrefNode.nodeValue
! 166: if href.startswith('#note-'):
! 167: hrefNode.nodeValue = href.replace('#note-',"?url=%s&viewMode=%s&tocMode=%s&tocPN=%s&pn=%s#note-"%(url,viewMode,tocMode,tocPN,pn))
! 168: return serializeNode(pagenode)
! 169: if mode == "xml":
! 170: # first div contains text
! 171: pagedivs = pagedom.xpath("/div")
! 172: if len(pagedivs) > 0:
! 173: pagenode = pagedivs[0]
! 174: return serializeNode(pagenode)
! 175: if mode == "pureXml":
! 176: # first div contains text
! 177: pagedivs = pagedom.xpath("/div")
! 178: if len(pagedivs) > 0:
! 179: pagenode = pagedivs[0]
! 180: return serializeNode(pagenode)
! 181: # text-with-links mode
! 182: if mode == "text_dict":
! 183: # first div contains text
! 184: pagedivs = pagedom.xpath("/div")
! 185: if len(pagedivs) > 0:
! 186: pagenode = pagedivs[0]
! 187: # check all a-tags
! 188: links = pagenode.xpath("//a")
! 189: for l in links:
! 190: hrefNode = l.getAttributeNodeNS(None, u"href")
! 191: if hrefNode:
! 192: # is link with href
! 193: href = hrefNode.nodeValue
! 194: if href.startswith('lt/lex.xql'):
! 195: # is pollux link
! 196: selfurl = self.absolute_url()
! 197: # change href
! 198: hrefNode.nodeValue = href.replace('lt/lex.xql','%s/template/head_main_voc'%selfurl)
! 199: # add target
! 200: l.setAttributeNS(None, 'target', '_blank')
! 201: l.setAttributeNS(None, 'onClick',"popupWin = window.open(this.href, 'contacts', 'location,width=500,height=600,top=180, left=700, scrollbars=1'); return false;")
! 202: l.setAttributeNS(None, 'onDblclick', 'popupWin.focus();')
! 203: if href.startswith('lt/lemma.xql'):
! 204: selfurl = self.absolute_url()
! 205: hrefNode.nodeValue = href.replace('lt/lemma.xql','%s/template/head_main_lemma'%selfurl)
! 206: l.setAttributeNS(None, 'target', '_blank')
! 207: l.setAttributeNS(None, 'onClick',"popupWin = window.open(this.href, 'contacts', 'location,width=500,height=600,top=180, left=700, scrollbars=1'); return false;")
! 208: l.setAttributeNS(None, 'onDblclick', 'popupWin.focus();')
! 209: if href.startswith('#note-'):
! 210: hrefNode.nodeValue = href.replace('#note-',"?url=%s&viewMode=%s&tocMode=%s&tocPN=%s&pn=%s#note-"%(url,viewMode,tocMode,tocPN,pn))
! 211: return serializeNode(pagenode)
! 212: return "no text here"
! 213:
! 214: def getTranslate(self, query=None, language=None):
! 215: """translate into another languages"""
! 216: data = self.getServerData("lt/lex.xql","document=&language="+str(language)+"&query="+url_quote(str(query)))
! 217: #pagexml=self.template.fulltextclient.eval("/mpdl/interface/lt/lex.xql","document=&language="+str(language)+"&query="+url_quote(str(query)))
! 218: return data
! 219:
! 220: def getLemma(self, lemma=None, language=None):
! 221: """simular words lemma """
! 222: data = self.getServerData("lt/lemma.xql","document=&language="+str(language)+"&lemma="+url_quote(str(lemma)))
! 223: #pagexml=self.template.fulltextclient.eval("/mpdl/interface/lt/lemma.xql","document=&language="+str(language)+"&lemma="+url_quote(str(lemma)))
! 224: return data
! 225:
! 226: def getLemmaNew(self, query=None, language=None):
! 227: """simular words lemma """
! 228: data = self.getServerData("lt/lemma.xql","document=&language="+str(language)+"&lemma="+url_quote(str(query)))
! 229: #pagexml=self.template.fulltextclient.eval("/mpdl/interface/lt/lemma.xql","document=&language="+str(language)+"&lemma="+url_quote(str(query)))
! 230: return data
! 231:
! 232: def getQuery (self, docinfo=None, pageinfo=None, query=None, queryType=None, pn=1):
! 233: """number of"""
! 234: docpath = docinfo['textURLPath']
! 235: pagesize = pageinfo['queryPageSize']
! 236: pn = pageinfo['searchPN']
! 237: query =pageinfo['query']
! 238: queryType =pageinfo['queryType']
! 239: tocSearch = 0
! 240: tocDiv = None
! 241:
! 242: pagexml = self.getServerData("doc-query.xql","document=%s&mode=%s&queryType=%s&query=%s&queryResultPageSize=%s&queryResultPN=%s"%(docpath, 'text', queryType, query, pagesize, pn))
! 243: #pagexml=self.template.fulltextclient.eval("/mpdl/interface/doc-query.xql","document=%s&mode=%s&queryType=%s&query=%s&queryResultPageSize=%s&queryResultPN=%s"%(docpath, 'text', queryType, query, pagesize, pn) ,outputUnicode=False)
! 244: pagedom = Parse(pagexml)
! 245: numdivs = pagedom.xpath("//div[@class='queryResultHits']")
! 246: tocSearch = int(getTextFromNode(numdivs[0]))
! 247: tc=int((tocSearch/10)+1)
! 248: logging.debug("documentViewer (gettoc) tc: %s"%(tc))
! 249: return tc
! 250:
! 251: def getToc(self, mode="text", docinfo=None):
! 252: """loads table of contents and stores in docinfo"""
! 253: logging.debug("documentViewer (gettoc) mode: %s"%(mode))
! 254: if mode == "none":
! 255: return docinfo
! 256: if 'tocSize_%s'%mode in docinfo:
! 257: # cached toc
! 258: return docinfo
! 259:
! 260: docpath = docinfo['textURLPath']
! 261: # we need to set a result set size
! 262: pagesize = 1000
! 263: pn = 1
! 264: if mode == "text":
! 265: queryType = "toc"
! 266: else:
! 267: queryType = mode
! 268: # number of entries in toc
! 269: tocSize = 0
! 270: tocDiv = None
! 271:
! 272: pagexml = self.getServerData("doc-query.xql","document=%s&queryType=%s&queryResultPageSize=%s&queryResultPN=%s"%(docpath,queryType, pagesize, pn))
! 273: #pagexml=self.template.fulltextclient.eval("/mpdl/interface/doc-query.xql", "document=%s&queryType=%s&queryResultPageSize=%s&queryResultPN=%s"%(docpath,queryType,pagesize,pn), outputUnicode=False)
! 274: # post-processing downloaded xml
! 275: pagedom = Parse(pagexml)
! 276: # get number of entries
! 277: numdivs = pagedom.xpath("//div[@class='queryResultHits']")
! 278: if len(numdivs) > 0:
! 279: tocSize = int(getTextFromNode(numdivs[0]))
! 280: docinfo['tocSize_%s'%mode] = tocSize
! 281: return docinfo
! 282:
! 283: def getTocPage(self, mode="text", pn=1, pageinfo=None, docinfo=None):
! 284: """returns single page from the table of contents"""
! 285: # TODO: this should use the cached TOC
! 286: if mode == "text":
! 287: queryType = "toc"
! 288: else:
! 289: queryType = mode
! 290: docpath = docinfo['textURLPath']
! 291: path = docinfo['textURLPath']
! 292: pagesize = pageinfo['tocPageSize']
! 293: pn = pageinfo['tocPN']
! 294: url = docinfo['url']
! 295: selfurl = self.absolute_url()
! 296: viewMode= pageinfo['viewMode']
! 297: tocMode = pageinfo['tocMode']
! 298: tocPN = pageinfo['tocPN']
! 299:
! 300: data = self.getServerData("doc-query.xql","document=%s&queryType=%s&queryResultPageSize=%s&queryResultPN=%s"%(docpath,queryType, pagesize, pn))
! 301:
! 302: page = data.replace('page-fragment.xql?document=%s'%str(path),'%s?url=%s&viewMode=%s&tocMode=%s&tocPN=%s'%(selfurl,url, viewMode, tocMode, tocPN))
! 303: text = page.replace('mode=image','mode=texttool')
! 304: return text
! 305:
! 306: def manage_changeMpdlXmlTextServer(self,title="",serverUrl="http://mpdl-proto.mpiwg-berlin.mpg.de/mpdl/interface/",timeout=40,RESPONSE=None):
! 307: """change settings"""
! 308: self.title=title
! 309: self.timeout = timeout
! 310: self.serverUrl = serverUrl
! 311: if RESPONSE is not None:
! 312: RESPONSE.redirect('manage_main')
! 313:
! 314: # management methods
! 315: def manage_addMpdlXmlTextServerForm(self):
! 316: """Form for adding"""
! 317: pt = PageTemplateFile("zpt/manage_addMpdlXmlTextServer", globals()).__of__(self)
! 318: return pt()
! 319:
! 320: def manage_addMpdlXmlTextServer(self,id,title="",serverUrl="http://mpdl-proto.mpiwg-berlin.mpg.de/mpdl/interface/",timeout=40,RESPONSE=None):
! 321: """add zogiimage"""
! 322: newObj = MpdlXmlTextServer(id,title,serverUrl,timeout)
! 323: self.Destination()._setObject(id, newObj)
! 324: if RESPONSE is not None:
! 325: RESPONSE.redirect('manage_main')
! 326:
! 327:
! 328:
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>