Annotation of documentViewer/MpdlXmlTextServer.py, revision 1.2.2.2
1.2.2.2 ! casties 1:
! 2: from OFS.SimpleItem import SimpleItem
! 3: from Products.PageTemplates.PageTemplateFile import PageTemplateFile
! 4:
! 5: from Ft.Xml import EMPTY_NAMESPACE, Parse
! 6:
! 7: import sys
! 8: import logging
! 9: import documentViewer
! 10: from documentViewer import getTextFromNode, serializeNode
! 11:
! 12:
! 13: class MpdlXmlTextServer(SimpleItem):
! 14: """TextServer implementation for MPDL-XML eXist server"""
! 15: meta_type="MPDL-XML TextServer"
! 16:
! 17: manage_options=(
! 18: {'label':'Config','action':'manage_changeMpdlXmlTextServerForm'},
! 19: )+SimpleItem.manage_options
! 20:
! 21: manage_changeMpdlXmlTextServerForm = PageTemplateFile("zpt/manage_changeMpdlXmlTextServer", globals())
! 22:
! 23: def __init__(self,id,title="",serverUrl="http://mpdl-proto.mpiwg-berlin.mpg.de/mpdl/interface/", serverName=None, timeout=40):
! 24: """constructor"""
! 25: self.id=id
! 26: self.title=title
! 27: self.timeout = timeout
! 28: if serverName is None:
! 29: self.serverUrl = serverUrl
! 30: else:
! 31: self.serverUrl = "http://%s/mpdl/interface/"%serverName
! 32:
! 33:
! 34: def getHttpData(self, url, data=None):
! 35: """returns result from url+data HTTP request"""
! 36: return documentViewer.getHttpData(url,data,timeout=self.timeout)
! 37:
! 38:
! 39: def getServerData(self, method, data=None):
! 40: """returns result from text server for method+data"""
! 41: url = self.serverUrl+method
! 42: return documentViewer.getHttpData(url,data,timeout=self.timeout)
! 43:
! 44:
! 45: def getSearch(self, pn=1, pageinfo=None, docinfo=None, query=None, queryType=None, lemma=None):
! 46: """get search list"""
! 47: docpath = docinfo['textURLPath']
! 48: url = docinfo['url']
! 49: logging.debug("documentViewer (gettoc) docpath: %s"%(docpath))
! 50: logging.debug("documentViewer (gettoc) url: %s"%(url))
! 51: pagesize = pageinfo['queryPageSize']
! 52: pn = pageinfo['searchPN']
! 53: sn = pageinfo['sn']
! 54: highlightQuery = pageinfo['highlightQuery']
! 55: query =pageinfo['query']
! 56: queryType =pageinfo['queryType']
! 57: viewMode= pageinfo['viewMode']
! 58: tocMode = pageinfo['tocMode']
! 59: tocPN = pageinfo['tocPN']
! 60: selfurl = self.absolute_url()
! 61:
! 62: data = self.getServerData("doc-query.xql","document=%s&mode=%s&queryType=%s&query=%s&queryResultPageSize=%s&queryResultPN=%s&sn=%s&viewMode=%s&highlightQuery=%s"%(docpath, 'text', queryType, query, pagesize, pn, sn, viewMode,highlightQuery))
! 63: #page=self.template.fulltextclient.eval("/mpdl/interface/doc-query.xql","document=%s&mode=%s&queryType=%s&query=%s&queryResultPageSize=%s&queryResultPN=%s&sn=%s&viewMode=%s&highlightQuery=%s"%(docpath, 'text', queryType, query, pagesize, pn, sn, viewMode,highlightQuery) ,outputUnicode=False)
! 64:
! 65: pagexml = data.replace('?document=%s'%str(docpath),'?url=%s'%url)
! 66: pagedom = Parse(pagexml)
! 67: if (queryType=="fulltext")or(queryType=="xpath")or(queryType=="xquery")or(queryType=="fulltextMorphLemma"):
! 68: pagedivs = pagedom.xpath("//div[@class='queryResultPage']")
! 69: if len(pagedivs)>0:
! 70: pagenode=pagedivs[0]
! 71: links=pagenode.xpath("//a")
! 72: for l in links:
! 73: hrefNode = l.getAttributeNodeNS(None, u"href")
! 74: if hrefNode:
! 75: href = hrefNode.nodeValue
! 76: if href.startswith('page-fragment.xql'):
! 77: selfurl = self.absolute_url()
! 78: pagexml=href.replace('mode=text','mode=texttool&viewMode=%s&queryType=%s&query=%s&queryResultPageSize=%s&queryResultPN=%s&tocMode=%s&searchPN=%s&tocPN=%s'%(viewMode,queryType,query,pagesize,pn,tocMode,pn,tocPN))
! 79: hrefNode.nodeValue = pagexml.replace('page-fragment.xql','%s'%selfurl)
! 80: return serializeNode(pagenode)
! 81: if (queryType=="fulltextMorph"):
! 82: pagedivs = pagedom.xpath("//div[@class='queryResult']")
! 83: if len(pagedivs)>0:
! 84: pagenode=pagedivs[0]
! 85: links=pagenode.xpath("//a")
! 86: for l in links:
! 87: hrefNode = l.getAttributeNodeNS(None, u"href")
! 88: if hrefNode:
! 89: href = hrefNode.nodeValue
! 90: if href.startswith('page-fragment.xql'):
! 91: selfurl = self.absolute_url()
! 92: pagexml=href.replace('mode=text','mode=texttool&viewMode=%s&queryType=%s&query=%s&queryResultPageSize=%s&queryResultPN=%s&tocMode=%s&searchPN=%s&tocPN=%s'%(viewMode,queryType,query,pagesize,pn,tocMode,pn,tocPN))
! 93: hrefNode.nodeValue = pagexml.replace('page-fragment.xql','%s'%selfurl)
! 94: if href.startswith('../lt/lemma.xql'):
! 95: hrefNode.nodeValue = href.replace('../lt/lemma.xql','%s/template/head_main_lemma_New'%(selfurl))
! 96: l.setAttributeNS(None, 'target', '_blank')
! 97: l.setAttributeNS(None, 'onClick',"popupWin = window.open(this.href, 'contacts', 'location,width=500,height=600,top=180, left=400, scrollbars=1'); return false;")
! 98: l.setAttributeNS(None, 'onDblclick', 'popupWin.focus();')
! 99: pagedivs = pagedom.xpath("//div[@class='queryResultMorphExpansion']")
! 100: return serializeNode(pagenode)
! 101: if (queryType=="ftIndex")or(queryType=="ftIndexMorph"):
! 102: pagedivs= pagedom.xpath("//div[@class='queryResultPage']")
! 103: if len(pagedivs)>0:
! 104: pagenode=pagedivs[0]
! 105: links=pagenode.xpath("//a")
! 106: for l in links:
! 107: hrefNode = l.getAttributeNodeNS(None, u"href")
! 108: if hrefNode:
! 109: href = hrefNode.nodeValue
! 110: hrefNode.nodeValue=href.replace('mode=text','mode=texttool&viewMode=%s&tocMode=%s&tocPN=%s&pn=%s'%(viewMode,tocMode,tocPN,pn))
! 111: if href.startswith('../lt/lex.xql'):
! 112: hrefNode.nodeValue = href.replace('../lt/lex.xql','%s/template/head_main_voc'%selfurl)
! 113: l.setAttributeNS(None, 'target', '_blank')
! 114: l.setAttributeNS(None, 'onClick',"popupWin = window.open(this.href, 'contacts', 'location,width=500,height=600,top=180, left=400, scrollbars=1'); return false;")
! 115: l.setAttributeNS(None, 'onDblclick', 'popupWin.focus();')
! 116: if href.startswith('../lt/lemma.xql'):
! 117: hrefNode.nodeValue = href.replace('../lt/lemma.xql','%s/template/head_main_lemma'%selfurl)
! 118: l.setAttributeNS(None, 'target', '_blank')
! 119: l.setAttributeNS(None, 'onClick',"popupWin = window.open(this.href, 'contacts', 'location,width=500,height=600,top=180, left=400, scrollbars=1'); return false;")
! 120: l.setAttributeNS(None, 'onDblclick', 'popupWin.focus();')
! 121: return serializeNode(pagenode)
! 122: return "no text here"
! 123:
! 124: def getNumPages(self,docinfo=None):
! 125: """get list of pages from fulltext and put in docinfo"""
! 126: if 'numPages' in docinfo:
! 127: # already there
! 128: return docinfo
! 129:
! 130: xquery = '//pb'
! 131: text = self.getServerData("xquery.xql","document=%s&xquery=%s"%(docinfo['textURLPath'],xquery))
! 132: #text = self.template.fulltextclient.eval("/mpdl/interface/xquery.xql", "document=%s&xquery=%s"%(docinfo['textURLPath'],xquery))
! 133: docinfo['numPages'] = text.count("<pb ")
! 134: return docinfo
! 135:
! 136: def getTextPage(self, mode="text", pn=1, docinfo=None, pageinfo=None, highlightQuery=None,sn=None, viewMode=None, tocMode=None, tocPN=None):
! 137: """returns single page from fulltext"""
! 138: docpath = docinfo['textURLPath']
! 139: path = docinfo['textURLPath']
! 140: url = docinfo['url']
! 141: viewMode= pageinfo['viewMode']
! 142: tocMode = pageinfo['tocMode']
! 143: tocPN = pageinfo['tocPN']
! 144: selfurl = self.absolute_url()
! 145: if mode == "text_dict":
! 146: textmode = "textPollux"
! 147: else:
! 148: textmode = mode
! 149:
! 150: textParam = "document=%s&mode=%s&pn=%s"%(docpath,textmode,pn)
! 151: if highlightQuery is not None:
! 152: textParam +="&highlightQuery=%s&sn=%s"%(highlightQuery,sn)
! 153:
! 154: pagexml = self.getServerData("page-fragment.xql",textParam)
! 155: #pagexml=self.template.fulltextclient.eval("/mpdl/interface/page-fragment.xql", textParam, outputUnicode=False)
! 156:
! 157: pagedom = Parse(pagexml)
! 158: # plain text mode
! 159: if mode == "text":
! 160: # first div contains text
! 161: pagedivs = pagedom.xpath("/div")
! 162: if len(pagedivs) > 0:
! 163: pagenode = pagedivs[0]
! 164: links = pagenode.xpath("//a")
! 165: for l in links:
! 166: hrefNode = l.getAttributeNodeNS(None, u"href")
! 167: if hrefNode:
! 168: href= hrefNode.nodeValue
! 169: if href.startswith('#note-'):
! 170: hrefNode.nodeValue = href.replace('#note-',"?url=%s&viewMode=%s&tocMode=%s&tocPN=%s&pn=%s#note-"%(url,viewMode,tocMode,tocPN,pn))
! 171: return serializeNode(pagenode)
! 172: if mode == "xml":
! 173: # first div contains text
! 174: pagedivs = pagedom.xpath("/div")
! 175: if len(pagedivs) > 0:
! 176: pagenode = pagedivs[0]
! 177: return serializeNode(pagenode)
! 178: if mode == "pureXml":
! 179: # first div contains text
! 180: pagedivs = pagedom.xpath("/div")
! 181: if len(pagedivs) > 0:
! 182: pagenode = pagedivs[0]
! 183: return serializeNode(pagenode)
! 184: # text-with-links mode
! 185: if mode == "text_dict":
! 186: # first div contains text
! 187: pagedivs = pagedom.xpath("/div")
! 188: if len(pagedivs) > 0:
! 189: pagenode = pagedivs[0]
! 190: # check all a-tags
! 191: links = pagenode.xpath("//a")
! 192: for l in links:
! 193: hrefNode = l.getAttributeNodeNS(None, u"href")
! 194: if hrefNode:
! 195: # is link with href
! 196: href = hrefNode.nodeValue
! 197: if href.startswith('lt/lex.xql'):
! 198: # is pollux link
! 199: selfurl = self.absolute_url()
! 200: # change href
! 201: hrefNode.nodeValue = href.replace('lt/lex.xql','%s/template/head_main_voc'%selfurl)
! 202: # add target
! 203: l.setAttributeNS(None, 'target', '_blank')
! 204: l.setAttributeNS(None, 'onClick',"popupWin = window.open(this.href, 'contacts', 'location,width=500,height=600,top=180, left=700, scrollbars=1'); return false;")
! 205: l.setAttributeNS(None, 'onDblclick', 'popupWin.focus();')
! 206: if href.startswith('lt/lemma.xql'):
! 207: selfurl = self.absolute_url()
! 208: hrefNode.nodeValue = href.replace('lt/lemma.xql','%s/template/head_main_lemma'%selfurl)
! 209: l.setAttributeNS(None, 'target', '_blank')
! 210: l.setAttributeNS(None, 'onClick',"popupWin = window.open(this.href, 'contacts', 'location,width=500,height=600,top=180, left=700, scrollbars=1'); return false;")
! 211: l.setAttributeNS(None, 'onDblclick', 'popupWin.focus();')
! 212: if href.startswith('#note-'):
! 213: hrefNode.nodeValue = href.replace('#note-',"?url=%s&viewMode=%s&tocMode=%s&tocPN=%s&pn=%s#note-"%(url,viewMode,tocMode,tocPN,pn))
! 214: return serializeNode(pagenode)
! 215: return "no text here"
! 216:
! 217: def getTranslate(self, query=None, language=None):
! 218: """translate into another languages"""
! 219: data = self.getServerData("lt/lex.xql","document=&language="+str(language)+"&query="+url_quote(str(query)))
! 220: #pagexml=self.template.fulltextclient.eval("/mpdl/interface/lt/lex.xql","document=&language="+str(language)+"&query="+url_quote(str(query)))
! 221: return data
! 222:
! 223: def getLemma(self, lemma=None, language=None):
! 224: """simular words lemma """
! 225: data = self.getServerData("lt/lemma.xql","document=&language="+str(language)+"&lemma="+url_quote(str(lemma)))
! 226: #pagexml=self.template.fulltextclient.eval("/mpdl/interface/lt/lemma.xql","document=&language="+str(language)+"&lemma="+url_quote(str(lemma)))
! 227: return data
! 228:
! 229: def getLemmaNew(self, query=None, language=None):
! 230: """simular words lemma """
! 231: data = self.getServerData("lt/lemma.xql","document=&language="+str(language)+"&lemma="+url_quote(str(query)))
! 232: #pagexml=self.template.fulltextclient.eval("/mpdl/interface/lt/lemma.xql","document=&language="+str(language)+"&lemma="+url_quote(str(query)))
! 233: return data
! 234:
! 235: def getQuery (self, docinfo=None, pageinfo=None, query=None, queryType=None, pn=1):
! 236: """number of"""
! 237: docpath = docinfo['textURLPath']
! 238: pagesize = pageinfo['queryPageSize']
! 239: pn = pageinfo['searchPN']
! 240: query =pageinfo['query']
! 241: queryType =pageinfo['queryType']
! 242: tocSearch = 0
! 243: tocDiv = None
! 244:
! 245: pagexml = self.getServerData("doc-query.xql","document=%s&mode=%s&queryType=%s&query=%s&queryResultPageSize=%s&queryResultPN=%s"%(docpath, 'text', queryType, query, pagesize, pn))
! 246: #pagexml=self.template.fulltextclient.eval("/mpdl/interface/doc-query.xql","document=%s&mode=%s&queryType=%s&query=%s&queryResultPageSize=%s&queryResultPN=%s"%(docpath, 'text', queryType, query, pagesize, pn) ,outputUnicode=False)
! 247: pagedom = Parse(pagexml)
! 248: numdivs = pagedom.xpath("//div[@class='queryResultHits']")
! 249: tocSearch = int(getTextFromNode(numdivs[0]))
! 250: tc=int((tocSearch/10)+1)
! 251: logging.debug("documentViewer (gettoc) tc: %s"%(tc))
! 252: return tc
! 253:
! 254: def getToc(self, mode="text", docinfo=None):
! 255: """loads table of contents and stores in docinfo"""
! 256: logging.debug("documentViewer (gettoc) mode: %s"%(mode))
! 257: if mode == "none":
! 258: return docinfo
! 259: if 'tocSize_%s'%mode in docinfo:
! 260: # cached toc
! 261: return docinfo
! 262:
! 263: docpath = docinfo['textURLPath']
! 264: # we need to set a result set size
! 265: pagesize = 1000
! 266: pn = 1
! 267: if mode == "text":
! 268: queryType = "toc"
! 269: else:
! 270: queryType = mode
! 271: # number of entries in toc
! 272: tocSize = 0
! 273: tocDiv = None
! 274:
! 275: pagexml = self.getServerData("doc-query.xql","document=%s&queryType=%s&queryResultPageSize=%s&queryResultPN=%s"%(docpath,queryType, pagesize, pn))
! 276: #pagexml=self.template.fulltextclient.eval("/mpdl/interface/doc-query.xql", "document=%s&queryType=%s&queryResultPageSize=%s&queryResultPN=%s"%(docpath,queryType,pagesize,pn), outputUnicode=False)
! 277: # post-processing downloaded xml
! 278: pagedom = Parse(pagexml)
! 279: # get number of entries
! 280: numdivs = pagedom.xpath("//div[@class='queryResultHits']")
! 281: if len(numdivs) > 0:
! 282: tocSize = int(getTextFromNode(numdivs[0]))
! 283: docinfo['tocSize_%s'%mode] = tocSize
! 284: return docinfo
! 285:
! 286: def getTocPage(self, mode="text", pn=1, pageinfo=None, docinfo=None):
! 287: """returns single page from the table of contents"""
! 288: # TODO: this should use the cached TOC
! 289: if mode == "text":
! 290: queryType = "toc"
! 291: else:
! 292: queryType = mode
! 293: docpath = docinfo['textURLPath']
! 294: path = docinfo['textURLPath']
! 295: pagesize = pageinfo['tocPageSize']
! 296: pn = pageinfo['tocPN']
! 297: url = docinfo['url']
! 298: selfurl = self.absolute_url()
! 299: viewMode= pageinfo['viewMode']
! 300: tocMode = pageinfo['tocMode']
! 301: tocPN = pageinfo['tocPN']
! 302:
! 303: data = self.getServerData("doc-query.xql","document=%s&queryType=%s&queryResultPageSize=%s&queryResultPN=%s"%(docpath,queryType, pagesize, pn))
! 304:
! 305: page = data.replace('page-fragment.xql?document=%s'%str(path),'%s?url=%s&viewMode=%s&tocMode=%s&tocPN=%s'%(selfurl,url, viewMode, tocMode, tocPN))
! 306: text = page.replace('mode=image','mode=texttool')
! 307: return text
! 308:
! 309: def manage_changeMpdlXmlTextServer(self,title="",serverUrl="http://mpdl-proto.mpiwg-berlin.mpg.de/mpdl/interface/",timeout=40,RESPONSE=None):
! 310: """change settings"""
! 311: self.title=title
! 312: self.timeout = timeout
! 313: self.serverUrl = serverUrl
! 314: if RESPONSE is not None:
! 315: RESPONSE.redirect('manage_main')
! 316:
! 317: # management methods
! 318: def manage_addMpdlXmlTextServerForm(self):
! 319: """Form for adding"""
! 320: pt = PageTemplateFile("zpt/manage_addMpdlXmlTextServer", globals()).__of__(self)
! 321: return pt()
! 322:
! 323: def manage_addMpdlXmlTextServer(self,id,title="",serverUrl="http://mpdl-proto.mpiwg-berlin.mpg.de/mpdl/interface/",timeout=40,RESPONSE=None):
! 324: """add zogiimage"""
! 325: newObj = MpdlXmlTextServer(id,title,serverUrl,timeout)
! 326: self.Destination()._setObject(id, newObj)
! 327: if RESPONSE is not None:
! 328: RESPONSE.redirect('manage_main')
! 329:
! 330:
! 331:
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>