Annotation of documentViewer/extraFunction.py, revision 1.1.2.1
1.1.2.1 ! abukhman 1:
! 2: from OFS.Folder import Folder
! 3: from Products.PageTemplates.ZopePageTemplate import ZopePageTemplate
! 4: from Products.PageTemplates.PageTemplateFile import PageTemplateFile
! 5: from Products.PythonScripts.standard import url_quote
! 6:
! 7:
! 8: from Ft.Xml.Domlette import NonvalidatingReader
! 9: from Ft.Xml.Domlette import PrettyPrint, Print
! 10: from Ft.Xml import EMPTY_NAMESPACE, Parse
! 11:
! 12: from xml.dom.minidom import parse, parseString
! 13:
! 14: import Ft.Xml.XPath
! 15: import cStringIO
! 16: import xmlrpclib
! 17: import os.path
! 18: import sys
! 19: import cgi
! 20: import urllib
! 21: import logging
! 22: import math
! 23: import documentViewer
! 24:
! 25: import urlparse
! 26: from types import *
! 27:
! 28: def getTextFromNode(nodename):
! 29: "get the cdata content of a node"
! 30: if nodename is None:
! 31: return ""
! 32: nodelist=nodename.childNodes
! 33: rc = ""
! 34: for node in nodelist:
! 35: if node.nodeType == node.TEXT_NODE:
! 36: rc = rc + node.data
! 37: return rc
! 38:
! 39: def serializeNode(node, encoding='utf-8'):
! 40: "returns a string containing node as XML"
! 41: buf = cStringIO.StringIO()
! 42: Print(node, stream=buf, encoding=encoding)
! 43: s = buf.getvalue()
! 44: buf.close()
! 45: return s
! 46:
! 47:
! 48: class extraFunction(Folder):
! 49:
! 50:
! 51: def __init__(self,id, title=""):
! 52:
! 53: self.id=id
! 54: self.title=title
! 55:
! 56: def getSearch(self, pn=1, pageinfo=None, docinfo=None, query=None, queryType=None, lemma=None):
! 57: """get search list"""
! 58: docpath = docinfo['textURLPath']
! 59: url = docinfo['url']
! 60: logging.debug("documentViewer (gettoc) docpath: %s"%(docpath))
! 61: logging.debug("documentViewer (gettoc) url: %s"%(url))
! 62: pagesize = pageinfo['queryPageSize']
! 63: pn = pageinfo['searchPN']
! 64: sn = pageinfo['sn']
! 65: highlightQuery = pageinfo['highlightQuery']
! 66: query =pageinfo['query']
! 67: queryType =pageinfo['queryType']
! 68: viewMode= pageinfo['viewMode']
! 69: tocMode = pageinfo['tocMode']
! 70: tocPN = pageinfo['tocPN']
! 71: selfurl = self.absolute_url()
! 72: page=self.template.fulltextclient.eval("/mpdl/interface/doc-query.xql","document=%s&mode=%s&queryType=%s&query=%s&queryResultPageSize=%s&queryResultPN=%s&sn=%s&viewMode=%s&highlightQuery=%s"%(docpath, 'text', queryType, query, pagesize, pn, sn, viewMode,highlightQuery) ,outputUnicode=False)
! 73: pagexml = page.replace('?document=%s'%str(docpath),'?url=%s'%url)
! 74: pagedom = Parse(pagexml)
! 75: if (queryType=="fulltext")or(queryType=="xpath")or(queryType=="xquery")or(queryType=="fulltextMorphLemma"):
! 76: pagedivs = pagedom.xpath("//div[@class='queryResultPage']")
! 77: if len(pagedivs)>0:
! 78: pagenode=pagedivs[0]
! 79: links=pagenode.xpath("//a")
! 80: for l in links:
! 81: hrefNode = l.getAttributeNodeNS(None, u"href")
! 82: if hrefNode:
! 83: href = hrefNode.nodeValue
! 84: if href.startswith('page-fragment.xql'):
! 85: selfurl = self.absolute_url()
! 86: pagexml=href.replace('mode=text','mode=texttool&viewMode=%s&queryType=%s&query=%s&queryResultPageSize=%s&queryResultPN=%s&tocMode=%s&searchPN=%s&tocPN=%s'%(viewMode,queryType,query,pagesize,pn,tocMode,pn,tocPN))
! 87: hrefNode.nodeValue = pagexml.replace('page-fragment.xql','%s'%selfurl)
! 88: return serializeNode(pagenode)
! 89: if (queryType=="fulltextMorph"):
! 90: pagedivs = pagedom.xpath("//div[@class='queryResult']")
! 91: if len(pagedivs)>0:
! 92: pagenode=pagedivs[0]
! 93: links=pagenode.xpath("//a")
! 94: for l in links:
! 95: hrefNode = l.getAttributeNodeNS(None, u"href")
! 96: if hrefNode:
! 97: href = hrefNode.nodeValue
! 98: if href.startswith('page-fragment.xql'):
! 99: selfurl = self.absolute_url()
! 100: pagexml=href.replace('mode=text','mode=texttool&viewMode=%s&queryType=%s&query=%s&queryResultPageSize=%s&queryResultPN=%s&tocMode=%s&searchPN=%s&tocPN=%s'%(viewMode,queryType,query,pagesize,pn,tocMode,pn,tocPN))
! 101: hrefNode.nodeValue = pagexml.replace('page-fragment.xql','%s'%selfurl)
! 102: if href.startswith('../lt/lemma.xql'):
! 103: hrefNode.nodeValue = href.replace('../lt/lemma.xql','%s/template/head_main_lemma_New'%(selfurl))
! 104: l.setAttributeNS(None, 'target', '_blank')
! 105: l.setAttributeNS(None, 'onClick',"popupWin = window.open(this.href, 'contacts', 'location,width=500,height=600,top=180, left=400, scrollbars=1'); return false;")
! 106: l.setAttributeNS(None, 'onDblclick', 'popupWin.focus();')
! 107: pagedivs = pagedom.xpath("//div[@class='queryResultMorphExpansion']")
! 108: return serializeNode(pagenode)
! 109: if (queryType=="ftIndex")or(queryType=="ftIndexMorph"):
! 110: pagedivs= pagedom.xpath("//div[@class='queryResultPage']")
! 111: if len(pagedivs)>0:
! 112: pagenode=pagedivs[0]
! 113: links=pagenode.xpath("//a")
! 114: for l in links:
! 115: hrefNode = l.getAttributeNodeNS(None, u"href")
! 116: if hrefNode:
! 117: href = hrefNode.nodeValue
! 118: hrefNode.nodeValue=href.replace('mode=text','mode=texttool&viewMode=%s&tocMode=%s&tocPN=%s&pn=%s'%(viewMode,tocMode,tocPN,pn))
! 119: if href.startswith('../lt/lex.xql'):
! 120: hrefNode.nodeValue = href.replace('../lt/lex.xql','%s/template/head_main_voc'%selfurl)
! 121: l.setAttributeNS(None, 'target', '_blank')
! 122: l.setAttributeNS(None, 'onClick',"popupWin = window.open(this.href, 'contacts', 'location,width=500,height=600,top=180, left=400, scrollbars=1'); return false;")
! 123: l.setAttributeNS(None, 'onDblclick', 'popupWin.focus();')
! 124: if href.startswith('../lt/lemma.xql'):
! 125: hrefNode.nodeValue = href.replace('../lt/lemma.xql','%s/template/head_main_lemma'%selfurl)
! 126: l.setAttributeNS(None, 'target', '_blank')
! 127: l.setAttributeNS(None, 'onClick',"popupWin = window.open(this.href, 'contacts', 'location,width=500,height=600,top=180, left=400, scrollbars=1'); return false;")
! 128: l.setAttributeNS(None, 'onDblclick', 'popupWin.focus();')
! 129: return serializeNode(pagenode)
! 130: return "no text here"
! 131:
! 132: def getNumPages(self,docinfo=None):
! 133: """get list of pages from fulltext and put in docinfo"""
! 134: xquery = '//pb'
! 135: text = self.template.fulltextclient.eval("/mpdl/interface/xquery.xql", "document=%s&xquery=%s"%(docinfo['textURLPath'],xquery))
! 136: docinfo['numPages'] = text.count("<pb ")
! 137: return docinfo
! 138:
! 139: def getTextPage(self, mode="text", pn=1, docinfo=None, pageinfo=None, highlightQuery=None,sn=None, viewMode=None, tocMode=None, tocPN=None):
! 140: """returns single page from fulltext"""
! 141: docpath = docinfo['textURLPath']
! 142: path = docinfo['textURLPath']
! 143: url = docinfo['url']
! 144: viewMode= pageinfo['viewMode']
! 145: tocMode = pageinfo['tocMode']
! 146: tocPN = pageinfo['tocPN']
! 147: selfurl = self.absolute_url()
! 148: if mode == "text_dict":
! 149: textmode = "textPollux"
! 150: else:
! 151: textmode = mode
! 152:
! 153: textParam = "document=%s&mode=%s&pn=%s"%(docpath,textmode,pn)
! 154: if highlightQuery is not None:
! 155: textParam +="&highlightQuery=%s&sn=%s"%(highlightQuery,sn)
! 156: pagexml=self.template.fulltextclient.eval("/mpdl/interface/page-fragment.xql", textParam, outputUnicode=False)
! 157: pagedom = Parse(pagexml)
! 158: # plain text mode
! 159: if mode == "text":
! 160: # first div contains text
! 161: pagedivs = pagedom.xpath("/div")
! 162: if len(pagedivs) > 0:
! 163: pagenode = pagedivs[0]
! 164: links = pagenode.xpath("//a")
! 165: for l in links:
! 166: hrefNode = l.getAttributeNodeNS(None, u"href")
! 167: if hrefNode:
! 168: href= hrefNode.nodeValue
! 169: if href.startswith('#note-'):
! 170: hrefNode.nodeValue = href.replace('#note-',"?url=%s&viewMode=%s&tocMode=%s&tocPN=%s&pn=%s#note-"%(url,viewMode,tocMode,tocPN,pn))
! 171: return serializeNode(pagenode)
! 172: if mode == "xml":
! 173: # first div contains text
! 174: pagedivs = pagedom.xpath("/div")
! 175: if len(pagedivs) > 0:
! 176: pagenode = pagedivs[0]
! 177: return serializeNode(pagenode)
! 178: if mode == "pureXml":
! 179: # first div contains text
! 180: pagedivs = pagedom.xpath("/div")
! 181: if len(pagedivs) > 0:
! 182: pagenode = pagedivs[0]
! 183: return serializeNode(pagenode)
! 184: # text-with-links mode
! 185: if mode == "text_dict":
! 186: # first div contains text
! 187: pagedivs = pagedom.xpath("/div")
! 188: if len(pagedivs) > 0:
! 189: pagenode = pagedivs[0]
! 190: # check all a-tags
! 191: links = pagenode.xpath("//a")
! 192: for l in links:
! 193: hrefNode = l.getAttributeNodeNS(None, u"href")
! 194: if hrefNode:
! 195: # is link with href
! 196: href = hrefNode.nodeValue
! 197: if href.startswith('lt/lex.xql'):
! 198: # is pollux link
! 199: selfurl = self.absolute_url()
! 200: # change href
! 201: hrefNode.nodeValue = href.replace('lt/lex.xql','%s/template/head_main_voc'%selfurl)
! 202: # add target
! 203: l.setAttributeNS(None, 'target', '_blank')
! 204: l.setAttributeNS(None, 'onClick',"popupWin = window.open(this.href, 'contacts', 'location,width=500,height=600,top=180, left=700, scrollbars=1'); return false;")
! 205: l.setAttributeNS(None, 'onDblclick', 'popupWin.focus();')
! 206: if href.startswith('lt/lemma.xql'):
! 207: selfurl = self.absolute_url()
! 208: hrefNode.nodeValue = href.replace('lt/lemma.xql','%s/template/head_main_lemma'%selfurl)
! 209: l.setAttributeNS(None, 'target', '_blank')
! 210: l.setAttributeNS(None, 'onClick',"popupWin = window.open(this.href, 'contacts', 'location,width=500,height=600,top=180, left=700, scrollbars=1'); return false;")
! 211: l.setAttributeNS(None, 'onDblclick', 'popupWin.focus();')
! 212: if href.startswith('#note-'):
! 213: hrefNode.nodeValue = href.replace('#note-',"?url=%s&viewMode=%s&tocMode=%s&tocPN=%s&pn=%s#note-"%(url,viewMode,tocMode,tocPN,pn))
! 214: return serializeNode(pagenode)
! 215: return "no text here"
! 216:
! 217: def getTranslate(self, query=None, language=None):
! 218: """translate into another languages"""
! 219: pagexml=self.template.fulltextclient.eval("/mpdl/interface/lt/lex.xql","document=&language="+str(language)+"&query="+url_quote(str(query)))
! 220: return pagexml
! 221:
! 222: def getLemma(self, lemma=None, language=None):
! 223: """simular words lemma """
! 224: pagexml=self.template.fulltextclient.eval("/mpdl/interface/lt/lemma.xql","document=&language="+str(language)+"&lemma="+url_quote(str(lemma)))
! 225: return pagexml
! 226:
! 227: def getLemmaNew(self, query=None, language=None):
! 228: """simular words lemma """
! 229: pagexml=self.template.fulltextclient.eval("/mpdl/interface/lt/lemma.xql","document=&language="+str(language)+"&lemma="+url_quote(str(query)))
! 230: return pagexml
! 231:
! 232: def getQuery (self, docinfo=None, pageinfo=None, query=None, queryType=None, pn=1):
! 233: """number of"""
! 234: docpath = docinfo['textURLPath']
! 235: pagesize = pageinfo['queryPageSize']
! 236: pn = pageinfo['searchPN']
! 237: query =pageinfo['query']
! 238: queryType =pageinfo['queryType']
! 239: tocSearch = 0
! 240: tocDiv = None
! 241: pagexml=self.template.fulltextclient.eval("/mpdl/interface/doc-query.xql","document=%s&mode=%s&queryType=%s&query=%s&queryResultPageSize=%s&queryResultPN=%s"%(docpath, 'text', queryType, query, pagesize, pn) ,outputUnicode=False)
! 242: pagedom = Parse(pagexml)
! 243: numdivs = pagedom.xpath("//div[@class='queryResultHits']")
! 244: tocSearch = int(getTextFromNode(numdivs[0]))
! 245: tc=int((tocSearch/10)+1)
! 246: logging.debug("documentViewer (gettoc) tc: %s"%(tc))
! 247: return tc
! 248:
! 249: def getToc(self, mode="text", docinfo=None):
! 250: """loads table of contents and stores in docinfo"""
! 251: logging.debug("documentViewer (gettoc) mode: %s"%(mode))
! 252: if mode == "none":
! 253: return docinfo
! 254: if 'tocSize_%s'%mode in docinfo:
! 255: # cached toc
! 256: return docinfo
! 257:
! 258: docpath = docinfo['textURLPath']
! 259: # we need to set a result set size
! 260: pagesize = 1000
! 261: pn = 1
! 262: if mode == "text":
! 263: queryType = "toc"
! 264: else:
! 265: queryType = mode
! 266: # number of entries in toc
! 267: tocSize = 0
! 268: tocDiv = None
! 269: pagexml=self.template.fulltextclient.eval("/mpdl/interface/doc-query.xql", "document=%s&queryType=%s&queryResultPageSize=%s&queryResultPN=%s"%(docpath,queryType,pagesize,pn), outputUnicode=False)
! 270: # post-processing downloaded xml
! 271: pagedom = Parse(pagexml)
! 272: # get number of entries
! 273: numdivs = pagedom.xpath("//div[@class='queryResultHits']")
! 274: if len(numdivs) > 0:
! 275: tocSize = int(getTextFromNode(numdivs[0]))
! 276: docinfo['tocSize_%s'%mode] = tocSize
! 277: return docinfo
! 278:
! 279: def getTocPage(self, mode="text", pn=1, pageinfo=None, docinfo=None):
! 280: """returns single page from the table of contents"""
! 281: # TODO: this should use the cached TOC
! 282: if mode == "text":
! 283: queryType = "toc"
! 284: else:
! 285: queryType = mode
! 286: docpath = docinfo['textURLPath']
! 287: path = docinfo['textURLPath']
! 288: pagesize = pageinfo['tocPageSize']
! 289: pn = pageinfo['tocPN']
! 290: url = docinfo['url']
! 291: selfurl = self.absolute_url()
! 292: viewMode= pageinfo['viewMode']
! 293: tocMode = pageinfo['tocMode']
! 294: tocPN = pageinfo['tocPN']
! 295: pagexml=self.template.fulltextclient.eval("/mpdl/interface/doc-query.xql", "document=%s&queryType=%s&queryResultPageSize=%s&queryResultPN=%s"%(docpath,queryType, pagesize, pn), outputUnicode=False)
! 296: page = pagexml.replace('page-fragment.xql?document=%s'%str(path),'%s?url=%s&viewMode=%s&tocMode=%s&tocPN=%s'%(selfurl,url, viewMode, tocMode, tocPN))
! 297: text = page.replace('mode=image','mode=texttool')
! 298: return text
! 299:
! 300:
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>