documentViewer/extraFunction.py - view

File: [Repository] / documentViewer / Attic / extraFunction.py
Revision 1.1.2.1: download - view: text, annotated - select for diffs - revision graph
Mon Jun 14 10:50:06 2010 UTC (15 years ago) by abukhman
Branches: modularisierung

New Version

1: 2: from OFS.Folder import Folder 3: from Products.PageTemplates.ZopePageTemplate import ZopePageTemplate 4: from Products.PageTemplates.PageTemplateFile import PageTemplateFile 5: from Products.PythonScripts.standard import url_quote 6: 7: 8: from Ft.Xml.Domlette import NonvalidatingReader 9: from Ft.Xml.Domlette import PrettyPrint, Print 10: from Ft.Xml import EMPTY_NAMESPACE, Parse 11: 12: from xml.dom.minidom import parse, parseString 13: 14: import Ft.Xml.XPath 15: import cStringIO 16: import xmlrpclib 17: import os.path 18: import sys 19: import cgi 20: import urllib 21: import logging 22: import math 23: import documentViewer 24: 25: import urlparse 26: from types import * 27: 28: def getTextFromNode(nodename): 29: "get the cdata content of a node" 30: if nodename is None: 31: return "" 32: nodelist=nodename.childNodes 33: rc = "" 34: for node in nodelist: 35: if node.nodeType == node.TEXT_NODE: 36: rc = rc + node.data 37: return rc 38: 39: def serializeNode(node, encoding='utf-8'): 40: "returns a string containing node as XML" 41: buf = cStringIO.StringIO() 42: Print(node, stream=buf, encoding=encoding) 43: s = buf.getvalue() 44: buf.close() 45: return s 46: 47: 48: class extraFunction(Folder): 49: 50: 51: def __init__(self,id, title=""): 52: 53: self.id=id 54: self.title=title 55: 56: def getSearch(self, pn=1, pageinfo=None, docinfo=None, query=None, queryType=None, lemma=None): 57: """get search list""" 58: docpath = docinfo['textURLPath'] 59: url = docinfo['url'] 60: logging.debug("documentViewer (gettoc) docpath: %s"%(docpath)) 61: logging.debug("documentViewer (gettoc) url: %s"%(url)) 62: pagesize = pageinfo['queryPageSize'] 63: pn = pageinfo['searchPN'] 64: sn = pageinfo['sn'] 65: highlightQuery = pageinfo['highlightQuery'] 66: query =pageinfo['query'] 67: queryType =pageinfo['queryType'] 68: viewMode= pageinfo['viewMode'] 69: tocMode = pageinfo['tocMode'] 70: tocPN = pageinfo['tocPN'] 71: selfurl = self.absolute_url() 72: page=self.template.fulltextclient.eval("/mpdl/interface/doc-query.xql","document=%s&mode=%s&queryType=%s&query=%s&queryResultPageSize=%s&queryResultPN=%s&sn=%s&viewMode=%s&highlightQuery=%s"%(docpath, 'text', queryType, query, pagesize, pn, sn, viewMode,highlightQuery) ,outputUnicode=False) 73: pagexml = page.replace('?document=%s'%str(docpath),'?url=%s'%url) 74: pagedom = Parse(pagexml) 75: if (queryType=="fulltext")or(queryType=="xpath")or(queryType=="xquery")or(queryType=="fulltextMorphLemma"): 76: pagedivs = pagedom.xpath("//div[@class='queryResultPage']") 77: if len(pagedivs)>0: 78: pagenode=pagedivs[0] 79: links=pagenode.xpath("//a") 80: for l in links: 81: hrefNode = l.getAttributeNodeNS(None, u"href") 82: if hrefNode: 83: href = hrefNode.nodeValue 84: if href.startswith('page-fragment.xql'): 85: selfurl = self.absolute_url() 86: pagexml=href.replace('mode=text','mode=texttool&viewMode=%s&queryType=%s&query=%s&queryResultPageSize=%s&queryResultPN=%s&tocMode=%s&searchPN=%s&tocPN=%s'%(viewMode,queryType,query,pagesize,pn,tocMode,pn,tocPN)) 87: hrefNode.nodeValue = pagexml.replace('page-fragment.xql','%s'%selfurl) 88: return serializeNode(pagenode) 89: if (queryType=="fulltextMorph"): 90: pagedivs = pagedom.xpath("//div[@class='queryResult']") 91: if len(pagedivs)>0: 92: pagenode=pagedivs[0] 93: links=pagenode.xpath("//a") 94: for l in links: 95: hrefNode = l.getAttributeNodeNS(None, u"href") 96: if hrefNode: 97: href = hrefNode.nodeValue 98: if href.startswith('page-fragment.xql'): 99: selfurl = self.absolute_url() 100: pagexml=href.replace('mode=text','mode=texttool&viewMode=%s&queryType=%s&query=%s&queryResultPageSize=%s&queryResultPN=%s&tocMode=%s&searchPN=%s&tocPN=%s'%(viewMode,queryType,query,pagesize,pn,tocMode,pn,tocPN)) 101: hrefNode.nodeValue = pagexml.replace('page-fragment.xql','%s'%selfurl) 102: if href.startswith('../lt/lemma.xql'): 103: hrefNode.nodeValue = href.replace('../lt/lemma.xql','%s/template/head_main_lemma_New'%(selfurl)) 104: l.setAttributeNS(None, 'target', '_blank') 105: l.setAttributeNS(None, 'onClick',"popupWin = window.open(this.href, 'contacts', 'location,width=500,height=600,top=180, left=400, scrollbars=1'); return false;") 106: l.setAttributeNS(None, 'onDblclick', 'popupWin.focus();') 107: pagedivs = pagedom.xpath("//div[@class='queryResultMorphExpansion']") 108: return serializeNode(pagenode) 109: if (queryType=="ftIndex")or(queryType=="ftIndexMorph"): 110: pagedivs= pagedom.xpath("//div[@class='queryResultPage']") 111: if len(pagedivs)>0: 112: pagenode=pagedivs[0] 113: links=pagenode.xpath("//a") 114: for l in links: 115: hrefNode = l.getAttributeNodeNS(None, u"href") 116: if hrefNode: 117: href = hrefNode.nodeValue 118: hrefNode.nodeValue=href.replace('mode=text','mode=texttool&viewMode=%s&tocMode=%s&tocPN=%s&pn=%s'%(viewMode,tocMode,tocPN,pn)) 119: if href.startswith('../lt/lex.xql'): 120: hrefNode.nodeValue = href.replace('../lt/lex.xql','%s/template/head_main_voc'%selfurl) 121: l.setAttributeNS(None, 'target', '_blank') 122: l.setAttributeNS(None, 'onClick',"popupWin = window.open(this.href, 'contacts', 'location,width=500,height=600,top=180, left=400, scrollbars=1'); return false;") 123: l.setAttributeNS(None, 'onDblclick', 'popupWin.focus();') 124: if href.startswith('../lt/lemma.xql'): 125: hrefNode.nodeValue = href.replace('../lt/lemma.xql','%s/template/head_main_lemma'%selfurl) 126: l.setAttributeNS(None, 'target', '_blank') 127: l.setAttributeNS(None, 'onClick',"popupWin = window.open(this.href, 'contacts', 'location,width=500,height=600,top=180, left=400, scrollbars=1'); return false;") 128: l.setAttributeNS(None, 'onDblclick', 'popupWin.focus();') 129: return serializeNode(pagenode) 130: return "no text here" 131: 132: def getNumPages(self,docinfo=None): 133: """get list of pages from fulltext and put in docinfo""" 134: xquery = '//pb' 135: text = self.template.fulltextclient.eval("/mpdl/interface/xquery.xql", "document=%s&xquery=%s"%(docinfo['textURLPath'],xquery)) 136: docinfo['numPages'] = text.count("<pb ") 137: return docinfo 138: 139: def getTextPage(self, mode="text", pn=1, docinfo=None, pageinfo=None, highlightQuery=None,sn=None, viewMode=None, tocMode=None, tocPN=None): 140: """returns single page from fulltext""" 141: docpath = docinfo['textURLPath'] 142: path = docinfo['textURLPath'] 143: url = docinfo['url'] 144: viewMode= pageinfo['viewMode'] 145: tocMode = pageinfo['tocMode'] 146: tocPN = pageinfo['tocPN'] 147: selfurl = self.absolute_url() 148: if mode == "text_dict": 149: textmode = "textPollux" 150: else: 151: textmode = mode 152: 153: textParam = "document=%s&mode=%s&pn=%s"%(docpath,textmode,pn) 154: if highlightQuery is not None: 155: textParam +="&highlightQuery=%s&sn=%s"%(highlightQuery,sn) 156: pagexml=self.template.fulltextclient.eval("/mpdl/interface/page-fragment.xql", textParam, outputUnicode=False) 157: pagedom = Parse(pagexml) 158: # plain text mode 159: if mode == "text": 160: # first div contains text 161: pagedivs = pagedom.xpath("/div") 162: if len(pagedivs) > 0: 163: pagenode = pagedivs[0] 164: links = pagenode.xpath("//a") 165: for l in links: 166: hrefNode = l.getAttributeNodeNS(None, u"href") 167: if hrefNode: 168: href= hrefNode.nodeValue 169: if href.startswith('#note-'): 170: hrefNode.nodeValue = href.replace('#note-',"?url=%s&viewMode=%s&tocMode=%s&tocPN=%s&pn=%s#note-"%(url,viewMode,tocMode,tocPN,pn)) 171: return serializeNode(pagenode) 172: if mode == "xml": 173: # first div contains text 174: pagedivs = pagedom.xpath("/div") 175: if len(pagedivs) > 0: 176: pagenode = pagedivs[0] 177: return serializeNode(pagenode) 178: if mode == "pureXml": 179: # first div contains text 180: pagedivs = pagedom.xpath("/div") 181: if len(pagedivs) > 0: 182: pagenode = pagedivs[0] 183: return serializeNode(pagenode) 184: # text-with-links mode 185: if mode == "text_dict": 186: # first div contains text 187: pagedivs = pagedom.xpath("/div") 188: if len(pagedivs) > 0: 189: pagenode = pagedivs[0] 190: # check all a-tags 191: links = pagenode.xpath("//a") 192: for l in links: 193: hrefNode = l.getAttributeNodeNS(None, u"href") 194: if hrefNode: 195: # is link with href 196: href = hrefNode.nodeValue 197: if href.startswith('lt/lex.xql'): 198: # is pollux link 199: selfurl = self.absolute_url() 200: # change href 201: hrefNode.nodeValue = href.replace('lt/lex.xql','%s/template/head_main_voc'%selfurl) 202: # add target 203: l.setAttributeNS(None, 'target', '_blank') 204: l.setAttributeNS(None, 'onClick',"popupWin = window.open(this.href, 'contacts', 'location,width=500,height=600,top=180, left=700, scrollbars=1'); return false;") 205: l.setAttributeNS(None, 'onDblclick', 'popupWin.focus();') 206: if href.startswith('lt/lemma.xql'): 207: selfurl = self.absolute_url() 208: hrefNode.nodeValue = href.replace('lt/lemma.xql','%s/template/head_main_lemma'%selfurl) 209: l.setAttributeNS(None, 'target', '_blank') 210: l.setAttributeNS(None, 'onClick',"popupWin = window.open(this.href, 'contacts', 'location,width=500,height=600,top=180, left=700, scrollbars=1'); return false;") 211: l.setAttributeNS(None, 'onDblclick', 'popupWin.focus();') 212: if href.startswith('#note-'): 213: hrefNode.nodeValue = href.replace('#note-',"?url=%s&viewMode=%s&tocMode=%s&tocPN=%s&pn=%s#note-"%(url,viewMode,tocMode,tocPN,pn)) 214: return serializeNode(pagenode) 215: return "no text here" 216: 217: def getTranslate(self, query=None, language=None): 218: """translate into another languages""" 219: pagexml=self.template.fulltextclient.eval("/mpdl/interface/lt/lex.xql","document=&language="+str(language)+"&query="+url_quote(str(query))) 220: return pagexml 221: 222: def getLemma(self, lemma=None, language=None): 223: """simular words lemma """ 224: pagexml=self.template.fulltextclient.eval("/mpdl/interface/lt/lemma.xql","document=&language="+str(language)+"&lemma="+url_quote(str(lemma))) 225: return pagexml 226: 227: def getLemmaNew(self, query=None, language=None): 228: """simular words lemma """ 229: pagexml=self.template.fulltextclient.eval("/mpdl/interface/lt/lemma.xql","document=&language="+str(language)+"&lemma="+url_quote(str(query))) 230: return pagexml 231: 232: def getQuery (self, docinfo=None, pageinfo=None, query=None, queryType=None, pn=1): 233: """number of""" 234: docpath = docinfo['textURLPath'] 235: pagesize = pageinfo['queryPageSize'] 236: pn = pageinfo['searchPN'] 237: query =pageinfo['query'] 238: queryType =pageinfo['queryType'] 239: tocSearch = 0 240: tocDiv = None 241: pagexml=self.template.fulltextclient.eval("/mpdl/interface/doc-query.xql","document=%s&mode=%s&queryType=%s&query=%s&queryResultPageSize=%s&queryResultPN=%s"%(docpath, 'text', queryType, query, pagesize, pn) ,outputUnicode=False) 242: pagedom = Parse(pagexml) 243: numdivs = pagedom.xpath("//div[@class='queryResultHits']") 244: tocSearch = int(getTextFromNode(numdivs[0])) 245: tc=int((tocSearch/10)+1) 246: logging.debug("documentViewer (gettoc) tc: %s"%(tc)) 247: return tc 248: 249: def getToc(self, mode="text", docinfo=None): 250: """loads table of contents and stores in docinfo""" 251: logging.debug("documentViewer (gettoc) mode: %s"%(mode)) 252: if mode == "none": 253: return docinfo 254: if 'tocSize_%s'%mode in docinfo: 255: # cached toc 256: return docinfo 257: 258: docpath = docinfo['textURLPath'] 259: # we need to set a result set size 260: pagesize = 1000 261: pn = 1 262: if mode == "text": 263: queryType = "toc" 264: else: 265: queryType = mode 266: # number of entries in toc 267: tocSize = 0 268: tocDiv = None 269: pagexml=self.template.fulltextclient.eval("/mpdl/interface/doc-query.xql", "document=%s&queryType=%s&queryResultPageSize=%s&queryResultPN=%s"%(docpath,queryType,pagesize,pn), outputUnicode=False) 270: # post-processing downloaded xml 271: pagedom = Parse(pagexml) 272: # get number of entries 273: numdivs = pagedom.xpath("//div[@class='queryResultHits']") 274: if len(numdivs) > 0: 275: tocSize = int(getTextFromNode(numdivs[0])) 276: docinfo['tocSize_%s'%mode] = tocSize 277: return docinfo 278: 279: def getTocPage(self, mode="text", pn=1, pageinfo=None, docinfo=None): 280: """returns single page from the table of contents""" 281: # TODO: this should use the cached TOC 282: if mode == "text": 283: queryType = "toc" 284: else: 285: queryType = mode 286: docpath = docinfo['textURLPath'] 287: path = docinfo['textURLPath'] 288: pagesize = pageinfo['tocPageSize'] 289: pn = pageinfo['tocPN'] 290: url = docinfo['url'] 291: selfurl = self.absolute_url() 292: viewMode= pageinfo['viewMode'] 293: tocMode = pageinfo['tocMode'] 294: tocPN = pageinfo['tocPN'] 295: pagexml=self.template.fulltextclient.eval("/mpdl/interface/doc-query.xql", "document=%s&queryType=%s&queryResultPageSize=%s&queryResultPN=%s"%(docpath,queryType, pagesize, pn), outputUnicode=False) 296: page = pagexml.replace('page-fragment.xql?document=%s'%str(path),'%s?url=%s&viewMode=%s&tocMode=%s&tocPN=%s'%(selfurl,url, viewMode, tocMode, tocPN)) 297: text = page.replace('mode=image','mode=texttool') 298: return text 299: 300: