view extraFunction.py @ 3:3ba8479c7aba modularisierung

new function
author abukhman
date Wed, 16 Jun 2010 11:16:02 +0200
parents 8cc283757c39
children e9085ba2bb51
line wrap: on
line source


from OFS.Folder import Folder
from Products.PageTemplates.ZopePageTemplate import ZopePageTemplate
from Products.PageTemplates.PageTemplateFile import PageTemplateFile 
from Products.PythonScripts.standard import url_quote


from Ft.Xml.Domlette import NonvalidatingReader
from Ft.Xml.Domlette import PrettyPrint, Print
from Ft.Xml import EMPTY_NAMESPACE, Parse

from xml.dom.minidom import parse, parseString

import Ft.Xml.XPath
import cStringIO
import xmlrpclib
import os.path
import sys
import cgi
import urllib
import logging
import math
import documentViewer
import urllib2
import urllib
import urlparse 
from types import *

def getTextFromNode(nodename):
    "get the cdata content of a node"
    if nodename is None:
        return ""
    nodelist=nodename.childNodes
    rc = ""
    for node in nodelist:
        if node.nodeType == node.TEXT_NODE:
           rc = rc + node.data
    return rc

def serializeNode(node, encoding='utf-8'):
    "returns a string containing node as XML"
    buf = cStringIO.StringIO()
    Print(node, stream=buf, encoding=encoding)
    s = buf.getvalue()
    buf.close()
    return s


class extraFunction(Folder):
    
        
    def __init__(self,id, title=""):
  
        self.id=id
        self.title=title
    
    def getSearch(self, pn=1, pageinfo=None,  docinfo=None, query=None, queryType=None, lemma=None):
        """get search list"""
        docpath = docinfo['textURLPath'] 
        url = docinfo['url']
        logging.debug("documentViewer (gettoc) docpath: %s"%(docpath))
        logging.debug("documentViewer (gettoc) url: %s"%(url))
        pagesize = pageinfo['queryPageSize']
        pn = pageinfo['searchPN']
        sn = pageinfo['sn']
        highlightQuery = pageinfo['highlightQuery']
        query =pageinfo['query']
        queryType =pageinfo['queryType']
        viewMode=  pageinfo['viewMode']
        tocMode = pageinfo['tocMode']
        tocPN = pageinfo['tocPN']
        selfurl = self.absolute_url()
        
        page = urllib2.urlopen("http://mpdl-proto.mpiwg-berlin.mpg.de/mpdl/interface/doc-query.xql","document=%s&mode=%s&queryType=%s&query=%s&queryResultPageSize=%s&queryResultPN=%s&sn=%s&viewMode=%s&highlightQuery=%s"%(docpath, 'text', queryType, query, pagesize, pn, sn, viewMode,highlightQuery))
        #page=self.template.fulltextclient.eval("/mpdl/interface/doc-query.xql","document=%s&mode=%s&queryType=%s&query=%s&queryResultPageSize=%s&queryResultPN=%s&sn=%s&viewMode=%s&highlightQuery=%s"%(docpath, 'text', queryType, query, pagesize, pn, sn, viewMode,highlightQuery) ,outputUnicode=False)                
        data = page.read()
        page.close()
        
        pagexml = data.replace('?document=%s'%str(docpath),'?url=%s'%url)
        pagedom = Parse(pagexml)
        if (queryType=="fulltext")or(queryType=="xpath")or(queryType=="xquery")or(queryType=="fulltextMorphLemma"):   
            pagedivs = pagedom.xpath("//div[@class='queryResultPage']")
            if len(pagedivs)>0:
                pagenode=pagedivs[0]
                links=pagenode.xpath("//a")
                for l in links:
                    hrefNode = l.getAttributeNodeNS(None, u"href")
                    if hrefNode:
                        href = hrefNode.nodeValue
                        if href.startswith('page-fragment.xql'):
                            selfurl = self.absolute_url()            
                            pagexml=href.replace('mode=text','mode=texttool&viewMode=%s&queryType=%s&query=%s&queryResultPageSize=%s&queryResultPN=%s&tocMode=%s&searchPN=%s&tocPN=%s'%(viewMode,queryType,query,pagesize,pn,tocMode,pn,tocPN))
                            hrefNode.nodeValue = pagexml.replace('page-fragment.xql','%s'%selfurl)                                           
                return serializeNode(pagenode)        
        if (queryType=="fulltextMorph"):
            pagedivs = pagedom.xpath("//div[@class='queryResult']")
            if len(pagedivs)>0:
                pagenode=pagedivs[0]
                links=pagenode.xpath("//a")
                for l in links:
                    hrefNode = l.getAttributeNodeNS(None, u"href")
                    if hrefNode:
                        href = hrefNode.nodeValue
                        if href.startswith('page-fragment.xql'):
                            selfurl = self.absolute_url()       
                            pagexml=href.replace('mode=text','mode=texttool&viewMode=%s&queryType=%s&query=%s&queryResultPageSize=%s&queryResultPN=%s&tocMode=%s&searchPN=%s&tocPN=%s'%(viewMode,queryType,query,pagesize,pn,tocMode,pn,tocPN))
                            hrefNode.nodeValue = pagexml.replace('page-fragment.xql','%s'%selfurl)  
                        if href.startswith('../lt/lemma.xql'):
                            hrefNode.nodeValue = href.replace('../lt/lemma.xql','%s/template/head_main_lemma_New'%(selfurl))        
                            l.setAttributeNS(None, 'target', '_blank')
                            l.setAttributeNS(None, 'onClick',"popupWin = window.open(this.href, 'contacts', 'location,width=500,height=600,top=180, left=400, scrollbars=1'); return false;")
                            l.setAttributeNS(None, 'onDblclick', 'popupWin.focus();')  
                pagedivs = pagedom.xpath("//div[@class='queryResultMorphExpansion']")                
                return serializeNode(pagenode)        
        if (queryType=="ftIndex")or(queryType=="ftIndexMorph"):
            pagedivs= pagedom.xpath("//div[@class='queryResultPage']")
            if len(pagedivs)>0:
                pagenode=pagedivs[0]
                links=pagenode.xpath("//a")
                for l in links:
                    hrefNode = l.getAttributeNodeNS(None, u"href")
                    if hrefNode:
                        href = hrefNode.nodeValue
                        hrefNode.nodeValue=href.replace('mode=text','mode=texttool&viewMode=%s&tocMode=%s&tocPN=%s&pn=%s'%(viewMode,tocMode,tocPN,pn))             
                        if href.startswith('../lt/lex.xql'):
                            hrefNode.nodeValue = href.replace('../lt/lex.xql','%s/template/head_main_voc'%selfurl)         
                            l.setAttributeNS(None, 'target', '_blank')
                            l.setAttributeNS(None, 'onClick',"popupWin = window.open(this.href, 'contacts', 'location,width=500,height=600,top=180, left=400, scrollbars=1'); return false;")
                            l.setAttributeNS(None, 'onDblclick', 'popupWin.focus();')
                        if href.startswith('../lt/lemma.xql'):
                            hrefNode.nodeValue = href.replace('../lt/lemma.xql','%s/template/head_main_lemma'%selfurl)        
                            l.setAttributeNS(None, 'target', '_blank')
                            l.setAttributeNS(None, 'onClick',"popupWin = window.open(this.href, 'contacts', 'location,width=500,height=600,top=180, left=400, scrollbars=1'); return false;")
                            l.setAttributeNS(None, 'onDblclick', 'popupWin.focus();')
                return serializeNode(pagenode)      
        return "no text here"   
                       
    def getNumPages(self,docinfo=None):
        """get list of pages from fulltext and put in docinfo"""
        xquery = '//pb'
        text = urllib2.urlopen("http://mpdl-proto.mpiwg-berlin.mpg.de/mpdl/interface/xquery.xql","document=%s&xquery=%s"%(docinfo['textURLPath'],xquery))
        #text = self.template.fulltextclient.eval("/mpdl/interface/xquery.xql", "document=%s&xquery=%s"%(docinfo['textURLPath'],xquery))
        docinfo['numPages'] = text.count("<pb ")
        return docinfo
       
    def getTextPage(self, mode="text", pn=1, docinfo=None, pageinfo=None, highlightQuery=None,sn=None, viewMode=None, tocMode=None, tocPN=None):
        """returns single page from fulltext"""
        docpath = docinfo['textURLPath']
        path = docinfo['textURLPath']
        url = docinfo['url']
        viewMode= pageinfo['viewMode']
        tocMode = pageinfo['tocMode']
        tocPN = pageinfo['tocPN']
        selfurl = self.absolute_url()   
        if mode == "text_dict":
            textmode = "textPollux"
        else:
            textmode = mode
        
        textParam = "document=%s&mode=%s&pn=%s"%(docpath,textmode,pn)
        if highlightQuery is not None:
            textParam +="&highlightQuery=%s&sn=%s"%(highlightQuery,sn)           
        
        pagexml = urllib2.urlopen("http://mpdl-proto.mpiwg-berlin.mpg.de/mpdl/interface/page-fragment.xql",textParam)
        """pagexml=self.template.fulltextclient.eval("/mpdl/interface/page-fragment.xql", textParam, outputUnicode=False)"""
        
        pagedom = Parse(pagexml)
        # plain text mode
        if mode == "text":
            # first div contains text
            pagedivs = pagedom.xpath("/div")
            if len(pagedivs) > 0:      
                pagenode = pagedivs[0]
                links = pagenode.xpath("//a")
                for l in links:
                    hrefNode = l.getAttributeNodeNS(None, u"href")
                    if hrefNode:
                        href= hrefNode.nodeValue
                        if href.startswith('#note-'):
                            hrefNode.nodeValue = href.replace('#note-',"?url=%s&viewMode=%s&tocMode=%s&tocPN=%s&pn=%s#note-"%(url,viewMode,tocMode,tocPN,pn))
                return serializeNode(pagenode)
        if mode == "xml":
              # first div contains text
              pagedivs = pagedom.xpath("/div")
              if len(pagedivs) > 0:
                  pagenode = pagedivs[0]
                  return serializeNode(pagenode)
        if mode == "pureXml":
              # first div contains text
              pagedivs = pagedom.xpath("/div")
              if len(pagedivs) > 0:
                  pagenode = pagedivs[0]
                  return serializeNode(pagenode)      
        # text-with-links mode
        if mode == "text_dict":
            # first div contains text
            pagedivs = pagedom.xpath("/div")
            if len(pagedivs) > 0:
                pagenode = pagedivs[0]
                # check all a-tags
                links = pagenode.xpath("//a")
                for l in links:
                    hrefNode = l.getAttributeNodeNS(None, u"href")
                    if hrefNode:
                        # is link with href
                        href = hrefNode.nodeValue
                        if href.startswith('lt/lex.xql'):
                            # is pollux link
                            selfurl = self.absolute_url()
                            # change href
                            hrefNode.nodeValue = href.replace('lt/lex.xql','%s/template/head_main_voc'%selfurl)
                            # add target
                            l.setAttributeNS(None, 'target', '_blank')
                            l.setAttributeNS(None, 'onClick',"popupWin = window.open(this.href, 'contacts', 'location,width=500,height=600,top=180, left=700, scrollbars=1'); return false;")
                            l.setAttributeNS(None, 'onDblclick', 'popupWin.focus();')      
                        if href.startswith('lt/lemma.xql'):    
                            selfurl = self.absolute_url()
                            hrefNode.nodeValue = href.replace('lt/lemma.xql','%s/template/head_main_lemma'%selfurl)
                            l.setAttributeNS(None, 'target', '_blank')
                            l.setAttributeNS(None, 'onClick',"popupWin = window.open(this.href, 'contacts', 'location,width=500,height=600,top=180, left=700, scrollbars=1'); return false;")
                            l.setAttributeNS(None, 'onDblclick', 'popupWin.focus();')   
                        if href.startswith('#note-'):
                            hrefNode.nodeValue = href.replace('#note-',"?url=%s&viewMode=%s&tocMode=%s&tocPN=%s&pn=%s#note-"%(url,viewMode,tocMode,tocPN,pn))    
                return serializeNode(pagenode)
        return "no text here"

    def getTranslate(self, query=None, language=None):
        """translate into another languages"""
        pagexml = urllib2.urlopen("http://mpdl-proto.mpiwg-berlin.mpg.de/mpdl/interface/lt/lex.xql","document=&language="+str(language)+"&query="+url_quote(str(query)))
        #pagexml=self.template.fulltextclient.eval("/mpdl/interface/lt/lex.xql","document=&language="+str(language)+"&query="+url_quote(str(query)))
        data = pagexml.read()
        pagexml.close()
        return data
    
    def getLemma(self, lemma=None, language=None):
        """simular words lemma """
        pagexml = urllib2.urlopen("http://mpdl-proto.mpiwg-berlin.mpg.de/mpdl/interface/lt/lemma.xql","document=&language="+str(language)+"&lemma="+url_quote(str(lemma)))
        #pagexml=self.template.fulltextclient.eval("/mpdl/interface/lt/lemma.xql","document=&language="+str(language)+"&lemma="+url_quote(str(lemma)))
        data = pagexml.read()
        pagexml.close()
        return data
    
    def getLemmaNew(self, query=None, language=None):
        """simular words lemma """
        
        pagexml = urllib2.urlopen("http://mpdl-proto.mpiwg-berlin.mpg.de/mpdl/interface/lt/lemma.xql","document=&language="+str(language)+"&lemma="+url_quote(str(query)))
        #pagexml=self.template.fulltextclient.eval("/mpdl/interface/lt/lemma.xql","document=&language="+str(language)+"&lemma="+url_quote(str(query)))
        data = pagexml.read()
        pagexml.close()
        return data

    def getQuery (self,  docinfo=None, pageinfo=None, query=None, queryType=None, pn=1):
         """number of"""
         docpath = docinfo['textURLPath'] 
         pagesize = pageinfo['queryPageSize']
         pn = pageinfo['searchPN']
         query =pageinfo['query']
         queryType =pageinfo['queryType']
         tocSearch = 0
         tocDiv = None
         
         pagexml = urllib2.urlopen("http://mpdl-proto.mpiwg-berlin.mpg.de/mpdl/interface/doc-query.xql","document=%s&mode=%s&queryType=%s&query=%s&queryResultPageSize=%s&queryResultPN=%s"%(docpath, 'text', queryType, query, pagesize, pn))
         #pagexml=self.template.fulltextclient.eval("/mpdl/interface/doc-query.xql","document=%s&mode=%s&queryType=%s&query=%s&queryResultPageSize=%s&queryResultPN=%s"%(docpath, 'text', queryType, query, pagesize, pn) ,outputUnicode=False)
         pagedom = Parse(pagexml)
         numdivs = pagedom.xpath("//div[@class='queryResultHits']")
         tocSearch = int(getTextFromNode(numdivs[0]))
         tc=int((tocSearch/10)+1)
         logging.debug("documentViewer (gettoc) tc: %s"%(tc))
         return tc

    def getToc(self, mode="text", docinfo=None):
        """loads table of contents and stores in docinfo"""
        logging.debug("documentViewer (gettoc) mode: %s"%(mode))
        if mode == "none":
            return docinfo        
        if 'tocSize_%s'%mode in docinfo:
            # cached toc
            return docinfo
        
        docpath = docinfo['textURLPath']
        # we need to set a result set size
        pagesize = 1000
        pn = 1
        if mode == "text":
            queryType = "toc"
        else:
            queryType = mode
        # number of entries in toc
        tocSize = 0
        tocDiv = None
        
        pagexml = urllib2.urlopen("http://mpdl-proto.mpiwg-berlin.mpg.de/mpdl/interface/doc-query.xql","document=%s&queryType=%s&queryResultPageSize=%s&queryResultPN=%s"%(docpath,queryType, pagesize, pn))
        #pagexml=self.template.fulltextclient.eval("/mpdl/interface/doc-query.xql", "document=%s&queryType=%s&queryResultPageSize=%s&queryResultPN=%s"%(docpath,queryType,pagesize,pn), outputUnicode=False)
        # post-processing downloaded xml
        pagedom = Parse(pagexml)
        # get number of entries
        numdivs = pagedom.xpath("//div[@class='queryResultHits']")
        if len(numdivs) > 0:
            tocSize = int(getTextFromNode(numdivs[0]))
        docinfo['tocSize_%s'%mode] = tocSize
        return docinfo
    
    def getTocPage(self, mode="text", pn=1, pageinfo=None, docinfo=None):
        """returns single page from the table of contents"""
        # TODO: this should use the cached TOC
        if mode == "text":
            queryType = "toc"
        else:
            queryType = mode
        docpath = docinfo['textURLPath']
        path = docinfo['textURLPath']       
        pagesize = pageinfo['tocPageSize']
        pn = pageinfo['tocPN']
        url = docinfo['url']
        selfurl = self.absolute_url()  
        viewMode=  pageinfo['viewMode']
        tocMode = pageinfo['tocMode']
        tocPN = pageinfo['tocPN']  
        
        pagexml = urllib2.urlopen("http://mpdl-proto.mpiwg-berlin.mpg.de/mpdl/interface/doc-query.xql","document=%s&queryType=%s&queryResultPageSize=%s&queryResultPN=%s"%(docpath,queryType, pagesize, pn))  
        data = pagexml.read()
        pagexml.close()

        page = data.replace('page-fragment.xql?document=%s'%str(path),'%s?url=%s&viewMode=%s&tocMode=%s&tocPN=%s'%(selfurl,url, viewMode, tocMode, tocPN))
        text = page.replace('mode=image','mode=texttool')
        return text