documentViewer/MpdlXmlTextServer.py - annotate

Return to MpdlXmlTextServer.py CVS log
Up to [Repository] / documentViewer
Annotation of documentViewer/MpdlXmlTextServer.py, revision 1.238.2.1

1.2       casties     1: 
                      2: from OFS.SimpleItem import SimpleItem
                      3: from Products.PageTemplates.PageTemplateFile import PageTemplateFile 
                      4: from Ft.Xml import EMPTY_NAMESPACE, Parse
1.238     abukhman    5: from Ft.Xml.Domlette import NonvalidatingReader
1.238.2.1! casties     6: import Ft.Xml.Domlette
        !             7: import cStringIO
        !             8: 
        !             9: import xml.etree.ElementTree as ET
1.2       casties    10: 
1.224     abukhman   11: import md5
1.2       casties    12: import sys
                     13: import logging
1.5       casties    14: import urllib
1.2       casties    15: import documentViewer
1.238.2.1! casties    16: #from documentViewer import getTextFromNode, serializeNode
        !            17: 
        !            18: def getText(node):
        !            19:     """get the cdata content of a node"""
        !            20:     if node is None:
        !            21:         return ""
        !            22:     # ET:
        !            23:     text = node.text or ""
        !            24:     for e in node:
        !            25:         text += gettext(e)
        !            26:         if e.tail:
        !            27:             text += e.tail
        !            28: 
        !            29:     return text
        !            30: 
        !            31: def serialize(node):
        !            32:     """returns a string containing an XML snippet of node"""
        !            33:     s = ET.tostring(node, 'UTF-8')
        !            34:     # snip off XML declaration
        !            35:     if s.startswith('<?xml'):
        !            36:         i = s.find('?>')
        !            37:         return s[i+3:]
        !            38: 
        !            39:     return s
        !            40: 
        !            41: 
        !            42: def getTextFromNode(node):
        !            43:     """get the cdata content of a node"""
        !            44:     if node is None:
        !            45:         return ""
        !            46:     # ET:
        !            47:     #text = node.text or ""
        !            48:     #for e in node:
        !            49:     #    text += gettext(e)
        !            50:     #    if e.tail:
        !            51:     #        text += e.tail
        !            52: 
        !            53:     # 4Suite:
        !            54:     nodelist=node.childNodes
        !            55:     text = ""
        !            56:     for n in nodelist:
        !            57:         if n.nodeType == node.TEXT_NODE:
        !            58:            text = text + n.data
        !            59:     
        !            60:     return text
        !            61: 
        !            62: def serializeNode(node, encoding="utf-8"):
        !            63:     """returns a string containing node as XML"""
        !            64:     #s = ET.tostring(node)
        !            65:     
        !            66:     # 4Suite:
        !            67:     stream = cStringIO.StringIO()
        !            68:     Ft.Xml.Domlette.Print(node, stream=stream, encoding=encoding)
        !            69:     s = stream.getvalue()
        !            70:     stream.close()
        !            71: 
        !            72:     return s
        !            73: 
1.2       casties    74: 
                     75: class MpdlXmlTextServer(SimpleItem):
                     76:     """TextServer implementation for MPDL-XML eXist server"""
                     77:     meta_type="MPDL-XML TextServer"
                     78: 
                     79:     manage_options=(
                     80:         {'label':'Config','action':'manage_changeMpdlXmlTextServerForm'},
                     81:        )+SimpleItem.manage_options
                     82:     
                     83:     manage_changeMpdlXmlTextServerForm = PageTemplateFile("zpt/manage_changeMpdlXmlTextServer", globals())
                     84:         
1.236     abukhman   85:     def __init__(self,id,title="",serverUrl="http://mpdl-system.mpiwg-berlin.mpg.de/mpdl/interface/", serverName=None, timeout=40):
1.234     abukhman   86:     #def __init__(self,id,title="",serverUrl="http://mpdl-text.mpiwg-berlin.mpg.de:30030/mpdl/interface/", serverName=None, timeout=40):    
1.231     abukhman   87:         
1.2       casties    88:         """constructor"""
                     89:         self.id=id
                     90:         self.title=title
                     91:         self.timeout = timeout
1.3       casties    92:         if serverName is None:
                     93:             self.serverUrl = serverUrl
                     94:         else:
                     95:             self.serverUrl = "http://%s/mpdl/interface/"%serverName
1.2       casties    96:         
                     97:     def getHttpData(self, url, data=None):
                     98:         """returns result from url+data HTTP request"""
                     99:         return documentViewer.getHttpData(url,data,timeout=self.timeout)
                    100:     
                    101:     def getServerData(self, method, data=None):
                    102:         """returns result from text server for method+data"""
                    103:         url = self.serverUrl+method
                    104:         return documentViewer.getHttpData(url,data,timeout=self.timeout)
                    105: 
1.235     abukhman  106:     def getSearch(self, pageinfo=None,  docinfo=None):
1.2       casties   107:         """get search list"""
                    108:         docpath = docinfo['textURLPath'] 
                    109:         url = docinfo['url']
                    110:         pagesize = pageinfo['queryPageSize']
1.222     abukhman  111:         pn = pageinfo.get('searchPN',1)
1.2       casties   112:         sn = pageinfo['sn']
                    113:         highlightQuery = pageinfo['highlightQuery']
1.34      abukhman  114:         query =pageinfo['query']
1.2       casties   115:         queryType =pageinfo['queryType']
                    116:         viewMode=  pageinfo['viewMode']
                    117:         tocMode = pageinfo['tocMode']
1.24      abukhman  118:         characterNormalization = pageinfo['characterNormalization']
1.237     abukhman  119:         #optionToggle = pageinfo['optionToggle']
1.2       casties   120:         tocPN = pageinfo['tocPN']
                    121:         selfurl = self.absolute_url()
1.237     abukhman  122:         data = self.getServerData("doc-query.xql","document=%s&mode=%s&queryType=%s&query=%s&queryResultPageSize=%s&queryResultPN=%s&sn=%s&viewMode=%s&characterNormalization=%s&highlightQuery=%s"%(docpath, 'text', queryType, urllib.quote(query), pagesize, pn, sn, viewMode,characterNormalization, urllib.quote(highlightQuery)))
1.2       casties   123:         pagexml = data.replace('?document=%s'%str(docpath),'?url=%s'%url)
                    124:         pagedom = Parse(pagexml)
1.222     abukhman  125:         
                    126:         """
                    127:         pagedivs = pagedom.xpath("//div[@class='queryResultHits']") 
                    128:         if (pagedivs == pagedom.xpath("//div[@class='queryResultHits']")):
                    129:             if len(pagedivs)>0:
                    130:                 docinfo['queryResultHits'] = int(getTextFromNode(pagedivs[0]))
                    131:                 s = getTextFromNode(pagedivs[0])
                    132:                 s1 = int(s)/10+1
                    133:                 try:
                    134:                     docinfo['queryResultHits'] = int(s1)
                    135:                     logging.debug("SEARCH ENTRIES: %s"%(s1))
                    136:                 except:
                    137:                     docinfo['queryResultHits'] = 0
                    138:         """
1.2       casties   139:         if (queryType=="fulltext")or(queryType=="xpath")or(queryType=="xquery")or(queryType=="fulltextMorphLemma"):   
                    140:             pagedivs = pagedom.xpath("//div[@class='queryResultPage']")
                    141:             if len(pagedivs)>0:
                    142:                 pagenode=pagedivs[0]
                    143:                 links=pagenode.xpath("//a")
                    144:                 for l in links:
                    145:                     hrefNode = l.getAttributeNodeNS(None, u"href")
                    146:                     if hrefNode:
                    147:                         href = hrefNode.nodeValue
                    148:                         if href.startswith('page-fragment.xql'):
                    149:                             selfurl = self.absolute_url()            
1.237     abukhman  150:                             pagexml=href.replace('mode=text','mode=texttool&viewMode=%s&queryType=%s&query=%s&queryResultPageSize=%s&queryResultPN=%s&tocMode=%s&searchPN=%s&tocPN=%s&characterNormalization=%s'%(viewMode,queryType,urllib.quote(query),pagesize,pn,tocMode,pn,tocPN, characterNormalization))
1.2       casties   151:                             hrefNode.nodeValue = pagexml.replace('page-fragment.xql','%s'%selfurl)                                           
1.230     abukhman  152:                 #logging.debug("PUREXML :%s"%(serializeNode(pagenode)))
1.2       casties   153:                 return serializeNode(pagenode)        
                    154:         if (queryType=="fulltextMorph"):
                    155:             pagedivs = pagedom.xpath("//div[@class='queryResult']")
                    156:             if len(pagedivs)>0:
                    157:                 pagenode=pagedivs[0]
                    158:                 links=pagenode.xpath("//a")
                    159:                 for l in links:
                    160:                     hrefNode = l.getAttributeNodeNS(None, u"href")
                    161:                     if hrefNode:
                    162:                         href = hrefNode.nodeValue
                    163:                         if href.startswith('page-fragment.xql'):
                    164:                             selfurl = self.absolute_url()       
1.237     abukhman  165:                             pagexml=href.replace('mode=text','mode=texttool&viewMode=%s&queryType=%s&query=%s&queryResultPageSize=%s&queryResultPN=%s&tocMode=%s&searchPN=%s&tocPN=%s&characterNormalization=%s'%(viewMode,queryType,urllib.quote(query),pagesize,pn,tocMode,pn,tocPN,characterNormalization))
1.2       casties   166:                             hrefNode.nodeValue = pagexml.replace('page-fragment.xql','%s'%selfurl)  
                    167:                         if href.startswith('../lt/lemma.xql'):
1.235     abukhman  168:                             hrefNode.nodeValue = href.replace('../lt/lemma.xql','%s/template/head_main_query'%(selfurl))        
1.2       casties   169:                             l.setAttributeNS(None, 'target', '_blank')
                    170:                             l.setAttributeNS(None, 'onClick',"popupWin = window.open(this.href, 'contacts', 'location,width=500,height=600,top=180, left=400, scrollbars=1'); return false;")
1.235     abukhman  171:                             l.setAttributeNS(None, 'onDblclick', 'popupWin.focus();')  
1.2       casties   172:                 pagedivs = pagedom.xpath("//div[@class='queryResultMorphExpansion']")                
                    173:                 return serializeNode(pagenode)        
                    174:         if (queryType=="ftIndex")or(queryType=="ftIndexMorph"):
                    175:             pagedivs= pagedom.xpath("//div[@class='queryResultPage']")
                    176:             if len(pagedivs)>0:
                    177:                 pagenode=pagedivs[0]
                    178:                 links=pagenode.xpath("//a")
                    179:                 for l in links:
                    180:                     hrefNode = l.getAttributeNodeNS(None, u"href")
                    181:                     if hrefNode:
                    182:                         href = hrefNode.nodeValue
1.237     abukhman  183:                         hrefNode.nodeValue=href.replace('mode=text','mode=texttool&viewMode=%s&tocMode=%s&tocPN=%s&pn=%s&characterNormalization=%s'%(viewMode,tocMode,tocPN,pn,characterNormalization))             
1.2       casties   184:                         if href.startswith('../lt/lex.xql'):
1.235     abukhman  185:                             hrefNode.nodeValue = href.replace('../lt/lex.xql','%s/template/head_main_lex'%selfurl)         
1.2       casties   186:                             l.setAttributeNS(None, 'target', '_blank')
                    187:                             l.setAttributeNS(None, 'onClick',"popupWin = window.open(this.href, 'contacts', 'location,width=500,height=600,top=180, left=400, scrollbars=1'); return false;")
1.235     abukhman  188:                             l.setAttributeNS(None, 'onDblclick', 'popupWin.focus();')
1.2       casties   189:                         if href.startswith('../lt/lemma.xql'):
1.235     abukhman  190:                             hrefNode.nodeValue = href.replace('../lt/lemma.xql','%s/template/head_main_lemma'%(selfurl))        
1.2       casties   191:                             l.setAttributeNS(None, 'target', '_blank')
                    192:                             l.setAttributeNS(None, 'onClick',"popupWin = window.open(this.href, 'contacts', 'location,width=500,height=600,top=180, left=400, scrollbars=1'); return false;")
1.235     abukhman  193:                             l.setAttributeNS(None, 'onDblclick', 'popupWin.focus();')
1.2       casties   194:                 return serializeNode(pagenode)      
                    195:         return "no text here"   
1.222     abukhman  196:            
1.89      abukhman  197:     def getGisPlaces(self, docinfo=None, pageinfo=None):
1.58      abukhman  198:         """ Show all Gis Places of whole Page"""
1.100     abukhman  199:         xpath='//place'
1.214     casties   200:         docpath = docinfo.get('textURLPath',None)
                    201:         if not docpath:
                    202:             return None
                    203: 
1.89      abukhman  204:         url = docinfo['url']
                    205:         selfurl = self.absolute_url()
1.93      abukhman  206:         pn = pageinfo['current']
1.127     abukhman  207:         hrefList=[]
1.142     abukhman  208:         myList= ""
1.100     abukhman  209:         text=self.getServerData("xpath.xql", "document=%s&xpath=%s&pn=%s"%(docinfo['textURLPath'],xpath,pn))
                    210:         dom = Parse(text)
1.101     abukhman  211:         result = dom.xpath("//result/resultPage/place")
1.72      abukhman  212:         for l in result:
1.86      abukhman  213:             hrefNode= l.getAttributeNodeNS(None, u"id")
1.108     abukhman  214:             href= hrefNode.nodeValue
1.128     abukhman  215:             hrefList.append(href)
1.145     abukhman  216:             myList = ",".join(hrefList)
1.230     abukhman  217:         #logging.debug("getGisPlaces :%s"%(myList))                             
1.143     abukhman  218:         return myList
                    219:     
                    220:     def getAllGisPlaces (self, docinfo=None, pageinfo=None):
                    221:         """Show all Gis Places of whole Book """
                    222:         xpath ='//echo:place'
                    223:         docpath =docinfo['textURLPath']
                    224:         url = docinfo['url']
                    225:         selfurl =self.absolute_url()
                    226:         pn =pageinfo['current']
                    227:         hrefList=[]
                    228:         myList=""
                    229:         text=self.getServerData("xpath.xql", "document=%s&xpath=%s"%(docinfo['textURLPath'],xpath))
                    230:         dom =Parse(text)
                    231:         result = dom.xpath("//result/resultPage/place")
1.205     abukhman  232:         
1.143     abukhman  233:         for l in result:
                    234:             hrefNode = l.getAttributeNodeNS(None, u"id")
                    235:             href= hrefNode.nodeValue
                    236:             hrefList.append(href)
1.136     abukhman  237:             myList = ",".join(hrefList)
1.230     abukhman  238:             #logging.debug("getALLGisPlaces :%s"%(myList))
1.145     abukhman  239:         return myList
1.222     abukhman  240:           
1.215     abukhman  241:            
1.227     abukhman  242:     def getTextPage(self, mode="text_dict", pn=1, docinfo=None, pageinfo=None):
1.2       casties   243:         """returns single page from fulltext"""
                    244:         docpath = docinfo['textURLPath']
                    245:         path = docinfo['textURLPath']
1.222     abukhman  246:         url = docinfo.get('url',None)
                    247:         name = docinfo.get('name',None)
                    248:         pn =pageinfo['current']
1.196     abukhman  249:         sn = pageinfo['sn']
1.237     abukhman  250:         #optionToggle =pageinfo ['optionToggle']
1.187     abukhman  251:         highlightQuery = pageinfo['highlightQuery']
1.225     abukhman  252:         #mode = pageinfo ['viewMode']
1.2       casties   253:         tocMode = pageinfo['tocMode']
1.20      abukhman  254:         characterNormalization=pageinfo['characterNormalization']
1.2       casties   255:         tocPN = pageinfo['tocPN']
                    256:         selfurl = self.absolute_url()   
                    257:         if mode == "text_dict":
                    258:             textmode = "textPollux"
                    259:         else:
                    260:             textmode = mode
1.222     abukhman  261:         
1.237     abukhman  262:         textParam = "document=%s&mode=%s&pn=%s&characterNormalization=%s"%(docpath,textmode,pn,characterNormalization)
1.190     abukhman  263:         if highlightQuery is not None:
1.196     abukhman  264:             textParam +="&highlightQuery=%s&sn=%s"%(urllib.quote(highlightQuery),sn)           
1.222     abukhman  265:         
1.38      abukhman  266:         pagexml = self.getServerData("page-fragment.xql",textParam)
1.238.2.1! casties   267:         dom = ET.fromstring(pagexml)
1.238     abukhman  268:         #dom = NonvalidatingReader.parseStream(pagexml)
1.236     abukhman  269:         
1.222     abukhman  270:         #original Pages
1.238.2.1! casties   271:         #pagedivs = dom.xpath("//div[@class='pageNumberOrig']")       
1.236     abukhman  272:         
1.230     abukhman  273:         """if pagedivs == dom.xpath("//div[@class='pageNumberOrig']"):
1.222     abukhman  274:             if len(pagedivs)>0:
                    275:                 docinfo['pageNumberOrig']= getTextFromNode(pagedivs[0])
1.230     abukhman  276:                 logging.debug("ORIGINAL PAGE: %s"%(docinfo['pageNumberOrig']))
1.226     abukhman  277:         
                    278:         #original Pages Norm
                    279:         pagedivs = dom.xpath("//div[@class='pageNumberOrigNorm']")
                    280:         if pagedivs == dom.xpath("//div[@class='pageNumberOrigNorm']"):
                    281:             if len(pagedivs)>0:
                    282:                 docinfo['pageNumberOrigNorm']= getTextFromNode(pagedivs[0])
1.230     abukhman  283:                 logging.debug("ORIGINAL PAGE NORM: %s"%(docinfo['pageNumberOrigNorm']))
                    284:         """
1.222     abukhman  285:         #figureEntries
1.238.2.1! casties   286: #        pagedivs = dom.xpath("//div[@class='countFigureEntries']")
        !           287: #        if pagedivs == dom.xpath("//div[@class='countFigureEntries']"):
        !           288: #            if len(pagedivs)>0:
        !           289: #                docinfo['countFigureEntries'] = getTextFromNode(pagedivs[0])
        !           290: #                s = getTextFromNode(pagedivs[0])
        !           291: #                if s=='0':
        !           292: #                    try:
        !           293: #                        docinfo['countFigureEntries'] = int(s)
        !           294: #                    except:
        !           295: #                        docinfo['countFigureEntries'] = 0
        !           296: #                else:
        !           297: #                    s1 = int(s)/30+1
        !           298: #                    try:
        !           299: #                        docinfo['countFigureEntries'] = int(s1)
        !           300: #                    except:
        !           301: #                        docinfo['countFigureEntries'] = 0      
        !           302: #        
        !           303: #        #allPlaces
        !           304: #        pagedivs = dom.xpath("//div[@class='countPlaces']")
        !           305: #        if pagedivs == dom.xpath("//div[@class='countPlaces']"):
        !           306: #            if len(pagedivs)>0:
        !           307: #                docinfo['countPlaces']= getTextFromNode(pagedivs[0])
        !           308: #                s = getTextFromNode(pagedivs[0])
        !           309: #                try:
        !           310: #                    docinfo['countPlaces'] = int(s)
        !           311: #                except:
        !           312: #                    docinfo['countPlaces'] = 0
        !           313: #        
        !           314: #        #tocEntries
        !           315: #        pagedivs = dom.xpath("//div[@class='countTocEntries']")
        !           316: #        if pagedivs == dom.xpath("//div[@class='countTocEntries']"):
        !           317: #            if len(pagedivs)>0:
        !           318: #                docinfo['countTocEntries'] = int(getTextFromNode(pagedivs[0]))
        !           319: #                s = getTextFromNode(pagedivs[0])
        !           320: #                if s=='0':
        !           321: #                    try:
        !           322: #                        docinfo['countTocEntries'] = int(s)
        !           323: #                    except:
        !           324: #                        docinfo['countTocEntries'] = 0
        !           325: #                else:
        !           326: #                    s1 = int(s)/30+1
        !           327: #                    try:
        !           328: #                        docinfo['countTocEntries'] = int(s1)
        !           329: #                    except:
        !           330: #                        docinfo['countTocEntries'] = 0
1.222     abukhman  331:         
                    332:         #numTextPages
1.238.2.1! casties   333:         #pagedivs = dom.xpath("//div[@class='countPages']")
        !           334:         alldivs = dom.findall(".//div")
        !           335:         pagediv = None
        !           336:         for div in alldivs:
        !           337:             dc = div.get('class')
        !           338:             if dc == 'pageContent':
        !           339:                 pagediv = div
1.222     abukhman  340:                 
1.238.2.1! casties   341:             if dc == 'countPages':
1.222     abukhman  342:                 try:
1.238.2.1! casties   343:                     np = int(div.text)                    
        !           344:                     docinfo['numPages'] = np
1.222     abukhman  345:                     pageinfo['end'] = min(pageinfo['end'], np)
                    346:                     pageinfo['numgroups'] = int(np / pageinfo['groupsize'])
                    347:                     if np % pageinfo['groupsize'] > 0:
1.238.2.1! casties   348:                         pageinfo['numgroups'] += 1
        !           349:                                
1.222     abukhman  350:                 except:
                    351:                     docinfo['numPages'] = 0
1.238.2.1! casties   352: 
        !           353:                 break
1.222     abukhman  354:                   
1.238.2.1! casties   355: # ROC: why?                  
        !           356: #            else:
        !           357: #                #no full text -- init to 0
        !           358: #                docinfo['pageNumberOrig'] = 0
        !           359: #                docinfo['countFigureEntries'] = 0
        !           360: #                docinfo['countPlaces'] = 0
        !           361: #                docinfo['countTocEntries'] = 0
        !           362: #                docinfo['numPages'] = 0
        !           363: #                docinfo['pageNumberOrigNorm'] = 0
        !           364: #                #return docinfo
1.2       casties   365:         
                    366:         # plain text mode
                    367:         if mode == "text":
1.238.2.1! casties   368:             #pagedivs = dom.xpath("/div")
        !           369:             if pagediv:
        !           370:                 links = pagediv.findall(".//a")
1.2       casties   371:                 for l in links:
1.238.2.1! casties   372:                     href = l.get('href')
        !           373:                     if href and href.startswith('#note-'):
        !           374:                         href = href.replace('#note-',"?url=%s&viewMode=text&tocMode=%s&tocPN=%s&pn=%s#note-"%(url,tocMode,tocPN,pn))
        !           375:                         l.set('href', href)
        !           376:                 logging.debug("page=%s"%ET.tostring(pagediv, 'UTF-8'))
        !           377:                 return serialize(pagediv)
        !           378:             
1.2       casties   379:         if mode == "xml":
1.238.2.1! casties   380:             if pagediv:
        !           381:                 return serialize(pagediv)
        !           382:             
        !           383:         if mode == "pureXml":
        !           384:             if pagediv:
        !           385:                 return serialize(pagediv)
        !           386:                   
1.7       abukhman  387:         if mode == "gis":
1.238.2.1! casties   388:             if pagediv:
        !           389:                 # check all a-tags
        !           390:                 links = pagediv.findall(".//a")
        !           391:                 for l in links:
        !           392:                     href = l.get('href')
        !           393:                     if href:
        !           394:                         if href.startswith('http://chinagis.mpiwg-berlin.mpg.de'):
        !           395:                             l.set('href', href.replace('chinagis_REST/REST/db/chgis/mpdl','chinagis/REST/db/mpdl/%s'%name))
        !           396:                             l.set('target', '_blank') 
        !           397:                             
        !           398:                 return serialize(pagenode)
1.7       abukhman  399:                     
1.2       casties   400:         # text-with-links mode
                    401:         if mode == "text_dict":
1.238.2.1! casties   402:             if pagediv:
1.2       casties   403:                 # check all a-tags
1.238.2.1! casties   404:                 links = pagediv.findall(".//a")
1.2       casties   405:                 for l in links:
1.238.2.1! casties   406:                     href = l.get('href')
1.236     abukhman  407:                     
1.238.2.1! casties   408:                     if href:
1.2       casties   409:                         # is link with href
1.235     abukhman  410:                         if href.startswith('http://mpdl-proto.mpiwg-berlin.mpg.de/mpdl/interface/lt/wordInfo.xql'):
1.2       casties   411:                             # is pollux link
                    412:                             selfurl = self.absolute_url()
                    413:                             # change href
1.238.2.1! casties   414:                             l.set('href', href.replace('http://mpdl-proto.mpiwg-berlin.mpg.de/mpdl/interface/lt/wordInfo.xql','%s/head_main_voc'%selfurl))
1.2       casties   415:                             # add target
1.238.2.1! casties   416:                             l.set('target', '_blank')
1.235     abukhman  417:                                                           
                    418:                         if href.startswith('http://mpdl-proto.mpiwg-berlin.mpg.de/mpdl/lt/lemma.xql'):    
1.2       casties   419:                             selfurl = self.absolute_url()
1.238.2.1! casties   420:                             l.set('href', href.replace('http://mpdl-proto.mpiwg-berlin.mpg.de/mpdl/lt/lemma.xql','%s/head_main_lemma'%selfurl))
        !           421:                             l.set('target', '_blank')
        !           422:                             l.set('onclick',"popupWin = window.open(this.href, 'InfoWindow', 'menubar=no, location,width=500,height=600,top=180, left=700, toolbar=no, scrollbars=1'); return false;")
        !           423:                             l.set('ondblclick', 'popupWin.focus();')   
1.236     abukhman  424:                     
1.2       casties   425:                         if href.startswith('#note-'):
1.238.2.1! casties   426:                             l.set('href', href.replace('#note-',"?url=%s&viewMode=text_dict&tocMode=%s&tocPN=%s&pn=%s#note-"%(url,tocMode,tocPN,pn)))
1.236     abukhman  427:                               
1.238.2.1! casties   428:                 return serialize(pagediv)
        !           429:             
1.2       casties   430:         return "no text here"
1.225     abukhman  431:     
1.230     abukhman  432:     def getOrigPages(self, docinfo=None, pageinfo=None):
                    433:         docpath = docinfo['textURLPath']
                    434:         pn =pageinfo['current']
                    435:         selfurl = self.absolute_url()   
                    436:         pagexml = self.getServerData("page-fragment.xql","document=%s&pn=%s"%(docpath, pn))
                    437:         dom = Parse(pagexml)
                    438:         pagedivs = dom.xpath("//div[@class='pageNumberOrig']")
                    439:         if pagedivs == dom.xpath("//div[@class='pageNumberOrig']"):
                    440:             if len(pagedivs)>0:
                    441:                 docinfo['pageNumberOrig']= getTextFromNode(pagedivs[0])        
                    442:                 return docinfo['pageNumberOrig']
                    443:     
                    444:     def getOrigPagesNorm(self, docinfo=None, pageinfo=None):
                    445:         docpath = docinfo['textURLPath']
                    446:         pn =pageinfo['current']
                    447:         selfurl = self.absolute_url()   
                    448:         pagexml = self.getServerData("page-fragment.xql","document=%s&pn=%s"%(docpath, pn))
                    449:         dom = Parse(pagexml)
                    450:         pagedivs = dom.xpath("//div[@class='pageNumberOrigNorm']")
                    451:         if pagedivs == dom.xpath("//div[@class='pageNumberOrigNorm']"):
                    452:             if len(pagedivs)>0:
                    453:                 docinfo['pageNumberOrigNorm']= getTextFromNode(pagedivs[0])        
                    454:                 return docinfo['pageNumberOrigNorm']
                    455: 
                    456:                 
1.235     abukhman  457:     def getTranslate(self, word=None, language=None):
1.2       casties   458:         """translate into another languages"""
1.235     abukhman  459:         data = self.getServerData("lt/wordInfo.xql","language="+str(language)+"&word="+urllib.quote(word)+"&output=html")
1.2       casties   460:         #pagexml=self.template.fulltextclient.eval("/mpdl/interface/lt/lex.xql","document=&language="+str(language)+"&query="+url_quote(str(query)))
                    461:         return data
                    462:     
                    463:     def getLemma(self, lemma=None, language=None):
                    464:         """simular words lemma """
1.235     abukhman  465:         data = self.getServerData("lt/lemma.xql","language="+str(language)+"&lemma="+urllib.quote(lemma)+"&output=html")
1.2       casties   466:         return data
                    467:     
1.235     abukhman  468:     def getLemmaQuery(self, query=None, language=None):
1.2       casties   469:         """simular words lemma """
1.235     abukhman  470:         data = self.getServerData("lt/lemma.xql","language="+str(language)+"&query="+urllib.quote(query)+"&output=html")
                    471:         return data
                    472:     
                    473:     def getLex(self, query=None, language=None):
                    474:         #simular words lemma
                    475:         data = self.getServerData("lt/lex.xql","document=&language="+str(language)+"&query="+urllib.quote(query))
1.2       casties   476:         return data
1.28      abukhman  477:     
1.237     abukhman  478:     def getQuery (self,  docinfo=None, pageinfo=None, query=None, queryType=None, pn=1):
1.222     abukhman  479:          #number of
1.2       casties   480:          docpath = docinfo['textURLPath'] 
                    481:          pagesize = pageinfo['queryPageSize']
                    482:          pn = pageinfo['searchPN']
1.34      abukhman  483:          query =pageinfo['query']
1.2       casties   484:          queryType =pageinfo['queryType']
                    485:          tocSearch = 0
                    486:          tocDiv = None
                    487:          
1.32      abukhman  488:          pagexml = self.getServerData("doc-query.xql","document=%s&mode=%s&queryType=%s&query=%s&queryResultPageSize=%s&queryResultPN=%s"%(docpath, 'text', queryType, urllib.quote(query), pagesize, pn))
1.2       casties   489:          pagedom = Parse(pagexml)
                    490:          numdivs = pagedom.xpath("//div[@class='queryResultHits']")
                    491:          tocSearch = int(getTextFromNode(numdivs[0]))
                    492:          tc=int((tocSearch/10)+1)
                    493:          return tc
1.222     abukhman  494:     
1.2       casties   495:     def getToc(self, mode="text", docinfo=None):
                    496:         """loads table of contents and stores in docinfo"""
                    497:         if mode == "none":
                    498:             return docinfo        
                    499:         if 'tocSize_%s'%mode in docinfo:
                    500:             # cached toc
                    501:             return docinfo
                    502:         
                    503:         docpath = docinfo['textURLPath']
                    504:         # we need to set a result set size
                    505:         pagesize = 1000
                    506:         pn = 1
                    507:         if mode == "text":
                    508:             queryType = "toc"
                    509:         else:
                    510:             queryType = mode
                    511:         # number of entries in toc
                    512:         tocSize = 0
                    513:         tocDiv = None
                    514:         
                    515:         pagexml = self.getServerData("doc-query.xql","document=%s&queryType=%s&queryResultPageSize=%s&queryResultPN=%s"%(docpath,queryType, pagesize, pn))
1.222     abukhman  516:         
1.2       casties   517:         # post-processing downloaded xml
                    518:         pagedom = Parse(pagexml)
                    519:         # get number of entries
                    520:         numdivs = pagedom.xpath("//div[@class='queryResultHits']")
                    521:         if len(numdivs) > 0:
                    522:             tocSize = int(getTextFromNode(numdivs[0]))
                    523:         docinfo['tocSize_%s'%mode] = tocSize
                    524:         return docinfo
                    525:     
                    526:     def getTocPage(self, mode="text", pn=1, pageinfo=None, docinfo=None):
                    527:         """returns single page from the table of contents"""
                    528:         # TODO: this should use the cached TOC
                    529:         if mode == "text":
                    530:             queryType = "toc"
                    531:         else:
                    532:             queryType = mode
                    533:         docpath = docinfo['textURLPath']
                    534:         path = docinfo['textURLPath']       
                    535:         pagesize = pageinfo['tocPageSize']
                    536:         pn = pageinfo['tocPN']
                    537:         url = docinfo['url']
                    538:         selfurl = self.absolute_url()  
                    539:         viewMode=  pageinfo['viewMode']
1.26      abukhman  540:         characterNormalization = pageinfo ['characterNormalization']
1.237     abukhman  541:         #optionToggle =pageinfo ['optionToggle']
1.2       casties   542:         tocMode = pageinfo['tocMode']
                    543:         tocPN = pageinfo['tocPN']  
                    544:         
1.237     abukhman  545:         data = self.getServerData("doc-query.xql","document=%s&queryType=%s&queryResultPageSize=%s&queryResultPN=%s&characterNormalization=regPlusNorm"%(docpath,queryType, pagesize, pn))  
                    546:         page = data.replace('page-fragment.xql?document=%s'%str(path),'%s?url=%s&viewMode=%s&tocMode=%s&tocPN=%s'%(selfurl,url, viewMode, tocMode, tocPN))
1.2       casties   547:         text = page.replace('mode=image','mode=texttool')
                    548:         return text
                    549:     
1.234     abukhman  550:     def manage_changeMpdlXmlTextServer(self,title="",serverUrl="http://mpdl-text.mpiwg-berlin.mpg.de/mpdl/interface/",timeout=40,RESPONSE=None):
                    551:     #def manage_changeMpdlXmlTextServer(self,title="",serverUrl="http://mpdl-text.mpiwg-berlin.mpg.de:30030/mpdl/interface/",timeout=40,RESPONSE=None):
1.2       casties   552:         """change settings"""
                    553:         self.title=title
                    554:         self.timeout = timeout
                    555:         self.serverUrl = serverUrl
                    556:         if RESPONSE is not None:
                    557:             RESPONSE.redirect('manage_main')
                    558:         
                    559: # management methods
                    560: def manage_addMpdlXmlTextServerForm(self):
                    561:     """Form for adding"""
                    562:     pt = PageTemplateFile("zpt/manage_addMpdlXmlTextServer", globals()).__of__(self)
                    563:     return pt()
                    564: 
1.234     abukhman  565: def manage_addMpdlXmlTextServer(self,id,title="",serverUrl="http://mpdl-text.mpiwg-berlin.mpg.de/mpdl/interface/",timeout=40,RESPONSE=None):
                    566: #def manage_addMpdlXmlTextServer(self,id,title="",serverUrl="http://mpdl-text.mpiwg-berlin.mpg.de:30030/mpdl/interface/",timeout=40,RESPONSE=None):    
1.2       casties   567:     """add zogiimage"""
                    568:     newObj = MpdlXmlTextServer(id,title,serverUrl,timeout)
                    569:     self.Destination()._setObject(id, newObj)
                    570:     if RESPONSE is not None:
1.224     abukhman  571:         RESPONSE.redirect('manage_main')
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>