Changeset 506:67014399894d in documentViewer for MpdlXmlTextServer.py
- Timestamp:
- Feb 21, 2012, 6:16:28 PM (12 years ago)
- Branch:
- elementtree
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
MpdlXmlTextServer.py
r501 r506 1 1 from OFS.SimpleItem import SimpleItem 2 2 from Products.PageTemplates.PageTemplateFile import PageTemplateFile 3 4 from Ft.Xml import EMPTY_NAMESPACE, Parse5 from Ft.Xml.Domlette import NonvalidatingReader6 import Ft.Xml.Domlette7 import cStringIO8 3 9 4 import xml.etree.ElementTree as ET … … 12 7 import logging 13 8 import urllib 9 import base64 14 10 15 11 from SrvTxtUtils import getInt, getText, getHttpData … … 22 18 i = s.find('?>') 23 19 return s[i+3:] 24 25 return s26 27 28 def getTextFromNode(node):29 """get the cdata content of a node"""30 if node is None:31 return ""32 33 # 4Suite:34 nodelist=node.childNodes35 text = ""36 for n in nodelist:37 if n.nodeType == node.TEXT_NODE:38 text = text + n.data39 40 return text41 42 def serializeNode(node, encoding="utf-8"):43 """returns a string containing node as XML"""44 #s = ET.tostring(node)45 46 # 4Suite:47 stream = cStringIO.StringIO()48 Ft.Xml.Domlette.Print(node, stream=stream, encoding=encoding)49 s = stream.getvalue()50 stream.close()51 20 52 21 return s … … 82 51 return getHttpData(url,data,timeout=self.timeout) 83 52 84 # WTF: what does this really do? can it be integrated in getPage? 85 def getSearch(self, pageinfo=None, docinfo=None): 86 """get search list""" 87 logging.debug("getSearch()") 88 docpath = docinfo['textURLPath'] 89 url = docinfo['url'] 90 pagesize = pageinfo['queryPageSize'] 91 pn = pageinfo.get('searchPN',1) 92 sn = pageinfo.get('sn',None) #TODO: is this s now? 93 highlightQuery = pageinfo['highlightQuery'] 94 query =pageinfo['query'] 95 queryType =pageinfo['queryType'] 96 viewMode= pageinfo['viewMode'] 97 tocMode = pageinfo['tocMode'] 98 characterNormalization = pageinfo['characterNormalization'] 99 #optionToggle = pageinfo['optionToggle'] 100 tocPN = pageinfo['tocPN'] 101 selfurl = self.absolute_url() 102 data = self.getServerData("doc-query.xql","document=%s&mode=%s&queryType=%s&query=%s&queryResultPageSize=%s&queryResultPN=%s&sn=%s&viewMode=%s&characterNormalization=%s&highlightQuery=%s"%(docpath, 'text', queryType, urllib.quote(query), pagesize, pn, sn, viewMode,characterNormalization, urllib.quote(highlightQuery))) 103 pagexml = data.replace('?document=%s'%str(docpath),'?url=%s'%url) 104 pagedom = Parse(pagexml) 105 106 """ 107 pagedivs = pagedom.xpath("//div[@class='queryResultHits']") 108 if (pagedivs == pagedom.xpath("//div[@class='queryResultHits']")): 109 if len(pagedivs)>0: 110 docinfo['queryResultHits'] = int(getTextFromNode(pagedivs[0])) 111 s = getTextFromNode(pagedivs[0]) 112 s1 = int(s)/10+1 113 try: 114 docinfo['queryResultHits'] = int(s1) 115 logging.debug("SEARCH ENTRIES: %s"%(s1)) 116 except: 117 docinfo['queryResultHits'] = 0 118 """ 119 if (queryType=="fulltext")or(queryType=="xpath")or(queryType=="xquery")or(queryType=="fulltextMorphLemma"): 120 pagedivs = pagedom.xpath("//div[@class='queryResultPage']") 121 if len(pagedivs)>0: 122 pagenode=pagedivs[0] 123 links=pagenode.xpath("//a") 124 for l in links: 125 hrefNode = l.getAttributeNodeNS(None, u"href") 126 if hrefNode: 127 href = hrefNode.nodeValue 128 if href.startswith('page-fragment.xql'): 129 selfurl = self.absolute_url() 130 pagexml=href.replace('mode=text','mode=texttool&viewMode=%s&queryType=%s&query=%s&queryResultPageSize=%s&queryResultPN=%s&tocMode=%s&searchPN=%s&tocPN=%s&characterNormalization=%s'%(viewMode,queryType,urllib.quote(query),pagesize,pn,tocMode,pn,tocPN, characterNormalization)) 131 hrefNode.nodeValue = pagexml.replace('page-fragment.xql','%s'%selfurl) 132 #logging.debug("PUREXML :%s"%(serializeNode(pagenode))) 133 return serializeNode(pagenode) 134 if (queryType=="fulltextMorph"): 135 pagedivs = pagedom.xpath("//div[@class='queryResult']") 136 if len(pagedivs)>0: 137 pagenode=pagedivs[0] 138 links=pagenode.xpath("//a") 139 for l in links: 140 hrefNode = l.getAttributeNodeNS(None, u"href") 141 if hrefNode: 142 href = hrefNode.nodeValue 143 if href.startswith('page-fragment.xql'): 144 selfurl = self.absolute_url() 145 pagexml=href.replace('mode=text','mode=texttool&viewMode=%s&queryType=%s&query=%s&queryResultPageSize=%s&queryResultPN=%s&tocMode=%s&searchPN=%s&tocPN=%s&characterNormalization=%s'%(viewMode,queryType,urllib.quote(query),pagesize,pn,tocMode,pn,tocPN,characterNormalization)) 146 hrefNode.nodeValue = pagexml.replace('page-fragment.xql','%s'%selfurl) 147 if href.startswith('../lt/lemma.xql'): 148 hrefNode.nodeValue = href.replace('../lt/lemma.xql','%s/template/head_main_query'%(selfurl)) 149 l.setAttributeNS(None, 'target', '_blank') 150 l.setAttributeNS(None, 'onClick',"popupWin = window.open(this.href, 'contacts', 'location,width=500,height=600,top=180, left=400, scrollbars=1'); return false;") 151 l.setAttributeNS(None, 'onDblclick', 'popupWin.focus();') 152 pagedivs = pagedom.xpath("//div[@class='queryResultMorphExpansion']") 153 return serializeNode(pagenode) 154 if (queryType=="ftIndex")or(queryType=="ftIndexMorph"): 155 pagedivs= pagedom.xpath("//div[@class='queryResultPage']") 156 if len(pagedivs)>0: 157 pagenode=pagedivs[0] 158 links=pagenode.xpath("//a") 159 for l in links: 160 hrefNode = l.getAttributeNodeNS(None, u"href") 161 if hrefNode: 162 href = hrefNode.nodeValue 163 hrefNode.nodeValue=href.replace('mode=text','mode=texttool&viewMode=%s&tocMode=%s&tocPN=%s&pn=%s&characterNormalization=%s'%(viewMode,tocMode,tocPN,pn,characterNormalization)) 164 if href.startswith('../lt/lex.xql'): 165 hrefNode.nodeValue = href.replace('../lt/lex.xql','%s/template/head_main_lex'%selfurl) 166 l.setAttributeNS(None, 'target', '_blank') 167 l.setAttributeNS(None, 'onClick',"popupWin = window.open(this.href, 'contacts', 'location,width=500,height=600,top=180, left=400, scrollbars=1'); return false;") 168 l.setAttributeNS(None, 'onDblclick', 'popupWin.focus();') 169 if href.startswith('../lt/lemma.xql'): 170 hrefNode.nodeValue = href.replace('../lt/lemma.xql','%s/template/head_main_lemma'%(selfurl)) 171 l.setAttributeNS(None, 'target', '_blank') 172 l.setAttributeNS(None, 'onClick',"popupWin = window.open(this.href, 'contacts', 'location,width=500,height=600,top=180, left=400, scrollbars=1'); return false;") 173 l.setAttributeNS(None, 'onDblclick', 'popupWin.focus();') 174 return serializeNode(pagenode) 175 return "no text here" 176 177 def getGisPlaces(self, docinfo=None, pageinfo=None): 178 """ Show all Gis Places of whole Page""" 179 xpath='//place' 53 54 def getPlacesOnPage(self, docinfo=None, pn=None): 55 """Returns list of GIS places of page pn""" 180 56 docpath = docinfo.get('textURLPath',None) 181 57 if not docpath: 182 58 return None 183 59 184 pn = pageinfo['current'] 185 hrefList=[] 186 myList= "" 187 text=self.getServerData("xpath.xql", "document=%s&xpath=%s&pn=%s"%(docinfo['textURLPath'],xpath,pn)) 60 places=[] 61 text=self.getServerData("xpath.xql", "document=%s&xpath=//place&pn=%s"%(docpath,pn)) 188 62 dom = ET.fromstring(text) 189 result = dom.findall(".//result /resultPage/place")63 result = dom.findall(".//resultPage/place") 190 64 for l in result: 191 href = l.get("id") 192 hrefList.append(href) 193 # WTF: what does this do? 194 myList = ",".join(hrefList) 195 #logging.debug("getGisPlaces :%s"%(myList)) 196 return myList 197 198 def getAllGisPlaces (self, docinfo=None, pageinfo=None): 199 """Show all Gis Places of whole Book """ 200 xpath ='//echo:place' 201 hrefList=[] 202 myList="" 203 text=self.getServerData("xpath.xql", "document=%s&xpath=%s"%(docinfo['textURLPath'],xpath)) 204 dom = ET.fromstring(text) 205 result = dom.findall(".//result/resultPage/place") 206 207 for l in result: 208 href = l.get("id") 209 hrefList.append(href) 210 # WTF: what does this do? 211 myList = ",".join(hrefList) 212 #logging.debug("getALLGisPlaces :%s"%(myList)) 213 return myList 65 id = l.get("id") 66 name = l.text 67 place = {'id': id, 'name': name} 68 places.append(place) 69 70 return places 71 214 72 215 73 def processPageInfo(self, dom, docinfo, pageinfo): … … 380 238 # gis mode 381 239 elif mode == "gis": 382 name = docinfo['name']383 240 if pagediv is not None: 384 241 # check all a-tags 385 242 links = pagediv.findall(".//a") 243 # add our URL as backlink 244 selfurl = self.getLink() 245 doc = base64.b64encode(selfurl) 386 246 for l in links: 387 247 href = l.get('href') 388 248 if href: 389 if href.startswith('http:// chinagis.mpiwg-berlin.mpg.de'):390 l.set('href', href.replace('chinagis_REST/REST/db/chgis/mpdl','chinagis/REST/db/mpdl/%s'%name))391 l.set('target', '_blank') 249 if href.startswith('http://mappit.mpiwg-berlin.mpg.de'): 250 l.set('href', re.sub(r'doc=[\w+/=]+', 'doc=%s'%doc, href)) 251 l.set('target', '_blank') 392 252 393 253 return serialize(pagediv) … … 395 255 return None 396 256 397 # TODO: should be getWordInfo 398 def getWordInfo(self, word='', language='', display=''): 399 """show information (like dictionaries) about word""" 400 data = self.getServerData("lt/wordInfo.xql","language=%s&word=%s&display=%s&output=html"%(language,urllib.quote(word),urllib.quote(display))) 401 return data 402 403 # WTF: what does this do? 404 def getLemma(self, lemma=None, language=None): 405 """simular words lemma """ 406 data = self.getServerData("lt/lemma.xql","language="+str(language)+"&lemma="+urllib.quote(lemma)+"&output=html") 407 return data 408 409 # WTF: what does this do? 410 def getLemmaQuery(self, query=None, language=None): 411 """simular words lemma """ 412 data = self.getServerData("lt/lemma.xql","language="+str(language)+"&query="+urllib.quote(query)+"&output=html") 413 return data 414 415 # WTF: what does this do? 416 def getLex(self, query=None, language=None): 417 #simular words lemma 418 data = self.getServerData("lt/lex.xql","document=&language="+str(language)+"&query="+urllib.quote(query)) 419 return data 420 421 # WTF: what does this do? 422 def getQuery (self, docinfo=None, pageinfo=None, query=None, queryType=None, pn=1): 423 #number of 424 docpath = docinfo['textURLPath'] 425 pagesize = pageinfo['queryPageSize'] 426 pn = pageinfo['searchPN'] 427 query =pageinfo['query'] 428 queryType =pageinfo['queryType'] 429 tocSearch = 0 430 tocDiv = None 431 432 pagexml = self.getServerData("doc-query.xql","document=%s&mode=%s&queryType=%s&query=%s&queryResultPageSize=%s&queryResultPN=%s"%(docpath, 'text', queryType, urllib.quote(query), pagesize, pn)) 433 pagedom = Parse(pagexml) 434 numdivs = pagedom.xpath("//div[@class='queryResultHits']") 435 tocSearch = int(getTextFromNode(numdivs[0])) 436 tc=int((tocSearch/10)+1) 437 return tc 438 257 439 258 def getToc(self, mode="text", docinfo=None): 440 259 """loads table of contents and stores XML in docinfo"""
Note: See TracChangeset
for help on using the changeset viewer.