Mercurial > hg > documentViewer
changeset 506:67014399894d elementtree
cleaned out all 4suite code and weird methods.
fixed GIS places.
author | casties |
---|---|
date | Tue, 21 Feb 2012 19:16:28 +0100 |
parents | 29f2172db368 |
children | 3c01e8f4e72b |
files | MpdlXmlTextServer.py documentViewer.py zpt/viewer_text.zpt |
diffstat | 3 files changed, 52 insertions(+), 272 deletions(-) [+] |
line wrap: on
line diff
--- a/MpdlXmlTextServer.py Fri Feb 17 10:46:03 2012 +0100 +++ b/MpdlXmlTextServer.py Tue Feb 21 19:16:28 2012 +0100 @@ -1,16 +1,12 @@ from OFS.SimpleItem import SimpleItem from Products.PageTemplates.PageTemplateFile import PageTemplateFile -from Ft.Xml import EMPTY_NAMESPACE, Parse -from Ft.Xml.Domlette import NonvalidatingReader -import Ft.Xml.Domlette -import cStringIO - import xml.etree.ElementTree as ET import re import logging import urllib +import base64 from SrvTxtUtils import getInt, getText, getHttpData @@ -25,33 +21,6 @@ return s -def getTextFromNode(node): - """get the cdata content of a node""" - if node is None: - return "" - - # 4Suite: - nodelist=node.childNodes - text = "" - for n in nodelist: - if n.nodeType == node.TEXT_NODE: - text = text + n.data - - return text - -def serializeNode(node, encoding="utf-8"): - """returns a string containing node as XML""" - #s = ET.tostring(node) - - # 4Suite: - stream = cStringIO.StringIO() - Ft.Xml.Domlette.Print(node, stream=stream, encoding=encoding) - s = stream.getvalue() - stream.close() - - return s - - class MpdlXmlTextServer(SimpleItem): """TextServer implementation for MPDL-XML eXist server""" meta_type="MPDL-XML TextServer" @@ -81,136 +50,25 @@ url = self.serverUrl+method return getHttpData(url,data,timeout=self.timeout) - # WTF: what does this really do? can it be integrated in getPage? - def getSearch(self, pageinfo=None, docinfo=None): - """get search list""" - logging.debug("getSearch()") - docpath = docinfo['textURLPath'] - url = docinfo['url'] - pagesize = pageinfo['queryPageSize'] - pn = pageinfo.get('searchPN',1) - sn = pageinfo.get('sn',None) #TODO: is this s now? - highlightQuery = pageinfo['highlightQuery'] - query =pageinfo['query'] - queryType =pageinfo['queryType'] - viewMode= pageinfo['viewMode'] - tocMode = pageinfo['tocMode'] - characterNormalization = pageinfo['characterNormalization'] - #optionToggle = pageinfo['optionToggle'] - tocPN = pageinfo['tocPN'] - selfurl = self.absolute_url() - data = self.getServerData("doc-query.xql","document=%s&mode=%s&queryType=%s&query=%s&queryResultPageSize=%s&queryResultPN=%s&sn=%s&viewMode=%s&characterNormalization=%s&highlightQuery=%s"%(docpath, 'text', queryType, urllib.quote(query), pagesize, pn, sn, viewMode,characterNormalization, urllib.quote(highlightQuery))) - pagexml = data.replace('?document=%s'%str(docpath),'?url=%s'%url) - pagedom = Parse(pagexml) - - """ - pagedivs = pagedom.xpath("//div[@class='queryResultHits']") - if (pagedivs == pagedom.xpath("//div[@class='queryResultHits']")): - if len(pagedivs)>0: - docinfo['queryResultHits'] = int(getTextFromNode(pagedivs[0])) - s = getTextFromNode(pagedivs[0]) - s1 = int(s)/10+1 - try: - docinfo['queryResultHits'] = int(s1) - logging.debug("SEARCH ENTRIES: %s"%(s1)) - except: - docinfo['queryResultHits'] = 0 - """ - if (queryType=="fulltext")or(queryType=="xpath")or(queryType=="xquery")or(queryType=="fulltextMorphLemma"): - pagedivs = pagedom.xpath("//div[@class='queryResultPage']") - if len(pagedivs)>0: - pagenode=pagedivs[0] - links=pagenode.xpath("//a") - for l in links: - hrefNode = l.getAttributeNodeNS(None, u"href") - if hrefNode: - href = hrefNode.nodeValue - if href.startswith('page-fragment.xql'): - selfurl = self.absolute_url() - pagexml=href.replace('mode=text','mode=texttool&viewMode=%s&queryType=%s&query=%s&queryResultPageSize=%s&queryResultPN=%s&tocMode=%s&searchPN=%s&tocPN=%s&characterNormalization=%s'%(viewMode,queryType,urllib.quote(query),pagesize,pn,tocMode,pn,tocPN, characterNormalization)) - hrefNode.nodeValue = pagexml.replace('page-fragment.xql','%s'%selfurl) - #logging.debug("PUREXML :%s"%(serializeNode(pagenode))) - return serializeNode(pagenode) - if (queryType=="fulltextMorph"): - pagedivs = pagedom.xpath("//div[@class='queryResult']") - if len(pagedivs)>0: - pagenode=pagedivs[0] - links=pagenode.xpath("//a") - for l in links: - hrefNode = l.getAttributeNodeNS(None, u"href") - if hrefNode: - href = hrefNode.nodeValue - if href.startswith('page-fragment.xql'): - selfurl = self.absolute_url() - pagexml=href.replace('mode=text','mode=texttool&viewMode=%s&queryType=%s&query=%s&queryResultPageSize=%s&queryResultPN=%s&tocMode=%s&searchPN=%s&tocPN=%s&characterNormalization=%s'%(viewMode,queryType,urllib.quote(query),pagesize,pn,tocMode,pn,tocPN,characterNormalization)) - hrefNode.nodeValue = pagexml.replace('page-fragment.xql','%s'%selfurl) - if href.startswith('../lt/lemma.xql'): - hrefNode.nodeValue = href.replace('../lt/lemma.xql','%s/template/head_main_query'%(selfurl)) - l.setAttributeNS(None, 'target', '_blank') - l.setAttributeNS(None, 'onClick',"popupWin = window.open(this.href, 'contacts', 'location,width=500,height=600,top=180, left=400, scrollbars=1'); return false;") - l.setAttributeNS(None, 'onDblclick', 'popupWin.focus();') - pagedivs = pagedom.xpath("//div[@class='queryResultMorphExpansion']") - return serializeNode(pagenode) - if (queryType=="ftIndex")or(queryType=="ftIndexMorph"): - pagedivs= pagedom.xpath("//div[@class='queryResultPage']") - if len(pagedivs)>0: - pagenode=pagedivs[0] - links=pagenode.xpath("//a") - for l in links: - hrefNode = l.getAttributeNodeNS(None, u"href") - if hrefNode: - href = hrefNode.nodeValue - hrefNode.nodeValue=href.replace('mode=text','mode=texttool&viewMode=%s&tocMode=%s&tocPN=%s&pn=%s&characterNormalization=%s'%(viewMode,tocMode,tocPN,pn,characterNormalization)) - if href.startswith('../lt/lex.xql'): - hrefNode.nodeValue = href.replace('../lt/lex.xql','%s/template/head_main_lex'%selfurl) - l.setAttributeNS(None, 'target', '_blank') - l.setAttributeNS(None, 'onClick',"popupWin = window.open(this.href, 'contacts', 'location,width=500,height=600,top=180, left=400, scrollbars=1'); return false;") - l.setAttributeNS(None, 'onDblclick', 'popupWin.focus();') - if href.startswith('../lt/lemma.xql'): - hrefNode.nodeValue = href.replace('../lt/lemma.xql','%s/template/head_main_lemma'%(selfurl)) - l.setAttributeNS(None, 'target', '_blank') - l.setAttributeNS(None, 'onClick',"popupWin = window.open(this.href, 'contacts', 'location,width=500,height=600,top=180, left=400, scrollbars=1'); return false;") - l.setAttributeNS(None, 'onDblclick', 'popupWin.focus();') - return serializeNode(pagenode) - return "no text here" - - def getGisPlaces(self, docinfo=None, pageinfo=None): - """ Show all Gis Places of whole Page""" - xpath='//place' + + def getPlacesOnPage(self, docinfo=None, pn=None): + """Returns list of GIS places of page pn""" docpath = docinfo.get('textURLPath',None) if not docpath: return None - pn = pageinfo['current'] - hrefList=[] - myList= "" - text=self.getServerData("xpath.xql", "document=%s&xpath=%s&pn=%s"%(docinfo['textURLPath'],xpath,pn)) + places=[] + text=self.getServerData("xpath.xql", "document=%s&xpath=//place&pn=%s"%(docpath,pn)) dom = ET.fromstring(text) - result = dom.findall(".//result/resultPage/place") + result = dom.findall(".//resultPage/place") for l in result: - href = l.get("id") - hrefList.append(href) - # WTF: what does this do? - myList = ",".join(hrefList) - #logging.debug("getGisPlaces :%s"%(myList)) - return myList + id = l.get("id") + name = l.text + place = {'id': id, 'name': name} + places.append(place) + + return places - def getAllGisPlaces (self, docinfo=None, pageinfo=None): - """Show all Gis Places of whole Book """ - xpath ='//echo:place' - hrefList=[] - myList="" - text=self.getServerData("xpath.xql", "document=%s&xpath=%s"%(docinfo['textURLPath'],xpath)) - dom = ET.fromstring(text) - result = dom.findall(".//result/resultPage/place") - - for l in result: - href = l.get("id") - hrefList.append(href) - # WTF: what does this do? - myList = ",".join(hrefList) - #logging.debug("getALLGisPlaces :%s"%(myList)) - return myList def processPageInfo(self, dom, docinfo, pageinfo): """processes page info divs from dom and stores in docinfo and pageinfo""" @@ -379,63 +237,24 @@ # gis mode elif mode == "gis": - name = docinfo['name'] if pagediv is not None: # check all a-tags links = pagediv.findall(".//a") + # add our URL as backlink + selfurl = self.getLink() + doc = base64.b64encode(selfurl) for l in links: href = l.get('href') if href: - if href.startswith('http://chinagis.mpiwg-berlin.mpg.de'): - l.set('href', href.replace('chinagis_REST/REST/db/chgis/mpdl','chinagis/REST/db/mpdl/%s'%name)) - l.set('target', '_blank') + if href.startswith('http://mappit.mpiwg-berlin.mpg.de'): + l.set('href', re.sub(r'doc=[\w+/=]+', 'doc=%s'%doc, href)) + l.set('target', '_blank') return serialize(pagediv) return None - # TODO: should be getWordInfo - def getWordInfo(self, word='', language='', display=''): - """show information (like dictionaries) about word""" - data = self.getServerData("lt/wordInfo.xql","language=%s&word=%s&display=%s&output=html"%(language,urllib.quote(word),urllib.quote(display))) - return data - - # WTF: what does this do? - def getLemma(self, lemma=None, language=None): - """simular words lemma """ - data = self.getServerData("lt/lemma.xql","language="+str(language)+"&lemma="+urllib.quote(lemma)+"&output=html") - return data - - # WTF: what does this do? - def getLemmaQuery(self, query=None, language=None): - """simular words lemma """ - data = self.getServerData("lt/lemma.xql","language="+str(language)+"&query="+urllib.quote(query)+"&output=html") - return data - - # WTF: what does this do? - def getLex(self, query=None, language=None): - #simular words lemma - data = self.getServerData("lt/lex.xql","document=&language="+str(language)+"&query="+urllib.quote(query)) - return data - # WTF: what does this do? - def getQuery (self, docinfo=None, pageinfo=None, query=None, queryType=None, pn=1): - #number of - docpath = docinfo['textURLPath'] - pagesize = pageinfo['queryPageSize'] - pn = pageinfo['searchPN'] - query =pageinfo['query'] - queryType =pageinfo['queryType'] - tocSearch = 0 - tocDiv = None - - pagexml = self.getServerData("doc-query.xql","document=%s&mode=%s&queryType=%s&query=%s&queryResultPageSize=%s&queryResultPN=%s"%(docpath, 'text', queryType, urllib.quote(query), pagesize, pn)) - pagedom = Parse(pagexml) - numdivs = pagedom.xpath("//div[@class='queryResultHits']") - tocSearch = int(getTextFromNode(numdivs[0])) - tc=int((tocSearch/10)+1) - return tc - def getToc(self, mode="text", docinfo=None): """loads table of contents and stores XML in docinfo""" logging.debug("getToc mode=%s"%mode)
--- a/documentViewer.py Fri Feb 17 10:46:03 2012 +0100 +++ b/documentViewer.py Tue Feb 21 19:16:28 2012 +0100 @@ -182,47 +182,11 @@ """returns one page of the table of contents""" return self.template.fulltextclient.getTocPage(**args) - #WTF? - def getQuery(self, **args): - """get query in search""" - return self.template.fulltextclient.getQuery(**args) - - #WTF? - def getSearch(self, **args): - """get search""" - return self.template.fulltextclient.getSearch(**args) - - #WTF? - def getGisPlaces(self, **args): - """get gis places""" - return self.template.fulltextclient.getGisPlaces(**args) + def getPlacesOnPage(self, **args): + """get list of gis places on one page""" + return self.template.fulltextclient.getPlacesOnPage(**args) #WTF? - def getAllGisPlaces(self, **args): - """get all gis places """ - return self.template.fulltextclient.getAllGisPlaces(**args) - - #WTF? - def getWordInfo(self, **args): - """get translate""" - return self.template.fulltextclient.getWordInfo(**args) - - #WTF? - def getLemma(self, **args): - """get lemma""" - return self.template.fulltextclient.getLemma(**args) - - #WTF? - def getLemmaQuery(self, **args): - """get query""" - return self.template.fulltextclient.getLemmaQuery(**args) - - #WTF? - def getLex(self, **args): - """get lex""" - return self.template.fulltextclient.getLex(**args) - - #WTF? thumbs_main_rss = PageTemplateFile('zpt/thumbs_main_rss', globals()) security.declareProtected('View','thumbs_rss') def thumbs_rss(self,mode,url,viewMode="auto",start=None,pn=1): @@ -315,6 +279,7 @@ # and execute with parameters return pt(docinfo=docinfo, pageinfo=pageinfo) + #WTF? def generateMarks(self,mk): ret="" if mk is None:
--- a/zpt/viewer_text.zpt Fri Feb 17 10:46:03 2012 +0100 +++ b/zpt/viewer_text.zpt Tue Feb 21 19:16:28 2012 +0100 @@ -26,9 +26,9 @@ <body tal:condition="numPages"> <tal:block tal:define="docpath docinfo/textURLPath; - pn pageinfo/pn; - flowLtr python:pageinfo.get('pageFlow','ltr')!='rtl'; - textPage python:here.getTextPage(mode=viewLayer, pn=pn, docinfo=docinfo, pageinfo=pageinfo) or '[no text here]';"> + pn pageinfo/pn; + flowLtr python:pageinfo.get('pageFlow','ltr')!='rtl'; + textPage python:here.getTextPage(mode=viewLayer, pn=pn, docinfo=docinfo, pageinfo=pageinfo) or '[no text here]';"> <!-- header --> <div class="page-head"> <metal:block metal:use-macro="here/template/common_template/macros/head" /> @@ -68,20 +68,18 @@ <input type="hidden" tal:define="params python:here.getParams(params={'viewLayer':None,'viewMode':None})" tal:repeat="param params" - tal:attributes="name param; value python:params[param]" /> - <input class="autosubmit" type="radio" name="viewMode" value="text" - tal:attributes="checked python:viewMode=='text'" /> Text<br /> - <span class="optionsText"> - <input type="checkbox" class="autosubmit" name="viewLayer" - value="dict" tal:attributes="checked python:viewLayer=='dict'" /> Dictionary<br /> - </span> - <span class="optionsText" tal:condition="python:docinfo.get('numPlaces',0)"> + tal:attributes="name param; value python:params[param]" /> <input + class="autosubmit" type="radio" name="viewMode" value="text" + tal:attributes="checked python:viewMode=='text'" /> Text<br /> <span + class="optionsText"> <input type="checkbox" + class="autosubmit" name="viewLayer" value="dict" + tal:attributes="checked python:viewLayer=='dict'" /> Dictionary<br /> + </span> <span class="optionsText" tal:condition="python:docinfo.get('numPlaces',0)"> <input type="checkbox" class="autosubmit" name="viewLayer" value="gis" tal:attributes="checked python:viewLayer=='gis'" /> Places<br /> - </span> - <input type="radio" class="autosubmit" name="viewMode" value="xml" - tal:attributes="checked python:viewMode=='xml'" /> XML<br /> - <input type="submit" value="Go!" /> + </span> <input type="radio" class="autosubmit" name="viewMode" value="xml" + tal:attributes="checked python:viewMode=='xml'" /> XML<br /> <input + type="submit" value="Go!" /> </form> </div> <!--"END TEXT DISPLAY"--> @@ -128,24 +126,22 @@ <!--"BEGIN PLACES"--> <div class="options" tal:condition="python:viewLayer=='gis'"> - <tal:block - tal:define="gisPlaces python:here.getGisPlaces(docinfo=docinfo, pageinfo=pageinfo); gisAllPlaces python:here.getAllGisPlaces(docinfo=docinfo, pageinfo=pageinfo);"> - <span><b>Places</b></span> - <br />(Link to extern:)<br /> - <span>Page</span> - <span style="float: right"> <a - tal:attributes="href python:'http://chinagis.mpiwg-berlin.mpg.de/chinagis/REST/db/mpdl/%s?id=%s&format=%s'%(name,gisPlaces,'gis')" - target="_blank"> <img src="images/arrow.png" alt="" /> - </a> - </span> - <br /> - <span>Book</span> - <span style="float: right"> <a - tal:attributes="href python:'http://chinagis.mpiwg-berlin.mpg.de/chinagis/REST/db/mpdl/%s?format=%s'%(name,'gis')" - target="_blank"> <img src="images/arrow.png" alt="" /> - </a> - </span> - <br /> + <tal:block tal:define=" + name docinfo/documentName; + places python:here.getPlacesOnPage(docinfo=docinfo, pn=pn); + pidlist python:','.join([p['id'] for p in places]);"> + <h4>Places</h4> + <ul> + <li><a + tal:attributes="href python:'http://mappit.mpiwg-berlin.mpg.de/db/RESTdb/db/mpdl/%s?id=%s&format=gis'%(name,pidlist)" + target="_blank">on this page</a> + </li> + <li> + <a + tal:attributes="href python:'http://mappit.mpiwg-berlin.mpg.de/db/RESTdb/db/mpdl/%s?format=gis'%(name)" + target="_blank">in whole document</a> + </li> + </ul> </tal:block> </div> <!--"END PLACES"-->