documentViewer: MpdlXmlTextServer

annotate MpdlXmlTextServer_old.py @ 467:8b75d55582e8 elementtree

test new getdata

author	casties
date	Tue, 02 Aug 2011 12:35:05 +0200
parents	0a53fea83df7
children

rev	line source
455 0a53fea83df7 more work renovating casties parents: diff changeset	1
0a53fea83df7 more work renovating casties parents: diff changeset	2 from OFS.SimpleItem import SimpleItem
0a53fea83df7 more work renovating casties parents: diff changeset	3 from Products.PageTemplates.PageTemplateFile import PageTemplateFile
0a53fea83df7 more work renovating casties parents: diff changeset	4 from Ft.Xml import EMPTY_NAMESPACE, Parse
0a53fea83df7 more work renovating casties parents: diff changeset	5 from Ft.Xml.Domlette import NonvalidatingReader
0a53fea83df7 more work renovating casties parents: diff changeset	6
0a53fea83df7 more work renovating casties parents: diff changeset	7 import md5
0a53fea83df7 more work renovating casties parents: diff changeset	8 import sys
0a53fea83df7 more work renovating casties parents: diff changeset	9 import logging
0a53fea83df7 more work renovating casties parents: diff changeset	10 import urllib
0a53fea83df7 more work renovating casties parents: diff changeset	11 import documentViewer
0a53fea83df7 more work renovating casties parents: diff changeset	12 from documentViewer import getTextFromNode, serializeNode
0a53fea83df7 more work renovating casties parents: diff changeset	13
0a53fea83df7 more work renovating casties parents: diff changeset	14 class MpdlXmlTextServer(SimpleItem):
0a53fea83df7 more work renovating casties parents: diff changeset	15 """TextServer implementation for MPDL-XML eXist server"""
0a53fea83df7 more work renovating casties parents: diff changeset	16 meta_type="MPDL-XML TextServer"
0a53fea83df7 more work renovating casties parents: diff changeset	17
0a53fea83df7 more work renovating casties parents: diff changeset	18 manage_options=(
0a53fea83df7 more work renovating casties parents: diff changeset	19 {'label':'Config','action':'manage_changeMpdlXmlTextServerForm'},
0a53fea83df7 more work renovating casties parents: diff changeset	20 )+SimpleItem.manage_options
0a53fea83df7 more work renovating casties parents: diff changeset	21
0a53fea83df7 more work renovating casties parents: diff changeset	22 manage_changeMpdlXmlTextServerForm = PageTemplateFile("zpt/manage_changeMpdlXmlTextServer", globals())
0a53fea83df7 more work renovating casties parents: diff changeset	23
0a53fea83df7 more work renovating casties parents: diff changeset	24 def __init__(self,id,title="",serverUrl="http://mpdl-system.mpiwg-berlin.mpg.de/mpdl/interface/", serverName=None, timeout=40):
0a53fea83df7 more work renovating casties parents: diff changeset	25 #def __init__(self,id,title="",serverUrl="http://mpdl-text.mpiwg-berlin.mpg.de:30030/mpdl/interface/", serverName=None, timeout=40):
0a53fea83df7 more work renovating casties parents: diff changeset	26
0a53fea83df7 more work renovating casties parents: diff changeset	27 """constructor"""
0a53fea83df7 more work renovating casties parents: diff changeset	28 self.id=id
0a53fea83df7 more work renovating casties parents: diff changeset	29 self.title=title
0a53fea83df7 more work renovating casties parents: diff changeset	30 self.timeout = timeout
0a53fea83df7 more work renovating casties parents: diff changeset	31 if serverName is None:
0a53fea83df7 more work renovating casties parents: diff changeset	32 self.serverUrl = serverUrl
0a53fea83df7 more work renovating casties parents: diff changeset	33 else:
0a53fea83df7 more work renovating casties parents: diff changeset	34 self.serverUrl = "http://%s/mpdl/interface/"%serverName
0a53fea83df7 more work renovating casties parents: diff changeset	35
0a53fea83df7 more work renovating casties parents: diff changeset	36 def getHttpData(self, url, data=None):
0a53fea83df7 more work renovating casties parents: diff changeset	37 """returns result from url+data HTTP request"""
0a53fea83df7 more work renovating casties parents: diff changeset	38 return documentViewer.getHttpData(url,data,timeout=self.timeout)
0a53fea83df7 more work renovating casties parents: diff changeset	39
0a53fea83df7 more work renovating casties parents: diff changeset	40 def getServerData(self, method, data=None):
0a53fea83df7 more work renovating casties parents: diff changeset	41 """returns result from text server for method+data"""
0a53fea83df7 more work renovating casties parents: diff changeset	42 url = self.serverUrl+method
0a53fea83df7 more work renovating casties parents: diff changeset	43 return documentViewer.getHttpData(url,data,timeout=self.timeout)
0a53fea83df7 more work renovating casties parents: diff changeset	44
0a53fea83df7 more work renovating casties parents: diff changeset	45 def getSearch(self, pageinfo=None, docinfo=None):
0a53fea83df7 more work renovating casties parents: diff changeset	46 """get search list"""
0a53fea83df7 more work renovating casties parents: diff changeset	47 docpath = docinfo['textURLPath']
0a53fea83df7 more work renovating casties parents: diff changeset	48 url = docinfo['url']
0a53fea83df7 more work renovating casties parents: diff changeset	49 pagesize = pageinfo['queryPageSize']
0a53fea83df7 more work renovating casties parents: diff changeset	50 pn = pageinfo.get('searchPN',1)
0a53fea83df7 more work renovating casties parents: diff changeset	51 sn = pageinfo['sn']
0a53fea83df7 more work renovating casties parents: diff changeset	52 highlightQuery = pageinfo['highlightQuery']
0a53fea83df7 more work renovating casties parents: diff changeset	53 query =pageinfo['query']
0a53fea83df7 more work renovating casties parents: diff changeset	54 queryType =pageinfo['queryType']
0a53fea83df7 more work renovating casties parents: diff changeset	55 viewMode= pageinfo['viewMode']
0a53fea83df7 more work renovating casties parents: diff changeset	56 tocMode = pageinfo['tocMode']
0a53fea83df7 more work renovating casties parents: diff changeset	57 characterNormalization = pageinfo['characterNormalization']
0a53fea83df7 more work renovating casties parents: diff changeset	58 #optionToggle = pageinfo['optionToggle']
0a53fea83df7 more work renovating casties parents: diff changeset	59 tocPN = pageinfo['tocPN']
0a53fea83df7 more work renovating casties parents: diff changeset	60 selfurl = self.absolute_url()
0a53fea83df7 more work renovating casties parents: diff changeset	61 data = self.getServerData("doc-query.xql","document=%s&mode=%s&queryType=%s&query=%s&queryResultPageSize=%s&queryResultPN=%s&sn=%s&viewMode=%s&characterNormalization=%s&highlightQuery=%s"%(docpath, 'text', queryType, urllib.quote(query), pagesize, pn, sn, viewMode,characterNormalization, urllib.quote(highlightQuery)))
0a53fea83df7 more work renovating casties parents: diff changeset	62 pagexml = data.replace('?document=%s'%str(docpath),'?url=%s'%url)
0a53fea83df7 more work renovating casties parents: diff changeset	63 pagedom = Parse(pagexml)
0a53fea83df7 more work renovating casties parents: diff changeset	64
0a53fea83df7 more work renovating casties parents: diff changeset	65 """
0a53fea83df7 more work renovating casties parents: diff changeset	66 pagedivs = pagedom.xpath("//div[@class='queryResultHits']")
0a53fea83df7 more work renovating casties parents: diff changeset	67 if (pagedivs == pagedom.xpath("//div[@class='queryResultHits']")):
0a53fea83df7 more work renovating casties parents: diff changeset	68 if len(pagedivs)>0:
0a53fea83df7 more work renovating casties parents: diff changeset	69 docinfo['queryResultHits'] = int(getTextFromNode(pagedivs[0]))
0a53fea83df7 more work renovating casties parents: diff changeset	70 s = getTextFromNode(pagedivs[0])
0a53fea83df7 more work renovating casties parents: diff changeset	71 s1 = int(s)/10+1
0a53fea83df7 more work renovating casties parents: diff changeset	72 try:
0a53fea83df7 more work renovating casties parents: diff changeset	73 docinfo['queryResultHits'] = int(s1)
0a53fea83df7 more work renovating casties parents: diff changeset	74 logging.debug("SEARCH ENTRIES: %s"%(s1))
0a53fea83df7 more work renovating casties parents: diff changeset	75 except:
0a53fea83df7 more work renovating casties parents: diff changeset	76 docinfo['queryResultHits'] = 0
0a53fea83df7 more work renovating casties parents: diff changeset	77 """
0a53fea83df7 more work renovating casties parents: diff changeset	78 if (queryType=="fulltext")or(queryType=="xpath")or(queryType=="xquery")or(queryType=="fulltextMorphLemma"):
0a53fea83df7 more work renovating casties parents: diff changeset	79 pagedivs = pagedom.xpath("//div[@class='queryResultPage']")
0a53fea83df7 more work renovating casties parents: diff changeset	80 if len(pagedivs)>0:
0a53fea83df7 more work renovating casties parents: diff changeset	81 pagenode=pagedivs[0]
0a53fea83df7 more work renovating casties parents: diff changeset	82 links=pagenode.xpath("//a")
0a53fea83df7 more work renovating casties parents: diff changeset	83 for l in links:
0a53fea83df7 more work renovating casties parents: diff changeset	84 hrefNode = l.getAttributeNodeNS(None, u"href")
0a53fea83df7 more work renovating casties parents: diff changeset	85 if hrefNode:
0a53fea83df7 more work renovating casties parents: diff changeset	86 href = hrefNode.nodeValue
0a53fea83df7 more work renovating casties parents: diff changeset	87 if href.startswith('page-fragment.xql'):
0a53fea83df7 more work renovating casties parents: diff changeset	88 selfurl = self.absolute_url()
0a53fea83df7 more work renovating casties parents: diff changeset	89 pagexml=href.replace('mode=text','mode=texttool&viewMode=%s&queryType=%s&query=%s&queryResultPageSize=%s&queryResultPN=%s&tocMode=%s&searchPN=%s&tocPN=%s&characterNormalization=%s'%(viewMode,queryType,urllib.quote(query),pagesize,pn,tocMode,pn,tocPN, characterNormalization))
0a53fea83df7 more work renovating casties parents: diff changeset	90 hrefNode.nodeValue = pagexml.replace('page-fragment.xql','%s'%selfurl)
0a53fea83df7 more work renovating casties parents: diff changeset	91 #logging.debug("PUREXML :%s"%(serializeNode(pagenode)))
0a53fea83df7 more work renovating casties parents: diff changeset	92 return serializeNode(pagenode)
0a53fea83df7 more work renovating casties parents: diff changeset	93 if (queryType=="fulltextMorph"):
0a53fea83df7 more work renovating casties parents: diff changeset	94 pagedivs = pagedom.xpath("//div[@class='queryResult']")
0a53fea83df7 more work renovating casties parents: diff changeset	95 if len(pagedivs)>0:
0a53fea83df7 more work renovating casties parents: diff changeset	96 pagenode=pagedivs[0]
0a53fea83df7 more work renovating casties parents: diff changeset	97 links=pagenode.xpath("//a")
0a53fea83df7 more work renovating casties parents: diff changeset	98 for l in links:
0a53fea83df7 more work renovating casties parents: diff changeset	99 hrefNode = l.getAttributeNodeNS(None, u"href")
0a53fea83df7 more work renovating casties parents: diff changeset	100 if hrefNode:
0a53fea83df7 more work renovating casties parents: diff changeset	101 href = hrefNode.nodeValue
0a53fea83df7 more work renovating casties parents: diff changeset	102 if href.startswith('page-fragment.xql'):
0a53fea83df7 more work renovating casties parents: diff changeset	103 selfurl = self.absolute_url()
0a53fea83df7 more work renovating casties parents: diff changeset	104 pagexml=href.replace('mode=text','mode=texttool&viewMode=%s&queryType=%s&query=%s&queryResultPageSize=%s&queryResultPN=%s&tocMode=%s&searchPN=%s&tocPN=%s&characterNormalization=%s'%(viewMode,queryType,urllib.quote(query),pagesize,pn,tocMode,pn,tocPN,characterNormalization))
0a53fea83df7 more work renovating casties parents: diff changeset	105 hrefNode.nodeValue = pagexml.replace('page-fragment.xql','%s'%selfurl)
0a53fea83df7 more work renovating casties parents: diff changeset	106 if href.startswith('../lt/lemma.xql'):
0a53fea83df7 more work renovating casties parents: diff changeset	107 hrefNode.nodeValue = href.replace('../lt/lemma.xql','%s/template/head_main_query'%(selfurl))
0a53fea83df7 more work renovating casties parents: diff changeset	108 l.setAttributeNS(None, 'target', '_blank')
0a53fea83df7 more work renovating casties parents: diff changeset	109 l.setAttributeNS(None, 'onClick',"popupWin = window.open(this.href, 'contacts', 'location,width=500,height=600,top=180, left=400, scrollbars=1'); return false;")
0a53fea83df7 more work renovating casties parents: diff changeset	110 l.setAttributeNS(None, 'onDblclick', 'popupWin.focus();')
0a53fea83df7 more work renovating casties parents: diff changeset	111 pagedivs = pagedom.xpath("//div[@class='queryResultMorphExpansion']")
0a53fea83df7 more work renovating casties parents: diff changeset	112 return serializeNode(pagenode)
0a53fea83df7 more work renovating casties parents: diff changeset	113 if (queryType=="ftIndex")or(queryType=="ftIndexMorph"):
0a53fea83df7 more work renovating casties parents: diff changeset	114 pagedivs= pagedom.xpath("//div[@class='queryResultPage']")
0a53fea83df7 more work renovating casties parents: diff changeset	115 if len(pagedivs)>0:
0a53fea83df7 more work renovating casties parents: diff changeset	116 pagenode=pagedivs[0]
0a53fea83df7 more work renovating casties parents: diff changeset	117 links=pagenode.xpath("//a")
0a53fea83df7 more work renovating casties parents: diff changeset	118 for l in links:
0a53fea83df7 more work renovating casties parents: diff changeset	119 hrefNode = l.getAttributeNodeNS(None, u"href")
0a53fea83df7 more work renovating casties parents: diff changeset	120 if hrefNode:
0a53fea83df7 more work renovating casties parents: diff changeset	121 href = hrefNode.nodeValue
0a53fea83df7 more work renovating casties parents: diff changeset	122 hrefNode.nodeValue=href.replace('mode=text','mode=texttool&viewMode=%s&tocMode=%s&tocPN=%s&pn=%s&characterNormalization=%s'%(viewMode,tocMode,tocPN,pn,characterNormalization))
0a53fea83df7 more work renovating casties parents: diff changeset	123 if href.startswith('../lt/lex.xql'):
0a53fea83df7 more work renovating casties parents: diff changeset	124 hrefNode.nodeValue = href.replace('../lt/lex.xql','%s/template/head_main_lex'%selfurl)
0a53fea83df7 more work renovating casties parents: diff changeset	125 l.setAttributeNS(None, 'target', '_blank')
0a53fea83df7 more work renovating casties parents: diff changeset	126 l.setAttributeNS(None, 'onClick',"popupWin = window.open(this.href, 'contacts', 'location,width=500,height=600,top=180, left=400, scrollbars=1'); return false;")
0a53fea83df7 more work renovating casties parents: diff changeset	127 l.setAttributeNS(None, 'onDblclick', 'popupWin.focus();')
0a53fea83df7 more work renovating casties parents: diff changeset	128 if href.startswith('../lt/lemma.xql'):
0a53fea83df7 more work renovating casties parents: diff changeset	129 hrefNode.nodeValue = href.replace('../lt/lemma.xql','%s/template/head_main_lemma'%(selfurl))
0a53fea83df7 more work renovating casties parents: diff changeset	130 l.setAttributeNS(None, 'target', '_blank')
0a53fea83df7 more work renovating casties parents: diff changeset	131 l.setAttributeNS(None, 'onClick',"popupWin = window.open(this.href, 'contacts', 'location,width=500,height=600,top=180, left=400, scrollbars=1'); return false;")
0a53fea83df7 more work renovating casties parents: diff changeset	132 l.setAttributeNS(None, 'onDblclick', 'popupWin.focus();')
0a53fea83df7 more work renovating casties parents: diff changeset	133 return serializeNode(pagenode)
0a53fea83df7 more work renovating casties parents: diff changeset	134 return "no text here"
0a53fea83df7 more work renovating casties parents: diff changeset	135
0a53fea83df7 more work renovating casties parents: diff changeset	136 def getGisPlaces(self, docinfo=None, pageinfo=None):
0a53fea83df7 more work renovating casties parents: diff changeset	137 """ Show all Gis Places of whole Page"""
0a53fea83df7 more work renovating casties parents: diff changeset	138 xpath='//place'
0a53fea83df7 more work renovating casties parents: diff changeset	139 docpath = docinfo.get('textURLPath',None)
0a53fea83df7 more work renovating casties parents: diff changeset	140 if not docpath:
0a53fea83df7 more work renovating casties parents: diff changeset	141 return None
0a53fea83df7 more work renovating casties parents: diff changeset	142
0a53fea83df7 more work renovating casties parents: diff changeset	143 url = docinfo['url']
0a53fea83df7 more work renovating casties parents: diff changeset	144 selfurl = self.absolute_url()
0a53fea83df7 more work renovating casties parents: diff changeset	145 pn = pageinfo['current']
0a53fea83df7 more work renovating casties parents: diff changeset	146 hrefList=[]
0a53fea83df7 more work renovating casties parents: diff changeset	147 myList= ""
0a53fea83df7 more work renovating casties parents: diff changeset	148 text=self.getServerData("xpath.xql", "document=%s&xpath=%s&pn=%s"%(docinfo['textURLPath'],xpath,pn))
0a53fea83df7 more work renovating casties parents: diff changeset	149 dom = Parse(text)
0a53fea83df7 more work renovating casties parents: diff changeset	150 result = dom.xpath("//result/resultPage/place")
0a53fea83df7 more work renovating casties parents: diff changeset	151 for l in result:
0a53fea83df7 more work renovating casties parents: diff changeset	152 hrefNode= l.getAttributeNodeNS(None, u"id")
0a53fea83df7 more work renovating casties parents: diff changeset	153 href= hrefNode.nodeValue
0a53fea83df7 more work renovating casties parents: diff changeset	154 hrefList.append(href)
0a53fea83df7 more work renovating casties parents: diff changeset	155 myList = ",".join(hrefList)
0a53fea83df7 more work renovating casties parents: diff changeset	156 #logging.debug("getGisPlaces :%s"%(myList))
0a53fea83df7 more work renovating casties parents: diff changeset	157 return myList
0a53fea83df7 more work renovating casties parents: diff changeset	158
0a53fea83df7 more work renovating casties parents: diff changeset	159 def getAllGisPlaces (self, docinfo=None, pageinfo=None):
0a53fea83df7 more work renovating casties parents: diff changeset	160 """Show all Gis Places of whole Book """
0a53fea83df7 more work renovating casties parents: diff changeset	161 xpath ='//echo:place'
0a53fea83df7 more work renovating casties parents: diff changeset	162 docpath =docinfo['textURLPath']
0a53fea83df7 more work renovating casties parents: diff changeset	163 url = docinfo['url']
0a53fea83df7 more work renovating casties parents: diff changeset	164 selfurl =self.absolute_url()
0a53fea83df7 more work renovating casties parents: diff changeset	165 pn =pageinfo['current']
0a53fea83df7 more work renovating casties parents: diff changeset	166 hrefList=[]
0a53fea83df7 more work renovating casties parents: diff changeset	167 myList=""
0a53fea83df7 more work renovating casties parents: diff changeset	168 text=self.getServerData("xpath.xql", "document=%s&xpath=%s"%(docinfo['textURLPath'],xpath))
0a53fea83df7 more work renovating casties parents: diff changeset	169 dom =Parse(text)
0a53fea83df7 more work renovating casties parents: diff changeset	170 result = dom.xpath("//result/resultPage/place")
0a53fea83df7 more work renovating casties parents: diff changeset	171
0a53fea83df7 more work renovating casties parents: diff changeset	172 for l in result:
0a53fea83df7 more work renovating casties parents: diff changeset	173 hrefNode = l.getAttributeNodeNS(None, u"id")
0a53fea83df7 more work renovating casties parents: diff changeset	174 href= hrefNode.nodeValue
0a53fea83df7 more work renovating casties parents: diff changeset	175 hrefList.append(href)
0a53fea83df7 more work renovating casties parents: diff changeset	176 myList = ",".join(hrefList)
0a53fea83df7 more work renovating casties parents: diff changeset	177 #logging.debug("getALLGisPlaces :%s"%(myList))
0a53fea83df7 more work renovating casties parents: diff changeset	178 return myList
0a53fea83df7 more work renovating casties parents: diff changeset	179
0a53fea83df7 more work renovating casties parents: diff changeset	180
0a53fea83df7 more work renovating casties parents: diff changeset	181 def getTextPage(self, mode="text_dict", pn=1, docinfo=None, pageinfo=None):
0a53fea83df7 more work renovating casties parents: diff changeset	182 """returns single page from fulltext"""
0a53fea83df7 more work renovating casties parents: diff changeset	183 docpath = docinfo['textURLPath']
0a53fea83df7 more work renovating casties parents: diff changeset	184 path = docinfo['textURLPath']
0a53fea83df7 more work renovating casties parents: diff changeset	185 url = docinfo.get('url',None)
0a53fea83df7 more work renovating casties parents: diff changeset	186 name = docinfo.get('name',None)
0a53fea83df7 more work renovating casties parents: diff changeset	187 pn =pageinfo['current']
0a53fea83df7 more work renovating casties parents: diff changeset	188 sn = pageinfo['sn']
0a53fea83df7 more work renovating casties parents: diff changeset	189 #optionToggle =pageinfo ['optionToggle']
0a53fea83df7 more work renovating casties parents: diff changeset	190 highlightQuery = pageinfo['highlightQuery']
0a53fea83df7 more work renovating casties parents: diff changeset	191 #mode = pageinfo ['viewMode']
0a53fea83df7 more work renovating casties parents: diff changeset	192 tocMode = pageinfo['tocMode']
0a53fea83df7 more work renovating casties parents: diff changeset	193 characterNormalization=pageinfo['characterNormalization']
0a53fea83df7 more work renovating casties parents: diff changeset	194 tocPN = pageinfo['tocPN']
0a53fea83df7 more work renovating casties parents: diff changeset	195 selfurl = self.absolute_url()
0a53fea83df7 more work renovating casties parents: diff changeset	196 if mode == "text_dict":
0a53fea83df7 more work renovating casties parents: diff changeset	197 textmode = "textPollux"
0a53fea83df7 more work renovating casties parents: diff changeset	198 else:
0a53fea83df7 more work renovating casties parents: diff changeset	199 textmode = mode
0a53fea83df7 more work renovating casties parents: diff changeset	200
0a53fea83df7 more work renovating casties parents: diff changeset	201 textParam = "document=%s&mode=%s&pn=%s&characterNormalization=%s"%(docpath,textmode,pn,characterNormalization)
0a53fea83df7 more work renovating casties parents: diff changeset	202 if highlightQuery is not None:
0a53fea83df7 more work renovating casties parents: diff changeset	203 textParam +="&highlightQuery=%s&sn=%s"%(urllib.quote(highlightQuery),sn)
0a53fea83df7 more work renovating casties parents: diff changeset	204
0a53fea83df7 more work renovating casties parents: diff changeset	205 pagexml = self.getServerData("page-fragment.xql",textParam)
0a53fea83df7 more work renovating casties parents: diff changeset	206 dom = Parse(pagexml)
0a53fea83df7 more work renovating casties parents: diff changeset	207 #dom = NonvalidatingReader.parseStream(pagexml)
0a53fea83df7 more work renovating casties parents: diff changeset	208
0a53fea83df7 more work renovating casties parents: diff changeset	209 #original Pages
0a53fea83df7 more work renovating casties parents: diff changeset	210 pagedivs = dom.xpath("//div[@class='pageNumberOrig']")
0a53fea83df7 more work renovating casties parents: diff changeset	211
0a53fea83df7 more work renovating casties parents: diff changeset	212 """if pagedivs == dom.xpath("//div[@class='pageNumberOrig']"):
0a53fea83df7 more work renovating casties parents: diff changeset	213 if len(pagedivs)>0:
0a53fea83df7 more work renovating casties parents: diff changeset	214 docinfo['pageNumberOrig']= getTextFromNode(pagedivs[0])
0a53fea83df7 more work renovating casties parents: diff changeset	215 logging.debug("ORIGINAL PAGE: %s"%(docinfo['pageNumberOrig']))
0a53fea83df7 more work renovating casties parents: diff changeset	216
0a53fea83df7 more work renovating casties parents: diff changeset	217 #original Pages Norm
0a53fea83df7 more work renovating casties parents: diff changeset	218 pagedivs = dom.xpath("//div[@class='pageNumberOrigNorm']")
0a53fea83df7 more work renovating casties parents: diff changeset	219 if pagedivs == dom.xpath("//div[@class='pageNumberOrigNorm']"):
0a53fea83df7 more work renovating casties parents: diff changeset	220 if len(pagedivs)>0:
0a53fea83df7 more work renovating casties parents: diff changeset	221 docinfo['pageNumberOrigNorm']= getTextFromNode(pagedivs[0])
0a53fea83df7 more work renovating casties parents: diff changeset	222 logging.debug("ORIGINAL PAGE NORM: %s"%(docinfo['pageNumberOrigNorm']))
0a53fea83df7 more work renovating casties parents: diff changeset	223 """
0a53fea83df7 more work renovating casties parents: diff changeset	224 #figureEntries
0a53fea83df7 more work renovating casties parents: diff changeset	225 pagedivs = dom.xpath("//div[@class='countFigureEntries']")
0a53fea83df7 more work renovating casties parents: diff changeset	226 if pagedivs == dom.xpath("//div[@class='countFigureEntries']"):
0a53fea83df7 more work renovating casties parents: diff changeset	227 if len(pagedivs)>0:
0a53fea83df7 more work renovating casties parents: diff changeset	228 docinfo['countFigureEntries'] = getTextFromNode(pagedivs[0])
0a53fea83df7 more work renovating casties parents: diff changeset	229 s = getTextFromNode(pagedivs[0])
0a53fea83df7 more work renovating casties parents: diff changeset	230 if s=='0':
0a53fea83df7 more work renovating casties parents: diff changeset	231 try:
0a53fea83df7 more work renovating casties parents: diff changeset	232 docinfo['countFigureEntries'] = int(s)
0a53fea83df7 more work renovating casties parents: diff changeset	233 except:
0a53fea83df7 more work renovating casties parents: diff changeset	234 docinfo['countFigureEntries'] = 0
0a53fea83df7 more work renovating casties parents: diff changeset	235 else:
0a53fea83df7 more work renovating casties parents: diff changeset	236 s1 = int(s)/30+1
0a53fea83df7 more work renovating casties parents: diff changeset	237 try:
0a53fea83df7 more work renovating casties parents: diff changeset	238 docinfo['countFigureEntries'] = int(s1)
0a53fea83df7 more work renovating casties parents: diff changeset	239 except:
0a53fea83df7 more work renovating casties parents: diff changeset	240 docinfo['countFigureEntries'] = 0
0a53fea83df7 more work renovating casties parents: diff changeset	241
0a53fea83df7 more work renovating casties parents: diff changeset	242 #allPlaces
0a53fea83df7 more work renovating casties parents: diff changeset	243 pagedivs = dom.xpath("//div[@class='countPlaces']")
0a53fea83df7 more work renovating casties parents: diff changeset	244 if pagedivs == dom.xpath("//div[@class='countPlaces']"):
0a53fea83df7 more work renovating casties parents: diff changeset	245 if len(pagedivs)>0:
0a53fea83df7 more work renovating casties parents: diff changeset	246 docinfo['countPlaces']= getTextFromNode(pagedivs[0])
0a53fea83df7 more work renovating casties parents: diff changeset	247 s = getTextFromNode(pagedivs[0])
0a53fea83df7 more work renovating casties parents: diff changeset	248 try:
0a53fea83df7 more work renovating casties parents: diff changeset	249 docinfo['countPlaces'] = int(s)
0a53fea83df7 more work renovating casties parents: diff changeset	250 except:
0a53fea83df7 more work renovating casties parents: diff changeset	251 docinfo['countPlaces'] = 0
0a53fea83df7 more work renovating casties parents: diff changeset	252
0a53fea83df7 more work renovating casties parents: diff changeset	253 #tocEntries
0a53fea83df7 more work renovating casties parents: diff changeset	254 pagedivs = dom.xpath("//div[@class='countTocEntries']")
0a53fea83df7 more work renovating casties parents: diff changeset	255 if pagedivs == dom.xpath("//div[@class='countTocEntries']"):
0a53fea83df7 more work renovating casties parents: diff changeset	256 if len(pagedivs)>0:
0a53fea83df7 more work renovating casties parents: diff changeset	257 docinfo['countTocEntries'] = int(getTextFromNode(pagedivs[0]))
0a53fea83df7 more work renovating casties parents: diff changeset	258 s = getTextFromNode(pagedivs[0])
0a53fea83df7 more work renovating casties parents: diff changeset	259 if s=='0':
0a53fea83df7 more work renovating casties parents: diff changeset	260 try:
0a53fea83df7 more work renovating casties parents: diff changeset	261 docinfo['countTocEntries'] = int(s)
0a53fea83df7 more work renovating casties parents: diff changeset	262 except:
0a53fea83df7 more work renovating casties parents: diff changeset	263 docinfo['countTocEntries'] = 0
0a53fea83df7 more work renovating casties parents: diff changeset	264 else:
0a53fea83df7 more work renovating casties parents: diff changeset	265 s1 = int(s)/30+1
0a53fea83df7 more work renovating casties parents: diff changeset	266 try:
0a53fea83df7 more work renovating casties parents: diff changeset	267 docinfo['countTocEntries'] = int(s1)
0a53fea83df7 more work renovating casties parents: diff changeset	268 except:
0a53fea83df7 more work renovating casties parents: diff changeset	269 docinfo['countTocEntries'] = 0
0a53fea83df7 more work renovating casties parents: diff changeset	270
0a53fea83df7 more work renovating casties parents: diff changeset	271 #numTextPages
0a53fea83df7 more work renovating casties parents: diff changeset	272 pagedivs = dom.xpath("//div[@class='countPages']")
0a53fea83df7 more work renovating casties parents: diff changeset	273 if pagedivs == dom.xpath("//div[@class='countPages']"):
0a53fea83df7 more work renovating casties parents: diff changeset	274 if len(pagedivs)>0:
0a53fea83df7 more work renovating casties parents: diff changeset	275 docinfo['numPages'] = getTextFromNode(pagedivs[0])
0a53fea83df7 more work renovating casties parents: diff changeset	276 s = getTextFromNode(pagedivs[0])
0a53fea83df7 more work renovating casties parents: diff changeset	277
0a53fea83df7 more work renovating casties parents: diff changeset	278 try:
0a53fea83df7 more work renovating casties parents: diff changeset	279 docinfo['numPages'] = int(s)
0a53fea83df7 more work renovating casties parents: diff changeset	280 #logging.debug("PAGE NUMBER: %s"%(s))
0a53fea83df7 more work renovating casties parents: diff changeset	281
0a53fea83df7 more work renovating casties parents: diff changeset	282 np = docinfo['numPages']
0a53fea83df7 more work renovating casties parents: diff changeset	283 pageinfo['end'] = min(pageinfo['end'], np)
0a53fea83df7 more work renovating casties parents: diff changeset	284 pageinfo['numgroups'] = int(np / pageinfo['groupsize'])
0a53fea83df7 more work renovating casties parents: diff changeset	285 if np % pageinfo['groupsize'] > 0:
0a53fea83df7 more work renovating casties parents: diff changeset	286 pageinfo['numgroups'] += 1
0a53fea83df7 more work renovating casties parents: diff changeset	287 except:
0a53fea83df7 more work renovating casties parents: diff changeset	288 docinfo['numPages'] = 0
0a53fea83df7 more work renovating casties parents: diff changeset	289
0a53fea83df7 more work renovating casties parents: diff changeset	290 else:
0a53fea83df7 more work renovating casties parents: diff changeset	291 #no full text -- init to 0
0a53fea83df7 more work renovating casties parents: diff changeset	292 docinfo['pageNumberOrig'] = 0
0a53fea83df7 more work renovating casties parents: diff changeset	293 docinfo['countFigureEntries'] = 0
0a53fea83df7 more work renovating casties parents: diff changeset	294 docinfo['countPlaces'] = 0
0a53fea83df7 more work renovating casties parents: diff changeset	295 docinfo['countTocEntries'] = 0
0a53fea83df7 more work renovating casties parents: diff changeset	296 docinfo['numPages'] = 0
0a53fea83df7 more work renovating casties parents: diff changeset	297 docinfo['pageNumberOrigNorm'] = 0
0a53fea83df7 more work renovating casties parents: diff changeset	298 #return docinfo
0a53fea83df7 more work renovating casties parents: diff changeset	299
0a53fea83df7 more work renovating casties parents: diff changeset	300 # plain text mode
0a53fea83df7 more work renovating casties parents: diff changeset	301 if mode == "text":
0a53fea83df7 more work renovating casties parents: diff changeset	302 # first div contains text
0a53fea83df7 more work renovating casties parents: diff changeset	303 pagedivs = dom.xpath("/div")
0a53fea83df7 more work renovating casties parents: diff changeset	304 if len(pagedivs) > 0:
0a53fea83df7 more work renovating casties parents: diff changeset	305 pagenode = pagedivs[0]
0a53fea83df7 more work renovating casties parents: diff changeset	306 links = pagenode.xpath("//a")
0a53fea83df7 more work renovating casties parents: diff changeset	307 for l in links:
0a53fea83df7 more work renovating casties parents: diff changeset	308 hrefNode = l.getAttributeNodeNS(None, u"href")
0a53fea83df7 more work renovating casties parents: diff changeset	309 if hrefNode:
0a53fea83df7 more work renovating casties parents: diff changeset	310 href= hrefNode.nodeValue
0a53fea83df7 more work renovating casties parents: diff changeset	311 if href.startswith('#note-'):
0a53fea83df7 more work renovating casties parents: diff changeset	312 hrefNode.nodeValue = href.replace('#note-',"?url=%s&viewMode=text&tocMode=%s&tocPN=%s&pn=%s#note-"%(url,tocMode,tocPN,pn))
0a53fea83df7 more work renovating casties parents: diff changeset	313 return serializeNode(pagenode)
0a53fea83df7 more work renovating casties parents: diff changeset	314 if mode == "xml":
0a53fea83df7 more work renovating casties parents: diff changeset	315 # first div contains text
0a53fea83df7 more work renovating casties parents: diff changeset	316 pagedivs = dom.xpath("/div")
0a53fea83df7 more work renovating casties parents: diff changeset	317 if len(pagedivs) > 0:
0a53fea83df7 more work renovating casties parents: diff changeset	318 pagenode = pagedivs[0]
0a53fea83df7 more work renovating casties parents: diff changeset	319 return serializeNode(pagenode)
0a53fea83df7 more work renovating casties parents: diff changeset	320 if mode == "gis":
0a53fea83df7 more work renovating casties parents: diff changeset	321 # first div contains text
0a53fea83df7 more work renovating casties parents: diff changeset	322 pagedivs = dom.xpath("/div")
0a53fea83df7 more work renovating casties parents: diff changeset	323 if len(pagedivs) > 0:
0a53fea83df7 more work renovating casties parents: diff changeset	324 pagenode = pagedivs[0]
0a53fea83df7 more work renovating casties parents: diff changeset	325 links =pagenode.xpath("//a")
0a53fea83df7 more work renovating casties parents: diff changeset	326 for l in links:
0a53fea83df7 more work renovating casties parents: diff changeset	327 hrefNode =l.getAttributeNodeNS(None, u"href")
0a53fea83df7 more work renovating casties parents: diff changeset	328 if hrefNode:
0a53fea83df7 more work renovating casties parents: diff changeset	329 href=hrefNode.nodeValue
0a53fea83df7 more work renovating casties parents: diff changeset	330 if href.startswith('http://chinagis.mpiwg-berlin.mpg.de'):
0a53fea83df7 more work renovating casties parents: diff changeset	331 hrefNode.nodeValue =href.replace('chinagis_REST/REST/db/chgis/mpdl','chinagis/REST/db/mpdl/%s'%name)
0a53fea83df7 more work renovating casties parents: diff changeset	332 l.setAttributeNS(None, 'target', '_blank')
0a53fea83df7 more work renovating casties parents: diff changeset	333 return serializeNode(pagenode)
0a53fea83df7 more work renovating casties parents: diff changeset	334
0a53fea83df7 more work renovating casties parents: diff changeset	335 if mode == "pureXml":
0a53fea83df7 more work renovating casties parents: diff changeset	336 # first div contains text
0a53fea83df7 more work renovating casties parents: diff changeset	337 pagedivs = dom.xpath("/div")
0a53fea83df7 more work renovating casties parents: diff changeset	338 if len(pagedivs) > 0:
0a53fea83df7 more work renovating casties parents: diff changeset	339 pagenode = pagedivs[0]
0a53fea83df7 more work renovating casties parents: diff changeset	340 return serializeNode(pagenode)
0a53fea83df7 more work renovating casties parents: diff changeset	341 # text-with-links mode
0a53fea83df7 more work renovating casties parents: diff changeset	342 if mode == "text_dict":
0a53fea83df7 more work renovating casties parents: diff changeset	343 # first div contains text
0a53fea83df7 more work renovating casties parents: diff changeset	344 #mode = pageinfo ['viewMode']
0a53fea83df7 more work renovating casties parents: diff changeset	345 pagedivs = dom.xpath("/div")
0a53fea83df7 more work renovating casties parents: diff changeset	346 if len(pagedivs) > 0:
0a53fea83df7 more work renovating casties parents: diff changeset	347 pagenode = pagedivs[0]
0a53fea83df7 more work renovating casties parents: diff changeset	348 # check all a-tags
0a53fea83df7 more work renovating casties parents: diff changeset	349 links = pagenode.xpath("//a")
0a53fea83df7 more work renovating casties parents: diff changeset	350
0a53fea83df7 more work renovating casties parents: diff changeset	351 for l in links:
0a53fea83df7 more work renovating casties parents: diff changeset	352 hrefNode = l.getAttributeNodeNS(None, u"href")
0a53fea83df7 more work renovating casties parents: diff changeset	353
0a53fea83df7 more work renovating casties parents: diff changeset	354 if hrefNode:
0a53fea83df7 more work renovating casties parents: diff changeset	355 # is link with href
0a53fea83df7 more work renovating casties parents: diff changeset	356 href = hrefNode.nodeValue
0a53fea83df7 more work renovating casties parents: diff changeset	357 if href.startswith('http://mpdl-proto.mpiwg-berlin.mpg.de/mpdl/interface/lt/wordInfo.xql'):
0a53fea83df7 more work renovating casties parents: diff changeset	358 # is pollux link
0a53fea83df7 more work renovating casties parents: diff changeset	359 selfurl = self.absolute_url()
0a53fea83df7 more work renovating casties parents: diff changeset	360 # change href
0a53fea83df7 more work renovating casties parents: diff changeset	361 hrefNode.nodeValue = href.replace('http://mpdl-proto.mpiwg-berlin.mpg.de/mpdl/interface/lt/wordInfo.xql','%s/head_main_voc'%selfurl)
0a53fea83df7 more work renovating casties parents: diff changeset	362 # add target
0a53fea83df7 more work renovating casties parents: diff changeset	363 l.setAttributeNS(None, 'target', '_blank')
0a53fea83df7 more work renovating casties parents: diff changeset	364 #l.setAttributeNS(None, 'onclick',"popupWin = window.open(this.href, 'InfoWindow', 'menubar=no, location,width=500,height=600,top=180, left=700, toolbar=no, scrollbars=1'); return false;")
0a53fea83df7 more work renovating casties parents: diff changeset	365 #l.setAttributeNS(None, "ondblclick", "popupWin.focus();")
0a53fea83df7 more work renovating casties parents: diff changeset	366 #window.open("this.href, 'InfoWindow', 'menubar=no, location,width=500,height=600,top=180, left=700, toolbar=yes, scrollbars=1'"); return false;")
0a53fea83df7 more work renovating casties parents: diff changeset	367
0a53fea83df7 more work renovating casties parents: diff changeset	368 if href.startswith('http://mpdl-proto.mpiwg-berlin.mpg.de/mpdl/lt/lemma.xql'):
0a53fea83df7 more work renovating casties parents: diff changeset	369 selfurl = self.absolute_url()
0a53fea83df7 more work renovating casties parents: diff changeset	370 hrefNode.nodeValue = href.replace('http://mpdl-proto.mpiwg-berlin.mpg.de/mpdl/lt/lemma.xql','%s/head_main_lemma'%selfurl)
0a53fea83df7 more work renovating casties parents: diff changeset	371 l.setAttributeNS(None, 'target', '_blank')
0a53fea83df7 more work renovating casties parents: diff changeset	372 l.setAttributeNS(None, 'onclick',"popupWin = window.open(this.href, 'InfoWindow', 'menubar=no, location,width=500,height=600,top=180, left=700, toolbar=no, scrollbars=1'); return false;")
0a53fea83df7 more work renovating casties parents: diff changeset	373 l.setAttributeNS(None, 'ondblclick', 'popupWin.focus();')
0a53fea83df7 more work renovating casties parents: diff changeset	374
0a53fea83df7 more work renovating casties parents: diff changeset	375 if href.startswith('#note-'):
0a53fea83df7 more work renovating casties parents: diff changeset	376 hrefNode.nodeValue = href.replace('#note-',"?url=%s&viewMode=text_dict&tocMode=%s&tocPN=%s&pn=%s#note-"%(url,tocMode,tocPN,pn))
0a53fea83df7 more work renovating casties parents: diff changeset	377
0a53fea83df7 more work renovating casties parents: diff changeset	378 return serializeNode(pagenode)
0a53fea83df7 more work renovating casties parents: diff changeset	379 return "no text here"
0a53fea83df7 more work renovating casties parents: diff changeset	380
0a53fea83df7 more work renovating casties parents: diff changeset	381 def getOrigPages(self, docinfo=None, pageinfo=None):
0a53fea83df7 more work renovating casties parents: diff changeset	382 docpath = docinfo['textURLPath']
0a53fea83df7 more work renovating casties parents: diff changeset	383 pn =pageinfo['current']
0a53fea83df7 more work renovating casties parents: diff changeset	384 selfurl = self.absolute_url()
0a53fea83df7 more work renovating casties parents: diff changeset	385 pagexml = self.getServerData("page-fragment.xql","document=%s&pn=%s"%(docpath, pn))
0a53fea83df7 more work renovating casties parents: diff changeset	386 dom = Parse(pagexml)
0a53fea83df7 more work renovating casties parents: diff changeset	387 pagedivs = dom.xpath("//div[@class='pageNumberOrig']")
0a53fea83df7 more work renovating casties parents: diff changeset	388 if pagedivs == dom.xpath("//div[@class='pageNumberOrig']"):
0a53fea83df7 more work renovating casties parents: diff changeset	389 if len(pagedivs)>0:
0a53fea83df7 more work renovating casties parents: diff changeset	390 docinfo['pageNumberOrig']= getTextFromNode(pagedivs[0])
0a53fea83df7 more work renovating casties parents: diff changeset	391 return docinfo['pageNumberOrig']
0a53fea83df7 more work renovating casties parents: diff changeset	392
0a53fea83df7 more work renovating casties parents: diff changeset	393 def getOrigPagesNorm(self, docinfo=None, pageinfo=None):
0a53fea83df7 more work renovating casties parents: diff changeset	394 docpath = docinfo['textURLPath']
0a53fea83df7 more work renovating casties parents: diff changeset	395 pn =pageinfo['current']
0a53fea83df7 more work renovating casties parents: diff changeset	396 selfurl = self.absolute_url()
0a53fea83df7 more work renovating casties parents: diff changeset	397 pagexml = self.getServerData("page-fragment.xql","document=%s&pn=%s"%(docpath, pn))
0a53fea83df7 more work renovating casties parents: diff changeset	398 dom = Parse(pagexml)
0a53fea83df7 more work renovating casties parents: diff changeset	399 pagedivs = dom.xpath("//div[@class='pageNumberOrigNorm']")
0a53fea83df7 more work renovating casties parents: diff changeset	400 if pagedivs == dom.xpath("//div[@class='pageNumberOrigNorm']"):
0a53fea83df7 more work renovating casties parents: diff changeset	401 if len(pagedivs)>0:
0a53fea83df7 more work renovating casties parents: diff changeset	402 docinfo['pageNumberOrigNorm']= getTextFromNode(pagedivs[0])
0a53fea83df7 more work renovating casties parents: diff changeset	403 return docinfo['pageNumberOrigNorm']
0a53fea83df7 more work renovating casties parents: diff changeset	404
0a53fea83df7 more work renovating casties parents: diff changeset	405
0a53fea83df7 more work renovating casties parents: diff changeset	406 def getTranslate(self, word=None, language=None):
0a53fea83df7 more work renovating casties parents: diff changeset	407 """translate into another languages"""
0a53fea83df7 more work renovating casties parents: diff changeset	408 data = self.getServerData("lt/wordInfo.xql","language="+str(language)+"&word="+urllib.quote(word)+"&output=html")
0a53fea83df7 more work renovating casties parents: diff changeset	409 #pagexml=self.template.fulltextclient.eval("/mpdl/interface/lt/lex.xql","document=&language="+str(language)+"&query="+url_quote(str(query)))
0a53fea83df7 more work renovating casties parents: diff changeset	410 return data
0a53fea83df7 more work renovating casties parents: diff changeset	411
0a53fea83df7 more work renovating casties parents: diff changeset	412 def getLemma(self, lemma=None, language=None):
0a53fea83df7 more work renovating casties parents: diff changeset	413 """simular words lemma """
0a53fea83df7 more work renovating casties parents: diff changeset	414 data = self.getServerData("lt/lemma.xql","language="+str(language)+"&lemma="+urllib.quote(lemma)+"&output=html")
0a53fea83df7 more work renovating casties parents: diff changeset	415 return data
0a53fea83df7 more work renovating casties parents: diff changeset	416
0a53fea83df7 more work renovating casties parents: diff changeset	417 def getLemmaQuery(self, query=None, language=None):
0a53fea83df7 more work renovating casties parents: diff changeset	418 """simular words lemma """
0a53fea83df7 more work renovating casties parents: diff changeset	419 data = self.getServerData("lt/lemma.xql","language="+str(language)+"&query="+urllib.quote(query)+"&output=html")
0a53fea83df7 more work renovating casties parents: diff changeset	420 return data
0a53fea83df7 more work renovating casties parents: diff changeset	421
0a53fea83df7 more work renovating casties parents: diff changeset	422 def getLex(self, query=None, language=None):
0a53fea83df7 more work renovating casties parents: diff changeset	423 #simular words lemma
0a53fea83df7 more work renovating casties parents: diff changeset	424 data = self.getServerData("lt/lex.xql","document=&language="+str(language)+"&query="+urllib.quote(query))
0a53fea83df7 more work renovating casties parents: diff changeset	425 return data
0a53fea83df7 more work renovating casties parents: diff changeset	426
0a53fea83df7 more work renovating casties parents: diff changeset	427 def getQuery (self, docinfo=None, pageinfo=None, query=None, queryType=None, pn=1):
0a53fea83df7 more work renovating casties parents: diff changeset	428 #number of
0a53fea83df7 more work renovating casties parents: diff changeset	429 docpath = docinfo['textURLPath']
0a53fea83df7 more work renovating casties parents: diff changeset	430 pagesize = pageinfo['queryPageSize']
0a53fea83df7 more work renovating casties parents: diff changeset	431 pn = pageinfo['searchPN']
0a53fea83df7 more work renovating casties parents: diff changeset	432 query =pageinfo['query']
0a53fea83df7 more work renovating casties parents: diff changeset	433 queryType =pageinfo['queryType']
0a53fea83df7 more work renovating casties parents: diff changeset	434 tocSearch = 0
0a53fea83df7 more work renovating casties parents: diff changeset	435 tocDiv = None
0a53fea83df7 more work renovating casties parents: diff changeset	436
0a53fea83df7 more work renovating casties parents: diff changeset	437 pagexml = self.getServerData("doc-query.xql","document=%s&mode=%s&queryType=%s&query=%s&queryResultPageSize=%s&queryResultPN=%s"%(docpath, 'text', queryType, urllib.quote(query), pagesize, pn))
0a53fea83df7 more work renovating casties parents: diff changeset	438 pagedom = Parse(pagexml)
0a53fea83df7 more work renovating casties parents: diff changeset	439 numdivs = pagedom.xpath("//div[@class='queryResultHits']")
0a53fea83df7 more work renovating casties parents: diff changeset	440 tocSearch = int(getTextFromNode(numdivs[0]))
0a53fea83df7 more work renovating casties parents: diff changeset	441 tc=int((tocSearch/10)+1)
0a53fea83df7 more work renovating casties parents: diff changeset	442 return tc
0a53fea83df7 more work renovating casties parents: diff changeset	443
0a53fea83df7 more work renovating casties parents: diff changeset	444 def getToc(self, mode="text", docinfo=None):
0a53fea83df7 more work renovating casties parents: diff changeset	445 """loads table of contents and stores in docinfo"""
0a53fea83df7 more work renovating casties parents: diff changeset	446 if mode == "none":
0a53fea83df7 more work renovating casties parents: diff changeset	447 return docinfo
0a53fea83df7 more work renovating casties parents: diff changeset	448 if 'tocSize_%s'%mode in docinfo:
0a53fea83df7 more work renovating casties parents: diff changeset	449 # cached toc
0a53fea83df7 more work renovating casties parents: diff changeset	450 return docinfo
0a53fea83df7 more work renovating casties parents: diff changeset	451
0a53fea83df7 more work renovating casties parents: diff changeset	452 docpath = docinfo['textURLPath']
0a53fea83df7 more work renovating casties parents: diff changeset	453 # we need to set a result set size
0a53fea83df7 more work renovating casties parents: diff changeset	454 pagesize = 1000
0a53fea83df7 more work renovating casties parents: diff changeset	455 pn = 1
0a53fea83df7 more work renovating casties parents: diff changeset	456 if mode == "text":
0a53fea83df7 more work renovating casties parents: diff changeset	457 queryType = "toc"
0a53fea83df7 more work renovating casties parents: diff changeset	458 else:
0a53fea83df7 more work renovating casties parents: diff changeset	459 queryType = mode
0a53fea83df7 more work renovating casties parents: diff changeset	460 # number of entries in toc
0a53fea83df7 more work renovating casties parents: diff changeset	461 tocSize = 0
0a53fea83df7 more work renovating casties parents: diff changeset	462 tocDiv = None
0a53fea83df7 more work renovating casties parents: diff changeset	463
0a53fea83df7 more work renovating casties parents: diff changeset	464 pagexml = self.getServerData("doc-query.xql","document=%s&queryType=%s&queryResultPageSize=%s&queryResultPN=%s"%(docpath,queryType, pagesize, pn))
0a53fea83df7 more work renovating casties parents: diff changeset	465
0a53fea83df7 more work renovating casties parents: diff changeset	466 # post-processing downloaded xml
0a53fea83df7 more work renovating casties parents: diff changeset	467 pagedom = Parse(pagexml)
0a53fea83df7 more work renovating casties parents: diff changeset	468 # get number of entries
0a53fea83df7 more work renovating casties parents: diff changeset	469 numdivs = pagedom.xpath("//div[@class='queryResultHits']")
0a53fea83df7 more work renovating casties parents: diff changeset	470 if len(numdivs) > 0:
0a53fea83df7 more work renovating casties parents: diff changeset	471 tocSize = int(getTextFromNode(numdivs[0]))
0a53fea83df7 more work renovating casties parents: diff changeset	472 docinfo['tocSize_%s'%mode] = tocSize
0a53fea83df7 more work renovating casties parents: diff changeset	473 return docinfo
0a53fea83df7 more work renovating casties parents: diff changeset	474
0a53fea83df7 more work renovating casties parents: diff changeset	475 def getTocPage(self, mode="text", pn=1, pageinfo=None, docinfo=None):
0a53fea83df7 more work renovating casties parents: diff changeset	476 """returns single page from the table of contents"""
0a53fea83df7 more work renovating casties parents: diff changeset	477 # TODO: this should use the cached TOC
0a53fea83df7 more work renovating casties parents: diff changeset	478 if mode == "text":
0a53fea83df7 more work renovating casties parents: diff changeset	479 queryType = "toc"
0a53fea83df7 more work renovating casties parents: diff changeset	480 else:
0a53fea83df7 more work renovating casties parents: diff changeset	481 queryType = mode
0a53fea83df7 more work renovating casties parents: diff changeset	482 docpath = docinfo['textURLPath']
0a53fea83df7 more work renovating casties parents: diff changeset	483 path = docinfo['textURLPath']
0a53fea83df7 more work renovating casties parents: diff changeset	484 pagesize = pageinfo['tocPageSize']
0a53fea83df7 more work renovating casties parents: diff changeset	485 pn = pageinfo['tocPN']
0a53fea83df7 more work renovating casties parents: diff changeset	486 url = docinfo['url']
0a53fea83df7 more work renovating casties parents: diff changeset	487 selfurl = self.absolute_url()
0a53fea83df7 more work renovating casties parents: diff changeset	488 viewMode= pageinfo['viewMode']
0a53fea83df7 more work renovating casties parents: diff changeset	489 characterNormalization = pageinfo ['characterNormalization']
0a53fea83df7 more work renovating casties parents: diff changeset	490 #optionToggle =pageinfo ['optionToggle']
0a53fea83df7 more work renovating casties parents: diff changeset	491 tocMode = pageinfo['tocMode']
0a53fea83df7 more work renovating casties parents: diff changeset	492 tocPN = pageinfo['tocPN']
0a53fea83df7 more work renovating casties parents: diff changeset	493
0a53fea83df7 more work renovating casties parents: diff changeset	494 data = self.getServerData("doc-query.xql","document=%s&queryType=%s&queryResultPageSize=%s&queryResultPN=%s&characterNormalization=regPlusNorm"%(docpath,queryType, pagesize, pn))
0a53fea83df7 more work renovating casties parents: diff changeset	495 page = data.replace('page-fragment.xql?document=%s'%str(path),'%s?url=%s&viewMode=%s&tocMode=%s&tocPN=%s'%(selfurl,url, viewMode, tocMode, tocPN))
0a53fea83df7 more work renovating casties parents: diff changeset	496 text = page.replace('mode=image','mode=texttool')
0a53fea83df7 more work renovating casties parents: diff changeset	497 return text
0a53fea83df7 more work renovating casties parents: diff changeset	498
0a53fea83df7 more work renovating casties parents: diff changeset	499 def manage_changeMpdlXmlTextServer(self,title="",serverUrl="http://mpdl-text.mpiwg-berlin.mpg.de/mpdl/interface/",timeout=40,RESPONSE=None):
0a53fea83df7 more work renovating casties parents: diff changeset	500 #def manage_changeMpdlXmlTextServer(self,title="",serverUrl="http://mpdl-text.mpiwg-berlin.mpg.de:30030/mpdl/interface/",timeout=40,RESPONSE=None):
0a53fea83df7 more work renovating casties parents: diff changeset	501 """change settings"""
0a53fea83df7 more work renovating casties parents: diff changeset	502 self.title=title
0a53fea83df7 more work renovating casties parents: diff changeset	503 self.timeout = timeout
0a53fea83df7 more work renovating casties parents: diff changeset	504 self.serverUrl = serverUrl
0a53fea83df7 more work renovating casties parents: diff changeset	505 if RESPONSE is not None:
0a53fea83df7 more work renovating casties parents: diff changeset	506 RESPONSE.redirect('manage_main')
0a53fea83df7 more work renovating casties parents: diff changeset	507
0a53fea83df7 more work renovating casties parents: diff changeset	508 # management methods
0a53fea83df7 more work renovating casties parents: diff changeset	509 def manage_addMpdlXmlTextServerForm(self):
0a53fea83df7 more work renovating casties parents: diff changeset	510 """Form for adding"""
0a53fea83df7 more work renovating casties parents: diff changeset	511 pt = PageTemplateFile("zpt/manage_addMpdlXmlTextServer", globals()).__of__(self)
0a53fea83df7 more work renovating casties parents: diff changeset	512 return pt()
0a53fea83df7 more work renovating casties parents: diff changeset	513
0a53fea83df7 more work renovating casties parents: diff changeset	514 def manage_addMpdlXmlTextServer(self,id,title="",serverUrl="http://mpdl-text.mpiwg-berlin.mpg.de/mpdl/interface/",timeout=40,RESPONSE=None):
0a53fea83df7 more work renovating casties parents: diff changeset	515 #def manage_addMpdlXmlTextServer(self,id,title="",serverUrl="http://mpdl-text.mpiwg-berlin.mpg.de:30030/mpdl/interface/",timeout=40,RESPONSE=None):
0a53fea83df7 more work renovating casties parents: diff changeset	516 """add zogiimage"""
0a53fea83df7 more work renovating casties parents: diff changeset	517 newObj = MpdlXmlTextServer(id,title,serverUrl,timeout)
0a53fea83df7 more work renovating casties parents: diff changeset	518 self.Destination()._setObject(id, newObj)
0a53fea83df7 more work renovating casties parents: diff changeset	519 if RESPONSE is not None:
0a53fea83df7 more work renovating casties parents: diff changeset	520 RESPONSE.redirect('manage_main')

Mercurial > hg > documentViewer

annotate MpdlXmlTextServer_old.py @ 467:8b75d55582e8 elementtree