Mercurial > hg > documentViewer
changeset 6:3c70a7d2f35b modularisierung
made extraFunction into separate object MpdlXmlTextServer
author | casties |
---|---|
date | Wed, 16 Jun 2010 20:27:04 +0200 |
parents | 7d10acbad6c0 |
children | 75c5208cdf64 |
files | MpdlXmlTextServer.py __init__.py documentViewer.py extraFunction.py version.txt zpt/manage_addMpdlXmlTextServer.zpt zpt/manage_changeMpdlXmlTextServer.zpt |
diffstat | 7 files changed, 469 insertions(+), 425 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/MpdlXmlTextServer.py Wed Jun 16 20:27:04 2010 +0200 @@ -0,0 +1,328 @@ + +from OFS.SimpleItem import SimpleItem +from Products.PageTemplates.PageTemplateFile import PageTemplateFile + +from Ft.Xml import EMPTY_NAMESPACE, Parse + +import sys +import logging +import documentViewer +from documentViewer import getTextFromNode, serializeNode + + +class MpdlXmlTextServer(SimpleItem): + """TextServer implementation for MPDL-XML eXist server""" + meta_type="MPDL-XML TextServer" + + manage_options=( + {'label':'Config','action':'manage_changeMpdlXmlTextServerForm'}, + )+SimpleItem.manage_options + + manage_changeMpdlXmlTextServerForm = PageTemplateFile("zpt/manage_changeMpdlXmlTextServer", globals()) + + def __init__(self,id,title="",serverUrl="http://mpdl-proto.mpiwg-berlin.mpg.de/mpdl/interface/", timeout=40): + """constructor""" + self.id=id + self.title=title + self.timeout = timeout + self.serverUrl = serverUrl + + + def getHttpData(self, url, data=None): + """returns result from url+data HTTP request""" + return documentViewer.getHttpData(url,data,timeout=self.timeout) + + + def getServerData(self, method, data=None): + """returns result from text server for method+data""" + url = self.serverUrl+method + return documentViewer.getHttpData(url,data,timeout=self.timeout) + + + def getSearch(self, pn=1, pageinfo=None, docinfo=None, query=None, queryType=None, lemma=None): + """get search list""" + docpath = docinfo['textURLPath'] + url = docinfo['url'] + logging.debug("documentViewer (gettoc) docpath: %s"%(docpath)) + logging.debug("documentViewer (gettoc) url: %s"%(url)) + pagesize = pageinfo['queryPageSize'] + pn = pageinfo['searchPN'] + sn = pageinfo['sn'] + highlightQuery = pageinfo['highlightQuery'] + query =pageinfo['query'] + queryType =pageinfo['queryType'] + viewMode= pageinfo['viewMode'] + tocMode = pageinfo['tocMode'] + tocPN = pageinfo['tocPN'] + selfurl = self.absolute_url() + + data = self.getServerData("doc-query.xql","document=%s&mode=%s&queryType=%s&query=%s&queryResultPageSize=%s&queryResultPN=%s&sn=%s&viewMode=%s&highlightQuery=%s"%(docpath, 'text', queryType, query, pagesize, pn, sn, viewMode,highlightQuery)) + #page=self.template.fulltextclient.eval("/mpdl/interface/doc-query.xql","document=%s&mode=%s&queryType=%s&query=%s&queryResultPageSize=%s&queryResultPN=%s&sn=%s&viewMode=%s&highlightQuery=%s"%(docpath, 'text', queryType, query, pagesize, pn, sn, viewMode,highlightQuery) ,outputUnicode=False) + + pagexml = data.replace('?document=%s'%str(docpath),'?url=%s'%url) + pagedom = Parse(pagexml) + if (queryType=="fulltext")or(queryType=="xpath")or(queryType=="xquery")or(queryType=="fulltextMorphLemma"): + pagedivs = pagedom.xpath("//div[@class='queryResultPage']") + if len(pagedivs)>0: + pagenode=pagedivs[0] + links=pagenode.xpath("//a") + for l in links: + hrefNode = l.getAttributeNodeNS(None, u"href") + if hrefNode: + href = hrefNode.nodeValue + if href.startswith('page-fragment.xql'): + selfurl = self.absolute_url() + pagexml=href.replace('mode=text','mode=texttool&viewMode=%s&queryType=%s&query=%s&queryResultPageSize=%s&queryResultPN=%s&tocMode=%s&searchPN=%s&tocPN=%s'%(viewMode,queryType,query,pagesize,pn,tocMode,pn,tocPN)) + hrefNode.nodeValue = pagexml.replace('page-fragment.xql','%s'%selfurl) + return serializeNode(pagenode) + if (queryType=="fulltextMorph"): + pagedivs = pagedom.xpath("//div[@class='queryResult']") + if len(pagedivs)>0: + pagenode=pagedivs[0] + links=pagenode.xpath("//a") + for l in links: + hrefNode = l.getAttributeNodeNS(None, u"href") + if hrefNode: + href = hrefNode.nodeValue + if href.startswith('page-fragment.xql'): + selfurl = self.absolute_url() + pagexml=href.replace('mode=text','mode=texttool&viewMode=%s&queryType=%s&query=%s&queryResultPageSize=%s&queryResultPN=%s&tocMode=%s&searchPN=%s&tocPN=%s'%(viewMode,queryType,query,pagesize,pn,tocMode,pn,tocPN)) + hrefNode.nodeValue = pagexml.replace('page-fragment.xql','%s'%selfurl) + if href.startswith('../lt/lemma.xql'): + hrefNode.nodeValue = href.replace('../lt/lemma.xql','%s/template/head_main_lemma_New'%(selfurl)) + l.setAttributeNS(None, 'target', '_blank') + l.setAttributeNS(None, 'onClick',"popupWin = window.open(this.href, 'contacts', 'location,width=500,height=600,top=180, left=400, scrollbars=1'); return false;") + l.setAttributeNS(None, 'onDblclick', 'popupWin.focus();') + pagedivs = pagedom.xpath("//div[@class='queryResultMorphExpansion']") + return serializeNode(pagenode) + if (queryType=="ftIndex")or(queryType=="ftIndexMorph"): + pagedivs= pagedom.xpath("//div[@class='queryResultPage']") + if len(pagedivs)>0: + pagenode=pagedivs[0] + links=pagenode.xpath("//a") + for l in links: + hrefNode = l.getAttributeNodeNS(None, u"href") + if hrefNode: + href = hrefNode.nodeValue + hrefNode.nodeValue=href.replace('mode=text','mode=texttool&viewMode=%s&tocMode=%s&tocPN=%s&pn=%s'%(viewMode,tocMode,tocPN,pn)) + if href.startswith('../lt/lex.xql'): + hrefNode.nodeValue = href.replace('../lt/lex.xql','%s/template/head_main_voc'%selfurl) + l.setAttributeNS(None, 'target', '_blank') + l.setAttributeNS(None, 'onClick',"popupWin = window.open(this.href, 'contacts', 'location,width=500,height=600,top=180, left=400, scrollbars=1'); return false;") + l.setAttributeNS(None, 'onDblclick', 'popupWin.focus();') + if href.startswith('../lt/lemma.xql'): + hrefNode.nodeValue = href.replace('../lt/lemma.xql','%s/template/head_main_lemma'%selfurl) + l.setAttributeNS(None, 'target', '_blank') + l.setAttributeNS(None, 'onClick',"popupWin = window.open(this.href, 'contacts', 'location,width=500,height=600,top=180, left=400, scrollbars=1'); return false;") + l.setAttributeNS(None, 'onDblclick', 'popupWin.focus();') + return serializeNode(pagenode) + return "no text here" + + def getNumPages(self,docinfo=None): + """get list of pages from fulltext and put in docinfo""" + if 'numPages' in docinfo: + # already there + return docinfo + + xquery = '//pb' + text = self.getServerData("xquery.xql","document=%s&xquery=%s"%(docinfo['textURLPath'],xquery)) + #text = self.template.fulltextclient.eval("/mpdl/interface/xquery.xql", "document=%s&xquery=%s"%(docinfo['textURLPath'],xquery)) + docinfo['numPages'] = text.count("<pb ") + return docinfo + + def getTextPage(self, mode="text", pn=1, docinfo=None, pageinfo=None, highlightQuery=None,sn=None, viewMode=None, tocMode=None, tocPN=None): + """returns single page from fulltext""" + docpath = docinfo['textURLPath'] + path = docinfo['textURLPath'] + url = docinfo['url'] + viewMode= pageinfo['viewMode'] + tocMode = pageinfo['tocMode'] + tocPN = pageinfo['tocPN'] + selfurl = self.absolute_url() + if mode == "text_dict": + textmode = "textPollux" + else: + textmode = mode + + textParam = "document=%s&mode=%s&pn=%s"%(docpath,textmode,pn) + if highlightQuery is not None: + textParam +="&highlightQuery=%s&sn=%s"%(highlightQuery,sn) + + pagexml = self.getServerData("page-fragment.xql",textParam) + #pagexml=self.template.fulltextclient.eval("/mpdl/interface/page-fragment.xql", textParam, outputUnicode=False) + + pagedom = Parse(pagexml) + # plain text mode + if mode == "text": + # first div contains text + pagedivs = pagedom.xpath("/div") + if len(pagedivs) > 0: + pagenode = pagedivs[0] + links = pagenode.xpath("//a") + for l in links: + hrefNode = l.getAttributeNodeNS(None, u"href") + if hrefNode: + href= hrefNode.nodeValue + if href.startswith('#note-'): + hrefNode.nodeValue = href.replace('#note-',"?url=%s&viewMode=%s&tocMode=%s&tocPN=%s&pn=%s#note-"%(url,viewMode,tocMode,tocPN,pn)) + return serializeNode(pagenode) + if mode == "xml": + # first div contains text + pagedivs = pagedom.xpath("/div") + if len(pagedivs) > 0: + pagenode = pagedivs[0] + return serializeNode(pagenode) + if mode == "pureXml": + # first div contains text + pagedivs = pagedom.xpath("/div") + if len(pagedivs) > 0: + pagenode = pagedivs[0] + return serializeNode(pagenode) + # text-with-links mode + if mode == "text_dict": + # first div contains text + pagedivs = pagedom.xpath("/div") + if len(pagedivs) > 0: + pagenode = pagedivs[0] + # check all a-tags + links = pagenode.xpath("//a") + for l in links: + hrefNode = l.getAttributeNodeNS(None, u"href") + if hrefNode: + # is link with href + href = hrefNode.nodeValue + if href.startswith('lt/lex.xql'): + # is pollux link + selfurl = self.absolute_url() + # change href + hrefNode.nodeValue = href.replace('lt/lex.xql','%s/template/head_main_voc'%selfurl) + # add target + l.setAttributeNS(None, 'target', '_blank') + l.setAttributeNS(None, 'onClick',"popupWin = window.open(this.href, 'contacts', 'location,width=500,height=600,top=180, left=700, scrollbars=1'); return false;") + l.setAttributeNS(None, 'onDblclick', 'popupWin.focus();') + if href.startswith('lt/lemma.xql'): + selfurl = self.absolute_url() + hrefNode.nodeValue = href.replace('lt/lemma.xql','%s/template/head_main_lemma'%selfurl) + l.setAttributeNS(None, 'target', '_blank') + l.setAttributeNS(None, 'onClick',"popupWin = window.open(this.href, 'contacts', 'location,width=500,height=600,top=180, left=700, scrollbars=1'); return false;") + l.setAttributeNS(None, 'onDblclick', 'popupWin.focus();') + if href.startswith('#note-'): + hrefNode.nodeValue = href.replace('#note-',"?url=%s&viewMode=%s&tocMode=%s&tocPN=%s&pn=%s#note-"%(url,viewMode,tocMode,tocPN,pn)) + return serializeNode(pagenode) + return "no text here" + + def getTranslate(self, query=None, language=None): + """translate into another languages""" + data = self.getServerData("lt/lex.xql","document=&language="+str(language)+"&query="+url_quote(str(query))) + #pagexml=self.template.fulltextclient.eval("/mpdl/interface/lt/lex.xql","document=&language="+str(language)+"&query="+url_quote(str(query))) + return data + + def getLemma(self, lemma=None, language=None): + """simular words lemma """ + data = self.getServerData("lt/lemma.xql","document=&language="+str(language)+"&lemma="+url_quote(str(lemma))) + #pagexml=self.template.fulltextclient.eval("/mpdl/interface/lt/lemma.xql","document=&language="+str(language)+"&lemma="+url_quote(str(lemma))) + return data + + def getLemmaNew(self, query=None, language=None): + """simular words lemma """ + data = self.getServerData("lt/lemma.xql","document=&language="+str(language)+"&lemma="+url_quote(str(query))) + #pagexml=self.template.fulltextclient.eval("/mpdl/interface/lt/lemma.xql","document=&language="+str(language)+"&lemma="+url_quote(str(query))) + return data + + def getQuery (self, docinfo=None, pageinfo=None, query=None, queryType=None, pn=1): + """number of""" + docpath = docinfo['textURLPath'] + pagesize = pageinfo['queryPageSize'] + pn = pageinfo['searchPN'] + query =pageinfo['query'] + queryType =pageinfo['queryType'] + tocSearch = 0 + tocDiv = None + + pagexml = self.getServerData("doc-query.xql","document=%s&mode=%s&queryType=%s&query=%s&queryResultPageSize=%s&queryResultPN=%s"%(docpath, 'text', queryType, query, pagesize, pn)) + #pagexml=self.template.fulltextclient.eval("/mpdl/interface/doc-query.xql","document=%s&mode=%s&queryType=%s&query=%s&queryResultPageSize=%s&queryResultPN=%s"%(docpath, 'text', queryType, query, pagesize, pn) ,outputUnicode=False) + pagedom = Parse(pagexml) + numdivs = pagedom.xpath("//div[@class='queryResultHits']") + tocSearch = int(getTextFromNode(numdivs[0])) + tc=int((tocSearch/10)+1) + logging.debug("documentViewer (gettoc) tc: %s"%(tc)) + return tc + + def getToc(self, mode="text", docinfo=None): + """loads table of contents and stores in docinfo""" + logging.debug("documentViewer (gettoc) mode: %s"%(mode)) + if mode == "none": + return docinfo + if 'tocSize_%s'%mode in docinfo: + # cached toc + return docinfo + + docpath = docinfo['textURLPath'] + # we need to set a result set size + pagesize = 1000 + pn = 1 + if mode == "text": + queryType = "toc" + else: + queryType = mode + # number of entries in toc + tocSize = 0 + tocDiv = None + + pagexml = self.getServerData("doc-query.xql","document=%s&queryType=%s&queryResultPageSize=%s&queryResultPN=%s"%(docpath,queryType, pagesize, pn)) + #pagexml=self.template.fulltextclient.eval("/mpdl/interface/doc-query.xql", "document=%s&queryType=%s&queryResultPageSize=%s&queryResultPN=%s"%(docpath,queryType,pagesize,pn), outputUnicode=False) + # post-processing downloaded xml + pagedom = Parse(pagexml) + # get number of entries + numdivs = pagedom.xpath("//div[@class='queryResultHits']") + if len(numdivs) > 0: + tocSize = int(getTextFromNode(numdivs[0])) + docinfo['tocSize_%s'%mode] = tocSize + return docinfo + + def getTocPage(self, mode="text", pn=1, pageinfo=None, docinfo=None): + """returns single page from the table of contents""" + # TODO: this should use the cached TOC + if mode == "text": + queryType = "toc" + else: + queryType = mode + docpath = docinfo['textURLPath'] + path = docinfo['textURLPath'] + pagesize = pageinfo['tocPageSize'] + pn = pageinfo['tocPN'] + url = docinfo['url'] + selfurl = self.absolute_url() + viewMode= pageinfo['viewMode'] + tocMode = pageinfo['tocMode'] + tocPN = pageinfo['tocPN'] + + data = self.getServerData("doc-query.xql","document=%s&queryType=%s&queryResultPageSize=%s&queryResultPN=%s"%(docpath,queryType, pagesize, pn)) + + page = data.replace('page-fragment.xql?document=%s'%str(path),'%s?url=%s&viewMode=%s&tocMode=%s&tocPN=%s'%(selfurl,url, viewMode, tocMode, tocPN)) + text = page.replace('mode=image','mode=texttool') + return text + + def manage_changeMpdlXmlTextServer(self,title="",serverUrl="http://mpdl-proto.mpiwg-berlin.mpg.de/mpdl/interface/",timeout=40,RESPONSE=None): + """change settings""" + self.title=title + self.timeout = timeout + self.serverUrl = serverUrl + if RESPONSE is not None: + RESPONSE.redirect('manage_main') + +# management methods +def manage_addMpdlXmlTextServerForm(self): + """Form for adding""" + pt = PageTemplateFile("zpt/manage_addMpdlXmlTextServer", globals()).__of__(self) + return pt() + +def manage_addMpdlXmlTextServer(self,id,title="",serverUrl="http://mpdl-proto.mpiwg-berlin.mpg.de/mpdl/interface/",timeout=40,RESPONSE=None): + """add zogiimage""" + newObj = MpdlXmlTextServer(id,title,serverUrl,timeout) + self.Destination()._setObject(id, newObj) + if RESPONSE is not None: + RESPONSE.redirect('manage_main') + + + \ No newline at end of file
--- a/__init__.py Wed Jun 16 18:39:54 2010 +0200 +++ b/__init__.py Wed Jun 16 20:27:04 2010 +0200 @@ -1,5 +1,5 @@ import documentViewer - +import MpdlXmlTextServer def initialize(context): """initialize ImageCollection""" @@ -10,3 +10,20 @@ documentViewer.manage_AddDocumentViewer ) ) + + context.registerClass( + documentViewer.DocumentViewerTemplate, + constructors = ( + documentViewer.manage_addDocumentViewerTemplateForm, + documentViewer.manage_addDocumentViewerTemplate + ) + ) + + context.registerClass( + MpdlXmlTextServer.MpdlXmlTextServer, + constructors = ( + MpdlXmlTextServer.manage_addMpdlXmlTextServerForm, + MpdlXmlTextServer.manage_addMpdlXmlTextServer + ) + ) + \ No newline at end of file
--- a/documentViewer.py Wed Jun 16 18:39:54 2010 +0200 +++ b/documentViewer.py Wed Jun 16 20:27:04 2010 +0200 @@ -66,23 +66,56 @@ return '/'.join(path.split('/')[0:-1]) -import socket +def getHttpData(url, data=None, num_tries=3, timeout=10): + """returns result from url+data HTTP request""" + # we do GET (by appending data to url) + if isinstance(data, str) or isinstance(data, unicode): + # if data is string then append + url = "%s?%s"%(url,data) + elif isinstance(data, dict) or isinstance(data, list) or isinstance(data, tuple): + # urlencode + url = "%s?%s"%(url,urllib.urlencode(data)) + + response = None + errmsg = None + for cnt in range(num_tries): + try: + logging.debug("getHttpData(#%s %ss) url=%s"%(cnt+1,timeout,url)) + if sys.version_info < (2, 6): + # set timeout on socket -- ugly :-( + import socket + socket.setdefaulttimeout(timeout) + response = urllib2.urlopen(url) + else: + response = urllib2.urlopen(url,timeout=float(timeout)) + # check result? + break + except urllib2.HTTPError, e: + logging.error("getHttpData: HTTP error(%s): %s"%(e.code,e)) + errmsg = str(e) + # stop trying + break + except urllib2.URLError, e: + logging.error("getHttpData: URLLIB error(%s): %s"%(e.reason,e)) + errmsg = str(e) + # stop trying + #break -def urlopen(url,timeout=2): - """urlopen mit timeout""" - socket.setdefaulttimeout(timeout) - ret=urllib.urlopen(url) - socket.setdefaulttimeout(5) - return ret + if response is not None: + data = response.read() + response.close() + return data + + raise IOError("ERROR fetching HTTP data from %s: %s"%(url,errmsg)) + #return None + ## ## documentViewer class ## -class documentViewer(Folder, extraFunction): +class documentViewer(Folder): """document viewer""" - #textViewerUrl="http://127.0.0.1:8080/HFQP/testXSLT/getPage?" - meta_type="Document viewer" security=ClassSecurityInfo() @@ -123,12 +156,12 @@ #self['template'] = templateFolder # Zope-2.12 style self._setObject('template',templateFolder) # old style try: - from Products.XMLRpcTools.XMLRpcTools import XMLRpcServerProxy - xmlRpcClient = XMLRpcServerProxy(id='fulltextclient', serverUrl=textServerName, use_xmlrpc=False) + import MpdlXmlTextServer + textServer = MpdlXmlTextServer(id='fulltextclient') #templateFolder['fulltextclient'] = xmlRpcClient - templateFolder._setObject('fulltextclient',xmlRpcClient) + templateFolder._setObject('fulltextclient',textServer) except Exception, e: - logging.error("Unable to create XMLRpcTools for fulltextclient: "+str(e)) + logging.error("Unable to create MpdlXmlTextServer for fulltextclient: "+str(e)) try: from Products.zogiLib.zogiLib import zogiLib zogilib = zogiLib(id="zogilib", title="zogilib for docuviewer", dlServerURL=imageScalerUrl, layout="book") @@ -137,7 +170,41 @@ except Exception, e: logging.error("Unable to create zogiLib for zogilib: "+str(e)) + + # proxy text server methods to fulltextclient + def getTextPage(self, **args): + """get page""" + return self.template.fulltextclient.getTextPage(**args) + def getQuery(self, **args): + """get query""" + return self.template.fulltextclient.getQuery(**args) + + def getSearch(self, **args): + """get search""" + return self.template.fulltextclient.getSearch(**args) + + def getNumPages(self, **args): + """get numpages""" + return self.template.fulltextclient.getNumPages(**args) + + def getTranslate(self, **args): + """get translate""" + return self.template.fulltextclient.getTranslate(**args) + + def getLemma(self, **args): + """get lemma""" + return self.template.fulltextclient.getLemma(**args) + + def getToc(self, **args): + """get toc""" + return self.template.fulltextclient.getToc(**args) + + def getTocPage(self, **args): + """get tocpage""" + return self.template.fulltextclient.getTocPage(**args) + + security.declareProtected('View','thumbs_rss') def thumbs_rss(self,mode,url,viewMode="auto",start=None,pn=1): ''' @@ -304,7 +371,6 @@ def getDirinfoFromDigilib(self,path,docinfo=None,cut=0): """gibt param von dlInfo aus""" - num_retries = 3 if docinfo is None: docinfo = {} @@ -316,17 +382,11 @@ logging.debug("documentViewer (getparamfromdigilib) dirInfo from %s"%(infoUrl)) - for cnt in range(num_retries): - try: - # dom = NonvalidatingReader.parseUri(imageUrl) - txt=urllib.urlopen(infoUrl).read() - dom = Parse(txt) - break - except: - logging.error("documentViewer (getdirinfofromdigilib) error reading %s (try %d)"%(infoUrl,cnt)) - else: + txt = getHttpData(infoUrl) + if txt is None: raise IOError("Unable to get dir-info from %s"%(infoUrl)) - + + dom = Parse(txt) sizes=dom.xpath("//dir/size") logging.debug("documentViewer (getparamfromdigilib) dirInfo:size"%sizes) @@ -342,7 +402,6 @@ def getIndexMeta(self, url): """returns dom of index.meta document at url""" - num_retries = 3 dom = None metaUrl = None if url.startswith("http://"): @@ -354,25 +413,17 @@ metaUrl=server+url.replace("/mpiwg/online","") if not metaUrl.endswith("index.meta"): metaUrl += "/index.meta" - logging.debug("METAURL: %s"%metaUrl) - for cnt in range(num_retries): - try: - # patch dirk encoding fehler treten dann nicht mehr auf - # dom = NonvalidatingReader.parseUri(metaUrl) - txt=urllib.urlopen(metaUrl).read() - dom = Parse(txt) - break - except: - logging.error("ERROR documentViewer (getIndexMeta) %s (%s)"%sys.exc_info()[0:2]) - if dom is None: + logging.debug("(getIndexMeta): METAURL: %s"%metaUrl) + txt=getHttpData(metaUrl) + if txt is None: raise IOError("Unable to read index meta from %s"%(url)) - + + dom = Parse(txt) return dom def getPresentationInfoXML(self, url): """returns dom of info.xml document at url""" - num_retries = 3 dom = None metaUrl = None if url.startswith("http://"): @@ -383,19 +434,11 @@ server=self.digilibBaseUrl+"/servlet/Texter?fn=" metaUrl=server+url.replace("/mpiwg/online","") - for cnt in range(num_retries): - try: - # patch dirk encoding fehler treten dann nicht mehr auf - # dom = NonvalidatingReader.parseUri(metaUrl) - txt=urllib.urlopen(metaUrl).read() - dom = Parse(txt) - break - except: - logging.error("ERROR documentViewer (getPresentationInfoXML) %s (%s)"%sys.exc_info()[0:2]) - - if dom is None: + txt=getHttpData(metaUrl) + if txt is None: raise IOError("Unable to read infoXMLfrom %s"%(url)) - + + dom = Parse(txt) return dom
--- a/extraFunction.py Wed Jun 16 18:39:54 2010 +0200 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,372 +0,0 @@ - -from OFS.Folder import Folder -from Products.PageTemplates.ZopePageTemplate import ZopePageTemplate -from Products.PageTemplates.PageTemplateFile import PageTemplateFile -from Products.PythonScripts.standard import url_quote - - -from Ft.Xml.Domlette import NonvalidatingReader -from Ft.Xml.Domlette import PrettyPrint, Print -from Ft.Xml import EMPTY_NAMESPACE, Parse - -from xml.dom.minidom import parse, parseString - -import Ft.Xml.XPath -import cStringIO -import xmlrpclib -import os.path -import sys -import cgi -import urllib -import logging -import math -import documentViewer -import urllib2 -import urllib -import urlparse -from types import * - -def getTextFromNode(nodename): - "get the cdata content of a node" - if nodename is None: - return "" - nodelist=nodename.childNodes - rc = "" - for node in nodelist: - if node.nodeType == node.TEXT_NODE: - rc = rc + node.data - return rc - -def serializeNode(node, encoding='utf-8'): - "returns a string containing node as XML" - buf = cStringIO.StringIO() - Print(node, stream=buf, encoding=encoding) - s = buf.getvalue() - buf.close() - return s - - -class extraFunction(Folder): - - - def __init__(self,id, title=""): - - self.id=id - self.title=title - - def getHttpData(self, url, data=None, num_tries=3, timeout=40): - """returns result from url+data HTTP request""" - # we do GET (by appending data to url) - if isinstance(data, str) or isinstance(data, unicode): - # if data is string then append - url = "%s?%s"%(url,data) - else: - # we assume its a dict - url = "%s?%s"%(url,urllib.urlencode(data)) - - response = None - errmsg = None - for cnt in range(num_tries): - try: - logging.debug("getHttpData(%s) url=%s"%(cnt+1,url)) - if sys.version_info < (2, 6): - # set timeout on socket -- ugly :-( - import socket - socket.setdefaulttimeout(timeout) - response = urllib2.urlopen(url) - else: - response = urllib2.urlopen(url,timeout=timeout) - # check result? - break - except urllib2.HTTPError, e: - logging.error("getHttpData: HTTP error(%s): %s"%(e.code,e)) - errmsg = str(e) - # stop trying - break - except urllib2.URLError, e: - logging.error("getHttpData: URLLIB error(%s): %s"%(e.reason,e)) - errmsg = str(e) - # stop trying - #break - - if response is not None: - data = response.read() - response.close() - return data - - raise IOError("ERROR fetching HTTP data from %s: %s"%(url,errmsg)) - #return None - - - - def getSearch(self, pn=1, pageinfo=None, docinfo=None, query=None, queryType=None, lemma=None): - """get search list""" - docpath = docinfo['textURLPath'] - url = docinfo['url'] - logging.debug("documentViewer (gettoc) docpath: %s"%(docpath)) - logging.debug("documentViewer (gettoc) url: %s"%(url)) - pagesize = pageinfo['queryPageSize'] - pn = pageinfo['searchPN'] - sn = pageinfo['sn'] - highlightQuery = pageinfo['highlightQuery'] - query =pageinfo['query'] - queryType =pageinfo['queryType'] - viewMode= pageinfo['viewMode'] - tocMode = pageinfo['tocMode'] - tocPN = pageinfo['tocPN'] - selfurl = self.absolute_url() - - data = self.getHttpData("http://mpdl-proto.mpiwg-berlin.mpg.de/mpdl/interface/doc-query.xql","document=%s&mode=%s&queryType=%s&query=%s&queryResultPageSize=%s&queryResultPN=%s&sn=%s&viewMode=%s&highlightQuery=%s"%(docpath, 'text', queryType, query, pagesize, pn, sn, viewMode,highlightQuery)) - #page=self.template.fulltextclient.eval("/mpdl/interface/doc-query.xql","document=%s&mode=%s&queryType=%s&query=%s&queryResultPageSize=%s&queryResultPN=%s&sn=%s&viewMode=%s&highlightQuery=%s"%(docpath, 'text', queryType, query, pagesize, pn, sn, viewMode,highlightQuery) ,outputUnicode=False) - #data = page.read() - #page.close() - - pagexml = data.replace('?document=%s'%str(docpath),'?url=%s'%url) - pagedom = Parse(pagexml) - if (queryType=="fulltext")or(queryType=="xpath")or(queryType=="xquery")or(queryType=="fulltextMorphLemma"): - pagedivs = pagedom.xpath("//div[@class='queryResultPage']") - if len(pagedivs)>0: - pagenode=pagedivs[0] - links=pagenode.xpath("//a") - for l in links: - hrefNode = l.getAttributeNodeNS(None, u"href") - if hrefNode: - href = hrefNode.nodeValue - if href.startswith('page-fragment.xql'): - selfurl = self.absolute_url() - pagexml=href.replace('mode=text','mode=texttool&viewMode=%s&queryType=%s&query=%s&queryResultPageSize=%s&queryResultPN=%s&tocMode=%s&searchPN=%s&tocPN=%s'%(viewMode,queryType,query,pagesize,pn,tocMode,pn,tocPN)) - hrefNode.nodeValue = pagexml.replace('page-fragment.xql','%s'%selfurl) - return serializeNode(pagenode) - if (queryType=="fulltextMorph"): - pagedivs = pagedom.xpath("//div[@class='queryResult']") - if len(pagedivs)>0: - pagenode=pagedivs[0] - links=pagenode.xpath("//a") - for l in links: - hrefNode = l.getAttributeNodeNS(None, u"href") - if hrefNode: - href = hrefNode.nodeValue - if href.startswith('page-fragment.xql'): - selfurl = self.absolute_url() - pagexml=href.replace('mode=text','mode=texttool&viewMode=%s&queryType=%s&query=%s&queryResultPageSize=%s&queryResultPN=%s&tocMode=%s&searchPN=%s&tocPN=%s'%(viewMode,queryType,query,pagesize,pn,tocMode,pn,tocPN)) - hrefNode.nodeValue = pagexml.replace('page-fragment.xql','%s'%selfurl) - if href.startswith('../lt/lemma.xql'): - hrefNode.nodeValue = href.replace('../lt/lemma.xql','%s/template/head_main_lemma_New'%(selfurl)) - l.setAttributeNS(None, 'target', '_blank') - l.setAttributeNS(None, 'onClick',"popupWin = window.open(this.href, 'contacts', 'location,width=500,height=600,top=180, left=400, scrollbars=1'); return false;") - l.setAttributeNS(None, 'onDblclick', 'popupWin.focus();') - pagedivs = pagedom.xpath("//div[@class='queryResultMorphExpansion']") - return serializeNode(pagenode) - if (queryType=="ftIndex")or(queryType=="ftIndexMorph"): - pagedivs= pagedom.xpath("//div[@class='queryResultPage']") - if len(pagedivs)>0: - pagenode=pagedivs[0] - links=pagenode.xpath("//a") - for l in links: - hrefNode = l.getAttributeNodeNS(None, u"href") - if hrefNode: - href = hrefNode.nodeValue - hrefNode.nodeValue=href.replace('mode=text','mode=texttool&viewMode=%s&tocMode=%s&tocPN=%s&pn=%s'%(viewMode,tocMode,tocPN,pn)) - if href.startswith('../lt/lex.xql'): - hrefNode.nodeValue = href.replace('../lt/lex.xql','%s/template/head_main_voc'%selfurl) - l.setAttributeNS(None, 'target', '_blank') - l.setAttributeNS(None, 'onClick',"popupWin = window.open(this.href, 'contacts', 'location,width=500,height=600,top=180, left=400, scrollbars=1'); return false;") - l.setAttributeNS(None, 'onDblclick', 'popupWin.focus();') - if href.startswith('../lt/lemma.xql'): - hrefNode.nodeValue = href.replace('../lt/lemma.xql','%s/template/head_main_lemma'%selfurl) - l.setAttributeNS(None, 'target', '_blank') - l.setAttributeNS(None, 'onClick',"popupWin = window.open(this.href, 'contacts', 'location,width=500,height=600,top=180, left=400, scrollbars=1'); return false;") - l.setAttributeNS(None, 'onDblclick', 'popupWin.focus();') - return serializeNode(pagenode) - return "no text here" - - def getNumPages(self,docinfo=None): - """get list of pages from fulltext and put in docinfo""" - xquery = '//pb' - text = self.getHttpData("http://mpdl-proto.mpiwg-berlin.mpg.de/mpdl/interface/xquery.xql","document=%s&xquery=%s"%(docinfo['textURLPath'],xquery)) - #text = self.template.fulltextclient.eval("/mpdl/interface/xquery.xql", "document=%s&xquery=%s"%(docinfo['textURLPath'],xquery)) - docinfo['numPages'] = text.count("<pb ") - return docinfo - - def getTextPage(self, mode="text", pn=1, docinfo=None, pageinfo=None, highlightQuery=None,sn=None, viewMode=None, tocMode=None, tocPN=None): - """returns single page from fulltext""" - docpath = docinfo['textURLPath'] - path = docinfo['textURLPath'] - url = docinfo['url'] - viewMode= pageinfo['viewMode'] - tocMode = pageinfo['tocMode'] - tocPN = pageinfo['tocPN'] - selfurl = self.absolute_url() - if mode == "text_dict": - textmode = "textPollux" - else: - textmode = mode - - textParam = "document=%s&mode=%s&pn=%s"%(docpath,textmode,pn) - if highlightQuery is not None: - textParam +="&highlightQuery=%s&sn=%s"%(highlightQuery,sn) - - pagexml = self.getHttpData("http://mpdl-proto.mpiwg-berlin.mpg.de/mpdl/interface/page-fragment.xql",textParam) - """pagexml=self.template.fulltextclient.eval("/mpdl/interface/page-fragment.xql", textParam, outputUnicode=False)""" - - pagedom = Parse(pagexml) - # plain text mode - if mode == "text": - # first div contains text - pagedivs = pagedom.xpath("/div") - if len(pagedivs) > 0: - pagenode = pagedivs[0] - links = pagenode.xpath("//a") - for l in links: - hrefNode = l.getAttributeNodeNS(None, u"href") - if hrefNode: - href= hrefNode.nodeValue - if href.startswith('#note-'): - hrefNode.nodeValue = href.replace('#note-',"?url=%s&viewMode=%s&tocMode=%s&tocPN=%s&pn=%s#note-"%(url,viewMode,tocMode,tocPN,pn)) - return serializeNode(pagenode) - if mode == "xml": - # first div contains text - pagedivs = pagedom.xpath("/div") - if len(pagedivs) > 0: - pagenode = pagedivs[0] - return serializeNode(pagenode) - if mode == "pureXml": - # first div contains text - pagedivs = pagedom.xpath("/div") - if len(pagedivs) > 0: - pagenode = pagedivs[0] - return serializeNode(pagenode) - # text-with-links mode - if mode == "text_dict": - # first div contains text - pagedivs = pagedom.xpath("/div") - if len(pagedivs) > 0: - pagenode = pagedivs[0] - # check all a-tags - links = pagenode.xpath("//a") - for l in links: - hrefNode = l.getAttributeNodeNS(None, u"href") - if hrefNode: - # is link with href - href = hrefNode.nodeValue - if href.startswith('lt/lex.xql'): - # is pollux link - selfurl = self.absolute_url() - # change href - hrefNode.nodeValue = href.replace('lt/lex.xql','%s/template/head_main_voc'%selfurl) - # add target - l.setAttributeNS(None, 'target', '_blank') - l.setAttributeNS(None, 'onClick',"popupWin = window.open(this.href, 'contacts', 'location,width=500,height=600,top=180, left=700, scrollbars=1'); return false;") - l.setAttributeNS(None, 'onDblclick', 'popupWin.focus();') - if href.startswith('lt/lemma.xql'): - selfurl = self.absolute_url() - hrefNode.nodeValue = href.replace('lt/lemma.xql','%s/template/head_main_lemma'%selfurl) - l.setAttributeNS(None, 'target', '_blank') - l.setAttributeNS(None, 'onClick',"popupWin = window.open(this.href, 'contacts', 'location,width=500,height=600,top=180, left=700, scrollbars=1'); return false;") - l.setAttributeNS(None, 'onDblclick', 'popupWin.focus();') - if href.startswith('#note-'): - hrefNode.nodeValue = href.replace('#note-',"?url=%s&viewMode=%s&tocMode=%s&tocPN=%s&pn=%s#note-"%(url,viewMode,tocMode,tocPN,pn)) - return serializeNode(pagenode) - return "no text here" - - def getTranslate(self, query=None, language=None): - """translate into another languages""" - data = self.getHttpData("http://mpdl-proto.mpiwg-berlin.mpg.de/mpdl/interface/lt/lex.xql","document=&language="+str(language)+"&query="+url_quote(str(query))) - #pagexml=self.template.fulltextclient.eval("/mpdl/interface/lt/lex.xql","document=&language="+str(language)+"&query="+url_quote(str(query))) - #data = pagexml.read() - #pagexml.close() - return data - - def getLemma(self, lemma=None, language=None): - """simular words lemma """ - data = self.getHttpData("http://mpdl-proto.mpiwg-berlin.mpg.de/mpdl/interface/lt/lemma.xql","document=&language="+str(language)+"&lemma="+url_quote(str(lemma))) - #pagexml=self.template.fulltextclient.eval("/mpdl/interface/lt/lemma.xql","document=&language="+str(language)+"&lemma="+url_quote(str(lemma))) - #data = pagexml.read() - #pagexml.close() - return data - - def getLemmaNew(self, query=None, language=None): - """simular words lemma """ - data = self.getHttpData("http://mpdl-proto.mpiwg-berlin.mpg.de/mpdl/interface/lt/lemma.xql","document=&language="+str(language)+"&lemma="+url_quote(str(query))) - #pagexml=self.template.fulltextclient.eval("/mpdl/interface/lt/lemma.xql","document=&language="+str(language)+"&lemma="+url_quote(str(query))) - #data = pagexml.read() - #pagexml.close() - return data - - def getQuery (self, docinfo=None, pageinfo=None, query=None, queryType=None, pn=1): - """number of""" - docpath = docinfo['textURLPath'] - pagesize = pageinfo['queryPageSize'] - pn = pageinfo['searchPN'] - query =pageinfo['query'] - queryType =pageinfo['queryType'] - tocSearch = 0 - tocDiv = None - - pagexml = self.getHttpData("http://mpdl-proto.mpiwg-berlin.mpg.de/mpdl/interface/doc-query.xql","document=%s&mode=%s&queryType=%s&query=%s&queryResultPageSize=%s&queryResultPN=%s"%(docpath, 'text', queryType, query, pagesize, pn)) - #pagexml=self.template.fulltextclient.eval("/mpdl/interface/doc-query.xql","document=%s&mode=%s&queryType=%s&query=%s&queryResultPageSize=%s&queryResultPN=%s"%(docpath, 'text', queryType, query, pagesize, pn) ,outputUnicode=False) - pagedom = Parse(pagexml) - numdivs = pagedom.xpath("//div[@class='queryResultHits']") - tocSearch = int(getTextFromNode(numdivs[0])) - tc=int((tocSearch/10)+1) - logging.debug("documentViewer (gettoc) tc: %s"%(tc)) - return tc - - def getToc(self, mode="text", docinfo=None): - """loads table of contents and stores in docinfo""" - logging.debug("documentViewer (gettoc) mode: %s"%(mode)) - if mode == "none": - return docinfo - if 'tocSize_%s'%mode in docinfo: - # cached toc - return docinfo - - docpath = docinfo['textURLPath'] - # we need to set a result set size - pagesize = 1000 - pn = 1 - if mode == "text": - queryType = "toc" - else: - queryType = mode - # number of entries in toc - tocSize = 0 - tocDiv = None - - pagexml = self.getHttpData("http://mpdl-proto.mpiwg-berlin.mpg.de/mpdl/interface/doc-query.xql","document=%s&queryType=%s&queryResultPageSize=%s&queryResultPN=%s"%(docpath,queryType, pagesize, pn)) - #pagexml=self.template.fulltextclient.eval("/mpdl/interface/doc-query.xql", "document=%s&queryType=%s&queryResultPageSize=%s&queryResultPN=%s"%(docpath,queryType,pagesize,pn), outputUnicode=False) - # post-processing downloaded xml - pagedom = Parse(pagexml) - # get number of entries - numdivs = pagedom.xpath("//div[@class='queryResultHits']") - if len(numdivs) > 0: - tocSize = int(getTextFromNode(numdivs[0])) - docinfo['tocSize_%s'%mode] = tocSize - return docinfo - - def getTocPage(self, mode="text", pn=1, pageinfo=None, docinfo=None): - """returns single page from the table of contents""" - # TODO: this should use the cached TOC - if mode == "text": - queryType = "toc" - else: - queryType = mode - docpath = docinfo['textURLPath'] - path = docinfo['textURLPath'] - pagesize = pageinfo['tocPageSize'] - pn = pageinfo['tocPN'] - url = docinfo['url'] - selfurl = self.absolute_url() - viewMode= pageinfo['viewMode'] - tocMode = pageinfo['tocMode'] - tocPN = pageinfo['tocPN'] - - data = self.getHttpData("http://mpdl-proto.mpiwg-berlin.mpg.de/mpdl/interface/doc-query.xql","document=%s&queryType=%s&queryResultPageSize=%s&queryResultPN=%s"%(docpath,queryType, pagesize, pn)) - #data = pagexml.read() - #pagexml.close() - - page = data.replace('page-fragment.xql?document=%s'%str(path),'%s?url=%s&viewMode=%s&tocMode=%s&tocPN=%s'%(selfurl,url, viewMode, tocMode, tocPN)) - text = page.replace('mode=image','mode=texttool') - return text - - \ No newline at end of file
--- a/version.txt Wed Jun 16 18:39:54 2010 +0200 +++ b/version.txt Wed Jun 16 20:27:04 2010 +0200 @@ -1,1 +1,1 @@ -DocumentViewer 0.4.0 \ No newline at end of file +DocumentViewer 0.5.1 \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/zpt/manage_addMpdlXmlTextServer.zpt Wed Jun 16 20:27:04 2010 +0200 @@ -0,0 +1,11 @@ +<h1 tal:replace="structure here/manage_page_header">Header</h1> + <h2>Add an MPDL-XML TextServer</h2> + <form action="manage_addMpdlXmlTextServer"> + <p class="form-label">Id<input name="id"></p> + <p class="form-optional">Title<input size="80" name="title"></p> + <p class="form-label">Text Server URL<input size="80" name="serverUrl" value="http://mpdl-proto.mpiwg-berlin.mpg.de/mpdl/interface/"></p> + <p class="form-optional">Timeout (s)<input size="10" name="timeout" value="30"></p> + <p><input type="submit" value="add"></p> + </form> + +<h1 tal:replace="structure here/manage_page_footer">Footer</h1>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/zpt/manage_changeMpdlXmlTextServer.zpt Wed Jun 16 20:27:04 2010 +0200 @@ -0,0 +1,17 @@ +<div tal:replace="structure here/manage_page_header">Header</div> +<!-- ZOPE management tabs --> +<h2 tal:define="manage_tabs_message options/manage_tabs_message | nothing" + tal:replace="structure here/manage_tabs">Tabs</h2> +<!-- end of ZOPE management tabs --> + <form action="manage_changeMpdlXmlTextServer"> + + <p class="form-optional">Title</p> + <p class="form-element"><input size="80" tal:attributes="value here/title" name="title"></p> + <p class="form-optional">Text Server URL</p> + <p class="form-element"><input size="80" tal:attributes="value here/serverUrl | default" name="serverUrl"></p> + <p class="form-optional">Timeout (s)</p> + <p class="form-element"><input size="3" tal:attributes="value here/timeout | default" name="timeout"></p> + <p><input type="submit" value="change"></p> + </form> + +<div tal:replace="structure here/manage_page_footer">Footer</div>