--- ECHO_content/ECHO_xslt.py 2005/04/10 11:52:54 1.3 +++ ECHO_content/ECHO_xslt.py 2006/10/18 14:42:33 1.14 @@ -1,85 +1,225 @@ ### XSLT Class ### ### setzt 4 suite vorraus ### - +from Acquisition import Implicit from Products.PageTemplates.PageTemplateFile import PageTemplateFile from Globals import DTMLFile from ECHO_Nav import ECHO_pageTemplate from threading import Thread,Timer - +import threading +from ECHO_helpers import * +import ECHO_language +import sys +import urllib +import urlparse +from Ft.Xml.Domlette import Print, PrettyPrint +from StringIO import StringIO +from types import * from Globals import package_home +import transaction import os.path -import urllib +import urllib,cgi try: from Ft.Xml.Xslt.Processor import Processor - from Ft.Xml import InputSource - + from Ft.Xml import InputSource, EMPTY_NAMESPACE,Parse + from Ft.Xml.Domlette import NonvalidatingReader except: print "4suite has to be installed" -class getXML: +class getXML(Implicit): """get XML thread""" - def __init__(self,qs,xsl): - - self.qs=qs + def set(self,qs,xsl,result): + """set""" + + self._v_qs=qs self.xsl=xsl - self.result=None + self.result=None +# def acquireLock(self): +# +# lock=getattr(self, "_v_lock", None) +# if not lock: +# self._v_lock=threading.Lock() +# lock=self._v_lock +# lock.acquire() +# +# def releaseLock(self): +# # acquire() should have been called +# # about one second before. This means the volatile lock +# # should still be there +# +# self._v_lock.release() +# + def __call__(self): + """wait""" + return True + + def run(self): + """call it""" + xml="" + try: - urlH=urllib.urlopen(self.qs) - xml=urlH.read() - urlH.close() + + urlH=urllib.urlopen(self._v_qs) + xml=urlH.read() + urlH.close() + xsltproc=Processor() + document = InputSource.DefaultFactory.fromString(xml) + + stylesheet = InputSource.DefaultFactory.fromUri(self.xsl) + + xsltproc.appendStylesheet(stylesheet) + - - xsltproc=Processor() - document = InputSource.DefaultFactory.fromString(xml) - stylesheet = InputSource.DefaultFactory.fromUri(self.xsl) - xsltproc.appendStylesheet(stylesheet) + #print self.xsl + #< xsltproc.run(document) + tmp=xsltproc.run(document) + + self.result=tmp[0:] + - self.result=xsltproc.run(document) except: - self.result="error" + + self.result="error: %s %s
"%sys.exc_info()[0:2] + self.result+=xml + self.result+="" + + def getResult(self): + return self.result +from ZODB import DB +from ZODB.FileStorage import FileStorage +class ECHO_cache: + def __init__(self): + """init the storage""" + self.storage=FileStorage("/var/tmp/echo_cache.fs") + self.db=DB(self.storage) + self.connection=self.db.open() + self.root=self.connection.root() + + def deleteObject(self,name,pn=None): + """delete an object from cache""" + fileStore=self.root.get(name,None) + if fileStore: + if not pn: + del(self.root[name]) + else: + if self.root[name].get(pn,None): + del(self.root[name][pn]) + + + def storeObject(self,name,pn,object): + """store an object""" + + if not self.root.get(name,None): + self.root[name]={} + + #following is necessary to make clear that object has really changed for ZODB + tmp=self.root[name] + tmp[pn]=object + self.root[name]=tmp + transaction.get().commit() + return True + + def retrieveObject(self,name,pn): + """retrieve it""" + + fileStore=self.root.get(name,None) + if not fileStore: + return None + else: + + return self.root[name].get(pn,None) + - -class ECHO_xslt(ECHO_pageTemplate): +class ECHO_xslt(ECHO_pageTemplate,ECHO_language.ECHO_language): """ECHO_xslt classe""" meta_type="ECHO_xslt" + + cache=ECHO_cache() # cache for analysed pages + caching="yes" + + appendQueryString=True # add query string to the cgiUrl can be changed with addChanges + + passURL=False #use url from querystring parameter fn to retrieve the text and not the url in cgi-url can be changed with addChanges + + + results={} + manage_options=ECHO_pageTemplate.manage_options+( + {'label':'Change xml-ressource','action':'change_ECHO_xsltForm'},) + def refreshTxt(self): """txt fuer refresh""" return """ 2;url=%s?repeat=%s """%(self.absolute_url(),self.threadName) def xslt(self): """xslt""" - return self.pt_render() + return self.document_src() + def change_ECHO_xsltForm(self): + """change form""" + pt=zptFile(self, 'zpt/ChangeECHO_xsltForm.zpt') + return pt() + def addChanges(self,cgiUrl,appendQueryString=False,passURL=False,caching=False,RESPONSE=None): + """change the xslt, ueberschriebt addChanges in ECHO_PageTemplate""" + + if urlparse.urlparse(cgiUrl)[0]=="":#relative url in absolute + self.cgiUrl=urlparse.urljoin(self.absolute_url(), cgiUrl) + else: + self.cgiUrl=cgiUrl + + if appendQueryString: + self.appendQueryString=True + else: + self.appendQueryString=False + + if passURL: + self.passURL=True + else: + self.passURL=False + + if caching: + self.caching="yes" + else: + self.caching="No" + + + if RESPONSE: + RESPONSE.redirect("manage_main") def index_html(self,repeat=None): """standard ausgabe""" + threadName=repeat - if not threadName or threadName=="": - self.cgiUrl="http://medea.mpiwg-berlin.mpg.de/cgi-bin/search/q1" - - qs="%s?%s"%(self.cgiUrl,self.REQUEST['QUERY_STRING']) - xsl=self.absolute_url()+"/xslt" - self.xmltrans=getXML(qs,xsl) - thread=Thread(target=self.xmltrans) + #abwaertskompatibilitŠt mit altem nivht konfigurierbaren prototypen + + if getattr(self,'cgiUrl','')=='': + self.cgiUrl="http://medea.mpiwg-berlin.mpg.de/cgi-bin/search/q1" + + qs="%s%s"%(self.cgiUrl,self.REQUEST['QUERY_STRING']) + xsl=self.absolute_url()+"/xslt" + self._v_xmltrans=getXML().__of__(self) + #self._xmltrans.start() + thread=Thread(target=self._v_xmltrans) thread.start() + self._v_xmltrans.set(qs,xsl,None) + self._v_xmltrans.run() + self.threadName=thread.getName()[0:] wait_template=self.aq_parent.ZopeFind(self.aq_parent,obj_ids=['wait_template']) @@ -87,23 +227,356 @@ class ECHO_xslt(ECHO_pageTemplate): return wait_template[0][1]() pt=PageTemplateFile(os.path.join(package_home(globals()),'zpt','xsltWait.zpt')).__of__(self) return pt() - #xmltrans.run() + #_v_xmltrans.run() else: - if (self.xmltrans.getResult()==None): + if (self._v_xmltrans.getResult()==None): - wait_template=self.aq_parent.ZopeFind(self.aq_parent,obj_ids=['wait_template']) - if wait_template: - return wait_template[0][1]() + wait_template=self.aq_parent.ZopeFind(self.aq_parent,obj_ids=['wait_template']) + if wait_template: + return wait_template[0][1]() pt=PageTemplateFile(os.path.join(package_home(globals()),'zpt','xsltWait.zpt')).__of__(self) return pt() else: - return self.xmltrans.getResult() - + return self._v_xmltrans.getResult() + def getText(self): + """print nur den text""" + qs,baseUri=self.getTextInput() + self.REQUEST.RESPONSE.redirect(qs) + + def deleteCache(self): + """deletefrom cache""" + fn=self.REQUEST['fn'] + self.cache.deleteObject(fn) + + + def createLinkNode(self,url,dom): + """createa a link node""" + txt=dom.createTextNode("") + node=dom.createElementNS("http://test.de","a") + node.setAttributeNS("http://test.de","href",url) + node.appendChild(txt) + return node + + def forwardLink(self,linkid,url,type="target",RESPONSE=None): + """forward to link""" + if RESPONSE: + RESPONSE.redirect(self.getLink(linkid,url,type=type)) + + else: + return self.getLink(linkid,url,type=type) + def getLink(self,linkid,url,type="target"): + """get target for linkid""" + dom=NonvalidatingReader.parseUri(url) + + masterurl=dom.xpath("//mpiwg:masterurl/@ref",explicitNss={'mpiwg':'http://www.mpiwg-berlin.mpg.de/namespace'})[0].value + slaveurl=dom.xpath("//mpiwg:slaveurl/@ref",explicitNss={'mpiwg':'http://www.mpiwg-berlin.mpg.de/namespace'})[0].value + + #check now if there are in the link file + + xp="//mpiwg:link[@id='%s']"%linkid + + if type=="target": + for link in dom.xpath(xp,explicitNss={'mpiwg':'http://www.mpiwg-berlin.mpg.de/namespace'}): + fn=link.xpath("mpiwg:target/@filename",explicitNss={'mpiwg':'http://www.mpiwg-berlin.mpg.de/namespace'})[0].value + + if urlparse.urlparse(urllib.unquote(fn))[0]=="http": # fn ist eine url + return urllib.unquote(fn) # dann gibt diese zurueck + + ref=link.xpath("mpiwg:target/@refid",explicitNss={'mpiwg':'http://www.mpiwg-berlin.mpg.de/namespace'})[0].value + + ref2=link.xpath("mpiwg:target/mpiwg:pagelink/@refid",explicitNss={'mpiwg':'http://www.mpiwg-berlin.mpg.de/namespace'})[0].value + selectionNodeIndex=link.xpath("mpiwg:target/mpiwg:pagelink/@selectionNodeIndex",explicitNss={'mpiwg':'http://www.mpiwg-berlin.mpg.de/namespace'})[0].value + + + + lstr=slaveurl+'fn='+fn+'&_id='+ref+'&_pagelink=%s///%s/%s/%s'%(ref2,selectionNodeIndex,linkid,'target') + lstr+="&_links="+urllib.quote(url) + + else: + for link in dom.xpath(xp,explicitNss={'mpiwg':'http://www.mpiwg-berlin.mpg.de/namespace'}): + fn=link.xpath("mpiwg:source/@filename",explicitNss={'mpiwg':'http://www.mpiwg-berlin.mpg.de/namespace'})[0].value + if urlparse.urlparse(urllib.unquote(fn))[0]=="http": # fn ist eine url + return urllib.unquote(fn) # dann gibt diese zurueck + + ref=link.xpath("mpiwg:source/@refid",explicitNss={'mpiwg':'http://www.mpiwg-berlin.mpg.de/namespace'})[0].value + + ref2=link.xpath("mpiwg:source/mpiwg:pagelink/@refid",explicitNss={'mpiwg':'http://www.mpiwg-berlin.mpg.de/namespace'})[0].value + selectionNodeIndex=link.xpath("mpiwg:source/mpiwg:pagelink/@selectionNodeIndex",explicitNss={'mpiwg':'http://www.mpiwg-berlin.mpg.de/namespace'})[0].value + + lstr=masterurl+'fn='+fn+'&_id='+ref+'&_pagelink=%s///%s/%s/%s'%(ref2,selectionNodeIndex,linkid,'source') + lstr+="&_links="+urllib.quote(url) + return lstr + + def addLinksUrl(self,txt,url): + """add reference to links to url""" + ret=[] + dom=NonvalidatingReader.parseUri(url) + textDom=NonvalidatingReader.parseString(txt) + + #find ids in txt + ids=textDom.xpath("//*[@id]") + + for textid in ids: + xp="//mpiwg:link[mpiwg:source/@refid='%s']"%textid.xpath("@id")[0].value + for link in dom.xpath(xp,explicitNss={'mpiwg':'http://www.mpiwg-berlin.mpg.de/namespace'}): + ref2=link.xpath("mpiwg:source/mpiwg:pagelink/@refid",explicitNss={'mpiwg':'http://www.mpiwg-berlin.mpg.de/namespace'})[0].value + selectionNodeIndex=link.xpath("mpiwg:source/mpiwg:pagelink/@selectionNodeIndex",explicitNss={'mpiwg':'http://www.mpiwg-berlin.mpg.de/namespace'})[0].value + linkid=link.xpath("@id")[0].value + ret.append('%s///%s/%s/%s'%(ref2,selectionNodeIndex,linkid,'source')) + + xp="//mpiwg:link[mpiwg:target/@refid='%s']"%textid.xpath("@id")[0].value + for link in dom.xpath(xp,explicitNss={'mpiwg':'http://www.mpiwg-berlin.mpg.de/namespace'}): + ref2=link.xpath("mpiwg:target/mpiwg:pagelink/@refid",explicitNss={'mpiwg':'http://www.mpiwg-berlin.mpg.de/namespace'})[0].value + selectionNodeIndex=link.xpath("mpiwg:target/mpiwg:pagelink/@selectionNodeIndex",explicitNss={'mpiwg':'http://www.mpiwg-berlin.mpg.de/namespace'})[0].value + linkid=link.xpath("@id")[0].value + ret.append('%s///%s/%s/%s'%(ref2,selectionNodeIndex,linkid,'target')) + + + return ret + + def addLinks(self,txt,url="http://127.0.0.1:8080/HFQP/linkCreator/getCollectionXML?collection=commentary2"): + """add links to a page from xml linkfile""" + + dom=NonvalidatingReader.parseUri(url) + textDom=NonvalidatingReader.parseString(txt) + + #find ids in txt + ids=textDom.xpath("//*[@id]") + masterurl=dom.xpath("//mpiwg:masterurl/@ref",explicitNss={'mpiwg':'http://www.mpiwg-berlin.mpg.de/namespace'})[0].value + slaveurl=dom.xpath("//mpiwg:slaveurl/@ref",explicitNss={'mpiwg':'http://www.mpiwg-berlin.mpg.de/namespace'})[0].value + + #check now if there are in the link file + for textid in ids: + xp="//mpiwg:link[mpiwg:source/@refid='%s']"%textid.xpath("@id")[0].value + for link in dom.xpath(xp,explicitNss={'mpiwg':'http://www.mpiwg-berlin.mpg.de/namespace'}): + fn=link.xpath("mpiwg:target/@filename",explicitNss={'mpiwg':'http://www.mpiwg-berlin.mpg.de/namespace'})[0].value + print fn + if urlparse.urlparse(urllib.unquote(fn))[0]=="http": # fn ist eine url + lstr=urllib.unquote(fn) # dann gibt diese zurueck + else: + try: + ref=link.xpath("mpiwg:target/@refid",explicitNss={'mpiwg':'http://www.mpiwg-berlin.mpg.de/namespace'})[0].value + + ref2=link.xpath("mpiwg:target/mpiwg:pagelink/@refid",explicitNss={'mpiwg':'http://www.mpiwg-berlin.mpg.de/namespace'})[0].value + selectionNodeIndex=link.xpath("mpiwg:target/mpiwg:pagelink/@selectionNodeIndex",explicitNss={'mpiwg':'http://www.mpiwg-berlin.mpg.de/namespace'})[0].value + linkid=link.xpath("@id")[0].value + lstr=slaveurl+'fn='+fn+'&_id='+ref+'&_pagelink=%s///%s/%s/%s'%(ref2,selectionNodeIndex,linkid,'target') + lstr+="&_links="+urllib.quote(url) + except: + lstr="" + node=self.createLinkNode(lstr,textDom) + textid.parentNode.insertBefore(node,textid) + + + xp="//mpiwg:link[mpiwg:target/@refid='%s']"%textid.xpath("@id")[0].value + for link in dom.xpath(xp,explicitNss={'mpiwg':'http://www.mpiwg-berlin.mpg.de/namespace'}): + fn=link.xpath("mpiwg:source/@filename",explicitNss={'mpiwg':'http://www.mpiwg-berlin.mpg.de/namespace'})[0].value + if urlparse.urlparse(urllib.unquote(fn))[0]=="http": # fn ist eine url + lstr=urllib.unquote(fn) # dann gibt diese zurueck + else: + + ref=link.xpath("mpiwg:source/@refid",explicitNss={'mpiwg':'http://www.mpiwg-berlin.mpg.de/namespace'})[0].value + + ref2=link.xpath("mpiwg:source/mpiwg:pagelink/@refid",explicitNss={'mpiwg':'http://www.mpiwg-berlin.mpg.de/namespace'})[0].value + selectionNodeIndex=link.xpath("mpiwg:source/mpiwg:pagelink/@selectionNodeIndex",explicitNss={'mpiwg':'http://www.mpiwg-berlin.mpg.de/namespace'})[0].value + linkid=link.xpath("@id")[0].value + lstr=masterurl+'fn='+fn+'&_id='+ref+'&_pagelink=%s///%s/%s/%s'%(ref2,selectionNodeIndex,linkid,"source") + lstr+="&_links="+urllib.quote(url) + + node=self.createLinkNode(lstr,textDom) + textid.parentNode.insertBefore(node,textid) + + + + strio = StringIO() + PrettyPrint(textDom,strio) + xmlstr = strio.getvalue() + + return xmlstr + + + + def getPageLex(self,_pn="1",_id=None,_caching=None,_links=None,_showall="no"): + """getpage mit lexikalischer analyse und xslt transform + if _caching=yes dann wird die lwxikalisch analysierte seite in einem cache abgespeichert + """ + def encode(hash): + ret=[] + for x in hash.keys(): + value=hash[x] + + if type(value) is ListType: + for z in value: + ret.append("%s=%s"%(x,z)) + else: + ret.append("%s=%s"%(x,value)) + return "&".join(ret) + + + + if not _caching: + _caching=self.caching + + fn=self.REQUEST['fn'] + + if not _id: + + fromCache=self.cache.retrieveObject(fn,_pn) + + if fromCache and _caching=="yes": + + txt = fromCache + else: + txt=self.tagLex(nr=_pn) + + self.cache.storeObject(fn,_pn,txt[0:]) + + else: + txt=self.tagLex(id=_id) + + if _showall=="yes": + params=cgi.parse_qs(self.REQUEST['QUERY_STRING']) + + params['_pagelink']=self.addLinksUrl(txt,url=_links) + params['_showall']='no' + + print self.absolute_url()+"?"+encode(params) + self.REQUEST.RESPONSE.redirect(self.absolute_url()+"/getPageLex?"+encode(params)) + + + xsl=self.xslt() + + xsltproc=Processor() + if type(txt)==UnicodeType: + document = InputSource.DefaultFactory.fromString(txt.encode('utf-8')) + else: + document = InputSource.DefaultFactory.fromString(txt) + stylesheet = InputSource.DefaultFactory.fromString(xsl) + xsltproc.appendStylesheet(stylesheet) + tmp=xsltproc.run(document) + + if _links: + _links=urllib.unquote(_links) + tmp=self.addLinks(tmp,url=_links) + + #bugfix for digilib images which doesn't accept & + tmp=tmp.replace("&","&") + + + return tmp[0:] + + def getTextInput(self): + """get the text + wie der text geholt wird liegt an der konfiguration, + is appendQueryString gesetzt, dann wir jeweils der Querystring an vorgebenen url gesetzt, erwartet wird fn= + fŸr den Pfad, is passURL gesetzt, dann wird falls fn= eine vollstŠndige url enthŠlt, diese anstelle der in cgiurl definierten genommen. + """ + + if getattr(self,'passURL',False) and self.REQUEST.has_key('fn') and (urlparse.urlparse(self.REQUEST['fn'])[0]=='http'): + qs=self.REQUEST['fn'] + baseUri=qs + elif getattr(self,'pappendQueryString',True): + qs="%s%s"%(self.cgiUrl,self.REQUEST['QUERY_STRING']) + baseUri=self.cgiUrl + else: + qs="%s"%(self.cgiUrl) + baseUri=self.cgiUrl + + #fact= InputSource.DefaultFactory.fromUri(qs) + return qs,baseUri + #return InputSource.InputSource(fact) + #xmlt=urllib.urlopen(qs).read() + + def getPage(self,_pn="-1",_id=None,REQUEST=None,_caching=None): + """get a page from an xml""" + + if not _caching: + _caching=self.caching + + pn=int(_pn)-1 + if pn<0 and (not _id): + if REQUEST: + return "Sorry, pagenumbers have to be greater than 0" + else: + return None + + xmlt,self.baseUri=self.getTextInput() + + #get the text from cache, if existing + fromCache=self.cache.retrieveObject(self.baseUri,"-1") + if fromCache and _caching=="yes": + + txt = fromCache + else: + + txt=urllib.urlopen(xmlt).read() + + self.cache.storeObject(self.baseUri,"-1",txt) + + + dom=NonvalidatingReader.parseString(txt,self.baseUri) + + #pb should have a namespache + + pbs=dom.xpath("//mpiwg:pb",explicitNss={'mpiwg':'http://www.mpiwg-berlin.mpg.de/namespace'}) + + if len(pbs)==0: # versuche nochmal ohne + pbs=dom.xpath("//pb") + + if _id: + #suche wieviele pb for der id + + + idpb=dom.xpath("//*[@id='%s']/preceding::node()/mpiwg:pb"%_id,explicitNss={'html':'http://test.de','mpiwg':'http://www.mpiwg-berlin.mpg.de/namespace'}) + if len(idpb)==0: + idpb=dom.xpath("//*[@id='%s']/preceding::node()/pb"%_id) + + if len(idpb)==0: + k=0 + for node in dom.xpath("//*[@id='%s']//preceding::node()"%_id,explicitNss={'html':'http://test.de','mpiwg':'http://www.mpiwg-berlin.mpg.de/namespace'}): + if getattr(node,'tagName',"")=="mpiwg:pb": + k+=1 + else: + k=len(idpb) + pn=k-1 #-1 wegen Seitenzahlzaehlung startet mit 0 + + if pn > len(pbs): + if REQUEST: + return "Sorry, pagenumber %s does not exit"%(pn+1) + else: + return None + + beginNode=pbs[pn] #take the n'th pb + + if not (pn==len(pbs)-1): # nicht die letzte Seite + endNode=pbs[pn+1] + else: + endNode=None + + deleteNodes=beginNode.xpath('preceding::node()') + if endNode: + deleteNodes+=endNode.xpath('following::node()') + for node in deleteNodes: + try: + parent=node.xpath("..") + + if parent: + parent[0].removeChild(node) + except: + zLOG.LOG("ECHO_Resource (getAccessRightMD)", zLOG.INFO,"%s (%s)"%sys.exc_info()[0:2]) + strio = StringIO() + PrettyPrint(dom,strio) + xmlstr = strio.getvalue() + + return xmlstr + + + def manage_addECHO_xsltForm(self): """Form for adding""" pt=PageTemplateFile(os.path.join(package_home(globals()),'zpt','AddECHO_xslt.zpt')).__of__(self) @@ -112,7 +585,7 @@ def manage_addECHO_xsltForm(self): from urllib import quote -def manage_addECHO_xslt(self, id, label, weight= 0,contentType=0,title=None, text=None, +def manage_addECHO_xslt(self, id, label, weight= 0,contentType=0,title=None, text=None, cgiUrl=None, REQUEST=None, submit=None): "Add a Page Template with optional file content." @@ -127,6 +600,7 @@ def manage_addECHO_xslt(self, id, label, if title: ob.pt_setTitle(title) return ob + setattr(ob,'cgiUrl',cgiUrl) else: file = REQUEST.form.get('file') headers = getattr(file, 'headers', None) @@ -139,6 +613,7 @@ def manage_addECHO_xslt(self, id, label, ob = getattr(self, id) setattr(ob,'weight',weight) setattr(ob,'label',label) + setattr(ob,'cgiUrl',cgiUrl) if title: ob.pt_setTitle(title)