--- ECHO_content/ECHO_xslt.py	2005/10/23 09:46:26	1.4
+++ ECHO_content/ECHO_xslt.py	2007/01/09 17:01:01	1.18
@@ -7,23 +7,30 @@ from ECHO_Nav import ECHO_pageTemplate
 from threading import Thread,Timer
 import threading
 from ECHO_helpers import *
+try:
+	from ECHO_language import *
+except:
+	print "no echo language"
+	class ECHO_language:
+		"""leere Klasse"""
+		pass
 import sys
 import urllib
 import urlparse
-
-
-
+from Ft.Xml.Domlette import Print, PrettyPrint
+from StringIO import StringIO
+from types import *
 from Globals import package_home
-
+import transaction
 
 import os.path
 
-import urllib
+import urllib,cgi
 
 try:
     from Ft.Xml.Xslt.Processor import Processor
-    from Ft.Xml import InputSource
-    
+    from Ft.Xml import InputSource, EMPTY_NAMESPACE,Parse
+    from Ft.Xml.Domlette import NonvalidatingReader
 except:
     print "4suite has to be installed"
 
@@ -61,9 +68,9 @@ class getXML(Implicit):
     def run(self):
         """call it"""
         xml=""
-        
+
         try:
-      
+  
             urlH=urllib.urlopen(self._v_qs)
             xml=urlH.read()
             urlH.close()
@@ -76,7 +83,7 @@ class getXML(Implicit):
             
         
             #print self.xsl
-            #print xsltproc.run(document)
+            #< xsltproc.run(document)
             tmp=xsltproc.run(document)
             
             self.result=tmp[0:]
@@ -94,13 +101,65 @@ class getXML(Implicit):
 
         return self.result
 
-
+from ZODB import DB
+from ZODB.FileStorage import FileStorage
+class ECHO_cache:
+    def __init__(self):
+        """init the storage"""
+        self.storage=FileStorage(os.path.join(INSTANCE_HOME,"var/echo_cache.fs"))		
+        self.db=DB(self.storage)    
+        self.connection=self.db.open()
+        self.root=self.connection.root()
     
-class ECHO_xslt(ECHO_pageTemplate):
+    def deleteObject(self,name,pn=None):
+        """delete an object from cache"""
+        fileStore=self.root.get(name,None)
+        if fileStore:
+            if not pn:
+                del(self.root[name])
+            else:
+                if self.root[name].get(pn,None):
+                    del(self.root[name][pn])
+                    
+        
+    def storeObject(self,name,pn,object):
+        """store an object"""
+        
+        if not self.root.get(name,None):
+            self.root[name]={}
+            
+
+        #following is necessary to make clear that object has really changed for ZODB
+        tmp=self.root[name]
+        tmp[pn]=object
+        self.root[name]=tmp
+        transaction.get().commit()
+        return True
+   
+    def retrieveObject(self,name,pn):
+        """retrieve it"""
+        
+        fileStore=self.root.get(name,None)
+        if not fileStore:
+            return None
+        else:
+           
+            return self.root[name].get(pn,None)
+        
+
+class ECHO_xslt(ECHO_pageTemplate,ECHO_language):
     """ECHO_xslt classe"""
 
     meta_type="ECHO_xslt"
     
+    cache=ECHO_cache() # cache for analysed pages
+    caching="yes"
+    
+    appendQueryString=True # add query string to the cgiUrl can be changed with addChanges
+    
+    passURL=False #use url from querystring parameter fn to retrieve the text and not the url in cgi-url can be changed with addChanges
+        
+    
     results={}
     manage_options=ECHO_pageTemplate.manage_options+(
      {'label':'Change xml-ressource','action':'change_ECHO_xsltForm'},)
@@ -119,13 +178,30 @@ class ECHO_xslt(ECHO_pageTemplate):
         pt=zptFile(self, 'zpt/ChangeECHO_xsltForm.zpt')
         return pt()
 
-    def addChanges(self,cgiUrl,RESPONSE=None):
+    def addChanges(self,cgiUrl,appendQueryString=False,passURL=False,caching=False,RESPONSE=None):
         """change the xslt, ueberschriebt addChanges in ECHO_PageTemplate"""
+    
         if urlparse.urlparse(cgiUrl)[0]=="":#relative url in absolute
             self.cgiUrl=urlparse.urljoin(self.absolute_url(), cgiUrl)
         else:
             self.cgiUrl=cgiUrl
-            
+        
+        if appendQueryString: 
+            self.appendQueryString=True
+        else:
+            self.appendQueryString=False
+        
+        if passURL:
+            self.passURL=True
+        else:
+            self.passURL=False
+       
+        if caching:
+            self.caching="yes"
+        else:
+            self.caching="No"
+        
+
         if RESPONSE:
             RESPONSE.redirect("manage_main")
         
@@ -141,7 +217,7 @@ class ECHO_xslt(ECHO_pageTemplate):
             if getattr(self,'cgiUrl','')=='':
                 self.cgiUrl="http://medea.mpiwg-berlin.mpg.de/cgi-bin/search/q1"
                 
-            qs="%s?%s"%(self.cgiUrl,self.REQUEST['QUERY_STRING'])
+            qs="%s%s"%(self.cgiUrl,self.REQUEST['QUERY_STRING'])
             xsl=self.absolute_url()+"/xslt"
             self._v_xmltrans=getXML().__of__(self)
             #self._xmltrans.start()
@@ -163,9 +239,9 @@ class ECHO_xslt(ECHO_pageTemplate):
             
             if (self._v_xmltrans.getResult()==None):
 
-            	wait_template=self.aq_parent.ZopeFind(self.aq_parent,obj_ids=['wait_template'])
-            	if wait_template:
-                	return wait_template[0][1]()
+                wait_template=self.aq_parent.ZopeFind(self.aq_parent,obj_ids=['wait_template'])
+                if wait_template:
+                        return wait_template[0][1]()
                 
                 pt=PageTemplateFile(os.path.join(package_home(globals()),'zpt','xsltWait.zpt')).__of__(self)
                 return pt()
@@ -173,6 +249,343 @@ class ECHO_xslt(ECHO_pageTemplate):
                 return self._v_xmltrans.getResult()
 
     
+    def getText(self):
+        """print nur den text"""
+        qs,baseUri=self.getTextInput()
+        self.REQUEST.RESPONSE.redirect(qs)
+
+    def deleteCache(self):
+        """deletefrom cache"""
+        fn=self.REQUEST['fn']
+        self.cache.deleteObject(fn)
+        
+    
+    def createLinkNode(self,url,dom):
+        """createa a link node"""
+        txt=dom.createTextNode("<XMLLink>")
+        node=dom.createElementNS("http://test.de","a")
+        node.setAttributeNS("http://test.de","href",url)
+        node.appendChild(txt)
+        return node
+      
+    def forwardLink(self,linkid,url,type="target",RESPONSE=None):
+        """forward to link"""
+        if RESPONSE:
+            RESPONSE.redirect(self.getLink(linkid,url,type=type))
+            
+        else:
+            return self.getLink(linkid,url,type=type)
+    def getLink(self,linkid,url,type="target"):
+        """get target for linkid"""
+        dom=NonvalidatingReader.parseUri(url)
+        
+        masterurl=dom.xpath("//mpiwg:masterurl/@ref",explicitNss={'mpiwg':'http://www.mpiwg-berlin.mpg.de/namespace'})[0].value
+        slaveurl=dom.xpath("//mpiwg:slaveurl/@ref",explicitNss={'mpiwg':'http://www.mpiwg-berlin.mpg.de/namespace'})[0].value
+        
+        #check now if there are in the link file
+     
+        xp="//mpiwg:link[@id='%s']"%linkid
+        
+        if type=="target":
+            for link in dom.xpath(xp,explicitNss={'mpiwg':'http://www.mpiwg-berlin.mpg.de/namespace'}):
+                fn=link.xpath("mpiwg:target/@filename",explicitNss={'mpiwg':'http://www.mpiwg-berlin.mpg.de/namespace'})[0].value
+                
+                if urlparse.urlparse(urllib.unquote(fn))[0]=="http": # fn ist eine url
+                    return urllib.unquote(fn)  # dann gibt diese zurueck 
+                
+                ref=link.xpath("mpiwg:target/@refid",explicitNss={'mpiwg':'http://www.mpiwg-berlin.mpg.de/namespace'})[0].value
+                
+                ref2=link.xpath("mpiwg:target/mpiwg:pagelink/@refid",explicitNss={'mpiwg':'http://www.mpiwg-berlin.mpg.de/namespace'})[0].value
+                selectionNodeIndex=link.xpath("mpiwg:target/mpiwg:pagelink/@selectionNodeIndex",explicitNss={'mpiwg':'http://www.mpiwg-berlin.mpg.de/namespace'})[0].value
+                  
+              
+                
+                lstr=slaveurl+'fn='+fn+'&_id='+ref+'&_pagelink=%s///%s/%s/%s'%(ref2,selectionNodeIndex,linkid,'target')
+                lstr+="&_links="+urllib.quote(url)
+                
+        else:
+            for link in dom.xpath(xp,explicitNss={'mpiwg':'http://www.mpiwg-berlin.mpg.de/namespace'}):
+                fn=link.xpath("mpiwg:source/@filename",explicitNss={'mpiwg':'http://www.mpiwg-berlin.mpg.de/namespace'})[0].value
+                if urlparse.urlparse(urllib.unquote(fn))[0]=="http": # fn ist eine url
+                    return urllib.unquote(fn)  # dann gibt diese zurueck 
+                
+                ref=link.xpath("mpiwg:source/@refid",explicitNss={'mpiwg':'http://www.mpiwg-berlin.mpg.de/namespace'})[0].value
+                
+                ref2=link.xpath("mpiwg:source/mpiwg:pagelink/@refid",explicitNss={'mpiwg':'http://www.mpiwg-berlin.mpg.de/namespace'})[0].value
+                selectionNodeIndex=link.xpath("mpiwg:source/mpiwg:pagelink/@selectionNodeIndex",explicitNss={'mpiwg':'http://www.mpiwg-berlin.mpg.de/namespace'})[0].value
+                             
+                lstr=masterurl+'fn='+fn+'&_id='+ref+'&_pagelink=%s///%s/%s/%s'%(ref2,selectionNodeIndex,linkid,'source')
+                lstr+="&_links="+urllib.quote(url)
+        return lstr
+   
+    def addLinksUrl(self,txt,url):
+        """add reference to links to  url"""
+        ret=[]
+        dom=NonvalidatingReader.parseUri(url)
+        textDom=NonvalidatingReader.parseString(txt)
+
+        #find ids in txt
+        ids=textDom.xpath("//*[@id]")
+        
+        for textid in ids:
+            xp="//mpiwg:link[mpiwg:source/@refid='%s']"%textid.xpath("@id")[0].value
+            for link in dom.xpath(xp,explicitNss={'mpiwg':'http://www.mpiwg-berlin.mpg.de/namespace'}):
+                ref2=link.xpath("mpiwg:source/mpiwg:pagelink/@refid",explicitNss={'mpiwg':'http://www.mpiwg-berlin.mpg.de/namespace'})[0].value
+                selectionNodeIndex=link.xpath("mpiwg:source/mpiwg:pagelink/@selectionNodeIndex",explicitNss={'mpiwg':'http://www.mpiwg-berlin.mpg.de/namespace'})[0].value
+                linkid=link.xpath("@id")[0].value         
+                ret.append('%s///%s/%s/%s'%(ref2,selectionNodeIndex,linkid,'source'))
+           
+            xp="//mpiwg:link[mpiwg:target/@refid='%s']"%textid.xpath("@id")[0].value
+            for link in dom.xpath(xp,explicitNss={'mpiwg':'http://www.mpiwg-berlin.mpg.de/namespace'}):
+                ref2=link.xpath("mpiwg:target/mpiwg:pagelink/@refid",explicitNss={'mpiwg':'http://www.mpiwg-berlin.mpg.de/namespace'})[0].value
+                selectionNodeIndex=link.xpath("mpiwg:target/mpiwg:pagelink/@selectionNodeIndex",explicitNss={'mpiwg':'http://www.mpiwg-berlin.mpg.de/namespace'})[0].value
+                linkid=link.xpath("@id")[0].value         
+                ret.append('%s///%s/%s/%s'%(ref2,selectionNodeIndex,linkid,'target'))
+           
+           
+        return ret
+          
+    def addLinks(self,txt,url="http://127.0.0.1:8080/HFQP/linkCreator/getCollectionXML?collection=commentary2"):
+        """add links to a page from xml linkfile"""
+        
+        dom=NonvalidatingReader.parseUri(url)
+        textDom=NonvalidatingReader.parseString(txt)
+
+        #find ids in txt
+        ids=textDom.xpath("//*[@id]")
+        masterurl=dom.xpath("//mpiwg:masterurl/@ref",explicitNss={'mpiwg':'http://www.mpiwg-berlin.mpg.de/namespace'})[0].value
+        slaveurl=dom.xpath("//mpiwg:slaveurl/@ref",explicitNss={'mpiwg':'http://www.mpiwg-berlin.mpg.de/namespace'})[0].value
+        
+        #check now if there are in the link file
+        for textid in ids:
+            xp="//mpiwg:link[mpiwg:source/@refid='%s']"%textid.xpath("@id")[0].value
+            for link in dom.xpath(xp,explicitNss={'mpiwg':'http://www.mpiwg-berlin.mpg.de/namespace'}):
+                fn=link.xpath("mpiwg:target/@filename",explicitNss={'mpiwg':'http://www.mpiwg-berlin.mpg.de/namespace'})[0].value
+                print fn
+                if urlparse.urlparse(urllib.unquote(fn))[0]=="http": # fn ist eine url
+                    lstr=urllib.unquote(fn)  # dann gibt diese zurueck 
+                else:
+                    try:
+                        ref=link.xpath("mpiwg:target/@refid",explicitNss={'mpiwg':'http://www.mpiwg-berlin.mpg.de/namespace'})[0].value
+                    
+                        ref2=link.xpath("mpiwg:target/mpiwg:pagelink/@refid",explicitNss={'mpiwg':'http://www.mpiwg-berlin.mpg.de/namespace'})[0].value
+                        selectionNodeIndex=link.xpath("mpiwg:target/mpiwg:pagelink/@selectionNodeIndex",explicitNss={'mpiwg':'http://www.mpiwg-berlin.mpg.de/namespace'})[0].value
+                        linkid=link.xpath("@id")[0].value         
+                        lstr=slaveurl+'fn='+fn+'&_id='+ref+'&_pagelink=%s///%s/%s/%s'%(ref2,selectionNodeIndex,linkid,'target')
+                        lstr+="&_links="+urllib.quote(url)
+                    except:
+                        lstr=""
+                node=self.createLinkNode(lstr,textDom)
+                textid.parentNode.insertBefore(node,textid)
+              
+          
+            xp="//mpiwg:link[mpiwg:target/@refid='%s']"%textid.xpath("@id")[0].value
+            for link in dom.xpath(xp,explicitNss={'mpiwg':'http://www.mpiwg-berlin.mpg.de/namespace'}):
+                fn=link.xpath("mpiwg:source/@filename",explicitNss={'mpiwg':'http://www.mpiwg-berlin.mpg.de/namespace'})[0].value
+                if urlparse.urlparse(urllib.unquote(fn))[0]=="http": # fn ist eine url
+                    lstr=urllib.unquote(fn)  # dann gibt diese zurueck 
+                else:
+                
+                    ref=link.xpath("mpiwg:source/@refid",explicitNss={'mpiwg':'http://www.mpiwg-berlin.mpg.de/namespace'})[0].value
+                    
+                    ref2=link.xpath("mpiwg:source/mpiwg:pagelink/@refid",explicitNss={'mpiwg':'http://www.mpiwg-berlin.mpg.de/namespace'})[0].value
+                    selectionNodeIndex=link.xpath("mpiwg:source/mpiwg:pagelink/@selectionNodeIndex",explicitNss={'mpiwg':'http://www.mpiwg-berlin.mpg.de/namespace'})[0].value
+                    linkid=link.xpath("@id")[0].value                    
+                    lstr=masterurl+'fn='+fn+'&_id='+ref+'&_pagelink=%s///%s/%s/%s'%(ref2,selectionNodeIndex,linkid,"source")
+                    lstr+="&_links="+urllib.quote(url)
+                    
+                node=self.createLinkNode(lstr,textDom)
+                textid.parentNode.insertBefore(node,textid)
+              
+            
+        
+        strio = StringIO()
+        PrettyPrint(textDom,strio) 
+        xmlstr = strio.getvalue()
+        
+        return xmlstr
+
+            
+        
+    def getPageLex(self,_pn="1",_id=None,_caching=None,_links=None,_showall="no",_displaylinks="yes"):
+        """getpage mit lexikalischer analyse und xslt transform
+        if _caching=yes dann wird die lwxikalisch analysierte seite in einem cache abgespeichert
+        """
+        def encode(hash):
+            ret=[]
+            for x in hash.keys():
+                value=hash[x]
+                
+                if type(value) is ListType:
+                    for z in value:
+                        ret.append("%s=%s"%(x,z))
+                else:
+                    ret.append("%s=%s"%(x,value))
+            return "&".join(ret)
+                    
+                        
+            
+        if not _caching:
+            _caching=self.caching
+            
+        fn=self.REQUEST['fn']
+
+        if not _id:
+           
+            fromCache=self.cache.retrieveObject(fn,_pn)
+     
+            if fromCache and _caching=="yes":
+              
+                txt = fromCache
+            else:
+                txt=self.tagLex(nr=_pn)   
+              
+                self.cache.storeObject(fn,_pn,txt[0:])
+            
+        else:
+           txt=self.tagLex(id=_id)
+      
+        if _showall=="yes":
+           params=cgi.parse_qs(self.REQUEST['QUERY_STRING'])
+           
+           params['_pagelink']=self.addLinksUrl(txt,url=_links)
+           params['_showall']='no'
+          
+           print self.absolute_url()+"?"+encode(params)
+           self.REQUEST.RESPONSE.redirect(self.absolute_url()+"/getPageLex?"+encode(params))
+           
+           
+        xsl=self.xslt()
+        
+        xsltproc=Processor()
+        if type(txt)==UnicodeType:
+            document = InputSource.DefaultFactory.fromString(txt.encode('utf-8'))
+        else:
+            document = InputSource.DefaultFactory.fromString(txt)
+        stylesheet = InputSource.DefaultFactory.fromString(xsl)
+        xsltproc.appendStylesheet(stylesheet)
+        tmp=xsltproc.run(document)
+        
+        if _links and (_displaylinks=='yes'):
+            _links=urllib.unquote(_links)
+            tmp=self.addLinks(tmp,url=_links)
+            
+        #bugfix for digilib images which doesn't accept &amp;
+        tmp=tmp.replace("&amp;","&")
+        
+
+        return tmp[0:]
+            
+    def getTextInput(self):
+        """get the text
+        wie der text geholt wird liegt an der konfiguration,
+        is appendQueryString gesetzt, dann wir jeweils der Querystring an vorgebenen url gesetzt, erwartet wird fn=
+        f�r den Pfad, is passURL gesetzt, dann wird falls fn= eine vollst�ndige url enth�lt, diese anstelle der in cgiurl definierten genommen.
+        """
+        
+        if getattr(self,'passURL',False) and self.REQUEST.has_key('fn') and (urlparse.urlparse(self.REQUEST['fn'])[0]=='http'):
+            qs=self.REQUEST['fn']
+            baseUri=qs
+        elif getattr(self,'pappendQueryString',True):
+            qs="%s%s"%(self.cgiUrl,self.REQUEST['QUERY_STRING'])
+            baseUri=self.cgiUrl
+        else:
+            qs="%s"%(self.cgiUrl)
+            baseUri=self.cgiUrl
+        
+        #fact= InputSource.DefaultFactory.fromUri(qs)
+        return qs,baseUri
+        #return InputSource.InputSource(fact)
+        #xmlt=urllib.urlopen(qs).read()
+        
+    def getPage(self,_pn="-1",_id=None,REQUEST=None,_caching=None):
+        """get a page from an xml"""
+        
+        if not _caching:
+            _caching=self.caching
+            
+        pn=int(_pn)-1
+        if pn<0 and (not _id):
+            if REQUEST:
+                return "Sorry, pagenumbers have to be greater than 0"
+            else:
+                return None
+       
+        xmlt,self.baseUri=self.getTextInput()
+        
+        #get the text from cache, if existing
+        try:
+            fromCache=self.cache.retrieveObject(self.baseUri,"-1")
+        except:
+            fromCache=None
+        if fromCache and _caching=="yes":
+          
+            txt = fromCache
+        else:
+
+            txt=urllib.urlopen(xmlt).read()
+            
+            self.cache.storeObject(self.baseUri,"-1",txt)
+        
+   
+        dom=NonvalidatingReader.parseString(txt,self.baseUri)
+        
+        #pb should have a namespache
+
+        pbs=dom.xpath("//mpiwg:pb",explicitNss={'mpiwg':'http://www.mpiwg-berlin.mpg.de/namespace'})
+        
+        if len(pbs)==0: # versuche nochmal ohne
+            pbs=dom.xpath("//pb")
+
+        if _id:
+            #suche wieviele pb for der id
+            
+            
+            idpb=dom.xpath("//*[@id='%s']/preceding::node()/mpiwg:pb"%_id,explicitNss={'html':'http://www.w3.org/1999/xhtml','mpiwg':'http://www.mpiwg-berlin.mpg.de/namespace'})
+            if len(idpb)==0:
+                idpb=dom.xpath("//*[@id='%s']/preceding::node()/pb"%_id)
+         
+            if len(idpb)==0:
+                        k=0
+                        for node in dom.xpath("//*[@id='%s']//preceding::node()"%_id,explicitNss={'html':'http://www.w3.org/1999/xhtml','mpiwg':'http://www.mpiwg-berlin.mpg.de/namespace'}):
+                            if getattr(node,'tagName',"")=="mpiwg:pb":
+                                k+=1
+            else:
+                k=len(idpb)
+            #pn=k-1 #-1 wegen Seitenzahlzaehlung startet mit 0
+            pn=k-1 #-1 wegen Seitenzahlzaehlung startet mit 0
+        if pn > len(pbs):
+            if REQUEST:
+                return "Sorry, pagenumber %s does not exit"%(pn+1)
+            else:
+                return None
+            
+        beginNode=pbs[pn] #take the n'th pb
+
+        if not (pn==len(pbs)-1): # nicht die letzte Seite
+            endNode=pbs[pn+1]
+        else:
+            endNode=None
+        
+        deleteNodes=beginNode.xpath('preceding::node()')
+        if endNode:
+            deleteNodes+=endNode.xpath('following::node()')
+        for node in deleteNodes:
+            try:
+                parent=node.xpath("..")
+           
+                if parent:
+                    parent[0].removeChild(node)
+            except:
+                zLOG.LOG("ECHO_Resource (getAccessRightMD)", zLOG.INFO,"%s (%s)"%sys.exc_info()[0:2])
+        strio = StringIO()
+        PrettyPrint(dom,strio) 
+        xmlstr = strio.getvalue()
+        
+        return xmlstr
+
+
+        
 def manage_addECHO_xsltForm(self):
     """Form for adding"""
     pt=PageTemplateFile(os.path.join(package_home(globals()),'zpt','AddECHO_xslt.zpt')).__of__(self)
@@ -222,4 +635,3 @@ def manage_addECHO_xslt(self, id, label,
             u = "%s/%s" % (u, quote(id))
         REQUEST.RESPONSE.redirect(u+'/manage_main')
     return ''
-