--- ECHO_content/vlp_xmlhelpers.py	2004/10/05 14:58:56	1.2
+++ ECHO_content/vlp_xmlhelpers.py	2009/06/09 14:05:20	1.16
@@ -1,6 +1,7 @@
 from sys import argv
 
 import string
+import logging
 import xml.dom.minidom
 import Ft.Xml.XLink.Processor
 import Ft.Xml.XLink.XLinkElements
@@ -9,211 +10,359 @@ from Ft.Xml import XPath
 from Ft.Xml.XPath import Evaluate
 from Ft.Xml.XLink import XLINK_NAMESPACE
 from Ft.Xml.XLink import XLinkElements
-
-#from Ft.Xml.Domlette import NonvalidatingReader,InputSource
-#from Ft.Xml import EMPTY_NAMESPACE
+import cStringIO
+from Ft.Xml.Domlette import NonvalidatingReader, PrettyPrint,Print
+from Ft.Xml import EMPTY_NAMESPACE
 from Ft.Lib import Uri
 import urllib
 import re
+from ECHO_collection import unicodify,utf8ify
 
+patternTXT=r"<\s*txt.*?>(.*?)</txt>"
+regexpTXT = re.compile(patternTXT, re.IGNORECASE + re.DOTALL)
 patternPage=r"<\s*page.*?>(.*?)</page>"
 regexpPage = re.compile(patternPage, re.IGNORECASE + re.DOTALL)
 
-xml2htmlArray={'WEB_normal':('<p>','</p>'),'Normal':('<p>','</p>'),'WEB_picture':('<p class=\"picture\">','</p>'),'WEB_figuretitle':('<p class=\"picturetitle\">','</p>'),'WEB_bibliography':('<p><i>','</i></p>'),'Web_kursiv':('<i>','</i>'),'WEB_kursiv':('<i>','</i>'),'WEB_hyperlink':('',''),'Hyperlink':('',''),'Picture':('<p class=\"picture\">','</p>'),'FigureTitle':('<p class=\"picturetitle\">','</p>')}
-
-def addToDict(dict,name,value):
-    if name=="":
-        return 0
-    else:
-        
-        if not dict.has_key(name):
-            dict[name]=[] # als array anlegen
+#xml2htmlArray={'WEB_normal':('<p>','</p>'),'Normal':('<p>','</p>'),'WEB_picture':('<p class=\"picture\">','</p>'),'WEB_figuretitle':('<p class=\"picturetitle\">','</p>'),'WEB_bibliography':('<p><i>','</i></p>'),'Web_kursiv':('<i>','</i>'),'WEB_kursiv':('<i>','</i>'),'WEB_hyperlink':('',''),'Hyperlink':('',''),'Picture':('<p class=\"picture\">','</p>'),'FigureTitle':('<p class=\"picturetitle\">','</p>')}
+#
+#def addToDict(dict,name,value):
+#    if name=="":
+#        return 0
+#    else:
+#        
+#        if not dict.has_key(name):
+#            dict[name]=[] # als array anlegen
+#
+#        dict[name].append(value)
+#        return 1    
+#
+#def proj2hash(self,xmlstring):
+#    """wandelt xml-files fuer die projekte in ein hash"""
+#    
+#    dom=xml.dom.minidom.parseString(xmlstring)
+#    
+#        
+#    list={}
+#
+#    #gettitle
+#    pars=Evaluate('par',dom.getElementsByTagName('part')[0])
+#    for par in pars:
+#        className=par.getAttribute('class')
+#        content=getText(self,par.childNodes)
+#        addToDict(list,className,content)
+#             
+#
+#    sectionXPath="section"
+#
+#    
+#    sections=Evaluate(sectionXPath,dom.getElementsByTagName('part')[0])
+#    
+#    while sections:
+#        
+#        for section in sections:
+#            
+#            sec=parseSection(self,section)
+#            
+#            if sec[0]=="WEB_project_header": # Sonderfall project
+#                addToDict(list,'WEB_project_header',sec[1]) # store title
+#                addToDict(list,'WEB_project_description',sec[2]) #store description
+#            else: # no information in heading
+#                level=int(sec[3])+2
+#                aTag="<h%i>"%level
+#                eTag="</h%i>"%level
+#                addToDict(list,"text",aTag+sec[1]+eTag)
+#                addToDict(list,"text",sec[2])
+#        sectionXPath+="/section"
+#        sections=Evaluate(sectionXPath,dom.getElementsByTagName('part')[0])
+#    return list
+#
+#
+#def parseSection(self,section):
+#    type=""
+#    header=""
+#    level=section.getAttribute('level')
+#    for heading in section.childNodes:
+#        if getattr(heading,'tagName','')=="heading":
+#            
+#            type=heading.getAttribute('class')
+#            header=getText(self,heading.childNodes)
+#
+#    if type=="": # falls heading fehlt, pruefe ob erster par richtig
+#        par=section.getElementsByTagName('par')[0]
+#        type=par.getAttribute('class')
+#        header=getText(par.childNodes)
+#
+#    #print section.childNodes
+#    #pars=Evaluate('par',section)
+#    pars=section.childNodes
+#    content=par2html(self,pars)
+#    #print "CONTENT",repr(content)
+#    return (type,header,content,level)
+#
+#def parseTable(table):
+#    fields={}
+#    rows=table.getElementsByTagName('html:tr')
+#    for row in rows:
+#        #print "ROW"
+#        cols=row.getElementsByTagName('html:td')
+#        
+#        #Name des Datenfeldes einlesen
+#        try:
+#            field=cols[0].getElementsByTagName('par')[0].getAttribute('class')
+#            #print "field",field
+#        except:
+#            print "error"
+#            field=""
+#
+#        #Wandeln der Eintrge in HTML
+#
+#        #pars=cols[1].getElementsByTagName('par')
+#        pars=cols[1].childNodes
+#        
+#        html=par2html(self,pars,tags=("",";"))
+#        
+#        addToDict(fields,field,html)
+#        #print fields
+#    return fields
+#
+#def par2html(self,pars,tags=None):
+#    html=""
+#
+#    for par in pars:
+#        tagName=getattr(par,'tagName','')
+#        if tagName in ["par","inline"]:
+#            #print repr(par.getAttribute('class')),xml2htmlArray.get(par.getAttribute('class'),'NOT FOUND')
+#            #print "par",par
+#            if not tags:
+#                try:
+#                    tag=xml2htmlArray[par.getAttribute('class')]
+#                except:
+#                    tag=('<p>','</p>')
+#            else:
+#                tag=tags
+#            #print "TAG",tag
+#            content=getText(self,par.childNodes,par.getAttribute('class'))
+#            
+#            
+#
+#            #print par.getAttribute('class'),node
+#            try:
+#                html+=tag[0]+content+tag[1]
+#            except:
+#                html=+tag[0]+content+tag[1]
+#            
+#        elif tagName=="pb":
+#            html+="<pb/>"
+#        
+#    
+#    try:
+#
+#        return html
+#    except:
+#        return ""
 
-        dict[name].append(value)
-        return 1    
+def getXlink(nodes):
+    """searches xlinks and gives them back as html"""
+    ret=""
+    for node in nodes:
+        if node.attributes:
+            if 'xlink:type' in node.attributes.keys(): #is a xlink?
+                ret +=xlink2html(node)
+    return ret
 
-def proj2hash(self,xmlstring):
-    """wandelt xml-files fuer die projekte in ein hash"""
-    
-    dom=xml.dom.minidom.parseString(xmlstring)
+def checkRef(self,ref):
+        """teste ob reference angezeigt werden sollen"""
+        dbs={'vl_literature':'AND online = \'1\'',
+             'vl_technology':'AND complete =\'yes\'',
+             'vl_people':'AND complete =\'yes\'',
+             'vl_sites':'AND complete =\'yes\'',
+             'vl_transcript':'AND complete =\'yes\'',
+             'vl_essays':'AND online =\'yes\'',
+	     'vl_categories':''
+             }
+        res=None
+        for db in dbs.keys():
+            searchStr=str("select reference from %s where reference =\'%s\' %s"%(db,ref,dbs[db]))
+            res=res or self.search(var=searchStr)
+        return res
     
-        
-    list={}
+def link2html(self,str):
+        """link2html links in html wandeln"""
+        if str:
 
-    #gettitle
-    pars=Evaluate('par',dom.getElementsByTagName('part')[0])
-    for par in pars:
-        className=par.getAttribute('class')
-        content=getText(self,par.childNodes)
-        addToDict(list,className,content)
-             
+            str=re.sub("\&","&amp;",str)
+            dom=xml.dom.minidom.parseString("<?xml version='1.0' encoding='utf-8'?><txt>"+utf8ify(str)+"</txt>")
+           
+            
+            links=dom.getElementsByTagName("link")
+            
 
-    sectionXPath="section"
+            for link in links:
+                link.tagName="a"
+                ref=link.getAttribute("ref")
+                pn=link.getAttribute("page")
+                mk=link.getAttribute("mk")
+                href= link.getAttribute("href")
+                if href:
+                    link.setAttribute("class","external")
+                                    
+                if self.checkRef(ref):
+                    more = ""
+                    if pn:
+                        more += "&page=%s"%pn
+                        
+                    if mk:
+                        more += "&mk=%s"%mk
+                        
+                    link.setAttribute("href",self.REQUEST['SERVER_URL']+"/references?id="+ref+more)
 
-    
-    sections=Evaluate(sectionXPath,dom.getElementsByTagName('part')[0])
-    
-    while sections:
-        
-        for section in sections:
+
+            newxml=dom.toxml('utf-8')
+          
             
-            sec=parseSection(self,section)
             
-            if sec[0]=="WEB_project_header": # Sonderfall project
-                addToDict(list,'WEB_project_header',sec[1]) # store title
-                addToDict(list,'WEB_project_description',sec[2]) #store description
-            else: # no information in heading
-                level=sec[3]
-                aTag="<h%s>"%level
-                eTag="</h%s>"%level
-                addToDict(list,"text",aTag+sec[1]+eTag)
-                addToDict(list,"text",sec[2])
-        sectionXPath+="/section"
-        sections=Evaluate(sectionXPath,dom.getElementsByTagName('part')[0])
-    return list
-
-
-def parseSection(self,section):
-    type=""
-    header=""
-    level=section.getAttribute('level')
-    for heading in section.childNodes:
-        if getattr(heading,'tagName','')=="heading":
-            
-            type=heading.getAttribute('class')
-            header=getText(self,heading.childNodes)
-
-    if type=="": # falls heading fehlt, pruefe ob erster par richtig
-        par=section.getElementsByTagName('par')[0]
-        type=par.getAttribute('class')
-        header=getText(par.childNodes)
-
-    #print section.childNodes
-    #pars=Evaluate('par',section)
-    pars=section.childNodes
-    content=par2html(self,pars)
-    #print "CONTENT",repr(content)
-    return (type,header,content,level)
-
-def parseTable(table):
-    fields={}
-    rows=table.getElementsByTagName('html:tr')
-    for row in rows:
-        #print "ROW"
-        cols=row.getElementsByTagName('html:td')
-        
-        #Name des Datenfeldes einlesen
-        try:
-            field=cols[0].getElementsByTagName('par')[0].getAttribute('class')
-            #print "field",field
-        except:
-            print "error"
-            field=""
+            retStr=regexpTXT.search(newxml)
+            retStr = retStr.group(1)
 
-        #Wandeln der Eintrge in HTML
+            return retStr.decode('utf-8') # we return unicode
 
-        #pars=cols[1].getElementsByTagName('par')
-        pars=cols[1].childNodes
-        
-        html=par2html(self,pars,tags=("",";"))
-        
-        addToDict(fields,field,html)
-        #print fields
-    return fields
-
-def par2html(self,pars,tags=None):
-    html=""
-
-    for par in pars:
-        tagName=getattr(par,'tagName','')
-        if tagName in ["par","inline"]:
-            #print repr(par.getAttribute('class')),xml2htmlArray.get(par.getAttribute('class'),'NOT FOUND')
-            #print "par",par
-            if not tags:
-                try:
-                    tag=xml2htmlArray[par.getAttribute('class')]
-                except:
-                    tag=('<p>','</p>')
-            else:
-                tag=tags
-            #print "TAG",tag
-            content=getText(self,par.childNodes,par.getAttribute('class'))
+        return u""
+
+def related2html(self,str):
+    """related library items: xlinks in html wandeln / mb 22.11.2006"""
+    if str:
+                
+        str=re.sub("\&","&amp;",str)
+        dom=xml.dom.minidom.parseString("<?xml version='1.0' encoding='utf-8'?><txt>"+utf8ify(str)+"</txt>")
+        links=dom.getElementsByTagName("link")
+                
+        for link in links:
+            link.tagName = "a"
+            ref = link.getAttribute("ref")
+            pn = link.getAttribute("page")
+            obj = ref[0:3]
             
+            """erweiterung der related items von literatur auf weitere datenbankobjekte, mb 05.06.2009"""
+            if obj == 'lit':            
+                searchStr="select fullreference, online from vl_literature where reference =\'%s\' and authorized = 1"%(ref)
+            elif obj == 'sit':
+                searchStr="select reference from vl_sites where reference =\'%s\' and complete = 'yes'"%(ref)
+            elif obj == 'per':
+                searchStr="select reference from vl_people where reference =\'%s\' and complete = 'yes'"%(ref)
+            elif obj == 'tec':
+                searchStr="select reference from vl_technology where reference =\'%s\' and complete = 'yes'"%(ref)
+            elif obj == 'exp':
+                searchStr="select reference from vl_experiments where reference =\'%s\' and complete = 'yes'"%(ref)
+                
+            res = self.search(var=searchStr)
+                                        
+            if res:
+                if obj == 'lit':
+                    if res[0]['online'] == 1: 
+                        # literatur item online verfuegbar
+                        if pn:
+                            link.setAttribute("href",self.REQUEST['SERVER_URL']+"/references?id="+ref+"&page="+pn)
+                        else:
+                            link.setAttribute("href",self.REQUEST['SERVER_URL']+"/references?id="+ref)
+                            
+                        link.setAttribute("title", "click to view!")
+                        link.removeAttribute("ref")
+                        
+                        # prefix preceding the link
+                        prefix = dom.createTextNode(U"\u2013\u0020") # = ndash + space
+                        dom.documentElement.insertBefore(prefix, link)
+  
+                    else:
+                        # literatur item nur als bibliographische angabe vorhanden
+                        link.setAttribute("alt", unicodify(res[0]['fullreference']))
+                        link.setAttribute("title", "click to expand")
+                        link.setAttribute("onclick", "return toggle(this);")
+                        link.setAttribute("class", "x_offline")
+                        
+                        # prefix inside link text
+                        link.firstChild.data = '+ ' + link.firstChild.data
+                else:
+                    # links zu den anderen datenbankobjekten
+                    link.setAttribute("href",self.REQUEST['SERVER_URL']+"/references?id="+ref)
+                    link.setAttribute("title", "click to view")
+                    link.removeAttribute("ref")
             
-
-            #print par.getAttribute('class'),node
-            try:
-                html+=tag[0]+content+tag[1]
-            except:
-                html=+tag[0]+content+tag[1]
+                    # prefix preceding the link
+                    prefix = dom.createTextNode(U"\u2013\u0020") # = ndash + space
+                    dom.documentElement.insertBefore(prefix, link)
             
-        elif tagName=="pb":
-            html+="<pb/>"
-        elif tagName=="img":
-            html+="XXX"
-    
-    try:
-
-        return html
-    except:
-        return ""
+            else:
+                # objekt nicht verfügbar/freigegeben oder (web)link mit href statt ref
+                
+                #if ref != '':
+                #    link.removeAttribute("ref")
+                #    link.setAttribute("title", ref)
+                
+                
+                # prefix preceding the link
+                prefix = dom.createTextNode(U"\u2013\u0020") # = ndash + space
+                dom.documentElement.insertBefore(prefix, link)
+
+                
+        newxml=dom.toxml('utf-8')
+                
+        retStr=regexpTXT.search(newxml)
+        retStr = retStr.group(1)
+        #logging.debug("related2html out=%s"%repr(retStr))
+        return retStr.decode('utf-8') # we return unicode
 
-def getXlink(nodes):
-    """searches xlinks and gives them back as html"""
-    ret=""
-    for node in nodes:
-        if node.attributes:
-            if 'xlink:type' in node.attributes.keys(): #is a xlink?
-                ret +=xlink2html(node)
-    return ret
+    return u""
 
-def checkRef(self,ref):
-        dbs={'vl_literature':'AND CD LIKE \'%lise%\'','vl_technology':'','vl_people':'','vl_sites':''}
-        res=None
-        for db in dbs.keys():
+    
 
-            res=res or self.search(var=str("select reference from %s where reference =\'%s\' %s"%(db,ref,dbs[db])))
-        return res
 
 def xml2html(self,str,quote="yes"):
         """link2html fuer VLP muss hier noch raus"""
-	
-        
         if str:
             if quote=="yes2":
                 str=re.sub("\&","&amp;",str)
-            
-            str=re.sub("ref\=([^>]*)\>",'ref=\"\g<1>\">',str)# einfuegen anfuehrungszeichen um ref attribut, falls fehlt.
-            #str=re.sub("ref\=([.[*^[>]]])",'XX',str)
-            #print "STR::",str
-            dom=xml.dom.minidom.parseString(str)
-            links=dom.getElementsByTagName("link")
-            
+            #dom=xml.dom.minidom.parseString(str)
+            dom = NonvalidatingReader.parseString(str,"http://www.mpiwg-berlin.mpg.de/")
+            #links=dom.getElementsByTagName("link")
+            links=Ft.Xml.XPath.Evaluate(".//link", contextNode=dom)
             for link in links:
-                link.tagName="a"
-                ref=link.getAttribute("ref")
-		pn=link.getAttribute("page")
-
-                if checkRef(self,ref):
-			if pn:
-				link.setAttribute("href",self.aq_parent.absolute_url()+"/vlp_coll?id="+ref+"&p="+pn)
-			else:
-				link.setAttribute("href",self.aq_parent.absolute_url()+"/vlp_coll?id="+ref)
-
-            str= dom.toxml()
-	    
-	    #print link.toxml('utf-8')
-	    retStr=regexpPage.search(str)
+                #link.tagName="a"
+        
+                ref=link.getAttributeNS(EMPTY_NAMESPACE,"ref")
+                pn=link.getAttributeNS(EMPTY_NAMESPACE,"page")
 
-            try:
-                return retStr.group(1)
+                cns=link.childNodes[0:]
+                
+                newLink=dom.createElementNS(EMPTY_NAMESPACE,"a")
+                for x in cns:
+                        newLink.appendChild(x)
+                
+                        
+                
+                link.parentNode.replaceChild(newLink,link)
+
+                if self.checkRef(ref):
+                        if pn:
+                                newLink.setAttributeNS(EMPTY_NAMESPACE,"href",self.REQUEST['SERVER_URL']+"/references?id="+ref+"&page="+pn)
+                        else:
+                                newLink.setAttributeNS(EMPTY_NAMESPACE,"href",self.REQUEST['SERVER_URL']+"/references?id="+ref)
+
+            #str= dom.toxml('utf-8')
+            buf = cStringIO.StringIO()
+            PrettyPrint(dom, stream=buf)
+            str = buf.getvalue()
+            buf.close()
+            #str=PrettyPrint(dom.documentElement,encoding='UTF-8')
+            #print link.toxml('utf-8')
+            #print type(str)
+            retStr=regexpPage.search(str)
+            
+            try: # hack warum fehtl manchmal page??
+                    return retStr.group(1).decode('utf-8')
             except:
-                exStr="""<?xml version="1.0" ?>"""
-                str=re.sub("\n","",str)
-                #str=
-                #print repr(str)
-                return str.replace(exStr,'')
+                    return str
         return ""
+
     
 def xlink2html(self,xlink,parClass=None):
     ret=""