--- ECHO_content/vlp_xmlhelpers.py	2005/10/26 11:18:19	1.5
+++ ECHO_content/vlp_xmlhelpers.py	2012/08/29 07:53:31	1.18.2.2
@@ -1,6 +1,7 @@
 from sys import argv
 
 import string
+import logging
 import xml.dom.minidom
 import Ft.Xml.XLink.Processor
 import Ft.Xml.XLink.XLinkElements
@@ -15,149 +16,150 @@ from Ft.Xml import EMPTY_NAMESPACE
 from Ft.Lib import Uri
 import urllib
 import re
+from ECHO_collection import unicodify,utf8ify
 
 patternTXT=r"<\s*txt.*?>(.*?)</txt>"
 regexpTXT = re.compile(patternTXT, re.IGNORECASE + re.DOTALL)
 patternPage=r"<\s*page.*?>(.*?)</page>"
 regexpPage = re.compile(patternPage, re.IGNORECASE + re.DOTALL)
 
-xml2htmlArray={'WEB_normal':('<p>','</p>'),'Normal':('<p>','</p>'),'WEB_picture':('<p class=\"picture\">','</p>'),'WEB_figuretitle':('<p class=\"picturetitle\">','</p>'),'WEB_bibliography':('<p><i>','</i></p>'),'Web_kursiv':('<i>','</i>'),'WEB_kursiv':('<i>','</i>'),'WEB_hyperlink':('',''),'Hyperlink':('',''),'Picture':('<p class=\"picture\">','</p>'),'FigureTitle':('<p class=\"picturetitle\">','</p>')}
-
-def addToDict(dict,name,value):
-    if name=="":
-        return 0
-    else:
-        
-        if not dict.has_key(name):
-            dict[name]=[] # als array anlegen
-
-        dict[name].append(value)
-        return 1    
-
-def proj2hash(self,xmlstring):
-    """wandelt xml-files fuer die projekte in ein hash"""
-    
-    dom=xml.dom.minidom.parseString(xmlstring)
-    
-        
-    list={}
-
-    #gettitle
-    pars=Evaluate('par',dom.getElementsByTagName('part')[0])
-    for par in pars:
-        className=par.getAttribute('class')
-        content=getText(self,par.childNodes)
-        addToDict(list,className,content)
-             
-
-    sectionXPath="section"
-
-    
-    sections=Evaluate(sectionXPath,dom.getElementsByTagName('part')[0])
-    
-    while sections:
-        
-        for section in sections:
-            
-            sec=parseSection(self,section)
-            
-            if sec[0]=="WEB_project_header": # Sonderfall project
-                addToDict(list,'WEB_project_header',sec[1]) # store title
-                addToDict(list,'WEB_project_description',sec[2]) #store description
-            else: # no information in heading
-                level=int(sec[3])+2
-                aTag="<h%i>"%level
-                eTag="</h%i>"%level
-                addToDict(list,"text",aTag+sec[1]+eTag)
-                addToDict(list,"text",sec[2])
-        sectionXPath+="/section"
-        sections=Evaluate(sectionXPath,dom.getElementsByTagName('part')[0])
-    return list
-
-
-def parseSection(self,section):
-    type=""
-    header=""
-    level=section.getAttribute('level')
-    for heading in section.childNodes:
-        if getattr(heading,'tagName','')=="heading":
-            
-            type=heading.getAttribute('class')
-            header=getText(self,heading.childNodes)
-
-    if type=="": # falls heading fehlt, pruefe ob erster par richtig
-        par=section.getElementsByTagName('par')[0]
-        type=par.getAttribute('class')
-        header=getText(par.childNodes)
-
-    #print section.childNodes
-    #pars=Evaluate('par',section)
-    pars=section.childNodes
-    content=par2html(self,pars)
-    #print "CONTENT",repr(content)
-    return (type,header,content,level)
-
-def parseTable(table):
-    fields={}
-    rows=table.getElementsByTagName('html:tr')
-    for row in rows:
-        #print "ROW"
-        cols=row.getElementsByTagName('html:td')
-        
-        #Name des Datenfeldes einlesen
-        try:
-            field=cols[0].getElementsByTagName('par')[0].getAttribute('class')
-            #print "field",field
-        except:
-            print "error"
-            field=""
-
-        #Wandeln der Eintrge in HTML
-
-        #pars=cols[1].getElementsByTagName('par')
-        pars=cols[1].childNodes
-        
-        html=par2html(self,pars,tags=("",";"))
-        
-        addToDict(fields,field,html)
-        #print fields
-    return fields
-
-def par2html(self,pars,tags=None):
-    html=""
-
-    for par in pars:
-        tagName=getattr(par,'tagName','')
-        if tagName in ["par","inline"]:
-            #print repr(par.getAttribute('class')),xml2htmlArray.get(par.getAttribute('class'),'NOT FOUND')
-            #print "par",par
-            if not tags:
-                try:
-                    tag=xml2htmlArray[par.getAttribute('class')]
-                except:
-                    tag=('<p>','</p>')
-            else:
-                tag=tags
-            #print "TAG",tag
-            content=getText(self,par.childNodes,par.getAttribute('class'))
-            
-            
-
-            #print par.getAttribute('class'),node
-            try:
-                html+=tag[0]+content+tag[1]
-            except:
-                html=+tag[0]+content+tag[1]
-            
-        elif tagName=="pb":
-            html+="<pb/>"
-        
-    
-    try:
-
-        return html
-    except:
-        return ""
+#xml2htmlArray={'WEB_normal':('<p>','</p>'),'Normal':('<p>','</p>'),'WEB_picture':('<p class=\"picture\">','</p>'),'WEB_figuretitle':('<p class=\"picturetitle\">','</p>'),'WEB_bibliography':('<p><i>','</i></p>'),'Web_kursiv':('<i>','</i>'),'WEB_kursiv':('<i>','</i>'),'WEB_hyperlink':('',''),'Hyperlink':('',''),'Picture':('<p class=\"picture\">','</p>'),'FigureTitle':('<p class=\"picturetitle\">','</p>')}
+#
+#def addToDict(dict,name,value):
+#    if name=="":
+#        return 0
+#    else:
+#        
+#        if not dict.has_key(name):
+#            dict[name]=[] # als array anlegen
+#
+#        dict[name].append(value)
+#        return 1    
+#
+#def proj2hash(self,xmlstring):
+#    """wandelt xml-files fuer die projekte in ein hash"""
+#    
+#    dom=xml.dom.minidom.parseString(xmlstring)
+#    
+#        
+#    list={}
+#
+#    #gettitle
+#    pars=Evaluate('par',dom.getElementsByTagName('part')[0])
+#    for par in pars:
+#        className=par.getAttribute('class')
+#        content=getText(self,par.childNodes)
+#        addToDict(list,className,content)
+#             
+#
+#    sectionXPath="section"
+#
+#    
+#    sections=Evaluate(sectionXPath,dom.getElementsByTagName('part')[0])
+#    
+#    while sections:
+#        
+#        for section in sections:
+#            
+#            sec=parseSection(self,section)
+#            
+#            if sec[0]=="WEB_project_header": # Sonderfall project
+#                addToDict(list,'WEB_project_header',sec[1]) # store title
+#                addToDict(list,'WEB_project_description',sec[2]) #store description
+#            else: # no information in heading
+#                level=int(sec[3])+2
+#                aTag="<h%i>"%level
+#                eTag="</h%i>"%level
+#                addToDict(list,"text",aTag+sec[1]+eTag)
+#                addToDict(list,"text",sec[2])
+#        sectionXPath+="/section"
+#        sections=Evaluate(sectionXPath,dom.getElementsByTagName('part')[0])
+#    return list
+#
+#
+#def parseSection(self,section):
+#    type=""
+#    header=""
+#    level=section.getAttribute('level')
+#    for heading in section.childNodes:
+#        if getattr(heading,'tagName','')=="heading":
+#            
+#            type=heading.getAttribute('class')
+#            header=getText(self,heading.childNodes)
+#
+#    if type=="": # falls heading fehlt, pruefe ob erster par richtig
+#        par=section.getElementsByTagName('par')[0]
+#        type=par.getAttribute('class')
+#        header=getText(par.childNodes)
+#
+#    #print section.childNodes
+#    #pars=Evaluate('par',section)
+#    pars=section.childNodes
+#    content=par2html(self,pars)
+#    #print "CONTENT",repr(content)
+#    return (type,header,content,level)
+#
+#def parseTable(table):
+#    fields={}
+#    rows=table.getElementsByTagName('html:tr')
+#    for row in rows:
+#        #print "ROW"
+#        cols=row.getElementsByTagName('html:td')
+#        
+#        #Name des Datenfeldes einlesen
+#        try:
+#            field=cols[0].getElementsByTagName('par')[0].getAttribute('class')
+#            #print "field",field
+#        except:
+#            print "error"
+#            field=""
+#
+#        #Wandeln der Eintrge in HTML
+#
+#        #pars=cols[1].getElementsByTagName('par')
+#        pars=cols[1].childNodes
+#        
+#        html=par2html(self,pars,tags=("",";"))
+#        
+#        addToDict(fields,field,html)
+#        #print fields
+#    return fields
+#
+#def par2html(self,pars,tags=None):
+#    html=""
+#
+#    for par in pars:
+#        tagName=getattr(par,'tagName','')
+#        if tagName in ["par","inline"]:
+#            #print repr(par.getAttribute('class')),xml2htmlArray.get(par.getAttribute('class'),'NOT FOUND')
+#            #print "par",par
+#            if not tags:
+#                try:
+#                    tag=xml2htmlArray[par.getAttribute('class')]
+#                except:
+#                    tag=('<p>','</p>')
+#            else:
+#                tag=tags
+#            #print "TAG",tag
+#            content=getText(self,par.childNodes,par.getAttribute('class'))
+#            
+#            
+#
+#            #print par.getAttribute('class'),node
+#            try:
+#                html+=tag[0]+content+tag[1]
+#            except:
+#                html=+tag[0]+content+tag[1]
+#            
+#        elif tagName=="pb":
+#            html+="<pb/>"
+#        
+#    
+#    try:
+#
+#        return html
+#    except:
+#        return ""
 
 def getXlink(nodes):
     """searches xlinks and gives them back as html"""
@@ -175,7 +177,8 @@ def checkRef(self,ref):
              'vl_people':'AND complete =\'yes\'',
              'vl_sites':'AND complete =\'yes\'',
              'vl_transcript':'AND complete =\'yes\'',
-             'vl_essays':'AND online =\'yes\''
+             'vl_essays':'AND online =\'yes\'',
+	     'vl_categories':''
              }
         res=None
         for db in dbs.keys():
@@ -184,11 +187,13 @@ def checkRef(self,ref):
         return res
     
 def link2html(self,str):
-        """link2html liks in html wandeln"""
+        """link2html links in html wandeln"""
         if str:
 
             str=re.sub("\&","&amp;",str)
-            dom=xml.dom.minidom.parseString("<?xml version='1.0' encoding='utf-8'?><txt>"+str+"</txt>")
+            dom=xml.dom.minidom.parseString("<?xml version='1.0' encoding='utf-8'?><txt>"+utf8ify(str)+"</txt>")
+           
+            
             links=dom.getElementsByTagName("link")
             
 
@@ -196,23 +201,121 @@ def link2html(self,str):
                 link.tagName="a"
                 ref=link.getAttribute("ref")
                 pn=link.getAttribute("page")
-                        
+                mk=link.getAttribute("mk")
+                href= link.getAttribute("href")
+                if href:
+                    link.setAttribute("class","external")
+                                    
                 if self.checkRef(ref):
-                        if pn:
-                                link.setAttribute("href",self.REQUEST['SERVER_URL']+"/references?id="+ref+"&page="+pn)
-                        else:
-                                link.setAttribute("href",self.REQUEST['SERVER_URL']+"/references?id="+ref)
+                    more = ""
+                    if pn:
+                        more += "&page=%s"%pn
+                        
+                    if mk:
+                        more += "&mk=%s"%mk
+                        
+                    link.setAttribute("href",self.REQUEST['SERVER_URL']+"/references?id="+ref+more)
 
 
             newxml=dom.toxml('utf-8')
           
+            
+            
             retStr=regexpTXT.search(newxml)
+            retStr = retStr.group(1)
 
-            return retStr.group(1)
+            return retStr.decode('utf-8') # we return unicode
+
+        return u""
+
+def related2html(self,str):
+    """related library items: xlinks in html wandeln / mb 22.11.2006"""
+    if str:
+                
+        str=re.sub("\&","&amp;",str)
+        dom=xml.dom.minidom.parseString("<?xml version='1.0' encoding='utf-8'?><txt>"+utf8ify(str)+"</txt>")
+        links=dom.getElementsByTagName("link")
+                
+        for link in links:
+            link.tagName = "a"
+            ref = link.getAttribute("ref")
+            pn = link.getAttribute("page")
+            obj = ref[0:3]
+            
+            """erweiterung der related items von literatur auf weitere datenbankobjekte, mb 09.06.2009"""
+            searchStr = ''
+            if obj == 'lit':            
+                searchStr="select fullreference, online from vl_literature where reference =\'%s\' and authorized = 1"%(ref)
+            elif obj == 'sit':
+                searchStr="select reference from vl_sites where reference =\'%s\' and complete = 'yes'"%(ref)
+            elif obj == 'per':
+                searchStr="select reference from vl_people where reference =\'%s\' and complete = 'yes'"%(ref)
+            elif obj == 'tec':
+                searchStr="select reference from vl_technology where reference =\'%s\' and complete = 'yes'"%(ref)
+            elif obj == 'exp':
+                searchStr="select reference from vl_experiments where reference =\'%s\' and complete = 'yes'"%(ref)
+                
+            res = self.search(var=searchStr)
+                                        
+            if res:
+                if obj == 'lit':
+                    if res[0]['online'] == 1: 
+                        # literatur item online verfuegbar
+                        if pn:
+                            link.setAttribute("href",self.REQUEST['SERVER_URL']+"/references?id="+ref+"&page="+pn)
+                        else:
+                            link.setAttribute("href",self.REQUEST['SERVER_URL']+"/references?id="+ref)
+                            
+                        link.setAttribute("title", "click to view!")
+                        link.removeAttribute("ref")
+                        
+                        # prefix preceding the link
+                        prefix = dom.createTextNode(U"\u2013\u0020") # = ndash + space
+                        dom.documentElement.insertBefore(prefix, link)
+  
+                    else:
+                        # literatur item nur als bibliographische angabe vorhanden
+                        link.setAttribute("alt", unicodify(res[0]['fullreference']))
+                        link.setAttribute("title", "click to expand")
+                        link.setAttribute("onclick", "return toggle(this);")
+                        link.setAttribute("class", "x_offline")
+                        
+                        # prefix inside link text
+                        link.firstChild.data = '+ ' + link.firstChild.data
+                else:
+                    # links zu den anderen datenbankobjekten
+                    link.setAttribute("href",self.REQUEST['SERVER_URL']+"/references?id="+ref)
+                    link.setAttribute("title", "click to view")
+                    link.removeAttribute("ref")
+            
+                    # prefix preceding the link
+                    prefix = dom.createTextNode(U"\u2013\u0020") # = ndash + space
+                    dom.documentElement.insertBefore(prefix, link)
+            
+            else:
+                # objekt nicht verfuegbar/freigegeben oder (web)link mit href statt ref
+                
+                try:
+                    link.removeAttribute("ref")
+                    link.setAttribute("title", ref)
+                except:
+                    pass
+                
+                
+                # prefix preceding the link
+                prefix = dom.createTextNode(U"\u2013\u0020") # = ndash + space
+                dom.documentElement.insertBefore(prefix, link)
+
+                
+        newxml=dom.toxml('utf-8')
+                
+        retStr=regexpTXT.search(newxml)
+        retStr = retStr.group(1)
+        #logging.debug("related2html out=%s"%repr(retStr))
+        return retStr.decode('utf-8') # we return unicode
+
+    return u""
 
-                           
-        return ""
-    
 
 
 def xml2html(self,str,quote="yes"):
@@ -221,6 +324,7 @@ def xml2html(self,str,quote="yes"):
             if quote=="yes2":
                 str=re.sub("\&","&amp;",str)
             #dom=xml.dom.minidom.parseString(str)
+            logging.debug(str)
             dom = NonvalidatingReader.parseString(str,"http://www.mpiwg-berlin.mpg.de/")
             #links=dom.getElementsByTagName("link")
             links=Ft.Xml.XPath.Evaluate(".//link", contextNode=dom)
@@ -242,7 +346,7 @@ def xml2html(self,str,quote="yes"):
 
                 if self.checkRef(ref):
                         if pn:
-                                newLink.setAttributeNS(EMPTY_NAMESPACE,"href",self.REQUEST['SERVER_URL']+"/references?id="+ref+"&p="+pn)
+                                newLink.setAttributeNS(EMPTY_NAMESPACE,"href",self.REQUEST['SERVER_URL']+"/references?id="+ref+"&page="+pn)
                         else:
                                 newLink.setAttributeNS(EMPTY_NAMESPACE,"href",self.REQUEST['SERVER_URL']+"/references?id="+ref)
 
@@ -257,7 +361,7 @@ def xml2html(self,str,quote="yes"):
             retStr=regexpPage.search(str)
             
             try: # hack warum fehtl manchmal page??
-                    return retStr.group(1)
+                    return retStr.group(1).decode('utf-8')
             except:
                     return str
         return ""