ECHO_content/vlp_xmlhelpers.py - diff

Return to vlp_xmlhelpers.py CVS log

Up to [Repository] / ECHO_content

Diff for /ECHO_content/vlp_xmlhelpers.py between versions 1.4 and 1.15

-version 1.4, 2004/10/06 13:02:56
+version 1.15, 2008/09/08 11:12:41
  Line 1
  from sys import argv
  import string
+ import logging
  import xml.dom.minidom
  import Ft.Xml.XLink.Processor
  import Ft.Xml.XLink.XLinkElements
- Line 9  from Ft.Xml import XPath
+ Line 10  from Ft.Xml import XPath
  from Ft.Xml.XPath import Evaluate
  from Ft.Xml.XLink import XLINK_NAMESPACE
  from Ft.Xml.XLink import XLinkElements
+ import cStringIO
- #from Ft.Xml.Domlette import NonvalidatingReader,InputSource
+ from Ft.Xml.Domlette import NonvalidatingReader, PrettyPrint,Print
- #from Ft.Xml import EMPTY_NAMESPACE
+ from Ft.Xml import EMPTY_NAMESPACE
  from Ft.Lib import Uri
  import urllib
  import re
+ from ECHO_collection import unicodify,utf8ify
+ patternTXT=r"<\s*txt.*?>(.*?)</txt>"
+ regexpTXT = re.compile(patternTXT, re.IGNORECASE + re.DOTALL)
  patternPage=r"<\s*page.*?>(.*?)</page>"
  regexpPage = re.compile(patternPage, re.IGNORECASE + re.DOTALL)
- xml2htmlArray={'WEB_normal':('<p>','</p>'),'Normal':('<p>','</p>'),'WEB_picture':('<p class=\"picture\">','</p>'),'WEB_figuretitle':('<p class=\"picturetitle\">','</p>'),'WEB_bibliography':('<p><i>','</i></p>'),'Web_kursiv':('<i>','</i>'),'WEB_kursiv':('<i>','</i>'),'WEB_hyperlink':('',''),'Hyperlink':('',''),'Picture':('<p class=\"picture\">','</p>'),'FigureTitle':('<p class=\"picturetitle\">','</p>')}
+ #xml2htmlArray={'WEB_normal':('<p>','</p>'),'Normal':('<p>','</p>'),'WEB_picture':('<p class=\"picture\">','</p>'),'WEB_figuretitle':('<p class=\"picturetitle\">','</p>'),'WEB_bibliography':('<p><i>','</i></p>'),'Web_kursiv':('<i>','</i>'),'WEB_kursiv':('<i>','</i>'),'WEB_hyperlink':('',''),'Hyperlink':('',''),'Picture':('<p class=\"picture\">','</p>'),'FigureTitle':('<p class=\"picturetitle\">','</p>')}
+ #
+ #def addToDict(dict,name,value):
+ #    if name=="":
+ #        return 0
+ #    else:
+ #
+ #        if not dict.has_key(name):
+ #            dict[name]=[] # als array anlegen
+ #
+ #        dict[name].append(value)
+ #        return 1
+ #
+ #def proj2hash(self,xmlstring):
+ #    """wandelt xml-files fuer die projekte in ein hash"""
+ #
+ #    dom=xml.dom.minidom.parseString(xmlstring)
+ #
+ #
+ #    list={}
+ #
+ #    #gettitle
+ #    pars=Evaluate('par',dom.getElementsByTagName('part')[0])
+ #    for par in pars:
+ #        className=par.getAttribute('class')
+ #        content=getText(self,par.childNodes)
+ #        addToDict(list,className,content)
+ #
+ #
+ #    sectionXPath="section"
+ #
+ #
+ #    sections=Evaluate(sectionXPath,dom.getElementsByTagName('part')[0])
+ #
+ #    while sections:
+ #
+ #        for section in sections:
+ #
+ #            sec=parseSection(self,section)
+ #
+ #            if sec[0]=="WEB_project_header": # Sonderfall project
+ #                addToDict(list,'WEB_project_header',sec[1]) # store title
+ #                addToDict(list,'WEB_project_description',sec[2]) #store description
+ #            else: # no information in heading
+ #                level=int(sec[3])+2
+ #                aTag="<h%i>"%level
+ #                eTag="</h%i>"%level
+ #                addToDict(list,"text",aTag+sec[1]+eTag)
+ #                addToDict(list,"text",sec[2])
+ #        sectionXPath+="/section"
+ #        sections=Evaluate(sectionXPath,dom.getElementsByTagName('part')[0])
+ #    return list
+ #
+ #
+ #def parseSection(self,section):
+ #    type=""
+ #    header=""
+ #    level=section.getAttribute('level')
+ #    for heading in section.childNodes:
+ #        if getattr(heading,'tagName','')=="heading":
+ #
+ #            type=heading.getAttribute('class')
+ #            header=getText(self,heading.childNodes)
+ #
+ #    if type=="": # falls heading fehlt, pruefe ob erster par richtig
+ #        par=section.getElementsByTagName('par')[0]
+ #        type=par.getAttribute('class')
+ #        header=getText(par.childNodes)
+ #
+ #    #print section.childNodes
+ #    #pars=Evaluate('par',section)
+ #    pars=section.childNodes
+ #    content=par2html(self,pars)
+ #    #print "CONTENT",repr(content)
+ #    return (type,header,content,level)
+ #
+ #def parseTable(table):
+ #    fields={}
+ #    rows=table.getElementsByTagName('html:tr')
+ #    for row in rows:
+ #        #print "ROW"
+ #        cols=row.getElementsByTagName('html:td')
+ #
+ #        #Name des Datenfeldes einlesen
+ #        try:
+ #            field=cols[0].getElementsByTagName('par')[0].getAttribute('class')
+ #            #print "field",field
+ #        except:
+ #            print "error"
+ #            field=""
+ #
+ #        #Wandeln der Eintrge in HTML
+ #
+ #        #pars=cols[1].getElementsByTagName('par')
+ #        pars=cols[1].childNodes
+ #
+ #        html=par2html(self,pars,tags=("",";"))
+ #
+ #        addToDict(fields,field,html)
+ #        #print fields
+ #    return fields
+ #
+ #def par2html(self,pars,tags=None):
+ #    html=""
+ #
+ #    for par in pars:
+ #        tagName=getattr(par,'tagName','')
+ #        if tagName in ["par","inline"]:
+ #            #print repr(par.getAttribute('class')),xml2htmlArray.get(par.getAttribute('class'),'NOT FOUND')
+ #            #print "par",par
+ #            if not tags:
+ #                try:
+ #                    tag=xml2htmlArray[par.getAttribute('class')]
+ #                except:
+ #                    tag=('<p>','</p>')
+ #            else:
+ #                tag=tags
+ #            #print "TAG",tag
+ #            content=getText(self,par.childNodes,par.getAttribute('class'))
+ #
+ #
+ #
+ #            #print par.getAttribute('class'),node
+ #            try:
+ #                html+=tag[0]+content+tag[1]
+ #            except:
+ #                html=+tag[0]+content+tag[1]
+ #
+ #        elif tagName=="pb":
+ #            html+="<pb/>"
+ #
+ #
+ #    try:
+ #
+ #        return html
+ #    except:
+ #        return ""
- def addToDict(dict,name,value):
+ def getXlink(nodes):
-     if name=="":
+     """searches xlinks and gives them back as html"""
-         return 0
+     ret=""
-     else:
+     for node in nodes:
+         if node.attributes:
+             if 'xlink:type' in node.attributes.keys(): #is a xlink?
+                 ret +=xlink2html(node)
+     return ret
-         if not dict.has_key(name):
+ def checkRef(self,ref):
-             dict[name]=[] # als array anlegen
+         """teste ob reference angezeigt werden sollen"""
+         dbs={'vl_literature':'AND online = \'1\'',
+              'vl_technology':'AND complete =\'yes\'',
+              'vl_people':'AND complete =\'yes\'',
+              'vl_sites':'AND complete =\'yes\'',
+              'vl_transcript':'AND complete =\'yes\'',
+              'vl_essays':'AND online =\'yes\'',
+          'vl_categories':''
+              }
+         res=None
+         for db in dbs.keys():
+             searchStr=str("select reference from %s where reference =\'%s\' %s"%(db,ref,dbs[db]))
+             res=res or self.search(var=searchStr)
+         return res
-         dict[name].append(value)
+ def link2html(self,str):
-         return 1
+         """link2html links in html wandeln"""
+         if str:
- def proj2hash(self,xmlstring):
+             str=re.sub("\&","&amp;",str)
-     """wandelt xml-files fuer die projekte in ein hash"""
+             dom=xml.dom.minidom.parseString("<?xml version='1.0' encoding='utf-8'?><txt>"+utf8ify(str)+"</txt>")
-     dom=xml.dom.minidom.parseString(xmlstring)
+             links=dom.getElementsByTagName("link")
-     list={}
-     #gettitle
+             for link in links:
-     pars=Evaluate('par',dom.getElementsByTagName('part')[0])
+                 link.tagName="a"
-     for par in pars:
+                 ref=link.getAttribute("ref")
-         className=par.getAttribute('class')
+                 pn=link.getAttribute("page")
-         content=getText(self,par.childNodes)
+                 mk=link.getAttribute("mk")
-         addToDict(list,className,content)
+                 href= link.getAttribute("href")
+                 if href:
+                     link.setAttribute("class","external")
+                 if self.checkRef(ref):
+                     more = ""
+                     if pn:
+                         more += "&page=%s"%pn
-     sectionXPath="section"
+                     if mk:
+                         more += "&mk=%s"%mk
+                     link.setAttribute("href",self.REQUEST['SERVER_URL']+"/references?id="+ref+more)
-     sections=Evaluate(sectionXPath,dom.getElementsByTagName('part')[0])
-     while sections:
+             newxml=dom.toxml('utf-8')
-         for section in sections:
-             sec=parseSection(self,section)
-             if sec[0]=="WEB_project_header": # Sonderfall project
+             retStr=regexpTXT.search(newxml)
-                 addToDict(list,'WEB_project_header',sec[1]) # store title
+             retStr = retStr.group(1)
-                 addToDict(list,'WEB_project_description',sec[2]) #store description
-             else: # no information in heading
-                 level=int(sec[3])+2
-                 aTag="<h%i>"%level
-                 eTag="</h%i>"%level
-                 addToDict(list,"text",aTag+sec[1]+eTag)
-                 addToDict(list,"text",sec[2])
-         sectionXPath+="/section"
-         sections=Evaluate(sectionXPath,dom.getElementsByTagName('part')[0])
-     return list
+             return retStr.decode('utf-8') # we return unicode
- def parseSection(self,section):
+         return u""
-     type=""
-     header=""
-     level=section.getAttribute('level')
-     for heading in section.childNodes:
-         if getattr(heading,'tagName','')=="heading":
-             type=heading.getAttribute('class')
+ def related2html(self,str):
-             header=getText(self,heading.childNodes)
+     """related library items: xlinks in html wandeln / mb 22.11.2006"""
+     if str:
-     if type=="": # falls heading fehlt, pruefe ob erster par richtig
+         str=re.sub("\&","&amp;",str)
-         par=section.getElementsByTagName('par')[0]
+         dom=xml.dom.minidom.parseString("<?xml version='1.0' encoding='utf-8'?><txt>"+utf8ify(str)+"</txt>")
-         type=par.getAttribute('class')
+         links=dom.getElementsByTagName("link")
-         header=getText(par.childNodes)
-     #print section.childNodes
+         for link in links:
-     #pars=Evaluate('par',section)
+             link.tagName = "a"
-     pars=section.childNodes
+             ref = link.getAttribute("ref")
-     content=par2html(self,pars)
+             pn = link.getAttribute("page")
-     #print "CONTENT",repr(content)
-     return (type,header,content,level)
- def parseTable(table):
+             searchStr="select fullreference, online from vl_literature where reference =\'%s\' and authorized = 1"%(ref)
-     fields={}
+             res = self.search(var=searchStr)
-     rows=table.getElementsByTagName('html:tr')
-     for row in rows:
-         #print "ROW"
-         cols=row.getElementsByTagName('html:td')
-         #Name des Datenfeldes einlesen
+             if res:
-         try:
+                 if res[0]['online'] == 1:
-             field=cols[0].getElementsByTagName('par')[0].getAttribute('class')
+                     # item online verfuegbar
-             #print "field",field
+                     if pn:
-         except:
+                         link.setAttribute("href",self.REQUEST['SERVER_URL']+"/references?id="+ref+"&page="+pn)
-             print "error"
+                     else:
-             field=""
+                         link.setAttribute("href",self.REQUEST['SERVER_URL']+"/references?id="+ref)
-         #Wandeln der Eintrge in HTML
+                     link.setAttribute("title", "click to view")
+                     link.removeAttribute("ref")
-         #pars=cols[1].getElementsByTagName('par')
+                     # prefix preceding the link
-         pars=cols[1].childNodes
+                     prefix = dom.createTextNode(U"\u2013\u0020") # = ndash + space
+                     dom.documentElement.insertBefore(prefix, link)
-         html=par2html(self,pars,tags=("",";"))
-         addToDict(fields,field,html)
-         #print fields
-     return fields
- def par2html(self,pars,tags=None):
-     html=""
-     for par in pars:
-         tagName=getattr(par,'tagName','')
-         if tagName in ["par","inline"]:
-             #print repr(par.getAttribute('class')),xml2htmlArray.get(par.getAttribute('class'),'NOT FOUND')
-             #print "par",par
-             if not tags:
-                 try:
-                     tag=xml2htmlArray[par.getAttribute('class')]
-                 except:
-                     tag=('<p>','</p>')
              else:
-                 tag=tags
+                     # item nur als bibliographische angabe vorhanden
-             #print "TAG",tag
+                     link.setAttribute("alt", unicodify(res[0]['fullreference']))
-             content=getText(self,par.childNodes,par.getAttribute('class'))
+                     link.setAttribute("title", "click to expand")
+                     link.setAttribute("onclick", "return toggle(this);")
+                     link.setAttribute("class", "x_offline")
+                     # prefix inside link text
+                     link.firstChild.data = '+ ' + link.firstChild.data
-             #print par.getAttribute('class'),node
-             try:
-                 html+=tag[0]+content+tag[1]
-             except:
-                 html=+tag[0]+content+tag[1]
-         elif tagName=="pb":
-             html+="<pb/>"
+         newxml=dom.toxml('utf-8')
-     try:
+         retStr=regexpTXT.search(newxml)
+         retStr = retStr.group(1)
+         #logging.debug("related2html out=%s"%repr(retStr))
+         return retStr.decode('utf-8') # we return unicode
-         return html
+     return u""
-     except:
-         return ""
- def getXlink(nodes):
-     """searches xlinks and gives them back as html"""
-     ret=""
-     for node in nodes:
-         if node.attributes:
-             if 'xlink:type' in node.attributes.keys(): #is a xlink?
-                 ret +=xlink2html(node)
-     return ret
- def checkRef(self,ref):
-         dbs={'vl_literature':'AND CD LIKE \'%lise%\'','vl_technology':'','vl_people':'','vl_sites':''}
-         res=None
-         for db in dbs.keys():
-             res=res or self.search(var=str("select reference from %s where reference =\'%s\' %s"%(db,ref,dbs[db])))
-         return res
  def xml2html(self,str,quote="yes"):
          """link2html fuer VLP muss hier noch raus"""
          if str:
              if quote=="yes2":
                  str=re.sub("\&","&amp;",str)
+             #dom=xml.dom.minidom.parseString(str)
+             dom = NonvalidatingReader.parseString(str,"http://www.mpiwg-berlin.mpg.de/")
+             #links=dom.getElementsByTagName("link")
+             links=Ft.Xml.XPath.Evaluate(".//link", contextNode=dom)
+             for link in links:
+                 #link.tagName="a"
-             str=re.sub("ref\=([^>]*)\>",'ref=\"\g<1>\">',str)# einfuegen anfuehrungszeichen um ref attribut, falls fehlt.
+                 ref=link.getAttributeNS(EMPTY_NAMESPACE,"ref")
-             #str=re.sub("ref\=([.[*^[>]]])",'XX',str)
+                 pn=link.getAttributeNS(EMPTY_NAMESPACE,"page")
-             #print "STR::",str
-             dom=xml.dom.minidom.parseString(str)
+                 cns=link.childNodes[0:]
-             links=dom.getElementsByTagName("link")
+                 newLink=dom.createElementNS(EMPTY_NAMESPACE,"a")
+                 for x in cns:
+                         newLink.appendChild(x)
-             for link in links:
-                 link.tagName="a"
-                 ref=link.getAttribute("ref")
-         pn=link.getAttribute("page")
-                 if checkRef(self,ref):
+                 link.parentNode.replaceChild(newLink,link)
+                 if self.checkRef(ref):
              if pn:
-                 link.setAttribute("href",self.aq_parent.absolute_url()+"/vlp_coll?id="+ref+"&p="+pn)
+                                 newLink.setAttributeNS(EMPTY_NAMESPACE,"href",self.REQUEST['SERVER_URL']+"/references?id="+ref+"&page="+pn)
              else:
-                 link.setAttribute("href",self.aq_parent.absolute_url()+"/vlp_coll?id="+ref)
+                                 newLink.setAttributeNS(EMPTY_NAMESPACE,"href",self.REQUEST['SERVER_URL']+"/references?id="+ref)
-             str= dom.toxml()
+             #str= dom.toxml('utf-8')
+             buf = cStringIO.StringIO()
+             PrettyPrint(dom, stream=buf)
+             str = buf.getvalue()
+             buf.close()
+             #str=PrettyPrint(dom.documentElement,encoding='UTF-8')
          #print link.toxml('utf-8')
+             #print type(str)
          retStr=regexpPage.search(str)
-             try:
+             try: # hack warum fehtl manchmal page??
-                 return retStr.group(1)
+                     return retStr.group(1).decode('utf-8')
              except:
-                 exStr="""<?xml version="1.0" ?>"""
+                     return str
-                 str=re.sub("\n","",str)
-                 #str=
-                 #print repr(str)
-                 return str.replace(exStr,'')
          return ""
  def xlink2html(self,xlink,parClass=None):
      ret=""
      attributes=xlink.attributes

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>

Removed from v.1.4
changed lines
	Added in v.1.15