File:  [Repository] / MPIWGWeb / xmlhelper.py
Revision 1.1.1.1 (vendor branch): download - view: text, annotated - select for diffs - revision graph
Wed Feb 4 10:03:33 2004 UTC (20 years, 3 months ago) by dwinter
Branches: dwinter
CVS tags: first
first import product fuer www des insitutes


from sys import argv

import string
import xml.dom.minidom
import Ft.Xml.XLink.Processor
import Ft.Xml.XLink.XLinkElements

from Ft.Xml import XPath
from Ft.Xml.XPath import Evaluate
from Ft.Xml.XLink import XLINK_NAMESPACE
from Ft.Xml.XLink import XLinkElements

#from Ft.Xml.Domlette import NonvalidatingReader,InputSource
#from Ft.Xml import EMPTY_NAMESPACE
from Ft.Lib import Uri

xml2html={'WEB_normal':('<p>','</p>'),'Normal':('<p>','</p>'),'WEB_picture':('<p>','</p>'),'WEB_figuretitle':('<i>','</i>'),'WEB_bibliography':('<p><i>','</i></p>'),'Web_kursiv':('<i>','</i>'),'WEB_kursiv':('<i>','</i>'),'WEB_hyperlink':('','')}

def addToDict(dict,name,value):
    if name=="":
        return 0
    else:
        
        if not dict.has_key(name):
            dict[name]=[] # als array anlegen

        dict[name].append(value)
        return 1    

def proj2hash(xmlstring):
    """wandelt xml-files fuer die projekte in ein hash"""
    
    dom=xml.dom.minidom.parseString(xmlstring)
    
        
    list={}

    #gettitle
    pars=Evaluate('par',dom.getElementsByTagName('part')[0])
    for par in pars:
        className=par.getAttribute('class')
        content=getText(par.childNodes)
        addToDict(list,className,content)
             
    list.update(parseTable(dom.getElementsByTagName('html:table')[0])) # Parse the Table

    #evaluate level 1

    sections=Evaluate('section',dom.getElementsByTagName('part')[0])# Parse all Sections
    #print sections,dom.getElementsByTagName('part')[0]
    for section in sections:
        
        sec=parseSection(section)
        if sec[0]=="WEB_project_header": # Sonderfall project
            addToDict(list,'WEB_project_header',sec[1]) # store title
            addToDict(list,'WEB_project_description',sec[2]) #store description
        else: # no information in heading
            addToDict(list,sec[0],sec[2])

    #evaluate higher level sections

    sections=Evaluate('section/section',dom.getElementsByTagName('part')[0])
    
    for section in sections:
        sec=parseSection(section)
        
        if sec[0]=="WEB_project_header": # Sonderfall project
            addToDict(list,'WEB_project_header',sec[1]) # store title
            addToDict(list,'WEB_project_description',sec[2]) #store description
        else: # no information in heading
            addToDict(list,sec[0],sec[2])
    
    
    return list


def parseSection(section):
    heading=section.getElementsByTagName('heading')[0]
    type=heading.getAttribute('class')
    header=getText(heading.childNodes)
    #print section.childNodes
    pars=Evaluate('par',section)
    content=par2html(pars)
    
    return (type,header,content)

def parseTable(table):
    fields={}
    rows=table.getElementsByTagName('html:tr')
    for row in rows:
        #print "ROW"
        cols=row.getElementsByTagName('html:td')
        
        #Name des Datenfeldes einlesen
        try:
            field=cols[0].getElementsByTagName('par')[0].getAttribute('class')
            #print "field",field
        except:
            print "error"
            field=""

        #Wandeln der Eintrge in HTML

        pars=cols[1].getElementsByTagName('par')

        
        html=par2html(pars,tags=("",";"))
        
        addToDict(fields,field,html)
        #print fields
    return fields

def par2html(pars,tags=None):
    #html=""

    for par in pars:
        #print "par",par
        if not tags:
            try:
                tag=xml2html[par.getAttribute('class')]
            except:
                tag=('<p>','</p>')
        else:
            tag=tags
        
        content=getText(par.childNodes)
        #print "CONTETN",content
        
        #print par.getAttribute('class'),node
        try:
            html=html+tag[0]+content+tag[1]
        except:
            html=tag[0]+content+tag[1]

    try:    
        return html
    except:
        return ""

def getXlink(nodes):
    """searches xlinks and gives them back as html"""
    ret=""
    for node in nodes:
        if node.attributes:
            if 'xlink:type' in node.attributes.keys(): #is a xlink?
                ret +=xlink2html(node)
    return ret
    
def xlink2html(xlink):
    ret=""
    attributes=xlink.attributes
    
    if xlink.tagName.lower()=="image":
        ret +="<img src=%s />"%xlink.getAttribute('xlink:href')
    elif xlink.tagName.lower()=="link":
        ret +="<a href='%s' >%s</a>"%(xlink.getAttribute('xlink:href'),getText(xlink.childNodes))
    
        
        
    
    return ret

def getText(nodelist):
    
    rc = u''
    for node in nodelist:
        print "HHHH"
    	if node.nodeType == node.TEXT_NODE:
            #print "node",node
            #print "NODE",node.data.encode('utf-8','ignore'),"V"
            #print "HALII"
            try:
                try:
                    print "try1"
                    #rc += node.data.encode('utf-8','ignore')
                    rc += node.data
                                        
                except:
                    print "try2"
                    #rc= node.data.encode('utf-8','ignore')
                    rc=node.data
            except:
                rc="ERROR"
                #node.data.decode('utf-8','ignore')
                print "ERROR"
            node.data.encode('utf-8','ignore')
            #print "RC",rc
        elif node.tagName =="inline":
            print "HI", node.getAttribute('class')
            rc+=par2html([node])
        elif node.attributes:
            print "xlink?"
            if 'xlink:type' in node.attributes.keys(): #is a xlink?
                rc +=xlink2html(node)
    #print "RWT",rc        
    return rc


#filename=argv[1]
#fileString=file(filename).read()
#print proj2hash(fileString)

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>