File:  [Repository] / MPIWGWeb / xmlhelper.py
Revision 1.4: download - view: text, annotated - select for diffs - revision graph
Wed Sep 1 09:35:12 2004 UTC (19 years, 9 months ago) by dwinter
Branches: MAIN
CVS tags: HEAD
bug fixed in parseSection

    1: 
    2: from sys import argv
    3: 
    4: import string
    5: import xml.dom.minidom
    6: import Ft.Xml.XLink.Processor
    7: import Ft.Xml.XLink.XLinkElements
    8: 
    9: from Ft.Xml import XPath
   10: from Ft.Xml.XPath import Evaluate
   11: from Ft.Xml.XLink import XLINK_NAMESPACE
   12: from Ft.Xml.XLink import XLinkElements
   13: 
   14: #from Ft.Xml.Domlette import NonvalidatingReader,InputSource
   15: #from Ft.Xml import EMPTY_NAMESPACE
   16: from Ft.Lib import Uri
   17: 
   18: xml2html={'WEB_normal':('<p>','</p>'),'Normal':('<p>','</p>'),'WEB_picture':('<p>','</p>'),'WEB_figuretitle':('<i>','</i>'),'WEB_bibliography':('<p><i>','</i></p>'),'Web_kursiv':('<i>','</i>'),'WEB_kursiv':('<i>','</i>'),'WEB_hyperlink':('',''),'Hyperlink':('','')}
   19: 
   20: def addToDict(dict,name,value):
   21:     if name=="":
   22:         return 0
   23:     else:
   24:         
   25:         if not dict.has_key(name):
   26:             dict[name]=[] # als array anlegen
   27: 
   28:         dict[name].append(value)
   29:         return 1    
   30: 
   31: def proj2hash(xmlstring):
   32:     """wandelt xml-files fuer die projekte in ein hash"""
   33:     
   34:     dom=xml.dom.minidom.parseString(xmlstring)
   35:     
   36:         
   37:     list={}
   38: 
   39:     #gettitle
   40:     pars=Evaluate('par',dom.getElementsByTagName('part')[0])
   41:     for par in pars:
   42:         className=par.getAttribute('class')
   43:         content=getText(par.childNodes)
   44:         addToDict(list,className,content)
   45:              
   46:     list.update(parseTable(dom.getElementsByTagName('html:table')[0])) # Parse the Table
   47: 
   48:     #evaluate level 1
   49: 
   50:     sections=Evaluate('section',dom.getElementsByTagName('part')[0])# Parse all Sections
   51:     #print sections,dom.getElementsByTagName('part')[0]
   52:     for section in sections:
   53: 
   54:         sec=parseSection(section)
   55:         if sec[0]=="WEB_project_header": # Sonderfall project
   56: 
   57:             addToDict(list,'WEB_project_header',sec[1]) # store title
   58:             addToDict(list,'WEB_project_description',sec[2]) #store description
   59:         else: # no information in heading
   60: 
   61:             addToDict(list,sec[0],sec[2])
   62: 
   63:     #evaluate higher level sections
   64: 
   65:     sections=Evaluate('section/section',dom.getElementsByTagName('part')[0])
   66: 
   67:     for section in sections:
   68:         sec=parseSection(section)
   69:         
   70:         if sec[0]=="WEB_project_header": # Sonderfall project
   71:             addToDict(list,'WEB_project_header',sec[1]) # store title
   72:             addToDict(list,'WEB_project_description',sec[2]) #store description
   73:         else: # no information in heading
   74:             addToDict(list,sec[0],sec[2])
   75: 
   76:     
   77:     return list
   78: 
   79: 
   80: def parseSection(section):
   81:     type=""
   82:     header=""
   83:     for heading in section.childNodes:
   84:         if getattr(heading,'tagName','')=="heading":
   85:             
   86:             type=heading.getAttribute('class')
   87:             header=getText(heading.childNodes)
   88: 
   89:     if type=="": # falls heading fehlt, pruefe ob erster par richtig
   90:         par=section.getElementsByTagName('par')[0]
   91:         type=par.getAttribute('class')
   92:         header=getText(par.childNodes)
   93:         
   94:     #print section.childNodes
   95:     pars=Evaluate('par',section)
   96:     content=par2html(pars)
   97:     
   98:     return (type,header,content)
   99: 
  100: def parseTable(table):
  101:     fields={}
  102:     rows=table.getElementsByTagName('html:tr')
  103:     for row in rows:
  104:         #print "ROW"
  105:         cols=row.getElementsByTagName('html:td')
  106:         
  107:         #Name des Datenfeldes einlesen
  108:         try:
  109:             field=cols[0].getElementsByTagName('par')[0].getAttribute('class')
  110:             #print "field",field
  111:         except:
  112:             print "error"
  113:             field=""
  114: 
  115:         #Wandeln der Eintrge in HTML
  116: 
  117:         pars=cols[1].getElementsByTagName('par')
  118: 
  119:         
  120:         html=par2html(pars,tags=("",";"))
  121:         
  122:         addToDict(fields,field,html)
  123:         #print fields
  124:     return fields
  125: 
  126: def par2html(pars,tags=None):
  127:     #html=""
  128: 
  129:     for par in pars:
  130:         #print "par",par
  131:         if not tags:
  132:             try:
  133:                 tag=xml2html[par.getAttribute('class')]
  134:             except:
  135:                 tag=('<p>','</p>')
  136:         else:
  137:             tag=tags
  138:         
  139:         content=getText(par.childNodes)
  140:         #print "CONTETN",content
  141:         
  142:         #print par.getAttribute('class'),node
  143:         try:
  144:             html=html+tag[0]+content+tag[1]
  145:         except:
  146:             html=tag[0]+content+tag[1]
  147: 
  148:     try:    
  149:         return html
  150:     except:
  151:         return ""
  152: 
  153: def getXlink(nodes):
  154:     """searches xlinks and gives them back as html"""
  155:     ret=""
  156:     for node in nodes:
  157:         if node.attributes:
  158:             if 'xlink:type' in node.attributes.keys(): #is a xlink?
  159:                 ret +=xlink2html(node)
  160:     return ret
  161:     
  162: def xlink2html(xlink):
  163:     ret=""
  164:     attributes=xlink.attributes
  165:     
  166:     if xlink.tagName.lower()=="image":
  167:         ret +="<img src=%s />"%xlink.getAttribute('xlink:href')
  168:     elif xlink.tagName.lower()=="link":
  169:         ret +="<a href='%s' >%s</a>"%(xlink.getAttribute('xlink:href'),getText(xlink.childNodes))
  170:     
  171:         
  172:         
  173:     
  174:     return ret
  175: 
  176: def getText(nodelist):
  177:     
  178:     rc = u''
  179:     for node in nodelist:
  180:     	if node.nodeType == node.TEXT_NODE:
  181:             #print "node",node
  182:             #print "NODE",node.data.encode('utf-8','ignore'),"V"
  183:             #print "HALII"
  184:             try:
  185:                 try:
  186:                     #rc += node.data.encode('utf-8','ignore')
  187:                     rc += node.data
  188:                                         
  189:                 except:
  190:                     #rc= node.data.encode('utf-8','ignore')
  191:                     rc=node.data
  192:             except:
  193:                 rc="ERROR"
  194:                 #node.data.decode('utf-8','ignore')
  195:                 print "ERROR"
  196:             node.data.encode('utf-8','ignore')
  197:             #print "RC",rc
  198:         elif node.tagName =="inline":
  199:             rc+=par2html([node])
  200:         elif node.attributes:
  201: 
  202:             if 'xlink:type' in node.attributes.keys(): #is a xlink?
  203:                 rc +=xlink2html(node)
  204:     #print "RWT",rc        
  205:     return rc
  206: 
  207: 
  208: #filename=argv[1]
  209: #fileString=file(filename).read()
  210: #print proj2hash(fileString)
  211: 

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>