File:  [Repository] / MPIWGWeb / xmlhelper.py
Revision 1.6.2.2: download - view: text, annotated - select for diffs - revision graph
Mon Jan 9 07:33:31 2012 UTC (12 years, 5 months ago) by dwinter
Branches: r2
errors in template und helper fixed

    1: 
    2: from sys import argv
    3: 
    4: import string
    5: import xml.dom.minidom
    6: #import Ft.Xml.XLink.Processor
    7: #import Ft.Xml.XLink.XLinkElements
    8: #
    9: #from Ft.Xml import XPath
   10: #from Ft.Xml.XPath import Evaluate
   11: #from Ft.Xml.XLink import XLINK_NAMESPACE
   12: #from Ft.Xml.XLink import XLinkElements
   13: 
   14: #from Ft.Xml.Domlette import NonvalidatingReader,InputSource
   15: #from Ft.Xml import EMPTY_NAMESPACE
   16: 
   17: #from Ft.Lib import Uri
   18: 
   19: xml2html={'WEB_normal':('<p>','</p>'),'Normal':('<p>','</p>'),'WEB_picture':('<p class=\"picture\">','</p>'),'WEB_figuretitle':('<p class=\"picturetitle\">','</p>'),'WEB_bibliography':('<p><i>','</i></p>'),'Web_kursiv':('<i>','</i>'),'WEB_kursiv':('<i>','</i>'),'WEB_hyperlink':('',''),'Hyperlink':('','')}
   20: 
   21: def addToDict(dict,name,value):
   22:     if name=="":
   23:         return 0
   24:     else:
   25:         
   26:         if not dict.has_key(name):
   27:             dict[name]=[] # als array anlegen
   28: 
   29:         dict[name].append(value)
   30:         return 1    
   31: 
   32: def proj2hash(xmlstring):
   33:     """wandelt xml-files fuer die projekte in ein hash"""
   34:     
   35:     dom=xml.dom.minidom.parseString(xmlstring)
   36:     
   37:         
   38:     list={}
   39: 
   40:     #gettitle
   41:     pars=Evaluate('par',dom.getElementsByTagName('part')[0])
   42:     for par in pars:
   43:         className=par.getAttribute('class')
   44:         content=getText(par.childNodes)
   45:         addToDict(list,className,content)
   46:              
   47:     list.update(parseTable(dom.getElementsByTagName('html:table')[0])) # Parse the Table
   48: 
   49:     #evaluate level 1
   50: 
   51:     sections=Evaluate('section',dom.getElementsByTagName('part')[0])# Parse all Sections
   52:     #print sections,dom.getElementsByTagName('part')[0]
   53:     for section in sections:
   54: 
   55:         sec=parseSection(section)
   56:         if sec[0]=="WEB_project_header": # Sonderfall project
   57: 
   58:             addToDict(list,'WEB_project_header',sec[1]) # store title
   59:             addToDict(list,'WEB_project_description',sec[2]) #store description
   60:         else: # no information in heading
   61: 
   62:             addToDict(list,sec[0],sec[2])
   63: 
   64:     #evaluate higher level sections
   65: 
   66:     sections=Evaluate('section/section',dom.getElementsByTagName('part')[0])
   67: 
   68:     for section in sections:
   69:         sec=parseSection(section)
   70:         
   71:         if sec[0]=="WEB_project_header": # Sonderfall project
   72:             addToDict(list,'WEB_project_header',sec[1]) # store title
   73:             addToDict(list,'WEB_project_description',sec[2]) #store description
   74:         else: # no information in heading
   75:             addToDict(list,sec[0],sec[2])
   76: 
   77:     
   78:     return list
   79: 
   80: 
   81: def parseSection(section):
   82:     type=""
   83:     header=""
   84:     for heading in section.childNodes:
   85:         if getattr(heading,'tagName','')=="heading":
   86:             
   87:             type=heading.getAttribute('class')
   88:             header=getText(heading.childNodes)
   89: 
   90:     if type=="": # falls heading fehlt, pruefe ob erster par richtig
   91:         par=section.getElementsByTagName('par')[0]
   92:         type=par.getAttribute('class')
   93:         header=getText(par.childNodes)
   94:         
   95:     #print section.childNodes
   96:     pars=Evaluate('par',section)
   97:     content=par2html(pars)
   98:     
   99:     return (type,header,content)
  100: 
  101: def parseTable(table):
  102:     fields={}
  103:     rows=table.getElementsByTagName('html:tr')
  104:     for row in rows:
  105:         #print "ROW"
  106:         cols=row.getElementsByTagName('html:td')
  107:         
  108:         #Name des Datenfeldes einlesen
  109:         try:
  110:             field=cols[0].getElementsByTagName('par')[0].getAttribute('class')
  111:             #print "field",field
  112:         except:
  113:             print "error"
  114:             field=""
  115: 
  116:         #Wandeln der Eintrge in HTML
  117: 
  118:         pars=cols[1].getElementsByTagName('par')
  119: 
  120:         
  121:         html=par2html(pars,tags=("",";"))
  122:         
  123:         addToDict(fields,field,html)
  124:         #print fields
  125:     return fields
  126: 
  127: def par2html(pars,tags=None):
  128:     #html=""
  129: 
  130:     for par in pars:
  131:         #print "par",par
  132:         if not tags:
  133:             try:
  134:                 tag=xml2html[par.getAttribute('class')]
  135:             except:
  136:                 tag=('<p>','</p>')
  137:         else:
  138:             tag=tags
  139:         
  140:         content=getText(par.childNodes)
  141:         #print "CONTETN",content
  142:         
  143:         #print par.getAttribute('class'),node
  144:         try:
  145:             html=html+tag[0]+content+tag[1]
  146:         except:
  147:             html=tag[0]+content+tag[1]
  148: 
  149:     try:    
  150:         return html
  151:     except:
  152:         return ""
  153: 
  154: def getXlink(nodes):
  155:     """searches xlinks and gives them back as html"""
  156:     ret=""
  157:     for node in nodes:
  158:         if node.attributes:
  159:             if 'xlink:type' in node.attributes.keys(): #is a xlink?
  160:                 ret +=xlink2html(node)
  161:     return ret
  162:     
  163: def xlink2html(xlink):
  164:     ret=""
  165:     attributes=xlink.attributes
  166:     
  167:     if xlink.tagName.lower()=="image":
  168:         ret +="<img src=%s />"%xlink.getAttribute('xlink:href')
  169:     elif xlink.tagName.lower()=="link":
  170:         ret +="<a href='%s' >%s</a>"%(xlink.getAttribute('xlink:href'),getText(xlink.childNodes))
  171:     
  172:         
  173:         
  174:     
  175:     return ret
  176: 
  177: def getText(nodelist):
  178:     
  179:     rc = u''
  180:     for node in nodelist:
  181:         if node.nodeType == node.TEXT_NODE:
  182:             #print "node",node
  183:             #print "NODE",node.data.encode('utf-8','ignore'),"V"
  184:             #print "HALII"
  185:             try:
  186:                 try:
  187:                     #rc += node.data.encode('utf-8','ignore')
  188:                     rc += node.data
  189:                                         
  190:                 except:
  191:                     #rc= node.data.encode('utf-8','ignore')
  192:                     rc=node.data
  193:             except:
  194:                 rc="ERROR"
  195:                 #node.data.decode('utf-8','ignore')
  196:                 print "ERROR"
  197:             node.data.encode('utf-8','ignore')
  198:             #print "RC",rc
  199:         elif node.tagName =="inline":
  200:             rc+=par2html([node])
  201:         elif node.attributes:
  202: 
  203:             if 'xlink:type' in node.attributes.keys(): #is a xlink?
  204:                 rc +=xlink2html(node)
  205:     #print "RWT",rc        
  206:     return rc
  207: 
  208: 
  209: #filename=argv[1]
  210: #fileString=file(filename).read()
  211: #print proj2hash(fileString)
  212: 

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>