--- MPIWGWeb/xmlhelper.py 2012/02/15 11:51:47 1.6.2.3 +++ MPIWGWeb/xmlhelper.py 2009/02/18 13:01:17 1.7 @@ -3,21 +3,17 @@ from sys import argv import string import xml.dom.minidom -#import Ft.Xml.XLink.Processor -#import Ft.Xml.XLink.XLinkElements -# -#from Ft.Xml import XPath -#from Ft.Xml.XPath import Evaluate -#from Ft.Xml.XLink import XLINK_NAMESPACE -#from Ft.Xml.XLink import XLinkElements +import Ft.Xml.XLink.Processor +import Ft.Xml.XLink.XLinkElements + +from Ft.Xml import XPath +from Ft.Xml.XPath import Evaluate +from Ft.Xml.XLink import XLINK_NAMESPACE +from Ft.Xml.XLink import XLinkElements #from Ft.Xml.Domlette import NonvalidatingReader,InputSource #from Ft.Xml import EMPTY_NAMESPACE - -#from Ft.Lib import Uri - -from xml.etree import ElementTree -import logging +from Ft.Lib import Uri xml2html={'WEB_normal':('

','

'),'Normal':('

','

'),'WEB_picture':('

','

'),'WEB_figuretitle':('

','

'),'WEB_bibliography':('

','

'),'Web_kursiv':('',''),'WEB_kursiv':('',''),'WEB_hyperlink':('',''),'Hyperlink':('','')} @@ -35,33 +31,23 @@ def addToDict(dict,name,value): def proj2hash(xmlstring): """wandelt xml-files fuer die projekte in ein hash""" - #dom=xml.dom.minidom.parseString(xmlstring) + dom=xml.dom.minidom.parseString(xmlstring) - tree = ElementTree.fromstring(xmlstring) - - - pars = tree.findall(".//part[0]/par") - + list={} #gettitle - #part= dom.getElementsByTagName('part')[0] - #pars=part.getElementsByTagName('par') - #pars=Evaluate('par',dom.getElementsByTagName('part')[0]) - logging.debug(pars) + pars=Evaluate('par',dom.getElementsByTagName('part')[0]) for par in pars: - logging.debug(par) - className=par.attrib['class'] - #.getAttribute('class') - content=par.text + className=par.getAttribute('class') + content=getText(par.childNodes) addToDict(list,className,content) - list.update(parseTable(tree.find('.//{http://www.w3.org/HTML/1998/html4}table'))) # Parse the Table + list.update(parseTable(dom.getElementsByTagName('html:table')[0])) # Parse the Table #evaluate level 1 - sections = tree.findall(".//part[0]/section") - #sections=Evaluate('section',dom.getElementsByTagName('part')[0])# Parse all Sections - #sections=Evaluate('section',dom.getElementsByTagName('part')[0])# Parse all Sections + + sections=Evaluate('section',dom.getElementsByTagName('part')[0])# Parse all Sections #print sections,dom.getElementsByTagName('part')[0] for section in sections: @@ -75,11 +61,10 @@ def proj2hash(xmlstring): addToDict(list,sec[0],sec[2]) #evaluate higher level sections - sections = tree.findall(".//part[0]/section/section") - #sections=Evaluate('section/section',dom.getElementsByTagName('part')[0]) + + sections=Evaluate('section/section',dom.getElementsByTagName('part')[0]) for section in sections: - logging.debug("sections2:"+repr(section)) sec=parseSection(section) if sec[0]=="WEB_project_header": # Sonderfall project @@ -95,80 +80,63 @@ def proj2hash(xmlstring): def parseSection(section): type="" header="" - #for heading in section.childNodes: - - heading=section.find(".//heading") - # if getattr(heading,'tagName','')=="heading": - - - type=heading.attrib['class'] - logging.debug("parseSection (class):"+type) - header=heading.text - logging.debug("parseSection (header):"+header) - + for heading in section.childNodes: + if getattr(heading,'tagName','')=="heading": + + type=heading.getAttribute('class') + header=getText(heading.childNodes) + if type=="": # falls heading fehlt, pruefe ob erster par richtig - par=section.find(".//par") - #par=section.getElementsByTagName('par')[0] - type=par.attrib['class'] - header=par.text + par=section.getElementsByTagName('par')[0] + type=par.getAttribute('class') + header=getText(par.childNodes) #print section.childNodes - pars=section.findall(".//par") - #pars=Evaluate('par',section) + pars=Evaluate('par',section) content=par2html(pars) return (type,header,content) def parseTable(table): fields={} - rows=table.findall('.//{http://www.w3.org/HTML/1998/html4}tr') - #rows=table.getElementsByTagName('html:tr') + rows=table.getElementsByTagName('html:tr') for row in rows: - logging.debug("ROW") - cols=row.findall('.//{http://www.w3.org/HTML/1998/html4}td') - #cols=row.getElementsByTagName('html:td') + #print "ROW" + cols=row.getElementsByTagName('html:td') #Name des Datenfeldes einlesen try: - field=cols[0].find('.//par').attrib['class'] - #field=cols[0].getElementsByTagName('par')[0].getAttribute('class') + field=cols[0].getElementsByTagName('par')[0].getAttribute('class') #print "field",field except: - logging.debug("error") + print "error" field="" #Wandeln der Eintrge in HTML - - pars=cols[1].findall('.//par') - #pars=cols[1].getElementsByTagName('par') + + pars=cols[1].getElementsByTagName('par') html=par2html(pars,tags=("",";")) - logging.debug("field:"+field) - logging.debug("html:"+html) + addToDict(fields,field,html) #print fields return fields def par2html(pars,tags=None): #html="" - logging.debug("part2html:"+repr(pars)) - if pars is None: - return "" + for par in pars: - logging.debug("part2html:"+repr(par)) + #print "par",par if not tags: try: - tag=xml2html[par.attrib['class']] + tag=xml2html[par.getAttribute('class')] except: tag=('

','

') else: tag=tags - content=par.text - if content is None: - content="" - logging.debug("part2html:"+content) + content=getText(par.childNodes) #print "CONTETN",content #print par.getAttribute('class'),node