--- ECHO_content/vlp_xmlhelpers.py 2007/12/11 17:00:01 1.13 +++ ECHO_content/vlp_xmlhelpers.py 2008/08/05 16:17:46 1.14 @@ -23,143 +23,143 @@ regexpTXT = re.compile(patternTXT, re.IG patternPage=r"<\s*page.*?>(.*?)" regexpPage = re.compile(patternPage, re.IGNORECASE + re.DOTALL) -xml2htmlArray={'WEB_normal':('

','

'),'Normal':('

','

'),'WEB_picture':('

','

'),'WEB_figuretitle':('

','

'),'WEB_bibliography':('

','

'),'Web_kursiv':('',''),'WEB_kursiv':('',''),'WEB_hyperlink':('',''),'Hyperlink':('',''),'Picture':('

','

'),'FigureTitle':('

','

')} - -def addToDict(dict,name,value): - if name=="": - return 0 - else: - - if not dict.has_key(name): - dict[name]=[] # als array anlegen - - dict[name].append(value) - return 1 - -def proj2hash(self,xmlstring): - """wandelt xml-files fuer die projekte in ein hash""" - - dom=xml.dom.minidom.parseString(xmlstring) - - - list={} - - #gettitle - pars=Evaluate('par',dom.getElementsByTagName('part')[0]) - for par in pars: - className=par.getAttribute('class') - content=getText(self,par.childNodes) - addToDict(list,className,content) - - - sectionXPath="section" - - - sections=Evaluate(sectionXPath,dom.getElementsByTagName('part')[0]) - - while sections: - - for section in sections: - - sec=parseSection(self,section) - - if sec[0]=="WEB_project_header": # Sonderfall project - addToDict(list,'WEB_project_header',sec[1]) # store title - addToDict(list,'WEB_project_description',sec[2]) #store description - else: # no information in heading - level=int(sec[3])+2 - aTag=""%level - eTag=""%level - addToDict(list,"text",aTag+sec[1]+eTag) - addToDict(list,"text",sec[2]) - sectionXPath+="/section" - sections=Evaluate(sectionXPath,dom.getElementsByTagName('part')[0]) - return list - - -def parseSection(self,section): - type="" - header="" - level=section.getAttribute('level') - for heading in section.childNodes: - if getattr(heading,'tagName','')=="heading": - - type=heading.getAttribute('class') - header=getText(self,heading.childNodes) - - if type=="": # falls heading fehlt, pruefe ob erster par richtig - par=section.getElementsByTagName('par')[0] - type=par.getAttribute('class') - header=getText(par.childNodes) - - #print section.childNodes - #pars=Evaluate('par',section) - pars=section.childNodes - content=par2html(self,pars) - #print "CONTENT",repr(content) - return (type,header,content,level) - -def parseTable(table): - fields={} - rows=table.getElementsByTagName('html:tr') - for row in rows: - #print "ROW" - cols=row.getElementsByTagName('html:td') - - #Name des Datenfeldes einlesen - try: - field=cols[0].getElementsByTagName('par')[0].getAttribute('class') - #print "field",field - except: - print "error" - field="" - - #Wandeln der Eintrge in HTML - - #pars=cols[1].getElementsByTagName('par') - pars=cols[1].childNodes - - html=par2html(self,pars,tags=("",";")) - - addToDict(fields,field,html) - #print fields - return fields - -def par2html(self,pars,tags=None): - html="" - - for par in pars: - tagName=getattr(par,'tagName','') - if tagName in ["par","inline"]: - #print repr(par.getAttribute('class')),xml2htmlArray.get(par.getAttribute('class'),'NOT FOUND') - #print "par",par - if not tags: - try: - tag=xml2htmlArray[par.getAttribute('class')] - except: - tag=('

','

') - else: - tag=tags - #print "TAG",tag - content=getText(self,par.childNodes,par.getAttribute('class')) - - - - #print par.getAttribute('class'),node - try: - html+=tag[0]+content+tag[1] - except: - html=+tag[0]+content+tag[1] - - elif tagName=="pb": - html+="" - - - try: - - return html - except: - return "" +#xml2htmlArray={'WEB_normal':('

','

'),'Normal':('

','

'),'WEB_picture':('

','

'),'WEB_figuretitle':('

','

'),'WEB_bibliography':('

','

'),'Web_kursiv':('',''),'WEB_kursiv':('',''),'WEB_hyperlink':('',''),'Hyperlink':('',''),'Picture':('

','

'),'FigureTitle':('

','

')} +# +#def addToDict(dict,name,value): +# if name=="": +# return 0 +# else: +# +# if not dict.has_key(name): +# dict[name]=[] # als array anlegen +# +# dict[name].append(value) +# return 1 +# +#def proj2hash(self,xmlstring): +# """wandelt xml-files fuer die projekte in ein hash""" +# +# dom=xml.dom.minidom.parseString(xmlstring) +# +# +# list={} +# +# #gettitle +# pars=Evaluate('par',dom.getElementsByTagName('part')[0]) +# for par in pars: +# className=par.getAttribute('class') +# content=getText(self,par.childNodes) +# addToDict(list,className,content) +# +# +# sectionXPath="section" +# +# +# sections=Evaluate(sectionXPath,dom.getElementsByTagName('part')[0]) +# +# while sections: +# +# for section in sections: +# +# sec=parseSection(self,section) +# +# if sec[0]=="WEB_project_header": # Sonderfall project +# addToDict(list,'WEB_project_header',sec[1]) # store title +# addToDict(list,'WEB_project_description',sec[2]) #store description +# else: # no information in heading +# level=int(sec[3])+2 +# aTag=""%level +# eTag=""%level +# addToDict(list,"text",aTag+sec[1]+eTag) +# addToDict(list,"text",sec[2]) +# sectionXPath+="/section" +# sections=Evaluate(sectionXPath,dom.getElementsByTagName('part')[0]) +# return list +# +# +#def parseSection(self,section): +# type="" +# header="" +# level=section.getAttribute('level') +# for heading in section.childNodes: +# if getattr(heading,'tagName','')=="heading": +# +# type=heading.getAttribute('class') +# header=getText(self,heading.childNodes) +# +# if type=="": # falls heading fehlt, pruefe ob erster par richtig +# par=section.getElementsByTagName('par')[0] +# type=par.getAttribute('class') +# header=getText(par.childNodes) +# +# #print section.childNodes +# #pars=Evaluate('par',section) +# pars=section.childNodes +# content=par2html(self,pars) +# #print "CONTENT",repr(content) +# return (type,header,content,level) +# +#def parseTable(table): +# fields={} +# rows=table.getElementsByTagName('html:tr') +# for row in rows: +# #print "ROW" +# cols=row.getElementsByTagName('html:td') +# +# #Name des Datenfeldes einlesen +# try: +# field=cols[0].getElementsByTagName('par')[0].getAttribute('class') +# #print "field",field +# except: +# print "error" +# field="" +# +# #Wandeln der Eintrge in HTML +# +# #pars=cols[1].getElementsByTagName('par') +# pars=cols[1].childNodes +# +# html=par2html(self,pars,tags=("",";")) +# +# addToDict(fields,field,html) +# #print fields +# return fields +# +#def par2html(self,pars,tags=None): +# html="" +# +# for par in pars: +# tagName=getattr(par,'tagName','') +# if tagName in ["par","inline"]: +# #print repr(par.getAttribute('class')),xml2htmlArray.get(par.getAttribute('class'),'NOT FOUND') +# #print "par",par +# if not tags: +# try: +# tag=xml2htmlArray[par.getAttribute('class')] +# except: +# tag=('

','

') +# else: +# tag=tags +# #print "TAG",tag +# content=getText(self,par.childNodes,par.getAttribute('class')) +# +# +# +# #print par.getAttribute('class'),node +# try: +# html+=tag[0]+content+tag[1] +# except: +# html=+tag[0]+content+tag[1] +# +# elif tagName=="pb": +# html+="" +# +# +# try: +# +# return html +# except: +# return "" def getXlink(nodes): """searches xlinks and gives them back as html"""