version 1.6, 2004/09/03 08:04:22
|
version 1.6.2.3, 2012/02/15 11:51:47
|
Line 3 from sys import argv
|
Line 3 from sys import argv
|
|
|
import string |
import string |
import xml.dom.minidom |
import xml.dom.minidom |
import Ft.Xml.XLink.Processor |
#import Ft.Xml.XLink.Processor |
import Ft.Xml.XLink.XLinkElements |
#import Ft.Xml.XLink.XLinkElements |
|
# |
from Ft.Xml import XPath |
#from Ft.Xml import XPath |
from Ft.Xml.XPath import Evaluate |
#from Ft.Xml.XPath import Evaluate |
from Ft.Xml.XLink import XLINK_NAMESPACE |
#from Ft.Xml.XLink import XLINK_NAMESPACE |
from Ft.Xml.XLink import XLinkElements |
#from Ft.Xml.XLink import XLinkElements |
|
|
#from Ft.Xml.Domlette import NonvalidatingReader,InputSource |
#from Ft.Xml.Domlette import NonvalidatingReader,InputSource |
#from Ft.Xml import EMPTY_NAMESPACE |
#from Ft.Xml import EMPTY_NAMESPACE |
from Ft.Lib import Uri |
|
|
#from Ft.Lib import Uri |
|
|
|
from xml.etree import ElementTree |
|
import logging |
|
|
xml2html={'WEB_normal':('<p>','</p>'),'Normal':('<p>','</p>'),'WEB_picture':('<p class=\"picture\">','</p>'),'WEB_figuretitle':('<p class=\"picturetitle\">','</p>'),'WEB_bibliography':('<p><i>','</i></p>'),'Web_kursiv':('<i>','</i>'),'WEB_kursiv':('<i>','</i>'),'WEB_hyperlink':('',''),'Hyperlink':('','')} |
xml2html={'WEB_normal':('<p>','</p>'),'Normal':('<p>','</p>'),'WEB_picture':('<p class=\"picture\">','</p>'),'WEB_figuretitle':('<p class=\"picturetitle\">','</p>'),'WEB_bibliography':('<p><i>','</i></p>'),'Web_kursiv':('<i>','</i>'),'WEB_kursiv':('<i>','</i>'),'WEB_hyperlink':('',''),'Hyperlink':('','')} |
|
|
Line 31 def addToDict(dict,name,value):
|
Line 35 def addToDict(dict,name,value):
|
def proj2hash(xmlstring): |
def proj2hash(xmlstring): |
"""wandelt xml-files fuer die projekte in ein hash""" |
"""wandelt xml-files fuer die projekte in ein hash""" |
|
|
dom=xml.dom.minidom.parseString(xmlstring) |
#dom=xml.dom.minidom.parseString(xmlstring) |
|
|
|
tree = ElementTree.fromstring(xmlstring) |
|
|
|
|
|
pars = tree.findall(".//part[0]/par") |
|
|
list={} |
list={} |
|
|
#gettitle |
#gettitle |
pars=Evaluate('par',dom.getElementsByTagName('part')[0]) |
#part= dom.getElementsByTagName('part')[0] |
|
#pars=part.getElementsByTagName('par') |
|
#pars=Evaluate('par',dom.getElementsByTagName('part')[0]) |
|
logging.debug(pars) |
for par in pars: |
for par in pars: |
className=par.getAttribute('class') |
logging.debug(par) |
content=getText(par.childNodes) |
className=par.attrib['class'] |
|
#.getAttribute('class') |
|
content=par.text |
addToDict(list,className,content) |
addToDict(list,className,content) |
|
|
list.update(parseTable(dom.getElementsByTagName('html:table')[0])) # Parse the Table |
list.update(parseTable(tree.find('.//{http://www.w3.org/HTML/1998/html4}table'))) # Parse the Table |
|
|
#evaluate level 1 |
#evaluate level 1 |
|
sections = tree.findall(".//part[0]/section") |
sections=Evaluate('section',dom.getElementsByTagName('part')[0])# Parse all Sections |
#sections=Evaluate('section',dom.getElementsByTagName('part')[0])# Parse all Sections |
|
#sections=Evaluate('section',dom.getElementsByTagName('part')[0])# Parse all Sections |
#print sections,dom.getElementsByTagName('part')[0] |
#print sections,dom.getElementsByTagName('part')[0] |
for section in sections: |
for section in sections: |
|
|
Line 61 def proj2hash(xmlstring):
|
Line 75 def proj2hash(xmlstring):
|
addToDict(list,sec[0],sec[2]) |
addToDict(list,sec[0],sec[2]) |
|
|
#evaluate higher level sections |
#evaluate higher level sections |
|
sections = tree.findall(".//part[0]/section/section") |
sections=Evaluate('section/section',dom.getElementsByTagName('part')[0]) |
#sections=Evaluate('section/section',dom.getElementsByTagName('part')[0]) |
|
|
for section in sections: |
for section in sections: |
|
logging.debug("sections2:"+repr(section)) |
sec=parseSection(section) |
sec=parseSection(section) |
|
|
if sec[0]=="WEB_project_header": # Sonderfall project |
if sec[0]=="WEB_project_header": # Sonderfall project |
Line 80 def proj2hash(xmlstring):
|
Line 95 def proj2hash(xmlstring):
|
def parseSection(section): |
def parseSection(section): |
type="" |
type="" |
header="" |
header="" |
for heading in section.childNodes: |
#for heading in section.childNodes: |
if getattr(heading,'tagName','')=="heading": |
|
|
heading=section.find(".//heading") |
|
# if getattr(heading,'tagName','')=="heading": |
|
|
|
|
type=heading.getAttribute('class') |
type=heading.attrib['class'] |
header=getText(heading.childNodes) |
logging.debug("parseSection (class):"+type) |
|
header=heading.text |
|
logging.debug("parseSection (header):"+header) |
|
|
if type=="": # falls heading fehlt, pruefe ob erster par richtig |
if type=="": # falls heading fehlt, pruefe ob erster par richtig |
par=section.getElementsByTagName('par')[0] |
par=section.find(".//par") |
type=par.getAttribute('class') |
#par=section.getElementsByTagName('par')[0] |
header=getText(par.childNodes) |
type=par.attrib['class'] |
|
header=par.text |
|
|
#print section.childNodes |
#print section.childNodes |
pars=Evaluate('par',section) |
pars=section.findall(".//par") |
|
#pars=Evaluate('par',section) |
content=par2html(pars) |
content=par2html(pars) |
|
|
return (type,header,content) |
return (type,header,content) |
|
|
def parseTable(table): |
def parseTable(table): |
fields={} |
fields={} |
rows=table.getElementsByTagName('html:tr') |
rows=table.findall('.//{http://www.w3.org/HTML/1998/html4}tr') |
|
#rows=table.getElementsByTagName('html:tr') |
for row in rows: |
for row in rows: |
#print "ROW" |
logging.debug("ROW") |
cols=row.getElementsByTagName('html:td') |
cols=row.findall('.//{http://www.w3.org/HTML/1998/html4}td') |
|
#cols=row.getElementsByTagName('html:td') |
|
|
#Name des Datenfeldes einlesen |
#Name des Datenfeldes einlesen |
try: |
try: |
field=cols[0].getElementsByTagName('par')[0].getAttribute('class') |
field=cols[0].find('.//par').attrib['class'] |
|
#field=cols[0].getElementsByTagName('par')[0].getAttribute('class') |
#print "field",field |
#print "field",field |
except: |
except: |
print "error" |
logging.debug("error") |
field="" |
field="" |
|
|
#Wandeln der Eintrge in HTML |
#Wandeln der Eintrge in HTML |
|
|
pars=cols[1].getElementsByTagName('par') |
pars=cols[1].findall('.//par') |
|
#pars=cols[1].getElementsByTagName('par') |
|
|
|
|
html=par2html(pars,tags=("",";")) |
html=par2html(pars,tags=("",";")) |
|
logging.debug("field:"+field) |
|
logging.debug("html:"+html) |
addToDict(fields,field,html) |
addToDict(fields,field,html) |
#print fields |
#print fields |
return fields |
return fields |
|
|
def par2html(pars,tags=None): |
def par2html(pars,tags=None): |
#html="" |
#html="" |
|
logging.debug("part2html:"+repr(pars)) |
|
if pars is None: |
|
return "" |
for par in pars: |
for par in pars: |
#print "par",par |
logging.debug("part2html:"+repr(par)) |
if not tags: |
if not tags: |
try: |
try: |
tag=xml2html[par.getAttribute('class')] |
tag=xml2html[par.attrib['class']] |
except: |
except: |
tag=('<p>','</p>') |
tag=('<p>','</p>') |
else: |
else: |
tag=tags |
tag=tags |
|
|
content=getText(par.childNodes) |
content=par.text |
|
if content is None: |
|
content="" |
|
logging.debug("part2html:"+content) |
#print "CONTETN",content |
#print "CONTETN",content |
|
|
#print par.getAttribute('class'),node |
#print par.getAttribute('class'),node |