MPIWGWeb/xmlhelper.py - diff

Return to xmlhelper.py CVS log

Up to [Repository] / MPIWGWeb

Diff for /MPIWGWeb/xmlhelper.py between versions 1.1.1.1 and 1.6.2.3

version 1.1.1.1, 2004/02/04 10:03:33	version 1.6.2.3, 2012/02/15 11:51:47
Line 3 from sys import argv	Line 3 from sys import argv

import string	import string
import xml.dom.minidom	import xml.dom.minidom
import Ft.Xml.XLink.Processor	#import Ft.Xml.XLink.Processor
import Ft.Xml.XLink.XLinkElements	#import Ft.Xml.XLink.XLinkElements
	#
from Ft.Xml import XPath	#from Ft.Xml import XPath
from Ft.Xml.XPath import Evaluate	#from Ft.Xml.XPath import Evaluate
from Ft.Xml.XLink import XLINK_NAMESPACE	#from Ft.Xml.XLink import XLINK_NAMESPACE
from Ft.Xml.XLink import XLinkElements	#from Ft.Xml.XLink import XLinkElements

#from Ft.Xml.Domlette import NonvalidatingReader,InputSource	#from Ft.Xml.Domlette import NonvalidatingReader,InputSource
#from Ft.Xml import EMPTY_NAMESPACE	#from Ft.Xml import EMPTY_NAMESPACE
from Ft.Lib import Uri

xml2html={'WEB_normal':('<p>','</p>'),'Normal':('<p>','</p>'),'WEB_picture':('<p>','</p>'),'WEB_figuretitle':('<i>','</i>'),'WEB_bibliography':('<p><i>','</i></p>'),'Web_kursiv':('<i>','</i>'),'WEB_kursiv':('<i>','</i>'),'WEB_hyperlink':('','')}	#from Ft.Lib import Uri

	from xml.etree import ElementTree
	import logging

	xml2html={'WEB_normal':('<p>','</p>'),'Normal':('<p>','</p>'),'WEB_picture':('<p class=\"picture\">','</p>'),'WEB_figuretitle':('<p class=\"picturetitle\">','</p>'),'WEB_bibliography':('<p><i>','</i></p>'),'Web_kursiv':('<i>','</i>'),'WEB_kursiv':('<i>','</i>'),'WEB_hyperlink':('',''),'Hyperlink':('','')}

def addToDict(dict,name,value):	def addToDict(dict,name,value):
if name=="":	if name=="":
Line 31 def addToDict(dict,name,value):	Line 35 def addToDict(dict,name,value):
def proj2hash(xmlstring):	def proj2hash(xmlstring):
"""wandelt xml-files fuer die projekte in ein hash"""	"""wandelt xml-files fuer die projekte in ein hash"""

dom=xml.dom.minidom.parseString(xmlstring)	#dom=xml.dom.minidom.parseString(xmlstring)

	tree = ElementTree.fromstring(xmlstring)


	pars = tree.findall(".//part[0]/par")

list={}	list={}

#gettitle	#gettitle
pars=Evaluate('par',dom.getElementsByTagName('part')[0])	#part= dom.getElementsByTagName('part')[0]
	#pars=part.getElementsByTagName('par')
	#pars=Evaluate('par',dom.getElementsByTagName('part')[0])
	logging.debug(pars)
for par in pars:	for par in pars:
className=par.getAttribute('class')	logging.debug(par)
content=getText(par.childNodes)	className=par.attrib['class']
	#.getAttribute('class')
	content=par.text
addToDict(list,className,content)	addToDict(list,className,content)

list.update(parseTable(dom.getElementsByTagName('html:table')[0])) # Parse the Table	list.update(parseTable(tree.find('.//{http://www.w3.org/HTML/1998/html4}table'))) # Parse the Table

#evaluate level 1	#evaluate level 1
	sections = tree.findall(".//part[0]/section")
sections=Evaluate('section',dom.getElementsByTagName('part')[0])# Parse all Sections	#sections=Evaluate('section',dom.getElementsByTagName('part')[0])# Parse all Sections
	#sections=Evaluate('section',dom.getElementsByTagName('part')[0])# Parse all Sections
#print sections,dom.getElementsByTagName('part')[0]	#print sections,dom.getElementsByTagName('part')[0]
for section in sections:	for section in sections:

sec=parseSection(section)	sec=parseSection(section)
if sec[0]=="WEB_project_header": # Sonderfall project	if sec[0]=="WEB_project_header": # Sonderfall project

addToDict(list,'WEB_project_header',sec[1]) # store title	addToDict(list,'WEB_project_header',sec[1]) # store title
addToDict(list,'WEB_project_description',sec[2]) #store description	addToDict(list,'WEB_project_description',sec[2]) #store description
else: # no information in heading	else: # no information in heading

addToDict(list,sec[0],sec[2])	addToDict(list,sec[0],sec[2])

#evaluate higher level sections	#evaluate higher level sections
	sections = tree.findall(".//part[0]/section/section")
sections=Evaluate('section/section',dom.getElementsByTagName('part')[0])	#sections=Evaluate('section/section',dom.getElementsByTagName('part')[0])

for section in sections:	for section in sections:
	logging.debug("sections2:"+repr(section))
sec=parseSection(section)	sec=parseSection(section)

if sec[0]=="WEB_project_header": # Sonderfall project	if sec[0]=="WEB_project_header": # Sonderfall project
Line 76 def proj2hash(xmlstring):	Line 93 def proj2hash(xmlstring):


def parseSection(section):	def parseSection(section):
heading=section.getElementsByTagName('heading')[0]	type=""
type=heading.getAttribute('class')	header=""
header=getText(heading.childNodes)	#for heading in section.childNodes:

	heading=section.find(".//heading")
	# if getattr(heading,'tagName','')=="heading":


	type=heading.attrib['class']
	logging.debug("parseSection (class):"+type)
	header=heading.text
	logging.debug("parseSection (header):"+header)

	if type=="": # falls heading fehlt, pruefe ob erster par richtig
	par=section.find(".//par")
	#par=section.getElementsByTagName('par')[0]
	type=par.attrib['class']
	header=par.text

#print section.childNodes	#print section.childNodes
pars=Evaluate('par',section)	pars=section.findall(".//par")
	#pars=Evaluate('par',section)
content=par2html(pars)	content=par2html(pars)

return (type,header,content)	return (type,header,content)

def parseTable(table):	def parseTable(table):
fields={}	fields={}
rows=table.getElementsByTagName('html:tr')	rows=table.findall('.//{http://www.w3.org/HTML/1998/html4}tr')
	#rows=table.getElementsByTagName('html:tr')
for row in rows:	for row in rows:
#print "ROW"	logging.debug("ROW")
cols=row.getElementsByTagName('html:td')	cols=row.findall('.//{http://www.w3.org/HTML/1998/html4}td')
	#cols=row.getElementsByTagName('html:td')

#Name des Datenfeldes einlesen	#Name des Datenfeldes einlesen
try:	try:
field=cols[0].getElementsByTagName('par')[0].getAttribute('class')	field=cols[0].find('.//par').attrib['class']
	#field=cols[0].getElementsByTagName('par')[0].getAttribute('class')
#print "field",field	#print "field",field
except:	except:
print "error"	logging.debug("error")
field=""	field=""

#Wandeln der Eintrge in HTML	#Wandeln der Eintrge in HTML

pars=cols[1].getElementsByTagName('par')	pars=cols[1].findall('.//par')
	#pars=cols[1].getElementsByTagName('par')


html=par2html(pars,tags=("",";"))	html=par2html(pars,tags=("",";"))
	logging.debug("field:"+field)
	logging.debug("html:"+html)
addToDict(fields,field,html)	addToDict(fields,field,html)
#print fields	#print fields
return fields	return fields

def par2html(pars,tags=None):	def par2html(pars,tags=None):
#html=""	#html=""
	logging.debug("part2html:"+repr(pars))
	if pars is None:
	return ""
for par in pars:	for par in pars:
#print "par",par	logging.debug("part2html:"+repr(par))
if not tags:	if not tags:
try:	try:
tag=xml2html[par.getAttribute('class')]	tag=xml2html[par.attrib['class']]
except:	except:
tag=('<p>','</p>')	tag=('<p>','</p>')
else:	else:
tag=tags	tag=tags

content=getText(par.childNodes)	content=par.text
	if content is None:
	content=""
	logging.debug("part2html:"+content)
#print "CONTETN",content	#print "CONTETN",content

#print par.getAttribute('class'),node	#print par.getAttribute('class'),node
Line 165 def getText(nodelist):	Line 209 def getText(nodelist):

rc = u''	rc = u''
for node in nodelist:	for node in nodelist:
print "HHHH"
if node.nodeType == node.TEXT_NODE:	if node.nodeType == node.TEXT_NODE:
#print "node",node	#print "node",node
#print "NODE",node.data.encode('utf-8','ignore'),"V"	#print "NODE",node.data.encode('utf-8','ignore'),"V"
#print "HALII"	#print "HALII"
try:	try:
try:	try:
print "try1"
#rc += node.data.encode('utf-8','ignore')	#rc += node.data.encode('utf-8','ignore')
rc += node.data	rc += node.data

except:	except:
print "try2"
#rc= node.data.encode('utf-8','ignore')	#rc= node.data.encode('utf-8','ignore')
rc=node.data	rc=node.data
except:	except:
Line 187 def getText(nodelist):	Line 228 def getText(nodelist):
node.data.encode('utf-8','ignore')	node.data.encode('utf-8','ignore')
#print "RC",rc	#print "RC",rc
elif node.tagName =="inline":	elif node.tagName =="inline":
print "HI", node.getAttribute('class')
rc+=par2html([node])	rc+=par2html([node])
elif node.attributes:	elif node.attributes:
print "xlink?"
if 'xlink:type' in node.attributes.keys(): #is a xlink?	if 'xlink:type' in node.attributes.keys(): #is a xlink?
rc +=xlink2html(node)	rc +=xlink2html(node)
#print "RWT",rc	#print "RWT",rc

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>

Removed from v.1.1.1.1
changed lines
	Added in v.1.6.2.3