Annotation of MPIWGWeb/xmlhelper.py, revision 1.6.2.3
1.1 dwinter 1:
2: from sys import argv
3:
4: import string
5: import xml.dom.minidom
1.6.2.2 dwinter 6: #import Ft.Xml.XLink.Processor
7: #import Ft.Xml.XLink.XLinkElements
8: #
9: #from Ft.Xml import XPath
10: #from Ft.Xml.XPath import Evaluate
11: #from Ft.Xml.XLink import XLINK_NAMESPACE
12: #from Ft.Xml.XLink import XLinkElements
1.1 dwinter 13:
14: #from Ft.Xml.Domlette import NonvalidatingReader,InputSource
15: #from Ft.Xml import EMPTY_NAMESPACE
1.6.2.2 dwinter 16:
17: #from Ft.Lib import Uri
1.1 dwinter 18:
1.6.2.3 ! dwinter 19: from xml.etree import ElementTree
! 20: import logging
! 21:
1.6 dwinter 22: xml2html={'WEB_normal':('<p>','</p>'),'Normal':('<p>','</p>'),'WEB_picture':('<p class=\"picture\">','</p>'),'WEB_figuretitle':('<p class=\"picturetitle\">','</p>'),'WEB_bibliography':('<p><i>','</i></p>'),'Web_kursiv':('<i>','</i>'),'WEB_kursiv':('<i>','</i>'),'WEB_hyperlink':('',''),'Hyperlink':('','')}
1.1 dwinter 23:
24: def addToDict(dict,name,value):
25: if name=="":
26: return 0
27: else:
28:
29: if not dict.has_key(name):
30: dict[name]=[] # als array anlegen
31:
32: dict[name].append(value)
33: return 1
34:
35: def proj2hash(xmlstring):
36: """wandelt xml-files fuer die projekte in ein hash"""
37:
1.6.2.3 ! dwinter 38: #dom=xml.dom.minidom.parseString(xmlstring)
1.1 dwinter 39:
1.6.2.3 ! dwinter 40: tree = ElementTree.fromstring(xmlstring)
! 41:
! 42:
! 43: pars = tree.findall(".//part[0]/par")
! 44:
1.1 dwinter 45: list={}
46:
47: #gettitle
1.6.2.3 ! dwinter 48: #part= dom.getElementsByTagName('part')[0]
! 49: #pars=part.getElementsByTagName('par')
! 50: #pars=Evaluate('par',dom.getElementsByTagName('part')[0])
! 51: logging.debug(pars)
1.1 dwinter 52: for par in pars:
1.6.2.3 ! dwinter 53: logging.debug(par)
! 54: className=par.attrib['class']
! 55: #.getAttribute('class')
! 56: content=par.text
1.1 dwinter 57: addToDict(list,className,content)
58:
1.6.2.3 ! dwinter 59: list.update(parseTable(tree.find('.//{http://www.w3.org/HTML/1998/html4}table'))) # Parse the Table
1.1 dwinter 60:
61: #evaluate level 1
1.6.2.3 ! dwinter 62: sections = tree.findall(".//part[0]/section")
! 63: #sections=Evaluate('section',dom.getElementsByTagName('part')[0])# Parse all Sections
! 64: #sections=Evaluate('section',dom.getElementsByTagName('part')[0])# Parse all Sections
1.1 dwinter 65: #print sections,dom.getElementsByTagName('part')[0]
66: for section in sections:
1.4 dwinter 67:
1.1 dwinter 68: sec=parseSection(section)
69: if sec[0]=="WEB_project_header": # Sonderfall project
1.4 dwinter 70:
1.1 dwinter 71: addToDict(list,'WEB_project_header',sec[1]) # store title
72: addToDict(list,'WEB_project_description',sec[2]) #store description
73: else: # no information in heading
1.4 dwinter 74:
1.1 dwinter 75: addToDict(list,sec[0],sec[2])
76:
77: #evaluate higher level sections
1.6.2.3 ! dwinter 78: sections = tree.findall(".//part[0]/section/section")
! 79: #sections=Evaluate('section/section',dom.getElementsByTagName('part')[0])
1.4 dwinter 80:
1.1 dwinter 81: for section in sections:
1.6.2.3 ! dwinter 82: logging.debug("sections2:"+repr(section))
1.1 dwinter 83: sec=parseSection(section)
84:
85: if sec[0]=="WEB_project_header": # Sonderfall project
86: addToDict(list,'WEB_project_header',sec[1]) # store title
87: addToDict(list,'WEB_project_description',sec[2]) #store description
88: else: # no information in heading
89: addToDict(list,sec[0],sec[2])
1.4 dwinter 90:
1.1 dwinter 91:
92: return list
93:
94:
95: def parseSection(section):
1.3 dwinter 96: type=""
97: header=""
1.6.2.3 ! dwinter 98: #for heading in section.childNodes:
! 99:
! 100: heading=section.find(".//heading")
! 101: # if getattr(heading,'tagName','')=="heading":
! 102:
! 103:
! 104: type=heading.attrib['class']
! 105: logging.debug("parseSection (class):"+type)
! 106: header=heading.text
! 107: logging.debug("parseSection (header):"+header)
! 108:
1.3 dwinter 109: if type=="": # falls heading fehlt, pruefe ob erster par richtig
1.6.2.3 ! dwinter 110: par=section.find(".//par")
! 111: #par=section.getElementsByTagName('par')[0]
! 112: type=par.attrib['class']
! 113: header=par.text
1.3 dwinter 114:
1.1 dwinter 115: #print section.childNodes
1.6.2.3 ! dwinter 116: pars=section.findall(".//par")
! 117: #pars=Evaluate('par',section)
1.1 dwinter 118: content=par2html(pars)
119:
120: return (type,header,content)
121:
122: def parseTable(table):
123: fields={}
1.6.2.3 ! dwinter 124: rows=table.findall('.//{http://www.w3.org/HTML/1998/html4}tr')
! 125: #rows=table.getElementsByTagName('html:tr')
1.1 dwinter 126: for row in rows:
1.6.2.3 ! dwinter 127: logging.debug("ROW")
! 128: cols=row.findall('.//{http://www.w3.org/HTML/1998/html4}td')
! 129: #cols=row.getElementsByTagName('html:td')
1.1 dwinter 130:
131: #Name des Datenfeldes einlesen
132: try:
1.6.2.3 ! dwinter 133: field=cols[0].find('.//par').attrib['class']
! 134: #field=cols[0].getElementsByTagName('par')[0].getAttribute('class')
1.1 dwinter 135: #print "field",field
136: except:
1.6.2.3 ! dwinter 137: logging.debug("error")
1.1 dwinter 138: field=""
139:
140: #Wandeln der Eintrge in HTML
1.6.2.3 ! dwinter 141:
! 142: pars=cols[1].findall('.//par')
! 143: #pars=cols[1].getElementsByTagName('par')
1.1 dwinter 144:
145:
146: html=par2html(pars,tags=("",";"))
1.6.2.3 ! dwinter 147: logging.debug("field:"+field)
! 148: logging.debug("html:"+html)
1.1 dwinter 149: addToDict(fields,field,html)
150: #print fields
151: return fields
152:
153: def par2html(pars,tags=None):
154: #html=""
1.6.2.3 ! dwinter 155: logging.debug("part2html:"+repr(pars))
! 156: if pars is None:
! 157: return ""
1.1 dwinter 158: for par in pars:
1.6.2.3 ! dwinter 159: logging.debug("part2html:"+repr(par))
1.1 dwinter 160: if not tags:
161: try:
1.6.2.3 ! dwinter 162: tag=xml2html[par.attrib['class']]
1.1 dwinter 163: except:
164: tag=('<p>','</p>')
165: else:
166: tag=tags
167:
1.6.2.3 ! dwinter 168: content=par.text
! 169: if content is None:
! 170: content=""
! 171: logging.debug("part2html:"+content)
1.1 dwinter 172: #print "CONTETN",content
173:
174: #print par.getAttribute('class'),node
175: try:
176: html=html+tag[0]+content+tag[1]
177: except:
178: html=tag[0]+content+tag[1]
179:
180: try:
181: return html
182: except:
183: return ""
184:
185: def getXlink(nodes):
186: """searches xlinks and gives them back as html"""
187: ret=""
188: for node in nodes:
189: if node.attributes:
190: if 'xlink:type' in node.attributes.keys(): #is a xlink?
191: ret +=xlink2html(node)
192: return ret
193:
194: def xlink2html(xlink):
195: ret=""
196: attributes=xlink.attributes
197:
198: if xlink.tagName.lower()=="image":
199: ret +="<img src=%s />"%xlink.getAttribute('xlink:href')
200: elif xlink.tagName.lower()=="link":
201: ret +="<a href='%s' >%s</a>"%(xlink.getAttribute('xlink:href'),getText(xlink.childNodes))
202:
203:
204:
205:
206: return ret
207:
208: def getText(nodelist):
209:
210: rc = u''
211: for node in nodelist:
1.6.2.1 dwinter 212: if node.nodeType == node.TEXT_NODE:
1.1 dwinter 213: #print "node",node
214: #print "NODE",node.data.encode('utf-8','ignore'),"V"
215: #print "HALII"
216: try:
217: try:
218: #rc += node.data.encode('utf-8','ignore')
219: rc += node.data
220:
221: except:
222: #rc= node.data.encode('utf-8','ignore')
223: rc=node.data
224: except:
225: rc="ERROR"
226: #node.data.decode('utf-8','ignore')
227: print "ERROR"
228: node.data.encode('utf-8','ignore')
229: #print "RC",rc
230: elif node.tagName =="inline":
231: rc+=par2html([node])
232: elif node.attributes:
1.3 dwinter 233:
1.1 dwinter 234: if 'xlink:type' in node.attributes.keys(): #is a xlink?
235: rc +=xlink2html(node)
236: #print "RWT",rc
237: return rc
238:
239:
240: #filename=argv[1]
241: #fileString=file(filename).read()
242: #print proj2hash(fileString)
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>