Annotation of MPIWGWeb/xmlhelper.py, revision 1.3
1.1 dwinter 1:
2: from sys import argv
3:
4: import string
5: import xml.dom.minidom
6: import Ft.Xml.XLink.Processor
7: import Ft.Xml.XLink.XLinkElements
8:
9: from Ft.Xml import XPath
10: from Ft.Xml.XPath import Evaluate
11: from Ft.Xml.XLink import XLINK_NAMESPACE
12: from Ft.Xml.XLink import XLinkElements
13:
14: #from Ft.Xml.Domlette import NonvalidatingReader,InputSource
15: #from Ft.Xml import EMPTY_NAMESPACE
16: from Ft.Lib import Uri
17:
1.2 dwinter 18: xml2html={'WEB_normal':('<p>','</p>'),'Normal':('<p>','</p>'),'WEB_picture':('<p>','</p>'),'WEB_figuretitle':('<i>','</i>'),'WEB_bibliography':('<p><i>','</i></p>'),'Web_kursiv':('<i>','</i>'),'WEB_kursiv':('<i>','</i>'),'WEB_hyperlink':('',''),'Hyperlink':('','')}
1.1 dwinter 19:
20: def addToDict(dict,name,value):
21: if name=="":
22: return 0
23: else:
24:
25: if not dict.has_key(name):
26: dict[name]=[] # als array anlegen
27:
28: dict[name].append(value)
29: return 1
30:
31: def proj2hash(xmlstring):
32: """wandelt xml-files fuer die projekte in ein hash"""
33:
34: dom=xml.dom.minidom.parseString(xmlstring)
35:
36:
37: list={}
38:
39: #gettitle
40: pars=Evaluate('par',dom.getElementsByTagName('part')[0])
41: for par in pars:
42: className=par.getAttribute('class')
43: content=getText(par.childNodes)
44: addToDict(list,className,content)
45:
46: list.update(parseTable(dom.getElementsByTagName('html:table')[0])) # Parse the Table
47:
48: #evaluate level 1
49:
50: sections=Evaluate('section',dom.getElementsByTagName('part')[0])# Parse all Sections
51: #print sections,dom.getElementsByTagName('part')[0]
52: for section in sections:
53:
54: sec=parseSection(section)
55: if sec[0]=="WEB_project_header": # Sonderfall project
56: addToDict(list,'WEB_project_header',sec[1]) # store title
57: addToDict(list,'WEB_project_description',sec[2]) #store description
58: else: # no information in heading
59: addToDict(list,sec[0],sec[2])
60:
61: #evaluate higher level sections
62:
63: sections=Evaluate('section/section',dom.getElementsByTagName('part')[0])
64:
65: for section in sections:
66: sec=parseSection(section)
67:
68: if sec[0]=="WEB_project_header": # Sonderfall project
69: addToDict(list,'WEB_project_header',sec[1]) # store title
70: addToDict(list,'WEB_project_description',sec[2]) #store description
71: else: # no information in heading
72: addToDict(list,sec[0],sec[2])
73:
74:
75: return list
76:
77:
78: def parseSection(section):
1.3 ! dwinter 79: type=""
! 80: header=""
! 81: for heading in section.getElementsByTagName('heading'):
! 82: type=heading.getAttribute('class')
! 83: header=getText(heading.childNodes)
! 84:
! 85: if type=="": # falls heading fehlt, pruefe ob erster par richtig
! 86: par=section.getElementsByTagName('par')[0]
! 87: type=par.getAttribute('class')
! 88: header=getText(par.childNodes)
! 89:
1.1 dwinter 90: #print section.childNodes
91: pars=Evaluate('par',section)
92: content=par2html(pars)
93:
94: return (type,header,content)
95:
96: def parseTable(table):
97: fields={}
98: rows=table.getElementsByTagName('html:tr')
99: for row in rows:
100: #print "ROW"
101: cols=row.getElementsByTagName('html:td')
102:
103: #Name des Datenfeldes einlesen
104: try:
105: field=cols[0].getElementsByTagName('par')[0].getAttribute('class')
106: #print "field",field
107: except:
108: print "error"
109: field=""
110:
111: #Wandeln der Eintrge in HTML
112:
113: pars=cols[1].getElementsByTagName('par')
114:
115:
116: html=par2html(pars,tags=("",";"))
117:
118: addToDict(fields,field,html)
119: #print fields
120: return fields
121:
122: def par2html(pars,tags=None):
123: #html=""
124:
125: for par in pars:
126: #print "par",par
127: if not tags:
128: try:
129: tag=xml2html[par.getAttribute('class')]
130: except:
131: tag=('<p>','</p>')
132: else:
133: tag=tags
134:
135: content=getText(par.childNodes)
136: #print "CONTETN",content
137:
138: #print par.getAttribute('class'),node
139: try:
140: html=html+tag[0]+content+tag[1]
141: except:
142: html=tag[0]+content+tag[1]
143:
144: try:
145: return html
146: except:
147: return ""
148:
149: def getXlink(nodes):
150: """searches xlinks and gives them back as html"""
151: ret=""
152: for node in nodes:
153: if node.attributes:
154: if 'xlink:type' in node.attributes.keys(): #is a xlink?
155: ret +=xlink2html(node)
156: return ret
157:
158: def xlink2html(xlink):
159: ret=""
160: attributes=xlink.attributes
161:
162: if xlink.tagName.lower()=="image":
163: ret +="<img src=%s />"%xlink.getAttribute('xlink:href')
164: elif xlink.tagName.lower()=="link":
165: ret +="<a href='%s' >%s</a>"%(xlink.getAttribute('xlink:href'),getText(xlink.childNodes))
166:
167:
168:
169:
170: return ret
171:
172: def getText(nodelist):
173:
174: rc = u''
175: for node in nodelist:
176: if node.nodeType == node.TEXT_NODE:
177: #print "node",node
178: #print "NODE",node.data.encode('utf-8','ignore'),"V"
179: #print "HALII"
180: try:
181: try:
182: #rc += node.data.encode('utf-8','ignore')
183: rc += node.data
184:
185: except:
186: #rc= node.data.encode('utf-8','ignore')
187: rc=node.data
188: except:
189: rc="ERROR"
190: #node.data.decode('utf-8','ignore')
191: print "ERROR"
192: node.data.encode('utf-8','ignore')
193: #print "RC",rc
194: elif node.tagName =="inline":
195: rc+=par2html([node])
196: elif node.attributes:
1.3 ! dwinter 197:
1.1 dwinter 198: if 'xlink:type' in node.attributes.keys(): #is a xlink?
199: rc +=xlink2html(node)
200: #print "RWT",rc
201: return rc
202:
203:
204: #filename=argv[1]
205: #fileString=file(filename).read()
206: #print proj2hash(fileString)
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>