Annotation of MPIWGWeb/xmlhelper.py, revision 1.5
1.1 dwinter 1:
2: from sys import argv
3:
4: import string
5: import xml.dom.minidom
6: import Ft.Xml.XLink.Processor
7: import Ft.Xml.XLink.XLinkElements
8:
9: from Ft.Xml import XPath
10: from Ft.Xml.XPath import Evaluate
11: from Ft.Xml.XLink import XLINK_NAMESPACE
12: from Ft.Xml.XLink import XLinkElements
13:
14: #from Ft.Xml.Domlette import NonvalidatingReader,InputSource
15: #from Ft.Xml import EMPTY_NAMESPACE
16: from Ft.Lib import Uri
17:
1.5 ! dwinter 18: xml2html={'WEB_normal':('<p>','</p>'),'Normal':('<p>','</p>'),'WEB_picture':('<p class="picture">','</p>'),'WEB_figuretitle':('<p class="picturetitle">','</p>'),'WEB_bibliography':('<p><i>','</i></p>'),'Web_kursiv':('<i>','</i>'),'WEB_kursiv':('<i>','</i>'),'WEB_hyperlink':('',''),'Hyperlink':('','')}
1.1 dwinter 19:
20: def addToDict(dict,name,value):
21: if name=="":
22: return 0
23: else:
24:
25: if not dict.has_key(name):
26: dict[name]=[] # als array anlegen
27:
28: dict[name].append(value)
29: return 1
30:
31: def proj2hash(xmlstring):
32: """wandelt xml-files fuer die projekte in ein hash"""
33:
34: dom=xml.dom.minidom.parseString(xmlstring)
35:
36:
37: list={}
38:
39: #gettitle
40: pars=Evaluate('par',dom.getElementsByTagName('part')[0])
41: for par in pars:
42: className=par.getAttribute('class')
43: content=getText(par.childNodes)
44: addToDict(list,className,content)
45:
46: list.update(parseTable(dom.getElementsByTagName('html:table')[0])) # Parse the Table
47:
48: #evaluate level 1
49:
50: sections=Evaluate('section',dom.getElementsByTagName('part')[0])# Parse all Sections
51: #print sections,dom.getElementsByTagName('part')[0]
52: for section in sections:
1.4 dwinter 53:
1.1 dwinter 54: sec=parseSection(section)
55: if sec[0]=="WEB_project_header": # Sonderfall project
1.4 dwinter 56:
1.1 dwinter 57: addToDict(list,'WEB_project_header',sec[1]) # store title
58: addToDict(list,'WEB_project_description',sec[2]) #store description
59: else: # no information in heading
1.4 dwinter 60:
1.1 dwinter 61: addToDict(list,sec[0],sec[2])
62:
63: #evaluate higher level sections
64:
65: sections=Evaluate('section/section',dom.getElementsByTagName('part')[0])
1.4 dwinter 66:
1.1 dwinter 67: for section in sections:
68: sec=parseSection(section)
69:
70: if sec[0]=="WEB_project_header": # Sonderfall project
71: addToDict(list,'WEB_project_header',sec[1]) # store title
72: addToDict(list,'WEB_project_description',sec[2]) #store description
73: else: # no information in heading
74: addToDict(list,sec[0],sec[2])
1.4 dwinter 75:
1.1 dwinter 76:
77: return list
78:
79:
80: def parseSection(section):
1.3 dwinter 81: type=""
82: header=""
1.4 dwinter 83: for heading in section.childNodes:
84: if getattr(heading,'tagName','')=="heading":
85:
86: type=heading.getAttribute('class')
87: header=getText(heading.childNodes)
1.3 dwinter 88:
89: if type=="": # falls heading fehlt, pruefe ob erster par richtig
90: par=section.getElementsByTagName('par')[0]
91: type=par.getAttribute('class')
92: header=getText(par.childNodes)
93:
1.1 dwinter 94: #print section.childNodes
95: pars=Evaluate('par',section)
96: content=par2html(pars)
97:
98: return (type,header,content)
99:
100: def parseTable(table):
101: fields={}
102: rows=table.getElementsByTagName('html:tr')
103: for row in rows:
104: #print "ROW"
105: cols=row.getElementsByTagName('html:td')
106:
107: #Name des Datenfeldes einlesen
108: try:
109: field=cols[0].getElementsByTagName('par')[0].getAttribute('class')
110: #print "field",field
111: except:
112: print "error"
113: field=""
114:
115: #Wandeln der Eintrge in HTML
116:
117: pars=cols[1].getElementsByTagName('par')
118:
119:
120: html=par2html(pars,tags=("",";"))
121:
122: addToDict(fields,field,html)
123: #print fields
124: return fields
125:
126: def par2html(pars,tags=None):
127: #html=""
128:
129: for par in pars:
130: #print "par",par
131: if not tags:
132: try:
133: tag=xml2html[par.getAttribute('class')]
134: except:
135: tag=('<p>','</p>')
136: else:
137: tag=tags
138:
139: content=getText(par.childNodes)
140: #print "CONTETN",content
141:
142: #print par.getAttribute('class'),node
143: try:
144: html=html+tag[0]+content+tag[1]
145: except:
146: html=tag[0]+content+tag[1]
147:
148: try:
149: return html
150: except:
151: return ""
152:
153: def getXlink(nodes):
154: """searches xlinks and gives them back as html"""
155: ret=""
156: for node in nodes:
157: if node.attributes:
158: if 'xlink:type' in node.attributes.keys(): #is a xlink?
159: ret +=xlink2html(node)
160: return ret
161:
162: def xlink2html(xlink):
163: ret=""
164: attributes=xlink.attributes
165:
166: if xlink.tagName.lower()=="image":
167: ret +="<img src=%s />"%xlink.getAttribute('xlink:href')
168: elif xlink.tagName.lower()=="link":
169: ret +="<a href='%s' >%s</a>"%(xlink.getAttribute('xlink:href'),getText(xlink.childNodes))
170:
171:
172:
173:
174: return ret
175:
176: def getText(nodelist):
177:
178: rc = u''
179: for node in nodelist:
180: if node.nodeType == node.TEXT_NODE:
181: #print "node",node
182: #print "NODE",node.data.encode('utf-8','ignore'),"V"
183: #print "HALII"
184: try:
185: try:
186: #rc += node.data.encode('utf-8','ignore')
187: rc += node.data
188:
189: except:
190: #rc= node.data.encode('utf-8','ignore')
191: rc=node.data
192: except:
193: rc="ERROR"
194: #node.data.decode('utf-8','ignore')
195: print "ERROR"
196: node.data.encode('utf-8','ignore')
197: #print "RC",rc
198: elif node.tagName =="inline":
199: rc+=par2html([node])
200: elif node.attributes:
1.3 dwinter 201:
1.1 dwinter 202: if 'xlink:type' in node.attributes.keys(): #is a xlink?
203: rc +=xlink2html(node)
204: #print "RWT",rc
205: return rc
206:
207:
208: #filename=argv[1]
209: #fileString=file(filename).read()
210: #print proj2hash(fileString)
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>