1:
2: from sys import argv
3:
4: import string
5: import xml.dom.minidom
6: #import Ft.Xml.XLink.Processor
7: #import Ft.Xml.XLink.XLinkElements
8: #
9: #from Ft.Xml import XPath
10: #from Ft.Xml.XPath import Evaluate
11: #from Ft.Xml.XLink import XLINK_NAMESPACE
12: #from Ft.Xml.XLink import XLinkElements
13:
14: #from Ft.Xml.Domlette import NonvalidatingReader,InputSource
15: #from Ft.Xml import EMPTY_NAMESPACE
16:
17: #from Ft.Lib import Uri
18:
19: xml2html={'WEB_normal':('<p>','</p>'),'Normal':('<p>','</p>'),'WEB_picture':('<p class=\"picture\">','</p>'),'WEB_figuretitle':('<p class=\"picturetitle\">','</p>'),'WEB_bibliography':('<p><i>','</i></p>'),'Web_kursiv':('<i>','</i>'),'WEB_kursiv':('<i>','</i>'),'WEB_hyperlink':('',''),'Hyperlink':('','')}
20:
21: def addToDict(dict,name,value):
22: if name=="":
23: return 0
24: else:
25:
26: if not dict.has_key(name):
27: dict[name]=[] # als array anlegen
28:
29: dict[name].append(value)
30: return 1
31:
32: def proj2hash(xmlstring):
33: """wandelt xml-files fuer die projekte in ein hash"""
34:
35: dom=xml.dom.minidom.parseString(xmlstring)
36:
37:
38: list={}
39:
40: #gettitle
41: pars=Evaluate('par',dom.getElementsByTagName('part')[0])
42: for par in pars:
43: className=par.getAttribute('class')
44: content=getText(par.childNodes)
45: addToDict(list,className,content)
46:
47: list.update(parseTable(dom.getElementsByTagName('html:table')[0])) # Parse the Table
48:
49: #evaluate level 1
50:
51: sections=Evaluate('section',dom.getElementsByTagName('part')[0])# Parse all Sections
52: #print sections,dom.getElementsByTagName('part')[0]
53: for section in sections:
54:
55: sec=parseSection(section)
56: if sec[0]=="WEB_project_header": # Sonderfall project
57:
58: addToDict(list,'WEB_project_header',sec[1]) # store title
59: addToDict(list,'WEB_project_description',sec[2]) #store description
60: else: # no information in heading
61:
62: addToDict(list,sec[0],sec[2])
63:
64: #evaluate higher level sections
65:
66: sections=Evaluate('section/section',dom.getElementsByTagName('part')[0])
67:
68: for section in sections:
69: sec=parseSection(section)
70:
71: if sec[0]=="WEB_project_header": # Sonderfall project
72: addToDict(list,'WEB_project_header',sec[1]) # store title
73: addToDict(list,'WEB_project_description',sec[2]) #store description
74: else: # no information in heading
75: addToDict(list,sec[0],sec[2])
76:
77:
78: return list
79:
80:
81: def parseSection(section):
82: type=""
83: header=""
84: for heading in section.childNodes:
85: if getattr(heading,'tagName','')=="heading":
86:
87: type=heading.getAttribute('class')
88: header=getText(heading.childNodes)
89:
90: if type=="": # falls heading fehlt, pruefe ob erster par richtig
91: par=section.getElementsByTagName('par')[0]
92: type=par.getAttribute('class')
93: header=getText(par.childNodes)
94:
95: #print section.childNodes
96: pars=Evaluate('par',section)
97: content=par2html(pars)
98:
99: return (type,header,content)
100:
101: def parseTable(table):
102: fields={}
103: rows=table.getElementsByTagName('html:tr')
104: for row in rows:
105: #print "ROW"
106: cols=row.getElementsByTagName('html:td')
107:
108: #Name des Datenfeldes einlesen
109: try:
110: field=cols[0].getElementsByTagName('par')[0].getAttribute('class')
111: #print "field",field
112: except:
113: print "error"
114: field=""
115:
116: #Wandeln der Eintrge in HTML
117:
118: pars=cols[1].getElementsByTagName('par')
119:
120:
121: html=par2html(pars,tags=("",";"))
122:
123: addToDict(fields,field,html)
124: #print fields
125: return fields
126:
127: def par2html(pars,tags=None):
128: #html=""
129:
130: for par in pars:
131: #print "par",par
132: if not tags:
133: try:
134: tag=xml2html[par.getAttribute('class')]
135: except:
136: tag=('<p>','</p>')
137: else:
138: tag=tags
139:
140: content=getText(par.childNodes)
141: #print "CONTETN",content
142:
143: #print par.getAttribute('class'),node
144: try:
145: html=html+tag[0]+content+tag[1]
146: except:
147: html=tag[0]+content+tag[1]
148:
149: try:
150: return html
151: except:
152: return ""
153:
154: def getXlink(nodes):
155: """searches xlinks and gives them back as html"""
156: ret=""
157: for node in nodes:
158: if node.attributes:
159: if 'xlink:type' in node.attributes.keys(): #is a xlink?
160: ret +=xlink2html(node)
161: return ret
162:
163: def xlink2html(xlink):
164: ret=""
165: attributes=xlink.attributes
166:
167: if xlink.tagName.lower()=="image":
168: ret +="<img src=%s />"%xlink.getAttribute('xlink:href')
169: elif xlink.tagName.lower()=="link":
170: ret +="<a href='%s' >%s</a>"%(xlink.getAttribute('xlink:href'),getText(xlink.childNodes))
171:
172:
173:
174:
175: return ret
176:
177: def getText(nodelist):
178:
179: rc = u''
180: for node in nodelist:
181: if node.nodeType == node.TEXT_NODE:
182: #print "node",node
183: #print "NODE",node.data.encode('utf-8','ignore'),"V"
184: #print "HALII"
185: try:
186: try:
187: #rc += node.data.encode('utf-8','ignore')
188: rc += node.data
189:
190: except:
191: #rc= node.data.encode('utf-8','ignore')
192: rc=node.data
193: except:
194: rc="ERROR"
195: #node.data.decode('utf-8','ignore')
196: print "ERROR"
197: node.data.encode('utf-8','ignore')
198: #print "RC",rc
199: elif node.tagName =="inline":
200: rc+=par2html([node])
201: elif node.attributes:
202:
203: if 'xlink:type' in node.attributes.keys(): #is a xlink?
204: rc +=xlink2html(node)
205: #print "RWT",rc
206: return rc
207:
208:
209: #filename=argv[1]
210: #fileString=file(filename).read()
211: #print proj2hash(fileString)
212:
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>