Annotation of MPIWGWeb/xmlhelper.py, revision 1.1.1.1
1.1 dwinter 1:
2: from sys import argv
3:
4: import string
5: import xml.dom.minidom
6: import Ft.Xml.XLink.Processor
7: import Ft.Xml.XLink.XLinkElements
8:
9: from Ft.Xml import XPath
10: from Ft.Xml.XPath import Evaluate
11: from Ft.Xml.XLink import XLINK_NAMESPACE
12: from Ft.Xml.XLink import XLinkElements
13:
14: #from Ft.Xml.Domlette import NonvalidatingReader,InputSource
15: #from Ft.Xml import EMPTY_NAMESPACE
16: from Ft.Lib import Uri
17:
18: xml2html={'WEB_normal':('<p>','</p>'),'Normal':('<p>','</p>'),'WEB_picture':('<p>','</p>'),'WEB_figuretitle':('<i>','</i>'),'WEB_bibliography':('<p><i>','</i></p>'),'Web_kursiv':('<i>','</i>'),'WEB_kursiv':('<i>','</i>'),'WEB_hyperlink':('','')}
19:
20: def addToDict(dict,name,value):
21: if name=="":
22: return 0
23: else:
24:
25: if not dict.has_key(name):
26: dict[name]=[] # als array anlegen
27:
28: dict[name].append(value)
29: return 1
30:
31: def proj2hash(xmlstring):
32: """wandelt xml-files fuer die projekte in ein hash"""
33:
34: dom=xml.dom.minidom.parseString(xmlstring)
35:
36:
37: list={}
38:
39: #gettitle
40: pars=Evaluate('par',dom.getElementsByTagName('part')[0])
41: for par in pars:
42: className=par.getAttribute('class')
43: content=getText(par.childNodes)
44: addToDict(list,className,content)
45:
46: list.update(parseTable(dom.getElementsByTagName('html:table')[0])) # Parse the Table
47:
48: #evaluate level 1
49:
50: sections=Evaluate('section',dom.getElementsByTagName('part')[0])# Parse all Sections
51: #print sections,dom.getElementsByTagName('part')[0]
52: for section in sections:
53:
54: sec=parseSection(section)
55: if sec[0]=="WEB_project_header": # Sonderfall project
56: addToDict(list,'WEB_project_header',sec[1]) # store title
57: addToDict(list,'WEB_project_description',sec[2]) #store description
58: else: # no information in heading
59: addToDict(list,sec[0],sec[2])
60:
61: #evaluate higher level sections
62:
63: sections=Evaluate('section/section',dom.getElementsByTagName('part')[0])
64:
65: for section in sections:
66: sec=parseSection(section)
67:
68: if sec[0]=="WEB_project_header": # Sonderfall project
69: addToDict(list,'WEB_project_header',sec[1]) # store title
70: addToDict(list,'WEB_project_description',sec[2]) #store description
71: else: # no information in heading
72: addToDict(list,sec[0],sec[2])
73:
74:
75: return list
76:
77:
78: def parseSection(section):
79: heading=section.getElementsByTagName('heading')[0]
80: type=heading.getAttribute('class')
81: header=getText(heading.childNodes)
82: #print section.childNodes
83: pars=Evaluate('par',section)
84: content=par2html(pars)
85:
86: return (type,header,content)
87:
88: def parseTable(table):
89: fields={}
90: rows=table.getElementsByTagName('html:tr')
91: for row in rows:
92: #print "ROW"
93: cols=row.getElementsByTagName('html:td')
94:
95: #Name des Datenfeldes einlesen
96: try:
97: field=cols[0].getElementsByTagName('par')[0].getAttribute('class')
98: #print "field",field
99: except:
100: print "error"
101: field=""
102:
103: #Wandeln der Eintrge in HTML
104:
105: pars=cols[1].getElementsByTagName('par')
106:
107:
108: html=par2html(pars,tags=("",";"))
109:
110: addToDict(fields,field,html)
111: #print fields
112: return fields
113:
114: def par2html(pars,tags=None):
115: #html=""
116:
117: for par in pars:
118: #print "par",par
119: if not tags:
120: try:
121: tag=xml2html[par.getAttribute('class')]
122: except:
123: tag=('<p>','</p>')
124: else:
125: tag=tags
126:
127: content=getText(par.childNodes)
128: #print "CONTETN",content
129:
130: #print par.getAttribute('class'),node
131: try:
132: html=html+tag[0]+content+tag[1]
133: except:
134: html=tag[0]+content+tag[1]
135:
136: try:
137: return html
138: except:
139: return ""
140:
141: def getXlink(nodes):
142: """searches xlinks and gives them back as html"""
143: ret=""
144: for node in nodes:
145: if node.attributes:
146: if 'xlink:type' in node.attributes.keys(): #is a xlink?
147: ret +=xlink2html(node)
148: return ret
149:
150: def xlink2html(xlink):
151: ret=""
152: attributes=xlink.attributes
153:
154: if xlink.tagName.lower()=="image":
155: ret +="<img src=%s />"%xlink.getAttribute('xlink:href')
156: elif xlink.tagName.lower()=="link":
157: ret +="<a href='%s' >%s</a>"%(xlink.getAttribute('xlink:href'),getText(xlink.childNodes))
158:
159:
160:
161:
162: return ret
163:
164: def getText(nodelist):
165:
166: rc = u''
167: for node in nodelist:
168: print "HHHH"
169: if node.nodeType == node.TEXT_NODE:
170: #print "node",node
171: #print "NODE",node.data.encode('utf-8','ignore'),"V"
172: #print "HALII"
173: try:
174: try:
175: print "try1"
176: #rc += node.data.encode('utf-8','ignore')
177: rc += node.data
178:
179: except:
180: print "try2"
181: #rc= node.data.encode('utf-8','ignore')
182: rc=node.data
183: except:
184: rc="ERROR"
185: #node.data.decode('utf-8','ignore')
186: print "ERROR"
187: node.data.encode('utf-8','ignore')
188: #print "RC",rc
189: elif node.tagName =="inline":
190: print "HI", node.getAttribute('class')
191: rc+=par2html([node])
192: elif node.attributes:
193: print "xlink?"
194: if 'xlink:type' in node.attributes.keys(): #is a xlink?
195: rc +=xlink2html(node)
196: #print "RWT",rc
197: return rc
198:
199:
200: #filename=argv[1]
201: #fileString=file(filename).read()
202: #print proj2hash(fileString)
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>