Annotation of ECHO_content/vlp_xmlhelpers.py, revision 1.3
1.1 dwinter 1: from sys import argv
2:
3: import string
4: import xml.dom.minidom
5: import Ft.Xml.XLink.Processor
6: import Ft.Xml.XLink.XLinkElements
7:
8: from Ft.Xml import XPath
9: from Ft.Xml.XPath import Evaluate
10: from Ft.Xml.XLink import XLINK_NAMESPACE
11: from Ft.Xml.XLink import XLinkElements
12:
13: #from Ft.Xml.Domlette import NonvalidatingReader,InputSource
14: #from Ft.Xml import EMPTY_NAMESPACE
15: from Ft.Lib import Uri
16: import urllib
17: import re
18:
19: patternPage=r"<\s*page.*?>(.*?)</page>"
20: regexpPage = re.compile(patternPage, re.IGNORECASE + re.DOTALL)
21:
22: xml2htmlArray={'WEB_normal':('<p>','</p>'),'Normal':('<p>','</p>'),'WEB_picture':('<p class=\"picture\">','</p>'),'WEB_figuretitle':('<p class=\"picturetitle\">','</p>'),'WEB_bibliography':('<p><i>','</i></p>'),'Web_kursiv':('<i>','</i>'),'WEB_kursiv':('<i>','</i>'),'WEB_hyperlink':('',''),'Hyperlink':('',''),'Picture':('<p class=\"picture\">','</p>'),'FigureTitle':('<p class=\"picturetitle\">','</p>')}
23:
24: def addToDict(dict,name,value):
25: if name=="":
26: return 0
27: else:
28:
29: if not dict.has_key(name):
30: dict[name]=[] # als array anlegen
31:
32: dict[name].append(value)
33: return 1
34:
35: def proj2hash(self,xmlstring):
36: """wandelt xml-files fuer die projekte in ein hash"""
37:
38: dom=xml.dom.minidom.parseString(xmlstring)
39:
40:
41: list={}
42:
43: #gettitle
44: pars=Evaluate('par',dom.getElementsByTagName('part')[0])
45: for par in pars:
46: className=par.getAttribute('class')
47: content=getText(self,par.childNodes)
48: addToDict(list,className,content)
49:
50:
51: sectionXPath="section"
52:
53:
54: sections=Evaluate(sectionXPath,dom.getElementsByTagName('part')[0])
55:
56: while sections:
57:
58: for section in sections:
59:
60: sec=parseSection(self,section)
61:
62: if sec[0]=="WEB_project_header": # Sonderfall project
63: addToDict(list,'WEB_project_header',sec[1]) # store title
64: addToDict(list,'WEB_project_description',sec[2]) #store description
65: else: # no information in heading
1.3 ! dwinter 66: level=int(sec[3])+2
! 67: aTag="<h%i>"%level
! 68: eTag="</h%i>"%level
1.1 dwinter 69: addToDict(list,"text",aTag+sec[1]+eTag)
70: addToDict(list,"text",sec[2])
71: sectionXPath+="/section"
72: sections=Evaluate(sectionXPath,dom.getElementsByTagName('part')[0])
73: return list
74:
75:
76: def parseSection(self,section):
77: type=""
78: header=""
79: level=section.getAttribute('level')
80: for heading in section.childNodes:
81: if getattr(heading,'tagName','')=="heading":
82:
83: type=heading.getAttribute('class')
84: header=getText(self,heading.childNodes)
85:
86: if type=="": # falls heading fehlt, pruefe ob erster par richtig
87: par=section.getElementsByTagName('par')[0]
88: type=par.getAttribute('class')
89: header=getText(par.childNodes)
90:
91: #print section.childNodes
92: #pars=Evaluate('par',section)
93: pars=section.childNodes
94: content=par2html(self,pars)
95: #print "CONTENT",repr(content)
96: return (type,header,content,level)
97:
98: def parseTable(table):
99: fields={}
100: rows=table.getElementsByTagName('html:tr')
101: for row in rows:
102: #print "ROW"
103: cols=row.getElementsByTagName('html:td')
104:
105: #Name des Datenfeldes einlesen
106: try:
107: field=cols[0].getElementsByTagName('par')[0].getAttribute('class')
108: #print "field",field
109: except:
110: print "error"
111: field=""
112:
113: #Wandeln der Eintrge in HTML
114:
115: #pars=cols[1].getElementsByTagName('par')
116: pars=cols[1].childNodes
117:
118: html=par2html(self,pars,tags=("",";"))
119:
120: addToDict(fields,field,html)
121: #print fields
122: return fields
123:
124: def par2html(self,pars,tags=None):
125: html=""
126:
127: for par in pars:
128: tagName=getattr(par,'tagName','')
129: if tagName in ["par","inline"]:
130: #print repr(par.getAttribute('class')),xml2htmlArray.get(par.getAttribute('class'),'NOT FOUND')
131: #print "par",par
132: if not tags:
133: try:
134: tag=xml2htmlArray[par.getAttribute('class')]
135: except:
136: tag=('<p>','</p>')
137: else:
138: tag=tags
139: #print "TAG",tag
140: content=getText(self,par.childNodes,par.getAttribute('class'))
141:
142:
143:
144: #print par.getAttribute('class'),node
145: try:
146: html+=tag[0]+content+tag[1]
147: except:
148: html=+tag[0]+content+tag[1]
149:
150: elif tagName=="pb":
151: html+="<pb/>"
1.2 dwinter 152: elif tagName=="img":
153: html+="XXX"
1.1 dwinter 154:
155: try:
156:
157: return html
158: except:
159: return ""
160:
161: def getXlink(nodes):
162: """searches xlinks and gives them back as html"""
163: ret=""
164: for node in nodes:
165: if node.attributes:
166: if 'xlink:type' in node.attributes.keys(): #is a xlink?
167: ret +=xlink2html(node)
168: return ret
169:
170: def checkRef(self,ref):
171: dbs={'vl_literature':'AND CD LIKE \'%lise%\'','vl_technology':'','vl_people':'','vl_sites':''}
172: res=None
173: for db in dbs.keys():
174:
175: res=res or self.search(var=str("select reference from %s where reference =\'%s\' %s"%(db,ref,dbs[db])))
176: return res
177:
178: def xml2html(self,str,quote="yes"):
179: """link2html fuer VLP muss hier noch raus"""
180:
181:
182: if str:
183: if quote=="yes2":
184: str=re.sub("\&","&",str)
185:
186: str=re.sub("ref\=([^>]*)\>",'ref=\"\g<1>\">',str)# einfuegen anfuehrungszeichen um ref attribut, falls fehlt.
187: #str=re.sub("ref\=([.[*^[>]]])",'XX',str)
188: #print "STR::",str
189: dom=xml.dom.minidom.parseString(str)
190: links=dom.getElementsByTagName("link")
191:
192: for link in links:
193: link.tagName="a"
194: ref=link.getAttribute("ref")
195: pn=link.getAttribute("page")
196:
197: if checkRef(self,ref):
198: if pn:
199: link.setAttribute("href",self.aq_parent.absolute_url()+"/vlp_coll?id="+ref+"&p="+pn)
200: else:
201: link.setAttribute("href",self.aq_parent.absolute_url()+"/vlp_coll?id="+ref)
202:
203: str= dom.toxml()
204:
205: #print link.toxml('utf-8')
206: retStr=regexpPage.search(str)
207:
208: try:
209: return retStr.group(1)
210: except:
211: exStr="""<?xml version="1.0" ?>"""
212: str=re.sub("\n","",str)
213: #str=
214: #print repr(str)
215: return str.replace(exStr,'')
216: return ""
217:
218: def xlink2html(self,xlink,parClass=None):
219: ret=""
220: attributes=xlink.attributes
221:
222: if xlink.tagName.lower()=="image":
223: ret +="""<img src="%s" />"""%xlink.getAttribute('href')
224: elif xlink.tagName.lower()=="link":
225: reference=urllib.unquote(xlink.getAttribute('href'))
226: label=getText(self,xlink.childNodes)
227:
228: # check if href is already a correct url
229: if reference.split(":")[0] in ['http','file']:
230: if parClass=="Picture":
231: ret +="""<img src="%s" />"""%(reference)
232: else:
233:
234: ret +="""<a href="%s" >%s</a>"""%(reference,label)
235: else: # transform
236: #href=xml2html(self,reference)
237: #print "refer",reference
238: reference=re.sub("ref\=([^>]*)\>",'ref=\"\g<1>\">',reference)# einfuegen anfuehrungszeichen um ref attribut, falls fehlt.
239: ret +=reference
240:
241: return ret
242:
243: def getText(self,nodelist,parClass=None):
244:
245: rc = u''
246: for node in nodelist:
247:
248: if node.nodeType == node.TEXT_NODE:
249:
250: try:
251: try:
252: #rc += node.data.encode('utf-8','ignore')
253: rc += node.data
254:
255: except:
256: #rc= node.data.encode('utf-8','ignore')
257: rc=node.data
258: except:
259: rc="ERROR"
260: #node.data.decode('utf-8','ignore')
261:
262: node.data.encode('utf-8','ignore')
263: #print "RC",rc
264: elif node.tagName =="inline":
265:
266: rc+=par2html(self,[node])
267:
268: elif node.tagName =="pb":
269: rc+="<pb/>"
270: elif node.attributes:
271:
272: if 'type' in node.attributes.keys(): #is a xlink?
273:
274: try:
275: rc +=xlink2html(self,node,parClass).encode('utf-8')
276:
277: except:
278: rc +=xlink2html(self,node,parClass)
279:
280: #print "RWT",rc
281: return rc
282:
283:
284: #filename=argv[1]
285: #fileString=file(filename).read()
286: #print proj2hash(fileString)
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>