Annotation of ECHO_content/vlp_xmlhelpers.py, revision 1.15
1.1 dwinter 1: from sys import argv
2:
3: import string
1.11 casties 4: import logging
1.1 dwinter 5: import xml.dom.minidom
6: import Ft.Xml.XLink.Processor
7: import Ft.Xml.XLink.XLinkElements
8:
9: from Ft.Xml import XPath
10: from Ft.Xml.XPath import Evaluate
11: from Ft.Xml.XLink import XLINK_NAMESPACE
12: from Ft.Xml.XLink import XLinkElements
1.5 dwinter 13: import cStringIO
14: from Ft.Xml.Domlette import NonvalidatingReader, PrettyPrint,Print
15: from Ft.Xml import EMPTY_NAMESPACE
1.1 dwinter 16: from Ft.Lib import Uri
17: import urllib
18: import re
1.11 casties 19: from ECHO_collection import unicodify,utf8ify
1.1 dwinter 20:
1.5 dwinter 21: patternTXT=r"<\s*txt.*?>(.*?)</txt>"
22: regexpTXT = re.compile(patternTXT, re.IGNORECASE + re.DOTALL)
1.1 dwinter 23: patternPage=r"<\s*page.*?>(.*?)</page>"
24: regexpPage = re.compile(patternPage, re.IGNORECASE + re.DOTALL)
25:
1.14 casties 26: #xml2htmlArray={'WEB_normal':('<p>','</p>'),'Normal':('<p>','</p>'),'WEB_picture':('<p class=\"picture\">','</p>'),'WEB_figuretitle':('<p class=\"picturetitle\">','</p>'),'WEB_bibliography':('<p><i>','</i></p>'),'Web_kursiv':('<i>','</i>'),'WEB_kursiv':('<i>','</i>'),'WEB_hyperlink':('',''),'Hyperlink':('',''),'Picture':('<p class=\"picture\">','</p>'),'FigureTitle':('<p class=\"picturetitle\">','</p>')}
27: #
28: #def addToDict(dict,name,value):
29: # if name=="":
30: # return 0
31: # else:
32: #
33: # if not dict.has_key(name):
34: # dict[name]=[] # als array anlegen
35: #
36: # dict[name].append(value)
37: # return 1
38: #
39: #def proj2hash(self,xmlstring):
40: # """wandelt xml-files fuer die projekte in ein hash"""
41: #
42: # dom=xml.dom.minidom.parseString(xmlstring)
43: #
44: #
45: # list={}
46: #
47: # #gettitle
48: # pars=Evaluate('par',dom.getElementsByTagName('part')[0])
49: # for par in pars:
50: # className=par.getAttribute('class')
51: # content=getText(self,par.childNodes)
52: # addToDict(list,className,content)
53: #
54: #
55: # sectionXPath="section"
56: #
57: #
58: # sections=Evaluate(sectionXPath,dom.getElementsByTagName('part')[0])
59: #
60: # while sections:
61: #
62: # for section in sections:
63: #
64: # sec=parseSection(self,section)
65: #
66: # if sec[0]=="WEB_project_header": # Sonderfall project
67: # addToDict(list,'WEB_project_header',sec[1]) # store title
68: # addToDict(list,'WEB_project_description',sec[2]) #store description
69: # else: # no information in heading
70: # level=int(sec[3])+2
71: # aTag="<h%i>"%level
72: # eTag="</h%i>"%level
73: # addToDict(list,"text",aTag+sec[1]+eTag)
74: # addToDict(list,"text",sec[2])
75: # sectionXPath+="/section"
76: # sections=Evaluate(sectionXPath,dom.getElementsByTagName('part')[0])
77: # return list
78: #
79: #
80: #def parseSection(self,section):
81: # type=""
82: # header=""
83: # level=section.getAttribute('level')
84: # for heading in section.childNodes:
85: # if getattr(heading,'tagName','')=="heading":
86: #
87: # type=heading.getAttribute('class')
88: # header=getText(self,heading.childNodes)
89: #
90: # if type=="": # falls heading fehlt, pruefe ob erster par richtig
91: # par=section.getElementsByTagName('par')[0]
92: # type=par.getAttribute('class')
93: # header=getText(par.childNodes)
94: #
95: # #print section.childNodes
96: # #pars=Evaluate('par',section)
97: # pars=section.childNodes
98: # content=par2html(self,pars)
99: # #print "CONTENT",repr(content)
100: # return (type,header,content,level)
101: #
102: #def parseTable(table):
103: # fields={}
104: # rows=table.getElementsByTagName('html:tr')
105: # for row in rows:
106: # #print "ROW"
107: # cols=row.getElementsByTagName('html:td')
108: #
109: # #Name des Datenfeldes einlesen
110: # try:
111: # field=cols[0].getElementsByTagName('par')[0].getAttribute('class')
112: # #print "field",field
113: # except:
114: # print "error"
115: # field=""
116: #
117: # #Wandeln der Eintrge in HTML
118: #
119: # #pars=cols[1].getElementsByTagName('par')
120: # pars=cols[1].childNodes
121: #
122: # html=par2html(self,pars,tags=("",";"))
123: #
124: # addToDict(fields,field,html)
125: # #print fields
126: # return fields
127: #
128: #def par2html(self,pars,tags=None):
129: # html=""
130: #
131: # for par in pars:
132: # tagName=getattr(par,'tagName','')
133: # if tagName in ["par","inline"]:
134: # #print repr(par.getAttribute('class')),xml2htmlArray.get(par.getAttribute('class'),'NOT FOUND')
135: # #print "par",par
136: # if not tags:
137: # try:
138: # tag=xml2htmlArray[par.getAttribute('class')]
139: # except:
140: # tag=('<p>','</p>')
141: # else:
142: # tag=tags
143: # #print "TAG",tag
144: # content=getText(self,par.childNodes,par.getAttribute('class'))
145: #
146: #
147: #
148: # #print par.getAttribute('class'),node
149: # try:
150: # html+=tag[0]+content+tag[1]
151: # except:
152: # html=+tag[0]+content+tag[1]
153: #
154: # elif tagName=="pb":
155: # html+="<pb/>"
156: #
157: #
158: # try:
159: #
160: # return html
161: # except:
162: # return ""
1.1 dwinter 163:
164: def getXlink(nodes):
165: """searches xlinks and gives them back as html"""
166: ret=""
167: for node in nodes:
168: if node.attributes:
169: if 'xlink:type' in node.attributes.keys(): #is a xlink?
170: ret +=xlink2html(node)
171: return ret
172:
173: def checkRef(self,ref):
1.5 dwinter 174: """teste ob reference angezeigt werden sollen"""
175: dbs={'vl_literature':'AND online = \'1\'',
176: 'vl_technology':'AND complete =\'yes\'',
177: 'vl_people':'AND complete =\'yes\'',
178: 'vl_sites':'AND complete =\'yes\'',
179: 'vl_transcript':'AND complete =\'yes\'',
1.11 casties 180: 'vl_essays':'AND online =\'yes\'',
181: 'vl_categories':''
1.5 dwinter 182: }
1.1 dwinter 183: res=None
184: for db in dbs.keys():
1.5 dwinter 185: searchStr=str("select reference from %s where reference =\'%s\' %s"%(db,ref,dbs[db]))
186: res=res or self.search(var=searchStr)
187: return res
188:
189: def link2html(self,str):
1.12 casties 190: """link2html links in html wandeln"""
1.5 dwinter 191: if str:
192:
193: str=re.sub("\&","&",str)
1.11 casties 194: dom=xml.dom.minidom.parseString("<?xml version='1.0' encoding='utf-8'?><txt>"+utf8ify(str)+"</txt>")
1.15 ! dwinter 195:
! 196:
1.5 dwinter 197: links=dom.getElementsByTagName("link")
198:
199:
200: for link in links:
201: link.tagName="a"
202: ref=link.getAttribute("ref")
203: pn=link.getAttribute("page")
1.13 casties 204: mk=link.getAttribute("mk")
1.15 ! dwinter 205: href= link.getAttribute("href")
! 206: if href:
! 207: link.setAttribute("class","external")
! 208:
1.5 dwinter 209: if self.checkRef(ref):
1.13 casties 210: more = ""
211: if pn:
212: more += "&page=%s"%pn
213:
214: if mk:
215: more += "&mk=%s"%mk
216:
217: link.setAttribute("href",self.REQUEST['SERVER_URL']+"/references?id="+ref+more)
1.5 dwinter 218:
1.15 ! dwinter 219:
1.5 dwinter 220: newxml=dom.toxml('utf-8')
221:
1.15 ! dwinter 222:
! 223:
1.5 dwinter 224: retStr=regexpTXT.search(newxml)
1.11 casties 225: retStr = retStr.group(1)
1.5 dwinter 226:
1.11 casties 227: return retStr.decode('utf-8') # we return unicode
1.5 dwinter 228:
1.11 casties 229: return u""
1.7 casties 230:
231: def related2html(self,str):
1.10 casties 232: """related library items: xlinks in html wandeln / mb 22.11.2006"""
1.7 casties 233: if str:
234:
235: str=re.sub("\&","&",str)
1.12 casties 236: dom=xml.dom.minidom.parseString("<?xml version='1.0' encoding='utf-8'?><txt>"+utf8ify(str)+"</txt>")
1.7 casties 237: links=dom.getElementsByTagName("link")
238:
239: for link in links:
240: link.tagName = "a"
241: ref = link.getAttribute("ref")
242: pn = link.getAttribute("page")
243:
1.8 casties 244: searchStr="select fullreference, online from vl_literature where reference =\'%s\' and authorized = 1"%(ref)
1.7 casties 245: res = self.search(var=searchStr)
246:
247: if res:
1.8 casties 248: if res[0]['online'] == 1:
249: # item online verfuegbar
1.7 casties 250: if pn:
251: link.setAttribute("href",self.REQUEST['SERVER_URL']+"/references?id="+ref+"&page="+pn)
252: else:
253: link.setAttribute("href",self.REQUEST['SERVER_URL']+"/references?id="+ref)
1.10 casties 254:
255: link.setAttribute("title", "click to view")
256: link.removeAttribute("ref")
257:
258: # prefix preceding the link
259: prefix = dom.createTextNode(U"\u2013\u0020") # = ndash + space
260: dom.documentElement.insertBefore(prefix, link)
261:
1.7 casties 262: else:
263: # item nur als bibliographische angabe vorhanden
1.12 casties 264: link.setAttribute("alt", unicodify(res[0]['fullreference']))
1.7 casties 265: link.setAttribute("title", "click to expand")
266: link.setAttribute("onclick", "return toggle(this);")
267: link.setAttribute("class", "x_offline")
1.10 casties 268:
269: # prefix inside link text
270: link.firstChild.data = '+ ' + link.firstChild.data
271:
1.7 casties 272:
273: newxml=dom.toxml('utf-8')
274:
275: retStr=regexpTXT.search(newxml)
1.11 casties 276: retStr = retStr.group(1)
1.12 casties 277: #logging.debug("related2html out=%s"%repr(retStr))
278: return retStr.decode('utf-8') # we return unicode
1.11 casties 279:
280: return u""
1.7 casties 281:
1.5 dwinter 282:
1.1 dwinter 283:
284:
285: def xml2html(self,str,quote="yes"):
286: """link2html fuer VLP muss hier noch raus"""
287: if str:
288: if quote=="yes2":
289: str=re.sub("\&","&",str)
1.5 dwinter 290: #dom=xml.dom.minidom.parseString(str)
291: dom = NonvalidatingReader.parseString(str,"http://www.mpiwg-berlin.mpg.de/")
292: #links=dom.getElementsByTagName("link")
293: links=Ft.Xml.XPath.Evaluate(".//link", contextNode=dom)
1.1 dwinter 294: for link in links:
1.5 dwinter 295: #link.tagName="a"
296:
297: ref=link.getAttributeNS(EMPTY_NAMESPACE,"ref")
298: pn=link.getAttributeNS(EMPTY_NAMESPACE,"page")
1.1 dwinter 299:
1.5 dwinter 300: cns=link.childNodes[0:]
301:
302: newLink=dom.createElementNS(EMPTY_NAMESPACE,"a")
303: for x in cns:
304: newLink.appendChild(x)
305:
306:
307:
308: link.parentNode.replaceChild(newLink,link)
309:
310: if self.checkRef(ref):
311: if pn:
1.6 dwinter 312: newLink.setAttributeNS(EMPTY_NAMESPACE,"href",self.REQUEST['SERVER_URL']+"/references?id="+ref+"&page="+pn)
1.5 dwinter 313: else:
314: newLink.setAttributeNS(EMPTY_NAMESPACE,"href",self.REQUEST['SERVER_URL']+"/references?id="+ref)
315:
316: #str= dom.toxml('utf-8')
317: buf = cStringIO.StringIO()
318: PrettyPrint(dom, stream=buf)
319: str = buf.getvalue()
320: buf.close()
321: #str=PrettyPrint(dom.documentElement,encoding='UTF-8')
322: #print link.toxml('utf-8')
323: #print type(str)
324: retStr=regexpPage.search(str)
325:
326: try: # hack warum fehtl manchmal page??
1.11 casties 327: return retStr.group(1).decode('utf-8')
1.1 dwinter 328: except:
1.5 dwinter 329: return str
1.1 dwinter 330: return ""
1.5 dwinter 331:
1.1 dwinter 332:
333: def xlink2html(self,xlink,parClass=None):
334: ret=""
335: attributes=xlink.attributes
336:
337: if xlink.tagName.lower()=="image":
338: ret +="""<img src="%s" />"""%xlink.getAttribute('href')
339: elif xlink.tagName.lower()=="link":
340: reference=urllib.unquote(xlink.getAttribute('href'))
341: label=getText(self,xlink.childNodes)
342:
343: # check if href is already a correct url
344: if reference.split(":")[0] in ['http','file']:
345: if parClass=="Picture":
346: ret +="""<img src="%s" />"""%(reference)
347: else:
348:
349: ret +="""<a href="%s" >%s</a>"""%(reference,label)
350: else: # transform
351: #href=xml2html(self,reference)
352: #print "refer",reference
353: reference=re.sub("ref\=([^>]*)\>",'ref=\"\g<1>\">',reference)# einfuegen anfuehrungszeichen um ref attribut, falls fehlt.
354: ret +=reference
355:
356: return ret
357:
358: def getText(self,nodelist,parClass=None):
359:
360: rc = u''
361: for node in nodelist:
362:
363: if node.nodeType == node.TEXT_NODE:
364:
365: try:
366: try:
367: #rc += node.data.encode('utf-8','ignore')
368: rc += node.data
369:
370: except:
371: #rc= node.data.encode('utf-8','ignore')
372: rc=node.data
373: except:
374: rc="ERROR"
375: #node.data.decode('utf-8','ignore')
376:
377: node.data.encode('utf-8','ignore')
378: #print "RC",rc
379: elif node.tagName =="inline":
380:
381: rc+=par2html(self,[node])
382:
383: elif node.tagName =="pb":
384: rc+="<pb/>"
385: elif node.attributes:
386:
387: if 'type' in node.attributes.keys(): #is a xlink?
388:
389: try:
390: rc +=xlink2html(self,node,parClass).encode('utf-8')
391:
392: except:
393: rc +=xlink2html(self,node,parClass)
394:
395: #print "RWT",rc
396: return rc
397:
398:
399: #filename=argv[1]
400: #fileString=file(filename).read()
401: #print proj2hash(fileString)
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>