Annotation of ECHO_content/vlp_xmlhelpers.py, revision 1.16
1.1 dwinter 1: from sys import argv
2:
3: import string
1.11 casties 4: import logging
1.1 dwinter 5: import xml.dom.minidom
6: import Ft.Xml.XLink.Processor
7: import Ft.Xml.XLink.XLinkElements
8:
9: from Ft.Xml import XPath
10: from Ft.Xml.XPath import Evaluate
11: from Ft.Xml.XLink import XLINK_NAMESPACE
12: from Ft.Xml.XLink import XLinkElements
1.5 dwinter 13: import cStringIO
14: from Ft.Xml.Domlette import NonvalidatingReader, PrettyPrint,Print
15: from Ft.Xml import EMPTY_NAMESPACE
1.1 dwinter 16: from Ft.Lib import Uri
17: import urllib
18: import re
1.11 casties 19: from ECHO_collection import unicodify,utf8ify
1.1 dwinter 20:
1.5 dwinter 21: patternTXT=r"<\s*txt.*?>(.*?)</txt>"
22: regexpTXT = re.compile(patternTXT, re.IGNORECASE + re.DOTALL)
1.1 dwinter 23: patternPage=r"<\s*page.*?>(.*?)</page>"
24: regexpPage = re.compile(patternPage, re.IGNORECASE + re.DOTALL)
25:
1.14 casties 26: #xml2htmlArray={'WEB_normal':('<p>','</p>'),'Normal':('<p>','</p>'),'WEB_picture':('<p class=\"picture\">','</p>'),'WEB_figuretitle':('<p class=\"picturetitle\">','</p>'),'WEB_bibliography':('<p><i>','</i></p>'),'Web_kursiv':('<i>','</i>'),'WEB_kursiv':('<i>','</i>'),'WEB_hyperlink':('',''),'Hyperlink':('',''),'Picture':('<p class=\"picture\">','</p>'),'FigureTitle':('<p class=\"picturetitle\">','</p>')}
27: #
28: #def addToDict(dict,name,value):
29: # if name=="":
30: # return 0
31: # else:
32: #
33: # if not dict.has_key(name):
34: # dict[name]=[] # als array anlegen
35: #
36: # dict[name].append(value)
37: # return 1
38: #
39: #def proj2hash(self,xmlstring):
40: # """wandelt xml-files fuer die projekte in ein hash"""
41: #
42: # dom=xml.dom.minidom.parseString(xmlstring)
43: #
44: #
45: # list={}
46: #
47: # #gettitle
48: # pars=Evaluate('par',dom.getElementsByTagName('part')[0])
49: # for par in pars:
50: # className=par.getAttribute('class')
51: # content=getText(self,par.childNodes)
52: # addToDict(list,className,content)
53: #
54: #
55: # sectionXPath="section"
56: #
57: #
58: # sections=Evaluate(sectionXPath,dom.getElementsByTagName('part')[0])
59: #
60: # while sections:
61: #
62: # for section in sections:
63: #
64: # sec=parseSection(self,section)
65: #
66: # if sec[0]=="WEB_project_header": # Sonderfall project
67: # addToDict(list,'WEB_project_header',sec[1]) # store title
68: # addToDict(list,'WEB_project_description',sec[2]) #store description
69: # else: # no information in heading
70: # level=int(sec[3])+2
71: # aTag="<h%i>"%level
72: # eTag="</h%i>"%level
73: # addToDict(list,"text",aTag+sec[1]+eTag)
74: # addToDict(list,"text",sec[2])
75: # sectionXPath+="/section"
76: # sections=Evaluate(sectionXPath,dom.getElementsByTagName('part')[0])
77: # return list
78: #
79: #
80: #def parseSection(self,section):
81: # type=""
82: # header=""
83: # level=section.getAttribute('level')
84: # for heading in section.childNodes:
85: # if getattr(heading,'tagName','')=="heading":
86: #
87: # type=heading.getAttribute('class')
88: # header=getText(self,heading.childNodes)
89: #
90: # if type=="": # falls heading fehlt, pruefe ob erster par richtig
91: # par=section.getElementsByTagName('par')[0]
92: # type=par.getAttribute('class')
93: # header=getText(par.childNodes)
94: #
95: # #print section.childNodes
96: # #pars=Evaluate('par',section)
97: # pars=section.childNodes
98: # content=par2html(self,pars)
99: # #print "CONTENT",repr(content)
100: # return (type,header,content,level)
101: #
102: #def parseTable(table):
103: # fields={}
104: # rows=table.getElementsByTagName('html:tr')
105: # for row in rows:
106: # #print "ROW"
107: # cols=row.getElementsByTagName('html:td')
108: #
109: # #Name des Datenfeldes einlesen
110: # try:
111: # field=cols[0].getElementsByTagName('par')[0].getAttribute('class')
112: # #print "field",field
113: # except:
114: # print "error"
115: # field=""
116: #
117: # #Wandeln der Eintrge in HTML
118: #
119: # #pars=cols[1].getElementsByTagName('par')
120: # pars=cols[1].childNodes
121: #
122: # html=par2html(self,pars,tags=("",";"))
123: #
124: # addToDict(fields,field,html)
125: # #print fields
126: # return fields
127: #
128: #def par2html(self,pars,tags=None):
129: # html=""
130: #
131: # for par in pars:
132: # tagName=getattr(par,'tagName','')
133: # if tagName in ["par","inline"]:
134: # #print repr(par.getAttribute('class')),xml2htmlArray.get(par.getAttribute('class'),'NOT FOUND')
135: # #print "par",par
136: # if not tags:
137: # try:
138: # tag=xml2htmlArray[par.getAttribute('class')]
139: # except:
140: # tag=('<p>','</p>')
141: # else:
142: # tag=tags
143: # #print "TAG",tag
144: # content=getText(self,par.childNodes,par.getAttribute('class'))
145: #
146: #
147: #
148: # #print par.getAttribute('class'),node
149: # try:
150: # html+=tag[0]+content+tag[1]
151: # except:
152: # html=+tag[0]+content+tag[1]
153: #
154: # elif tagName=="pb":
155: # html+="<pb/>"
156: #
157: #
158: # try:
159: #
160: # return html
161: # except:
162: # return ""
1.1 dwinter 163:
164: def getXlink(nodes):
165: """searches xlinks and gives them back as html"""
166: ret=""
167: for node in nodes:
168: if node.attributes:
169: if 'xlink:type' in node.attributes.keys(): #is a xlink?
170: ret +=xlink2html(node)
171: return ret
172:
173: def checkRef(self,ref):
1.5 dwinter 174: """teste ob reference angezeigt werden sollen"""
175: dbs={'vl_literature':'AND online = \'1\'',
176: 'vl_technology':'AND complete =\'yes\'',
177: 'vl_people':'AND complete =\'yes\'',
178: 'vl_sites':'AND complete =\'yes\'',
179: 'vl_transcript':'AND complete =\'yes\'',
1.11 casties 180: 'vl_essays':'AND online =\'yes\'',
181: 'vl_categories':''
1.5 dwinter 182: }
1.1 dwinter 183: res=None
184: for db in dbs.keys():
1.5 dwinter 185: searchStr=str("select reference from %s where reference =\'%s\' %s"%(db,ref,dbs[db]))
186: res=res or self.search(var=searchStr)
187: return res
188:
189: def link2html(self,str):
1.12 casties 190: """link2html links in html wandeln"""
1.5 dwinter 191: if str:
192:
193: str=re.sub("\&","&",str)
1.11 casties 194: dom=xml.dom.minidom.parseString("<?xml version='1.0' encoding='utf-8'?><txt>"+utf8ify(str)+"</txt>")
1.15 dwinter 195:
196:
1.5 dwinter 197: links=dom.getElementsByTagName("link")
198:
199:
200: for link in links:
201: link.tagName="a"
202: ref=link.getAttribute("ref")
203: pn=link.getAttribute("page")
1.13 casties 204: mk=link.getAttribute("mk")
1.15 dwinter 205: href= link.getAttribute("href")
206: if href:
207: link.setAttribute("class","external")
208:
1.5 dwinter 209: if self.checkRef(ref):
1.13 casties 210: more = ""
211: if pn:
212: more += "&page=%s"%pn
213:
214: if mk:
215: more += "&mk=%s"%mk
216:
217: link.setAttribute("href",self.REQUEST['SERVER_URL']+"/references?id="+ref+more)
1.5 dwinter 218:
1.15 dwinter 219:
1.5 dwinter 220: newxml=dom.toxml('utf-8')
221:
1.15 dwinter 222:
223:
1.5 dwinter 224: retStr=regexpTXT.search(newxml)
1.11 casties 225: retStr = retStr.group(1)
1.5 dwinter 226:
1.11 casties 227: return retStr.decode('utf-8') # we return unicode
1.5 dwinter 228:
1.11 casties 229: return u""
1.7 casties 230:
231: def related2html(self,str):
1.10 casties 232: """related library items: xlinks in html wandeln / mb 22.11.2006"""
1.7 casties 233: if str:
234:
235: str=re.sub("\&","&",str)
1.12 casties 236: dom=xml.dom.minidom.parseString("<?xml version='1.0' encoding='utf-8'?><txt>"+utf8ify(str)+"</txt>")
1.7 casties 237: links=dom.getElementsByTagName("link")
238:
239: for link in links:
240: link.tagName = "a"
241: ref = link.getAttribute("ref")
242: pn = link.getAttribute("page")
1.16 ! dwinter 243: obj = ref[0:3]
! 244:
! 245: """erweiterung der related items von literatur auf weitere datenbankobjekte, mb 05.06.2009"""
! 246: if obj == 'lit':
! 247: searchStr="select fullreference, online from vl_literature where reference =\'%s\' and authorized = 1"%(ref)
! 248: elif obj == 'sit':
! 249: searchStr="select reference from vl_sites where reference =\'%s\' and complete = 'yes'"%(ref)
! 250: elif obj == 'per':
! 251: searchStr="select reference from vl_people where reference =\'%s\' and complete = 'yes'"%(ref)
! 252: elif obj == 'tec':
! 253: searchStr="select reference from vl_technology where reference =\'%s\' and complete = 'yes'"%(ref)
! 254: elif obj == 'exp':
! 255: searchStr="select reference from vl_experiments where reference =\'%s\' and complete = 'yes'"%(ref)
! 256:
1.7 casties 257: res = self.search(var=searchStr)
258:
259: if res:
1.16 ! dwinter 260: if obj == 'lit':
! 261: if res[0]['online'] == 1:
! 262: # literatur item online verfuegbar
! 263: if pn:
! 264: link.setAttribute("href",self.REQUEST['SERVER_URL']+"/references?id="+ref+"&page="+pn)
! 265: else:
! 266: link.setAttribute("href",self.REQUEST['SERVER_URL']+"/references?id="+ref)
! 267:
! 268: link.setAttribute("title", "click to view!")
! 269: link.removeAttribute("ref")
! 270:
! 271: # prefix preceding the link
! 272: prefix = dom.createTextNode(U"\u2013\u0020") # = ndash + space
! 273: dom.documentElement.insertBefore(prefix, link)
! 274:
1.7 casties 275: else:
1.16 ! dwinter 276: # literatur item nur als bibliographische angabe vorhanden
! 277: link.setAttribute("alt", unicodify(res[0]['fullreference']))
! 278: link.setAttribute("title", "click to expand")
! 279: link.setAttribute("onclick", "return toggle(this);")
! 280: link.setAttribute("class", "x_offline")
1.10 casties 281:
1.16 ! dwinter 282: # prefix inside link text
! 283: link.firstChild.data = '+ ' + link.firstChild.data
! 284: else:
! 285: # links zu den anderen datenbankobjekten
! 286: link.setAttribute("href",self.REQUEST['SERVER_URL']+"/references?id="+ref)
1.10 casties 287: link.setAttribute("title", "click to view")
288: link.removeAttribute("ref")
1.16 ! dwinter 289:
1.10 casties 290: # prefix preceding the link
291: prefix = dom.createTextNode(U"\u2013\u0020") # = ndash + space
292: dom.documentElement.insertBefore(prefix, link)
1.16 ! dwinter 293:
! 294: else:
! 295: # objekt nicht verfügbar/freigegeben oder (web)link mit href statt ref
! 296:
! 297: #if ref != '':
! 298: # link.removeAttribute("ref")
! 299: # link.setAttribute("title", ref)
! 300:
! 301:
! 302: # prefix preceding the link
! 303: prefix = dom.createTextNode(U"\u2013\u0020") # = ndash + space
! 304: dom.documentElement.insertBefore(prefix, link)
1.10 casties 305:
1.16 ! dwinter 306:
1.7 casties 307: newxml=dom.toxml('utf-8')
308:
309: retStr=regexpTXT.search(newxml)
1.11 casties 310: retStr = retStr.group(1)
1.12 casties 311: #logging.debug("related2html out=%s"%repr(retStr))
312: return retStr.decode('utf-8') # we return unicode
1.11 casties 313:
314: return u""
1.7 casties 315:
1.5 dwinter 316:
1.1 dwinter 317:
318:
319: def xml2html(self,str,quote="yes"):
320: """link2html fuer VLP muss hier noch raus"""
321: if str:
322: if quote=="yes2":
323: str=re.sub("\&","&",str)
1.5 dwinter 324: #dom=xml.dom.minidom.parseString(str)
325: dom = NonvalidatingReader.parseString(str,"http://www.mpiwg-berlin.mpg.de/")
326: #links=dom.getElementsByTagName("link")
327: links=Ft.Xml.XPath.Evaluate(".//link", contextNode=dom)
1.1 dwinter 328: for link in links:
1.5 dwinter 329: #link.tagName="a"
330:
331: ref=link.getAttributeNS(EMPTY_NAMESPACE,"ref")
332: pn=link.getAttributeNS(EMPTY_NAMESPACE,"page")
1.1 dwinter 333:
1.5 dwinter 334: cns=link.childNodes[0:]
335:
336: newLink=dom.createElementNS(EMPTY_NAMESPACE,"a")
337: for x in cns:
338: newLink.appendChild(x)
339:
340:
341:
342: link.parentNode.replaceChild(newLink,link)
343:
344: if self.checkRef(ref):
345: if pn:
1.6 dwinter 346: newLink.setAttributeNS(EMPTY_NAMESPACE,"href",self.REQUEST['SERVER_URL']+"/references?id="+ref+"&page="+pn)
1.5 dwinter 347: else:
348: newLink.setAttributeNS(EMPTY_NAMESPACE,"href",self.REQUEST['SERVER_URL']+"/references?id="+ref)
349:
350: #str= dom.toxml('utf-8')
351: buf = cStringIO.StringIO()
352: PrettyPrint(dom, stream=buf)
353: str = buf.getvalue()
354: buf.close()
355: #str=PrettyPrint(dom.documentElement,encoding='UTF-8')
356: #print link.toxml('utf-8')
357: #print type(str)
358: retStr=regexpPage.search(str)
359:
360: try: # hack warum fehtl manchmal page??
1.11 casties 361: return retStr.group(1).decode('utf-8')
1.1 dwinter 362: except:
1.5 dwinter 363: return str
1.1 dwinter 364: return ""
1.5 dwinter 365:
1.1 dwinter 366:
367: def xlink2html(self,xlink,parClass=None):
368: ret=""
369: attributes=xlink.attributes
370:
371: if xlink.tagName.lower()=="image":
372: ret +="""<img src="%s" />"""%xlink.getAttribute('href')
373: elif xlink.tagName.lower()=="link":
374: reference=urllib.unquote(xlink.getAttribute('href'))
375: label=getText(self,xlink.childNodes)
376:
377: # check if href is already a correct url
378: if reference.split(":")[0] in ['http','file']:
379: if parClass=="Picture":
380: ret +="""<img src="%s" />"""%(reference)
381: else:
382:
383: ret +="""<a href="%s" >%s</a>"""%(reference,label)
384: else: # transform
385: #href=xml2html(self,reference)
386: #print "refer",reference
387: reference=re.sub("ref\=([^>]*)\>",'ref=\"\g<1>\">',reference)# einfuegen anfuehrungszeichen um ref attribut, falls fehlt.
388: ret +=reference
389:
390: return ret
391:
392: def getText(self,nodelist,parClass=None):
393:
394: rc = u''
395: for node in nodelist:
396:
397: if node.nodeType == node.TEXT_NODE:
398:
399: try:
400: try:
401: #rc += node.data.encode('utf-8','ignore')
402: rc += node.data
403:
404: except:
405: #rc= node.data.encode('utf-8','ignore')
406: rc=node.data
407: except:
408: rc="ERROR"
409: #node.data.decode('utf-8','ignore')
410:
411: node.data.encode('utf-8','ignore')
412: #print "RC",rc
413: elif node.tagName =="inline":
414:
415: rc+=par2html(self,[node])
416:
417: elif node.tagName =="pb":
418: rc+="<pb/>"
419: elif node.attributes:
420:
421: if 'type' in node.attributes.keys(): #is a xlink?
422:
423: try:
424: rc +=xlink2html(self,node,parClass).encode('utf-8')
425:
426: except:
427: rc +=xlink2html(self,node,parClass)
428:
429: #print "RWT",rc
430: return rc
431:
432:
433: #filename=argv[1]
434: #fileString=file(filename).read()
435: #print proj2hash(fileString)
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>