--- ECHO_content/vlp_xmlhelpers.py 2005/10/26 11:18:19 1.5 +++ ECHO_content/vlp_xmlhelpers.py 2012/08/29 07:53:31 1.18.2.2 @@ -1,6 +1,7 @@ from sys import argv import string +import logging import xml.dom.minidom import Ft.Xml.XLink.Processor import Ft.Xml.XLink.XLinkElements @@ -15,149 +16,150 @@ from Ft.Xml import EMPTY_NAMESPACE from Ft.Lib import Uri import urllib import re +from ECHO_collection import unicodify,utf8ify patternTXT=r"<\s*txt.*?>(.*?)" regexpTXT = re.compile(patternTXT, re.IGNORECASE + re.DOTALL) patternPage=r"<\s*page.*?>(.*?)" regexpPage = re.compile(patternPage, re.IGNORECASE + re.DOTALL) -xml2htmlArray={'WEB_normal':('

','

'),'Normal':('

','

'),'WEB_picture':('

','

'),'WEB_figuretitle':('

','

'),'WEB_bibliography':('

','

'),'Web_kursiv':('',''),'WEB_kursiv':('',''),'WEB_hyperlink':('',''),'Hyperlink':('',''),'Picture':('

','

'),'FigureTitle':('

','

')} - -def addToDict(dict,name,value): - if name=="": - return 0 - else: - - if not dict.has_key(name): - dict[name]=[] # als array anlegen - - dict[name].append(value) - return 1 - -def proj2hash(self,xmlstring): - """wandelt xml-files fuer die projekte in ein hash""" - - dom=xml.dom.minidom.parseString(xmlstring) - - - list={} - - #gettitle - pars=Evaluate('par',dom.getElementsByTagName('part')[0]) - for par in pars: - className=par.getAttribute('class') - content=getText(self,par.childNodes) - addToDict(list,className,content) - - - sectionXPath="section" - - - sections=Evaluate(sectionXPath,dom.getElementsByTagName('part')[0]) - - while sections: - - for section in sections: - - sec=parseSection(self,section) - - if sec[0]=="WEB_project_header": # Sonderfall project - addToDict(list,'WEB_project_header',sec[1]) # store title - addToDict(list,'WEB_project_description',sec[2]) #store description - else: # no information in heading - level=int(sec[3])+2 - aTag=""%level - eTag=""%level - addToDict(list,"text",aTag+sec[1]+eTag) - addToDict(list,"text",sec[2]) - sectionXPath+="/section" - sections=Evaluate(sectionXPath,dom.getElementsByTagName('part')[0]) - return list - - -def parseSection(self,section): - type="" - header="" - level=section.getAttribute('level') - for heading in section.childNodes: - if getattr(heading,'tagName','')=="heading": - - type=heading.getAttribute('class') - header=getText(self,heading.childNodes) - - if type=="": # falls heading fehlt, pruefe ob erster par richtig - par=section.getElementsByTagName('par')[0] - type=par.getAttribute('class') - header=getText(par.childNodes) - - #print section.childNodes - #pars=Evaluate('par',section) - pars=section.childNodes - content=par2html(self,pars) - #print "CONTENT",repr(content) - return (type,header,content,level) - -def parseTable(table): - fields={} - rows=table.getElementsByTagName('html:tr') - for row in rows: - #print "ROW" - cols=row.getElementsByTagName('html:td') - - #Name des Datenfeldes einlesen - try: - field=cols[0].getElementsByTagName('par')[0].getAttribute('class') - #print "field",field - except: - print "error" - field="" - - #Wandeln der Eintrge in HTML - - #pars=cols[1].getElementsByTagName('par') - pars=cols[1].childNodes - - html=par2html(self,pars,tags=("",";")) - - addToDict(fields,field,html) - #print fields - return fields - -def par2html(self,pars,tags=None): - html="" - - for par in pars: - tagName=getattr(par,'tagName','') - if tagName in ["par","inline"]: - #print repr(par.getAttribute('class')),xml2htmlArray.get(par.getAttribute('class'),'NOT FOUND') - #print "par",par - if not tags: - try: - tag=xml2htmlArray[par.getAttribute('class')] - except: - tag=('

','

') - else: - tag=tags - #print "TAG",tag - content=getText(self,par.childNodes,par.getAttribute('class')) - - - - #print par.getAttribute('class'),node - try: - html+=tag[0]+content+tag[1] - except: - html=+tag[0]+content+tag[1] - - elif tagName=="pb": - html+="" - - - try: - - return html - except: - return "" +#xml2htmlArray={'WEB_normal':('

','

'),'Normal':('

','

'),'WEB_picture':('

','

'),'WEB_figuretitle':('

','

'),'WEB_bibliography':('

','

'),'Web_kursiv':('',''),'WEB_kursiv':('',''),'WEB_hyperlink':('',''),'Hyperlink':('',''),'Picture':('

','

'),'FigureTitle':('

','

')} +# +#def addToDict(dict,name,value): +# if name=="": +# return 0 +# else: +# +# if not dict.has_key(name): +# dict[name]=[] # als array anlegen +# +# dict[name].append(value) +# return 1 +# +#def proj2hash(self,xmlstring): +# """wandelt xml-files fuer die projekte in ein hash""" +# +# dom=xml.dom.minidom.parseString(xmlstring) +# +# +# list={} +# +# #gettitle +# pars=Evaluate('par',dom.getElementsByTagName('part')[0]) +# for par in pars: +# className=par.getAttribute('class') +# content=getText(self,par.childNodes) +# addToDict(list,className,content) +# +# +# sectionXPath="section" +# +# +# sections=Evaluate(sectionXPath,dom.getElementsByTagName('part')[0]) +# +# while sections: +# +# for section in sections: +# +# sec=parseSection(self,section) +# +# if sec[0]=="WEB_project_header": # Sonderfall project +# addToDict(list,'WEB_project_header',sec[1]) # store title +# addToDict(list,'WEB_project_description',sec[2]) #store description +# else: # no information in heading +# level=int(sec[3])+2 +# aTag=""%level +# eTag=""%level +# addToDict(list,"text",aTag+sec[1]+eTag) +# addToDict(list,"text",sec[2]) +# sectionXPath+="/section" +# sections=Evaluate(sectionXPath,dom.getElementsByTagName('part')[0]) +# return list +# +# +#def parseSection(self,section): +# type="" +# header="" +# level=section.getAttribute('level') +# for heading in section.childNodes: +# if getattr(heading,'tagName','')=="heading": +# +# type=heading.getAttribute('class') +# header=getText(self,heading.childNodes) +# +# if type=="": # falls heading fehlt, pruefe ob erster par richtig +# par=section.getElementsByTagName('par')[0] +# type=par.getAttribute('class') +# header=getText(par.childNodes) +# +# #print section.childNodes +# #pars=Evaluate('par',section) +# pars=section.childNodes +# content=par2html(self,pars) +# #print "CONTENT",repr(content) +# return (type,header,content,level) +# +#def parseTable(table): +# fields={} +# rows=table.getElementsByTagName('html:tr') +# for row in rows: +# #print "ROW" +# cols=row.getElementsByTagName('html:td') +# +# #Name des Datenfeldes einlesen +# try: +# field=cols[0].getElementsByTagName('par')[0].getAttribute('class') +# #print "field",field +# except: +# print "error" +# field="" +# +# #Wandeln der Eintrge in HTML +# +# #pars=cols[1].getElementsByTagName('par') +# pars=cols[1].childNodes +# +# html=par2html(self,pars,tags=("",";")) +# +# addToDict(fields,field,html) +# #print fields +# return fields +# +#def par2html(self,pars,tags=None): +# html="" +# +# for par in pars: +# tagName=getattr(par,'tagName','') +# if tagName in ["par","inline"]: +# #print repr(par.getAttribute('class')),xml2htmlArray.get(par.getAttribute('class'),'NOT FOUND') +# #print "par",par +# if not tags: +# try: +# tag=xml2htmlArray[par.getAttribute('class')] +# except: +# tag=('

','

') +# else: +# tag=tags +# #print "TAG",tag +# content=getText(self,par.childNodes,par.getAttribute('class')) +# +# +# +# #print par.getAttribute('class'),node +# try: +# html+=tag[0]+content+tag[1] +# except: +# html=+tag[0]+content+tag[1] +# +# elif tagName=="pb": +# html+="" +# +# +# try: +# +# return html +# except: +# return "" def getXlink(nodes): """searches xlinks and gives them back as html""" @@ -175,7 +177,8 @@ def checkRef(self,ref): 'vl_people':'AND complete =\'yes\'', 'vl_sites':'AND complete =\'yes\'', 'vl_transcript':'AND complete =\'yes\'', - 'vl_essays':'AND online =\'yes\'' + 'vl_essays':'AND online =\'yes\'', + 'vl_categories':'' } res=None for db in dbs.keys(): @@ -184,11 +187,13 @@ def checkRef(self,ref): return res def link2html(self,str): - """link2html liks in html wandeln""" + """link2html links in html wandeln""" if str: str=re.sub("\&","&",str) - dom=xml.dom.minidom.parseString(""+str+"") + dom=xml.dom.minidom.parseString(""+utf8ify(str)+"") + + links=dom.getElementsByTagName("link") @@ -196,23 +201,121 @@ def link2html(self,str): link.tagName="a" ref=link.getAttribute("ref") pn=link.getAttribute("page") - + mk=link.getAttribute("mk") + href= link.getAttribute("href") + if href: + link.setAttribute("class","external") + if self.checkRef(ref): - if pn: - link.setAttribute("href",self.REQUEST['SERVER_URL']+"/references?id="+ref+"&page="+pn) - else: - link.setAttribute("href",self.REQUEST['SERVER_URL']+"/references?id="+ref) + more = "" + if pn: + more += "&page=%s"%pn + + if mk: + more += "&mk=%s"%mk + + link.setAttribute("href",self.REQUEST['SERVER_URL']+"/references?id="+ref+more) newxml=dom.toxml('utf-8') + + retStr=regexpTXT.search(newxml) + retStr = retStr.group(1) - return retStr.group(1) + return retStr.decode('utf-8') # we return unicode + + return u"" + +def related2html(self,str): + """related library items: xlinks in html wandeln / mb 22.11.2006""" + if str: + + str=re.sub("\&","&",str) + dom=xml.dom.minidom.parseString(""+utf8ify(str)+"") + links=dom.getElementsByTagName("link") + + for link in links: + link.tagName = "a" + ref = link.getAttribute("ref") + pn = link.getAttribute("page") + obj = ref[0:3] + + """erweiterung der related items von literatur auf weitere datenbankobjekte, mb 09.06.2009""" + searchStr = '' + if obj == 'lit': + searchStr="select fullreference, online from vl_literature where reference =\'%s\' and authorized = 1"%(ref) + elif obj == 'sit': + searchStr="select reference from vl_sites where reference =\'%s\' and complete = 'yes'"%(ref) + elif obj == 'per': + searchStr="select reference from vl_people where reference =\'%s\' and complete = 'yes'"%(ref) + elif obj == 'tec': + searchStr="select reference from vl_technology where reference =\'%s\' and complete = 'yes'"%(ref) + elif obj == 'exp': + searchStr="select reference from vl_experiments where reference =\'%s\' and complete = 'yes'"%(ref) + + res = self.search(var=searchStr) + + if res: + if obj == 'lit': + if res[0]['online'] == 1: + # literatur item online verfuegbar + if pn: + link.setAttribute("href",self.REQUEST['SERVER_URL']+"/references?id="+ref+"&page="+pn) + else: + link.setAttribute("href",self.REQUEST['SERVER_URL']+"/references?id="+ref) + + link.setAttribute("title", "click to view!") + link.removeAttribute("ref") + + # prefix preceding the link + prefix = dom.createTextNode(U"\u2013\u0020") # = ndash + space + dom.documentElement.insertBefore(prefix, link) + + else: + # literatur item nur als bibliographische angabe vorhanden + link.setAttribute("alt", unicodify(res[0]['fullreference'])) + link.setAttribute("title", "click to expand") + link.setAttribute("onclick", "return toggle(this);") + link.setAttribute("class", "x_offline") + + # prefix inside link text + link.firstChild.data = '+ ' + link.firstChild.data + else: + # links zu den anderen datenbankobjekten + link.setAttribute("href",self.REQUEST['SERVER_URL']+"/references?id="+ref) + link.setAttribute("title", "click to view") + link.removeAttribute("ref") + + # prefix preceding the link + prefix = dom.createTextNode(U"\u2013\u0020") # = ndash + space + dom.documentElement.insertBefore(prefix, link) + + else: + # objekt nicht verfuegbar/freigegeben oder (web)link mit href statt ref + + try: + link.removeAttribute("ref") + link.setAttribute("title", ref) + except: + pass + + + # prefix preceding the link + prefix = dom.createTextNode(U"\u2013\u0020") # = ndash + space + dom.documentElement.insertBefore(prefix, link) + + + newxml=dom.toxml('utf-8') + + retStr=regexpTXT.search(newxml) + retStr = retStr.group(1) + #logging.debug("related2html out=%s"%repr(retStr)) + return retStr.decode('utf-8') # we return unicode + + return u"" - - return "" - def xml2html(self,str,quote="yes"): @@ -221,6 +324,7 @@ def xml2html(self,str,quote="yes"): if quote=="yes2": str=re.sub("\&","&",str) #dom=xml.dom.minidom.parseString(str) + logging.debug(str) dom = NonvalidatingReader.parseString(str,"http://www.mpiwg-berlin.mpg.de/") #links=dom.getElementsByTagName("link") links=Ft.Xml.XPath.Evaluate(".//link", contextNode=dom) @@ -242,7 +346,7 @@ def xml2html(self,str,quote="yes"): if self.checkRef(ref): if pn: - newLink.setAttributeNS(EMPTY_NAMESPACE,"href",self.REQUEST['SERVER_URL']+"/references?id="+ref+"&p="+pn) + newLink.setAttributeNS(EMPTY_NAMESPACE,"href",self.REQUEST['SERVER_URL']+"/references?id="+ref+"&page="+pn) else: newLink.setAttributeNS(EMPTY_NAMESPACE,"href",self.REQUEST['SERVER_URL']+"/references?id="+ref) @@ -257,7 +361,7 @@ def xml2html(self,str,quote="yes"): retStr=regexpPage.search(str) try: # hack warum fehtl manchmal page?? - return retStr.group(1) + return retStr.group(1).decode('utf-8') except: return str return ""