version 1.5, 2005/10/26 11:18:19
|
version 1.18.2.2, 2012/08/29 07:53:31
|
Line 1
|
Line 1
|
from sys import argv |
from sys import argv |
|
|
import string |
import string |
|
import logging |
import xml.dom.minidom |
import xml.dom.minidom |
import Ft.Xml.XLink.Processor |
import Ft.Xml.XLink.Processor |
import Ft.Xml.XLink.XLinkElements |
import Ft.Xml.XLink.XLinkElements |
Line 15 from Ft.Xml import EMPTY_NAMESPACE
|
Line 16 from Ft.Xml import EMPTY_NAMESPACE
|
from Ft.Lib import Uri |
from Ft.Lib import Uri |
import urllib |
import urllib |
import re |
import re |
|
from ECHO_collection import unicodify,utf8ify |
|
|
patternTXT=r"<\s*txt.*?>(.*?)</txt>" |
patternTXT=r"<\s*txt.*?>(.*?)</txt>" |
regexpTXT = re.compile(patternTXT, re.IGNORECASE + re.DOTALL) |
regexpTXT = re.compile(patternTXT, re.IGNORECASE + re.DOTALL) |
patternPage=r"<\s*page.*?>(.*?)</page>" |
patternPage=r"<\s*page.*?>(.*?)</page>" |
regexpPage = re.compile(patternPage, re.IGNORECASE + re.DOTALL) |
regexpPage = re.compile(patternPage, re.IGNORECASE + re.DOTALL) |
|
|
xml2htmlArray={'WEB_normal':('<p>','</p>'),'Normal':('<p>','</p>'),'WEB_picture':('<p class=\"picture\">','</p>'),'WEB_figuretitle':('<p class=\"picturetitle\">','</p>'),'WEB_bibliography':('<p><i>','</i></p>'),'Web_kursiv':('<i>','</i>'),'WEB_kursiv':('<i>','</i>'),'WEB_hyperlink':('',''),'Hyperlink':('',''),'Picture':('<p class=\"picture\">','</p>'),'FigureTitle':('<p class=\"picturetitle\">','</p>')} |
#xml2htmlArray={'WEB_normal':('<p>','</p>'),'Normal':('<p>','</p>'),'WEB_picture':('<p class=\"picture\">','</p>'),'WEB_figuretitle':('<p class=\"picturetitle\">','</p>'),'WEB_bibliography':('<p><i>','</i></p>'),'Web_kursiv':('<i>','</i>'),'WEB_kursiv':('<i>','</i>'),'WEB_hyperlink':('',''),'Hyperlink':('',''),'Picture':('<p class=\"picture\">','</p>'),'FigureTitle':('<p class=\"picturetitle\">','</p>')} |
|
# |
def addToDict(dict,name,value): |
#def addToDict(dict,name,value): |
if name=="": |
# if name=="": |
return 0 |
# return 0 |
else: |
# else: |
|
# |
if not dict.has_key(name): |
# if not dict.has_key(name): |
dict[name]=[] # als array anlegen |
# dict[name]=[] # als array anlegen |
|
# |
dict[name].append(value) |
# dict[name].append(value) |
return 1 |
# return 1 |
|
# |
def proj2hash(self,xmlstring): |
#def proj2hash(self,xmlstring): |
"""wandelt xml-files fuer die projekte in ein hash""" |
# """wandelt xml-files fuer die projekte in ein hash""" |
|
# |
dom=xml.dom.minidom.parseString(xmlstring) |
# dom=xml.dom.minidom.parseString(xmlstring) |
|
# |
|
# |
list={} |
# list={} |
|
# |
#gettitle |
# #gettitle |
pars=Evaluate('par',dom.getElementsByTagName('part')[0]) |
# pars=Evaluate('par',dom.getElementsByTagName('part')[0]) |
for par in pars: |
# for par in pars: |
className=par.getAttribute('class') |
# className=par.getAttribute('class') |
content=getText(self,par.childNodes) |
# content=getText(self,par.childNodes) |
addToDict(list,className,content) |
# addToDict(list,className,content) |
|
# |
|
# |
sectionXPath="section" |
# sectionXPath="section" |
|
# |
|
# |
sections=Evaluate(sectionXPath,dom.getElementsByTagName('part')[0]) |
# sections=Evaluate(sectionXPath,dom.getElementsByTagName('part')[0]) |
|
# |
while sections: |
# while sections: |
|
# |
for section in sections: |
# for section in sections: |
|
# |
sec=parseSection(self,section) |
# sec=parseSection(self,section) |
|
# |
if sec[0]=="WEB_project_header": # Sonderfall project |
# if sec[0]=="WEB_project_header": # Sonderfall project |
addToDict(list,'WEB_project_header',sec[1]) # store title |
# addToDict(list,'WEB_project_header',sec[1]) # store title |
addToDict(list,'WEB_project_description',sec[2]) #store description |
# addToDict(list,'WEB_project_description',sec[2]) #store description |
else: # no information in heading |
# else: # no information in heading |
level=int(sec[3])+2 |
# level=int(sec[3])+2 |
aTag="<h%i>"%level |
# aTag="<h%i>"%level |
eTag="</h%i>"%level |
# eTag="</h%i>"%level |
addToDict(list,"text",aTag+sec[1]+eTag) |
# addToDict(list,"text",aTag+sec[1]+eTag) |
addToDict(list,"text",sec[2]) |
# addToDict(list,"text",sec[2]) |
sectionXPath+="/section" |
# sectionXPath+="/section" |
sections=Evaluate(sectionXPath,dom.getElementsByTagName('part')[0]) |
# sections=Evaluate(sectionXPath,dom.getElementsByTagName('part')[0]) |
return list |
# return list |
|
# |
|
# |
def parseSection(self,section): |
#def parseSection(self,section): |
type="" |
# type="" |
header="" |
# header="" |
level=section.getAttribute('level') |
# level=section.getAttribute('level') |
for heading in section.childNodes: |
# for heading in section.childNodes: |
if getattr(heading,'tagName','')=="heading": |
# if getattr(heading,'tagName','')=="heading": |
|
# |
type=heading.getAttribute('class') |
# type=heading.getAttribute('class') |
header=getText(self,heading.childNodes) |
# header=getText(self,heading.childNodes) |
|
# |
if type=="": # falls heading fehlt, pruefe ob erster par richtig |
# if type=="": # falls heading fehlt, pruefe ob erster par richtig |
par=section.getElementsByTagName('par')[0] |
# par=section.getElementsByTagName('par')[0] |
type=par.getAttribute('class') |
# type=par.getAttribute('class') |
header=getText(par.childNodes) |
# header=getText(par.childNodes) |
|
# |
#print section.childNodes |
# #print section.childNodes |
#pars=Evaluate('par',section) |
# #pars=Evaluate('par',section) |
pars=section.childNodes |
# pars=section.childNodes |
content=par2html(self,pars) |
# content=par2html(self,pars) |
#print "CONTENT",repr(content) |
# #print "CONTENT",repr(content) |
return (type,header,content,level) |
# return (type,header,content,level) |
|
# |
def parseTable(table): |
#def parseTable(table): |
fields={} |
# fields={} |
rows=table.getElementsByTagName('html:tr') |
# rows=table.getElementsByTagName('html:tr') |
for row in rows: |
# for row in rows: |
#print "ROW" |
# #print "ROW" |
cols=row.getElementsByTagName('html:td') |
# cols=row.getElementsByTagName('html:td') |
|
# |
#Name des Datenfeldes einlesen |
# #Name des Datenfeldes einlesen |
try: |
# try: |
field=cols[0].getElementsByTagName('par')[0].getAttribute('class') |
# field=cols[0].getElementsByTagName('par')[0].getAttribute('class') |
#print "field",field |
# #print "field",field |
except: |
# except: |
print "error" |
# print "error" |
field="" |
# field="" |
|
# |
#Wandeln der Eintrge in HTML |
# #Wandeln der Eintrge in HTML |
|
# |
#pars=cols[1].getElementsByTagName('par') |
# #pars=cols[1].getElementsByTagName('par') |
pars=cols[1].childNodes |
# pars=cols[1].childNodes |
|
# |
html=par2html(self,pars,tags=("",";")) |
# html=par2html(self,pars,tags=("",";")) |
|
# |
addToDict(fields,field,html) |
# addToDict(fields,field,html) |
#print fields |
# #print fields |
return fields |
# return fields |
|
# |
def par2html(self,pars,tags=None): |
#def par2html(self,pars,tags=None): |
html="" |
# html="" |
|
# |
for par in pars: |
# for par in pars: |
tagName=getattr(par,'tagName','') |
# tagName=getattr(par,'tagName','') |
if tagName in ["par","inline"]: |
# if tagName in ["par","inline"]: |
#print repr(par.getAttribute('class')),xml2htmlArray.get(par.getAttribute('class'),'NOT FOUND') |
# #print repr(par.getAttribute('class')),xml2htmlArray.get(par.getAttribute('class'),'NOT FOUND') |
#print "par",par |
# #print "par",par |
if not tags: |
# if not tags: |
try: |
# try: |
tag=xml2htmlArray[par.getAttribute('class')] |
# tag=xml2htmlArray[par.getAttribute('class')] |
except: |
# except: |
tag=('<p>','</p>') |
# tag=('<p>','</p>') |
else: |
# else: |
tag=tags |
# tag=tags |
#print "TAG",tag |
# #print "TAG",tag |
content=getText(self,par.childNodes,par.getAttribute('class')) |
# content=getText(self,par.childNodes,par.getAttribute('class')) |
|
# |
|
# |
|
# |
#print par.getAttribute('class'),node |
# #print par.getAttribute('class'),node |
try: |
# try: |
html+=tag[0]+content+tag[1] |
# html+=tag[0]+content+tag[1] |
except: |
# except: |
html=+tag[0]+content+tag[1] |
# html=+tag[0]+content+tag[1] |
|
# |
elif tagName=="pb": |
# elif tagName=="pb": |
html+="<pb/>" |
# html+="<pb/>" |
|
# |
|
# |
try: |
# try: |
|
# |
return html |
# return html |
except: |
# except: |
return "" |
# return "" |
|
|
def getXlink(nodes): |
def getXlink(nodes): |
"""searches xlinks and gives them back as html""" |
"""searches xlinks and gives them back as html""" |
Line 175 def checkRef(self,ref):
|
Line 177 def checkRef(self,ref):
|
'vl_people':'AND complete =\'yes\'', |
'vl_people':'AND complete =\'yes\'', |
'vl_sites':'AND complete =\'yes\'', |
'vl_sites':'AND complete =\'yes\'', |
'vl_transcript':'AND complete =\'yes\'', |
'vl_transcript':'AND complete =\'yes\'', |
'vl_essays':'AND online =\'yes\'' |
'vl_essays':'AND online =\'yes\'', |
|
'vl_categories':'' |
} |
} |
res=None |
res=None |
for db in dbs.keys(): |
for db in dbs.keys(): |
Line 184 def checkRef(self,ref):
|
Line 187 def checkRef(self,ref):
|
return res |
return res |
|
|
def link2html(self,str): |
def link2html(self,str): |
"""link2html liks in html wandeln""" |
"""link2html links in html wandeln""" |
if str: |
if str: |
|
|
str=re.sub("\&","&",str) |
str=re.sub("\&","&",str) |
dom=xml.dom.minidom.parseString("<?xml version='1.0' encoding='utf-8'?><txt>"+str+"</txt>") |
dom=xml.dom.minidom.parseString("<?xml version='1.0' encoding='utf-8'?><txt>"+utf8ify(str)+"</txt>") |
|
|
|
|
links=dom.getElementsByTagName("link") |
links=dom.getElementsByTagName("link") |
|
|
|
|
Line 196 def link2html(self,str):
|
Line 201 def link2html(self,str):
|
link.tagName="a" |
link.tagName="a" |
ref=link.getAttribute("ref") |
ref=link.getAttribute("ref") |
pn=link.getAttribute("page") |
pn=link.getAttribute("page") |
|
mk=link.getAttribute("mk") |
|
href= link.getAttribute("href") |
|
if href: |
|
link.setAttribute("class","external") |
|
|
if self.checkRef(ref): |
if self.checkRef(ref): |
|
more = "" |
|
if pn: |
|
more += "&page=%s"%pn |
|
|
|
if mk: |
|
more += "&mk=%s"%mk |
|
|
|
link.setAttribute("href",self.REQUEST['SERVER_URL']+"/references?id="+ref+more) |
|
|
|
|
|
newxml=dom.toxml('utf-8') |
|
|
|
|
|
|
|
retStr=regexpTXT.search(newxml) |
|
retStr = retStr.group(1) |
|
|
|
return retStr.decode('utf-8') # we return unicode |
|
|
|
return u"" |
|
|
|
def related2html(self,str): |
|
"""related library items: xlinks in html wandeln / mb 22.11.2006""" |
|
if str: |
|
|
|
str=re.sub("\&","&",str) |
|
dom=xml.dom.minidom.parseString("<?xml version='1.0' encoding='utf-8'?><txt>"+utf8ify(str)+"</txt>") |
|
links=dom.getElementsByTagName("link") |
|
|
|
for link in links: |
|
link.tagName = "a" |
|
ref = link.getAttribute("ref") |
|
pn = link.getAttribute("page") |
|
obj = ref[0:3] |
|
|
|
"""erweiterung der related items von literatur auf weitere datenbankobjekte, mb 09.06.2009""" |
|
searchStr = '' |
|
if obj == 'lit': |
|
searchStr="select fullreference, online from vl_literature where reference =\'%s\' and authorized = 1"%(ref) |
|
elif obj == 'sit': |
|
searchStr="select reference from vl_sites where reference =\'%s\' and complete = 'yes'"%(ref) |
|
elif obj == 'per': |
|
searchStr="select reference from vl_people where reference =\'%s\' and complete = 'yes'"%(ref) |
|
elif obj == 'tec': |
|
searchStr="select reference from vl_technology where reference =\'%s\' and complete = 'yes'"%(ref) |
|
elif obj == 'exp': |
|
searchStr="select reference from vl_experiments where reference =\'%s\' and complete = 'yes'"%(ref) |
|
|
|
res = self.search(var=searchStr) |
|
|
|
if res: |
|
if obj == 'lit': |
|
if res[0]['online'] == 1: |
|
# literatur item online verfuegbar |
if pn: |
if pn: |
link.setAttribute("href",self.REQUEST['SERVER_URL']+"/references?id="+ref+"&page="+pn) |
link.setAttribute("href",self.REQUEST['SERVER_URL']+"/references?id="+ref+"&page="+pn) |
else: |
else: |
link.setAttribute("href",self.REQUEST['SERVER_URL']+"/references?id="+ref) |
link.setAttribute("href",self.REQUEST['SERVER_URL']+"/references?id="+ref) |
|
|
|
link.setAttribute("title", "click to view!") |
|
link.removeAttribute("ref") |
|
|
newxml=dom.toxml('utf-8') |
# prefix preceding the link |
|
prefix = dom.createTextNode(U"\u2013\u0020") # = ndash + space |
|
dom.documentElement.insertBefore(prefix, link) |
|
|
retStr=regexpTXT.search(newxml) |
else: |
|
# literatur item nur als bibliographische angabe vorhanden |
|
link.setAttribute("alt", unicodify(res[0]['fullreference'])) |
|
link.setAttribute("title", "click to expand") |
|
link.setAttribute("onclick", "return toggle(this);") |
|
link.setAttribute("class", "x_offline") |
|
|
return retStr.group(1) |
# prefix inside link text |
|
link.firstChild.data = '+ ' + link.firstChild.data |
|
else: |
|
# links zu den anderen datenbankobjekten |
|
link.setAttribute("href",self.REQUEST['SERVER_URL']+"/references?id="+ref) |
|
link.setAttribute("title", "click to view") |
|
link.removeAttribute("ref") |
|
|
|
# prefix preceding the link |
|
prefix = dom.createTextNode(U"\u2013\u0020") # = ndash + space |
|
dom.documentElement.insertBefore(prefix, link) |
|
|
return "" |
else: |
|
# objekt nicht verfuegbar/freigegeben oder (web)link mit href statt ref |
|
|
|
try: |
|
link.removeAttribute("ref") |
|
link.setAttribute("title", ref) |
|
except: |
|
pass |
|
|
|
|
|
# prefix preceding the link |
|
prefix = dom.createTextNode(U"\u2013\u0020") # = ndash + space |
|
dom.documentElement.insertBefore(prefix, link) |
|
|
|
|
|
newxml=dom.toxml('utf-8') |
|
|
|
retStr=regexpTXT.search(newxml) |
|
retStr = retStr.group(1) |
|
#logging.debug("related2html out=%s"%repr(retStr)) |
|
return retStr.decode('utf-8') # we return unicode |
|
|
|
return u"" |
|
|
|
|
|
|
Line 221 def xml2html(self,str,quote="yes"):
|
Line 324 def xml2html(self,str,quote="yes"):
|
if quote=="yes2": |
if quote=="yes2": |
str=re.sub("\&","&",str) |
str=re.sub("\&","&",str) |
#dom=xml.dom.minidom.parseString(str) |
#dom=xml.dom.minidom.parseString(str) |
|
logging.debug(str) |
dom = NonvalidatingReader.parseString(str,"http://www.mpiwg-berlin.mpg.de/") |
dom = NonvalidatingReader.parseString(str,"http://www.mpiwg-berlin.mpg.de/") |
#links=dom.getElementsByTagName("link") |
#links=dom.getElementsByTagName("link") |
links=Ft.Xml.XPath.Evaluate(".//link", contextNode=dom) |
links=Ft.Xml.XPath.Evaluate(".//link", contextNode=dom) |
Line 242 def xml2html(self,str,quote="yes"):
|
Line 346 def xml2html(self,str,quote="yes"):
|
|
|
if self.checkRef(ref): |
if self.checkRef(ref): |
if pn: |
if pn: |
newLink.setAttributeNS(EMPTY_NAMESPACE,"href",self.REQUEST['SERVER_URL']+"/references?id="+ref+"&p="+pn) |
newLink.setAttributeNS(EMPTY_NAMESPACE,"href",self.REQUEST['SERVER_URL']+"/references?id="+ref+"&page="+pn) |
else: |
else: |
newLink.setAttributeNS(EMPTY_NAMESPACE,"href",self.REQUEST['SERVER_URL']+"/references?id="+ref) |
newLink.setAttributeNS(EMPTY_NAMESPACE,"href",self.REQUEST['SERVER_URL']+"/references?id="+ref) |
|
|
Line 257 def xml2html(self,str,quote="yes"):
|
Line 361 def xml2html(self,str,quote="yes"):
|
retStr=regexpPage.search(str) |
retStr=regexpPage.search(str) |
|
|
try: # hack warum fehtl manchmal page?? |
try: # hack warum fehtl manchmal page?? |
return retStr.group(1) |
return retStr.group(1).decode('utf-8') |
except: |
except: |
return str |
return str |
return "" |
return "" |