from sys import argv
import string
import logging
import xml.dom.minidom
import Ft.Xml.XLink.Processor
import Ft.Xml.XLink.XLinkElements
from Ft.Xml import XPath
from Ft.Xml.XPath import Evaluate
from Ft.Xml.XLink import XLINK_NAMESPACE
from Ft.Xml.XLink import XLinkElements
import cStringIO
from Ft.Xml.Domlette import NonvalidatingReader, PrettyPrint,Print
from Ft.Xml import EMPTY_NAMESPACE
from Ft.Lib import Uri
import urllib
import re
from ECHO_collection import unicodify,utf8ify
patternTXT=r"<\s*txt.*?>(.*?)"
regexpTXT = re.compile(patternTXT, re.IGNORECASE + re.DOTALL)
patternPage=r"<\s*page.*?>(.*?)"
regexpPage = re.compile(patternPage, re.IGNORECASE + re.DOTALL)
#xml2htmlArray={'WEB_normal':('
','
'),'Normal':('','
'),'WEB_picture':('','
'),'WEB_figuretitle':('','
'),'WEB_bibliography':('','
'),'Web_kursiv':('',''),'WEB_kursiv':('',''),'WEB_hyperlink':('',''),'Hyperlink':('',''),'Picture':('','
'),'FigureTitle':('','
')}
#
#def addToDict(dict,name,value):
# if name=="":
# return 0
# else:
#
# if not dict.has_key(name):
# dict[name]=[] # als array anlegen
#
# dict[name].append(value)
# return 1
#
#def proj2hash(self,xmlstring):
# """wandelt xml-files fuer die projekte in ein hash"""
#
# dom=xml.dom.minidom.parseString(xmlstring)
#
#
# list={}
#
# #gettitle
# pars=Evaluate('par',dom.getElementsByTagName('part')[0])
# for par in pars:
# className=par.getAttribute('class')
# content=getText(self,par.childNodes)
# addToDict(list,className,content)
#
#
# sectionXPath="section"
#
#
# sections=Evaluate(sectionXPath,dom.getElementsByTagName('part')[0])
#
# while sections:
#
# for section in sections:
#
# sec=parseSection(self,section)
#
# if sec[0]=="WEB_project_header": # Sonderfall project
# addToDict(list,'WEB_project_header',sec[1]) # store title
# addToDict(list,'WEB_project_description',sec[2]) #store description
# else: # no information in heading
# level=int(sec[3])+2
# aTag=""%level
# eTag=""%level
# addToDict(list,"text",aTag+sec[1]+eTag)
# addToDict(list,"text",sec[2])
# sectionXPath+="/section"
# sections=Evaluate(sectionXPath,dom.getElementsByTagName('part')[0])
# return list
#
#
#def parseSection(self,section):
# type=""
# header=""
# level=section.getAttribute('level')
# for heading in section.childNodes:
# if getattr(heading,'tagName','')=="heading":
#
# type=heading.getAttribute('class')
# header=getText(self,heading.childNodes)
#
# if type=="": # falls heading fehlt, pruefe ob erster par richtig
# par=section.getElementsByTagName('par')[0]
# type=par.getAttribute('class')
# header=getText(par.childNodes)
#
# #print section.childNodes
# #pars=Evaluate('par',section)
# pars=section.childNodes
# content=par2html(self,pars)
# #print "CONTENT",repr(content)
# return (type,header,content,level)
#
#def parseTable(table):
# fields={}
# rows=table.getElementsByTagName('html:tr')
# for row in rows:
# #print "ROW"
# cols=row.getElementsByTagName('html:td')
#
# #Name des Datenfeldes einlesen
# try:
# field=cols[0].getElementsByTagName('par')[0].getAttribute('class')
# #print "field",field
# except:
# print "error"
# field=""
#
# #Wandeln der Eintrge in HTML
#
# #pars=cols[1].getElementsByTagName('par')
# pars=cols[1].childNodes
#
# html=par2html(self,pars,tags=("",";"))
#
# addToDict(fields,field,html)
# #print fields
# return fields
#
#def par2html(self,pars,tags=None):
# html=""
#
# for par in pars:
# tagName=getattr(par,'tagName','')
# if tagName in ["par","inline"]:
# #print repr(par.getAttribute('class')),xml2htmlArray.get(par.getAttribute('class'),'NOT FOUND')
# #print "par",par
# if not tags:
# try:
# tag=xml2htmlArray[par.getAttribute('class')]
# except:
# tag=('','
')
# else:
# tag=tags
# #print "TAG",tag
# content=getText(self,par.childNodes,par.getAttribute('class'))
#
#
#
# #print par.getAttribute('class'),node
# try:
# html+=tag[0]+content+tag[1]
# except:
# html=+tag[0]+content+tag[1]
#
# elif tagName=="pb":
# html+=""
#
#
# try:
#
# return html
# except:
# return ""
def getXlink(nodes):
"""searches xlinks and gives them back as html"""
ret=""
for node in nodes:
if node.attributes:
if 'xlink:type' in node.attributes.keys(): #is a xlink?
ret +=xlink2html(node)
return ret
def checkRef(self,ref):
"""teste ob reference angezeigt werden sollen"""
dbs={'vl_literature':'AND online = \'1\'',
'vl_technology':'AND complete =\'yes\'',
'vl_people':'AND complete =\'yes\'',
'vl_sites':'AND complete =\'yes\'',
'vl_transcript':'AND complete =\'yes\'',
'vl_essays':'AND online =\'yes\'',
'vl_categories':''
}
res=None
for db in dbs.keys():
searchStr=str("select reference from %s where reference =\'%s\' %s"%(db,ref,dbs[db]))
res=res or self.search(var=searchStr)
return res
def link2html(self,str):
"""link2html links in html wandeln"""
if str:
str=re.sub("\&","&",str)
dom=xml.dom.minidom.parseString(""+utf8ify(str)+"")
links=dom.getElementsByTagName("link")
for link in links:
link.tagName="a"
ref=link.getAttribute("ref")
pn=link.getAttribute("page")
mk=link.getAttribute("mk")
href= link.getAttribute("href")
if href:
link.setAttribute("class","external")
if self.checkRef(ref):
more = ""
if pn:
more += "&page=%s"%pn
if mk:
more += "&mk=%s"%mk
link.setAttribute("href",self.REQUEST['SERVER_URL']+"/references?id="+ref+more)
newxml=dom.toxml('utf-8')
retStr=regexpTXT.search(newxml)
retStr = retStr.group(1)
return retStr.decode('utf-8') # we return unicode
return u""
def related2html(self,str):
"""related library items: xlinks in html wandeln / mb 22.11.2006"""
if str:
str=re.sub("\&","&",str)
dom=xml.dom.minidom.parseString(""+utf8ify(str)+"")
links=dom.getElementsByTagName("link")
for link in links:
link.tagName = "a"
ref = link.getAttribute("ref")
pn = link.getAttribute("page")
obj = ref[0:3]
"""erweiterung der related items von literatur auf weitere datenbankobjekte, mb 09.06.2009"""
searchStr = ''
if obj == 'lit':
searchStr="select fullreference, online from vl_literature where reference =\'%s\' and authorized = 1"%(ref)
elif obj == 'sit':
searchStr="select reference from vl_sites where reference =\'%s\' and complete = 'yes'"%(ref)
elif obj == 'per':
searchStr="select reference from vl_people where reference =\'%s\' and complete = 'yes'"%(ref)
elif obj == 'tec':
searchStr="select reference from vl_technology where reference =\'%s\' and complete = 'yes'"%(ref)
elif obj == 'exp':
searchStr="select reference from vl_experiments where reference =\'%s\' and complete = 'yes'"%(ref)
res = self.search(var=searchStr)
if res:
if obj == 'lit':
if res[0]['online'] == 1:
# literatur item online verfuegbar
if pn:
link.setAttribute("href",self.REQUEST['SERVER_URL']+"/references?id="+ref+"&page="+pn)
else:
link.setAttribute("href",self.REQUEST['SERVER_URL']+"/references?id="+ref)
link.setAttribute("title", "click to view!")
link.removeAttribute("ref")
# prefix preceding the link
prefix = dom.createTextNode(U"\u2013\u0020") # = ndash + space
dom.documentElement.insertBefore(prefix, link)
else:
# literatur item nur als bibliographische angabe vorhanden
link.setAttribute("alt", unicodify(res[0]['fullreference']))
link.setAttribute("title", "click to expand")
link.setAttribute("onclick", "return toggle(this);")
link.setAttribute("class", "x_offline")
# prefix inside link text
link.firstChild.data = '+ ' + link.firstChild.data
else:
# links zu den anderen datenbankobjekten
link.setAttribute("href",self.REQUEST['SERVER_URL']+"/references?id="+ref)
link.setAttribute("title", "click to view")
link.removeAttribute("ref")
# prefix preceding the link
prefix = dom.createTextNode(U"\u2013\u0020") # = ndash + space
dom.documentElement.insertBefore(prefix, link)
else:
# objekt nicht verfuegbar/freigegeben oder (web)link mit href statt ref
try:
link.removeAttribute("ref")
link.setAttribute("title", ref)
except:
pass
# prefix preceding the link
prefix = dom.createTextNode(U"\u2013\u0020") # = ndash + space
dom.documentElement.insertBefore(prefix, link)
newxml=dom.toxml('utf-8')
retStr=regexpTXT.search(newxml)
retStr = retStr.group(1)
#logging.debug("related2html out=%s"%repr(retStr))
return retStr.decode('utf-8') # we return unicode
return u""
def xml2html(self,str,quote="yes"):
"""link2html fuer VLP muss hier noch raus"""
if str:
if quote=="yes2":
str=re.sub("\&","&",str)
#dom=xml.dom.minidom.parseString(str)
logging.debug(str)
dom = NonvalidatingReader.parseString(str,"http://www.mpiwg-berlin.mpg.de/")
#links=dom.getElementsByTagName("link")
links=Ft.Xml.XPath.Evaluate(".//link", contextNode=dom)
for link in links:
#link.tagName="a"
ref=link.getAttributeNS(EMPTY_NAMESPACE,"ref")
pn=link.getAttributeNS(EMPTY_NAMESPACE,"page")
cns=link.childNodes[0:]
newLink=dom.createElementNS(EMPTY_NAMESPACE,"a")
for x in cns:
newLink.appendChild(x)
link.parentNode.replaceChild(newLink,link)
if self.checkRef(ref):
if pn:
newLink.setAttributeNS(EMPTY_NAMESPACE,"href",self.REQUEST['SERVER_URL']+"/references?id="+ref+"&page="+pn)
else:
newLink.setAttributeNS(EMPTY_NAMESPACE,"href",self.REQUEST['SERVER_URL']+"/references?id="+ref)
#str= dom.toxml('utf-8')
buf = cStringIO.StringIO()
PrettyPrint(dom, stream=buf)
str = buf.getvalue()
buf.close()
#str=PrettyPrint(dom.documentElement,encoding='UTF-8')
#print link.toxml('utf-8')
#print type(str)
retStr=regexpPage.search(str)
try: # hack warum fehtl manchmal page??
return retStr.group(1).decode('utf-8')
except:
return str
return ""
def xlink2html(self,xlink,parClass=None):
ret=""
attributes=xlink.attributes
if xlink.tagName.lower()=="image":
ret +=""""""%xlink.getAttribute('href')
elif xlink.tagName.lower()=="link":
reference=urllib.unquote(xlink.getAttribute('href'))
label=getText(self,xlink.childNodes)
# check if href is already a correct url
if reference.split(":")[0] in ['http','file']:
if parClass=="Picture":
ret +=""""""%(reference)
else:
ret +="""%s"""%(reference,label)
else: # transform
#href=xml2html(self,reference)
#print "refer",reference
reference=re.sub("ref\=([^>]*)\>",'ref=\"\g<1>\">',reference)# einfuegen anfuehrungszeichen um ref attribut, falls fehlt.
ret +=reference
return ret
def getText(self,nodelist,parClass=None):
rc = u''
for node in nodelist:
if node.nodeType == node.TEXT_NODE:
try:
try:
#rc += node.data.encode('utf-8','ignore')
rc += node.data
except:
#rc= node.data.encode('utf-8','ignore')
rc=node.data
except:
rc="ERROR"
#node.data.decode('utf-8','ignore')
node.data.encode('utf-8','ignore')
#print "RC",rc
elif node.tagName =="inline":
rc+=par2html(self,[node])
elif node.tagName =="pb":
rc+=""
elif node.attributes:
if 'type' in node.attributes.keys(): #is a xlink?
try:
rc +=xlink2html(self,node,parClass).encode('utf-8')
except:
rc +=xlink2html(self,node,parClass)
#print "RWT",rc
return rc
#filename=argv[1]
#fileString=file(filename).read()
#print proj2hash(fileString)