version 1.4, 2004/10/06 13:02:56
|
version 1.11, 2007/07/24 09:11:46
|
Line 1
|
Line 1
|
from sys import argv |
from sys import argv |
|
|
import string |
import string |
|
import logging |
import xml.dom.minidom |
import xml.dom.minidom |
import Ft.Xml.XLink.Processor |
import Ft.Xml.XLink.Processor |
import Ft.Xml.XLink.XLinkElements |
import Ft.Xml.XLink.XLinkElements |
Line 9 from Ft.Xml import XPath
|
Line 10 from Ft.Xml import XPath
|
from Ft.Xml.XPath import Evaluate |
from Ft.Xml.XPath import Evaluate |
from Ft.Xml.XLink import XLINK_NAMESPACE |
from Ft.Xml.XLink import XLINK_NAMESPACE |
from Ft.Xml.XLink import XLinkElements |
from Ft.Xml.XLink import XLinkElements |
|
import cStringIO |
#from Ft.Xml.Domlette import NonvalidatingReader,InputSource |
from Ft.Xml.Domlette import NonvalidatingReader, PrettyPrint,Print |
#from Ft.Xml import EMPTY_NAMESPACE |
from Ft.Xml import EMPTY_NAMESPACE |
from Ft.Lib import Uri |
from Ft.Lib import Uri |
import urllib |
import urllib |
import re |
import re |
|
from ECHO_collection import unicodify,utf8ify |
|
|
|
patternTXT=r"<\s*txt.*?>(.*?)</txt>" |
|
regexpTXT = re.compile(patternTXT, re.IGNORECASE + re.DOTALL) |
patternPage=r"<\s*page.*?>(.*?)</page>" |
patternPage=r"<\s*page.*?>(.*?)</page>" |
regexpPage = re.compile(patternPage, re.IGNORECASE + re.DOTALL) |
regexpPage = re.compile(patternPage, re.IGNORECASE + re.DOTALL) |
|
|
Line 167 def getXlink(nodes):
|
Line 171 def getXlink(nodes):
|
return ret |
return ret |
|
|
def checkRef(self,ref): |
def checkRef(self,ref): |
dbs={'vl_literature':'AND CD LIKE \'%lise%\'','vl_technology':'','vl_people':'','vl_sites':''} |
"""teste ob reference angezeigt werden sollen""" |
|
dbs={'vl_literature':'AND online = \'1\'', |
|
'vl_technology':'AND complete =\'yes\'', |
|
'vl_people':'AND complete =\'yes\'', |
|
'vl_sites':'AND complete =\'yes\'', |
|
'vl_transcript':'AND complete =\'yes\'', |
|
'vl_essays':'AND online =\'yes\'', |
|
'vl_categories':'' |
|
} |
res=None |
res=None |
for db in dbs.keys(): |
for db in dbs.keys(): |
|
searchStr=str("select reference from %s where reference =\'%s\' %s"%(db,ref,dbs[db])) |
res=res or self.search(var=str("select reference from %s where reference =\'%s\' %s"%(db,ref,dbs[db]))) |
res=res or self.search(var=searchStr) |
return res |
return res |
|
|
def xml2html(self,str,quote="yes"): |
def link2html(self,str): |
"""link2html fuer VLP muss hier noch raus""" |
"""link2html liks in html wandeln""" |
|
if str: |
|
|
|
str=re.sub("\&","&",str) |
|
dom=xml.dom.minidom.parseString("<?xml version='1.0' encoding='utf-8'?><txt>"+utf8ify(str)+"</txt>") |
|
links=dom.getElementsByTagName("link") |
|
|
|
|
|
for link in links: |
|
link.tagName="a" |
|
ref=link.getAttribute("ref") |
|
pn=link.getAttribute("page") |
|
|
|
if self.checkRef(ref): |
|
if pn: |
|
link.setAttribute("href",self.REQUEST['SERVER_URL']+"/references?id="+ref+"&page="+pn) |
|
else: |
|
link.setAttribute("href",self.REQUEST['SERVER_URL']+"/references?id="+ref) |
|
|
|
|
|
newxml=dom.toxml('utf-8') |
|
|
|
retStr=regexpTXT.search(newxml) |
|
retStr = retStr.group(1) |
|
|
|
return retStr.decode('utf-8') # we return unicode |
|
|
|
return u"" |
|
|
|
def related2html(self,str): |
|
"""related library items: xlinks in html wandeln / mb 22.11.2006""" |
if str: |
if str: |
if quote=="yes2": |
|
str=re.sub("\&","&",str) |
|
|
|
str=re.sub("ref\=([^>]*)\>",'ref=\"\g<1>\">',str)# einfuegen anfuehrungszeichen um ref attribut, falls fehlt. |
str=re.sub("\&","&",str) |
#str=re.sub("ref\=([.[*^[>]]])",'XX',str) |
dom=xml.dom.minidom.parseString("<?xml version='1.0' encoding='utf-8'?><txt>"+str+"</txt>") |
#print "STR::",str |
|
dom=xml.dom.minidom.parseString(str) |
|
links=dom.getElementsByTagName("link") |
links=dom.getElementsByTagName("link") |
|
|
for link in links: |
for link in links: |
Line 193 def xml2html(self,str,quote="yes"):
|
Line 229 def xml2html(self,str,quote="yes"):
|
ref=link.getAttribute("ref") |
ref=link.getAttribute("ref") |
pn=link.getAttribute("page") |
pn=link.getAttribute("page") |
|
|
if checkRef(self,ref): |
searchStr="select fullreference, online from vl_literature where reference =\'%s\' and authorized = 1"%(ref) |
|
res = self.search(var=searchStr) |
|
|
|
if res: |
|
if res[0]['online'] == 1: |
|
# item online verfuegbar |
if pn: |
if pn: |
link.setAttribute("href",self.aq_parent.absolute_url()+"/vlp_coll?id="+ref+"&p="+pn) |
link.setAttribute("href",self.REQUEST['SERVER_URL']+"/references?id="+ref+"&page="+pn) |
else: |
else: |
link.setAttribute("href",self.aq_parent.absolute_url()+"/vlp_coll?id="+ref) |
link.setAttribute("href",self.REQUEST['SERVER_URL']+"/references?id="+ref) |
|
|
str= dom.toxml() |
link.setAttribute("title", "click to view") |
|
link.removeAttribute("ref") |
|
|
|
# prefix preceding the link |
|
prefix = dom.createTextNode(U"\u2013\u0020") # = ndash + space |
|
dom.documentElement.insertBefore(prefix, link) |
|
|
|
else: |
|
# item nur als bibliographische angabe vorhanden |
|
link.setAttribute("alt", res[0]['fullreference'].decode('utf-8')) |
|
link.setAttribute("title", "click to expand") |
|
link.setAttribute("onclick", "return toggle(this);") |
|
link.setAttribute("class", "x_offline") |
|
|
|
# prefix inside link text |
|
link.firstChild.data = '+ ' + link.firstChild.data |
|
|
|
|
|
newxml=dom.toxml('utf-8') |
|
|
|
retStr=regexpTXT.search(newxml) |
|
retStr = retStr.group(1) |
|
|
|
return retStr.decode('utf-8') # we return unicode |
|
|
|
return u"" |
|
|
|
|
|
|
|
|
|
def xml2html(self,str,quote="yes"): |
|
"""link2html fuer VLP muss hier noch raus""" |
|
if str: |
|
if quote=="yes2": |
|
str=re.sub("\&","&",str) |
|
#dom=xml.dom.minidom.parseString(str) |
|
dom = NonvalidatingReader.parseString(str,"http://www.mpiwg-berlin.mpg.de/") |
|
#links=dom.getElementsByTagName("link") |
|
links=Ft.Xml.XPath.Evaluate(".//link", contextNode=dom) |
|
for link in links: |
|
#link.tagName="a" |
|
|
|
ref=link.getAttributeNS(EMPTY_NAMESPACE,"ref") |
|
pn=link.getAttributeNS(EMPTY_NAMESPACE,"page") |
|
|
|
cns=link.childNodes[0:] |
|
|
|
newLink=dom.createElementNS(EMPTY_NAMESPACE,"a") |
|
for x in cns: |
|
newLink.appendChild(x) |
|
|
|
|
|
|
|
link.parentNode.replaceChild(newLink,link) |
|
|
|
if self.checkRef(ref): |
|
if pn: |
|
newLink.setAttributeNS(EMPTY_NAMESPACE,"href",self.REQUEST['SERVER_URL']+"/references?id="+ref+"&page="+pn) |
|
else: |
|
newLink.setAttributeNS(EMPTY_NAMESPACE,"href",self.REQUEST['SERVER_URL']+"/references?id="+ref) |
|
|
|
#str= dom.toxml('utf-8') |
|
buf = cStringIO.StringIO() |
|
PrettyPrint(dom, stream=buf) |
|
str = buf.getvalue() |
|
buf.close() |
|
#str=PrettyPrint(dom.documentElement,encoding='UTF-8') |
#print link.toxml('utf-8') |
#print link.toxml('utf-8') |
|
#print type(str) |
retStr=regexpPage.search(str) |
retStr=regexpPage.search(str) |
|
|
try: |
try: # hack warum fehtl manchmal page?? |
return retStr.group(1) |
return retStr.group(1).decode('utf-8') |
except: |
except: |
exStr="""<?xml version="1.0" ?>""" |
return str |
str=re.sub("\n","",str) |
|
#str= |
|
#print repr(str) |
|
return str.replace(exStr,'') |
|
return "" |
return "" |
|
|
|
|
def xlink2html(self,xlink,parClass=None): |
def xlink2html(self,xlink,parClass=None): |
ret="" |
ret="" |
attributes=xlink.attributes |
attributes=xlink.attributes |