--- ECHO_content/vlp_xmlhelpers.py 2004/10/05 14:58:56 1.2
+++ ECHO_content/vlp_xmlhelpers.py 2009/06/09 14:05:20 1.16
@@ -1,6 +1,7 @@
from sys import argv
import string
+import logging
import xml.dom.minidom
import Ft.Xml.XLink.Processor
import Ft.Xml.XLink.XLinkElements
@@ -9,211 +10,359 @@ from Ft.Xml import XPath
from Ft.Xml.XPath import Evaluate
from Ft.Xml.XLink import XLINK_NAMESPACE
from Ft.Xml.XLink import XLinkElements
-
-#from Ft.Xml.Domlette import NonvalidatingReader,InputSource
-#from Ft.Xml import EMPTY_NAMESPACE
+import cStringIO
+from Ft.Xml.Domlette import NonvalidatingReader, PrettyPrint,Print
+from Ft.Xml import EMPTY_NAMESPACE
from Ft.Lib import Uri
import urllib
import re
+from ECHO_collection import unicodify,utf8ify
+patternTXT=r"<\s*txt.*?>(.*?)"
+regexpTXT = re.compile(patternTXT, re.IGNORECASE + re.DOTALL)
patternPage=r"<\s*page.*?>(.*?)"
regexpPage = re.compile(patternPage, re.IGNORECASE + re.DOTALL)
-xml2htmlArray={'WEB_normal':('
','
'),'Normal':('','
'),'WEB_picture':('','
'),'WEB_figuretitle':('','
'),'WEB_bibliography':('','
'),'Web_kursiv':('',''),'WEB_kursiv':('',''),'WEB_hyperlink':('',''),'Hyperlink':('',''),'Picture':('','
'),'FigureTitle':('','
')}
-
-def addToDict(dict,name,value):
- if name=="":
- return 0
- else:
-
- if not dict.has_key(name):
- dict[name]=[] # als array anlegen
+#xml2htmlArray={'WEB_normal':('','
'),'Normal':('','
'),'WEB_picture':('','
'),'WEB_figuretitle':('','
'),'WEB_bibliography':('','
'),'Web_kursiv':('',''),'WEB_kursiv':('',''),'WEB_hyperlink':('',''),'Hyperlink':('',''),'Picture':('','
'),'FigureTitle':('','
')}
+#
+#def addToDict(dict,name,value):
+# if name=="":
+# return 0
+# else:
+#
+# if not dict.has_key(name):
+# dict[name]=[] # als array anlegen
+#
+# dict[name].append(value)
+# return 1
+#
+#def proj2hash(self,xmlstring):
+# """wandelt xml-files fuer die projekte in ein hash"""
+#
+# dom=xml.dom.minidom.parseString(xmlstring)
+#
+#
+# list={}
+#
+# #gettitle
+# pars=Evaluate('par',dom.getElementsByTagName('part')[0])
+# for par in pars:
+# className=par.getAttribute('class')
+# content=getText(self,par.childNodes)
+# addToDict(list,className,content)
+#
+#
+# sectionXPath="section"
+#
+#
+# sections=Evaluate(sectionXPath,dom.getElementsByTagName('part')[0])
+#
+# while sections:
+#
+# for section in sections:
+#
+# sec=parseSection(self,section)
+#
+# if sec[0]=="WEB_project_header": # Sonderfall project
+# addToDict(list,'WEB_project_header',sec[1]) # store title
+# addToDict(list,'WEB_project_description',sec[2]) #store description
+# else: # no information in heading
+# level=int(sec[3])+2
+# aTag=""%level
+# eTag=""%level
+# addToDict(list,"text",aTag+sec[1]+eTag)
+# addToDict(list,"text",sec[2])
+# sectionXPath+="/section"
+# sections=Evaluate(sectionXPath,dom.getElementsByTagName('part')[0])
+# return list
+#
+#
+#def parseSection(self,section):
+# type=""
+# header=""
+# level=section.getAttribute('level')
+# for heading in section.childNodes:
+# if getattr(heading,'tagName','')=="heading":
+#
+# type=heading.getAttribute('class')
+# header=getText(self,heading.childNodes)
+#
+# if type=="": # falls heading fehlt, pruefe ob erster par richtig
+# par=section.getElementsByTagName('par')[0]
+# type=par.getAttribute('class')
+# header=getText(par.childNodes)
+#
+# #print section.childNodes
+# #pars=Evaluate('par',section)
+# pars=section.childNodes
+# content=par2html(self,pars)
+# #print "CONTENT",repr(content)
+# return (type,header,content,level)
+#
+#def parseTable(table):
+# fields={}
+# rows=table.getElementsByTagName('html:tr')
+# for row in rows:
+# #print "ROW"
+# cols=row.getElementsByTagName('html:td')
+#
+# #Name des Datenfeldes einlesen
+# try:
+# field=cols[0].getElementsByTagName('par')[0].getAttribute('class')
+# #print "field",field
+# except:
+# print "error"
+# field=""
+#
+# #Wandeln der Eintrge in HTML
+#
+# #pars=cols[1].getElementsByTagName('par')
+# pars=cols[1].childNodes
+#
+# html=par2html(self,pars,tags=("",";"))
+#
+# addToDict(fields,field,html)
+# #print fields
+# return fields
+#
+#def par2html(self,pars,tags=None):
+# html=""
+#
+# for par in pars:
+# tagName=getattr(par,'tagName','')
+# if tagName in ["par","inline"]:
+# #print repr(par.getAttribute('class')),xml2htmlArray.get(par.getAttribute('class'),'NOT FOUND')
+# #print "par",par
+# if not tags:
+# try:
+# tag=xml2htmlArray[par.getAttribute('class')]
+# except:
+# tag=('','
')
+# else:
+# tag=tags
+# #print "TAG",tag
+# content=getText(self,par.childNodes,par.getAttribute('class'))
+#
+#
+#
+# #print par.getAttribute('class'),node
+# try:
+# html+=tag[0]+content+tag[1]
+# except:
+# html=+tag[0]+content+tag[1]
+#
+# elif tagName=="pb":
+# html+=""
+#
+#
+# try:
+#
+# return html
+# except:
+# return ""
- dict[name].append(value)
- return 1
+def getXlink(nodes):
+ """searches xlinks and gives them back as html"""
+ ret=""
+ for node in nodes:
+ if node.attributes:
+ if 'xlink:type' in node.attributes.keys(): #is a xlink?
+ ret +=xlink2html(node)
+ return ret
-def proj2hash(self,xmlstring):
- """wandelt xml-files fuer die projekte in ein hash"""
-
- dom=xml.dom.minidom.parseString(xmlstring)
+def checkRef(self,ref):
+ """teste ob reference angezeigt werden sollen"""
+ dbs={'vl_literature':'AND online = \'1\'',
+ 'vl_technology':'AND complete =\'yes\'',
+ 'vl_people':'AND complete =\'yes\'',
+ 'vl_sites':'AND complete =\'yes\'',
+ 'vl_transcript':'AND complete =\'yes\'',
+ 'vl_essays':'AND online =\'yes\'',
+ 'vl_categories':''
+ }
+ res=None
+ for db in dbs.keys():
+ searchStr=str("select reference from %s where reference =\'%s\' %s"%(db,ref,dbs[db]))
+ res=res or self.search(var=searchStr)
+ return res
-
- list={}
+def link2html(self,str):
+ """link2html links in html wandeln"""
+ if str:
- #gettitle
- pars=Evaluate('par',dom.getElementsByTagName('part')[0])
- for par in pars:
- className=par.getAttribute('class')
- content=getText(self,par.childNodes)
- addToDict(list,className,content)
-
+ str=re.sub("\&","&",str)
+ dom=xml.dom.minidom.parseString(""+utf8ify(str)+"")
+
+
+ links=dom.getElementsByTagName("link")
+
- sectionXPath="section"
+ for link in links:
+ link.tagName="a"
+ ref=link.getAttribute("ref")
+ pn=link.getAttribute("page")
+ mk=link.getAttribute("mk")
+ href= link.getAttribute("href")
+ if href:
+ link.setAttribute("class","external")
+
+ if self.checkRef(ref):
+ more = ""
+ if pn:
+ more += "&page=%s"%pn
+
+ if mk:
+ more += "&mk=%s"%mk
+
+ link.setAttribute("href",self.REQUEST['SERVER_URL']+"/references?id="+ref+more)
-
- sections=Evaluate(sectionXPath,dom.getElementsByTagName('part')[0])
-
- while sections:
-
- for section in sections:
+
+ newxml=dom.toxml('utf-8')
+
- sec=parseSection(self,section)
- if sec[0]=="WEB_project_header": # Sonderfall project
- addToDict(list,'WEB_project_header',sec[1]) # store title
- addToDict(list,'WEB_project_description',sec[2]) #store description
- else: # no information in heading
- level=sec[3]
- aTag=""%level
- eTag=""%level
- addToDict(list,"text",aTag+sec[1]+eTag)
- addToDict(list,"text",sec[2])
- sectionXPath+="/section"
- sections=Evaluate(sectionXPath,dom.getElementsByTagName('part')[0])
- return list
-
-
-def parseSection(self,section):
- type=""
- header=""
- level=section.getAttribute('level')
- for heading in section.childNodes:
- if getattr(heading,'tagName','')=="heading":
-
- type=heading.getAttribute('class')
- header=getText(self,heading.childNodes)
-
- if type=="": # falls heading fehlt, pruefe ob erster par richtig
- par=section.getElementsByTagName('par')[0]
- type=par.getAttribute('class')
- header=getText(par.childNodes)
-
- #print section.childNodes
- #pars=Evaluate('par',section)
- pars=section.childNodes
- content=par2html(self,pars)
- #print "CONTENT",repr(content)
- return (type,header,content,level)
-
-def parseTable(table):
- fields={}
- rows=table.getElementsByTagName('html:tr')
- for row in rows:
- #print "ROW"
- cols=row.getElementsByTagName('html:td')
-
- #Name des Datenfeldes einlesen
- try:
- field=cols[0].getElementsByTagName('par')[0].getAttribute('class')
- #print "field",field
- except:
- print "error"
- field=""
+ retStr=regexpTXT.search(newxml)
+ retStr = retStr.group(1)
- #Wandeln der Eintrge in HTML
+ return retStr.decode('utf-8') # we return unicode
- #pars=cols[1].getElementsByTagName('par')
- pars=cols[1].childNodes
-
- html=par2html(self,pars,tags=("",";"))
-
- addToDict(fields,field,html)
- #print fields
- return fields
-
-def par2html(self,pars,tags=None):
- html=""
-
- for par in pars:
- tagName=getattr(par,'tagName','')
- if tagName in ["par","inline"]:
- #print repr(par.getAttribute('class')),xml2htmlArray.get(par.getAttribute('class'),'NOT FOUND')
- #print "par",par
- if not tags:
- try:
- tag=xml2htmlArray[par.getAttribute('class')]
- except:
- tag=('','
')
- else:
- tag=tags
- #print "TAG",tag
- content=getText(self,par.childNodes,par.getAttribute('class'))
+ return u""
+
+def related2html(self,str):
+ """related library items: xlinks in html wandeln / mb 22.11.2006"""
+ if str:
+
+ str=re.sub("\&","&",str)
+ dom=xml.dom.minidom.parseString(""+utf8ify(str)+"")
+ links=dom.getElementsByTagName("link")
+
+ for link in links:
+ link.tagName = "a"
+ ref = link.getAttribute("ref")
+ pn = link.getAttribute("page")
+ obj = ref[0:3]
+ """erweiterung der related items von literatur auf weitere datenbankobjekte, mb 05.06.2009"""
+ if obj == 'lit':
+ searchStr="select fullreference, online from vl_literature where reference =\'%s\' and authorized = 1"%(ref)
+ elif obj == 'sit':
+ searchStr="select reference from vl_sites where reference =\'%s\' and complete = 'yes'"%(ref)
+ elif obj == 'per':
+ searchStr="select reference from vl_people where reference =\'%s\' and complete = 'yes'"%(ref)
+ elif obj == 'tec':
+ searchStr="select reference from vl_technology where reference =\'%s\' and complete = 'yes'"%(ref)
+ elif obj == 'exp':
+ searchStr="select reference from vl_experiments where reference =\'%s\' and complete = 'yes'"%(ref)
+
+ res = self.search(var=searchStr)
+
+ if res:
+ if obj == 'lit':
+ if res[0]['online'] == 1:
+ # literatur item online verfuegbar
+ if pn:
+ link.setAttribute("href",self.REQUEST['SERVER_URL']+"/references?id="+ref+"&page="+pn)
+ else:
+ link.setAttribute("href",self.REQUEST['SERVER_URL']+"/references?id="+ref)
+
+ link.setAttribute("title", "click to view!")
+ link.removeAttribute("ref")
+
+ # prefix preceding the link
+ prefix = dom.createTextNode(U"\u2013\u0020") # = ndash + space
+ dom.documentElement.insertBefore(prefix, link)
+
+ else:
+ # literatur item nur als bibliographische angabe vorhanden
+ link.setAttribute("alt", unicodify(res[0]['fullreference']))
+ link.setAttribute("title", "click to expand")
+ link.setAttribute("onclick", "return toggle(this);")
+ link.setAttribute("class", "x_offline")
+
+ # prefix inside link text
+ link.firstChild.data = '+ ' + link.firstChild.data
+ else:
+ # links zu den anderen datenbankobjekten
+ link.setAttribute("href",self.REQUEST['SERVER_URL']+"/references?id="+ref)
+ link.setAttribute("title", "click to view")
+ link.removeAttribute("ref")
-
- #print par.getAttribute('class'),node
- try:
- html+=tag[0]+content+tag[1]
- except:
- html=+tag[0]+content+tag[1]
+ # prefix preceding the link
+ prefix = dom.createTextNode(U"\u2013\u0020") # = ndash + space
+ dom.documentElement.insertBefore(prefix, link)
- elif tagName=="pb":
- html+=""
- elif tagName=="img":
- html+="XXX"
-
- try:
-
- return html
- except:
- return ""
+ else:
+ # objekt nicht verfügbar/freigegeben oder (web)link mit href statt ref
+
+ #if ref != '':
+ # link.removeAttribute("ref")
+ # link.setAttribute("title", ref)
+
+
+ # prefix preceding the link
+ prefix = dom.createTextNode(U"\u2013\u0020") # = ndash + space
+ dom.documentElement.insertBefore(prefix, link)
+
+
+ newxml=dom.toxml('utf-8')
+
+ retStr=regexpTXT.search(newxml)
+ retStr = retStr.group(1)
+ #logging.debug("related2html out=%s"%repr(retStr))
+ return retStr.decode('utf-8') # we return unicode
-def getXlink(nodes):
- """searches xlinks and gives them back as html"""
- ret=""
- for node in nodes:
- if node.attributes:
- if 'xlink:type' in node.attributes.keys(): #is a xlink?
- ret +=xlink2html(node)
- return ret
+ return u""
-def checkRef(self,ref):
- dbs={'vl_literature':'AND CD LIKE \'%lise%\'','vl_technology':'','vl_people':'','vl_sites':''}
- res=None
- for db in dbs.keys():
+
- res=res or self.search(var=str("select reference from %s where reference =\'%s\' %s"%(db,ref,dbs[db])))
- return res
def xml2html(self,str,quote="yes"):
"""link2html fuer VLP muss hier noch raus"""
-
-
if str:
if quote=="yes2":
str=re.sub("\&","&",str)
-
- str=re.sub("ref\=([^>]*)\>",'ref=\"\g<1>\">',str)# einfuegen anfuehrungszeichen um ref attribut, falls fehlt.
- #str=re.sub("ref\=([.[*^[>]]])",'XX',str)
- #print "STR::",str
- dom=xml.dom.minidom.parseString(str)
- links=dom.getElementsByTagName("link")
-
+ #dom=xml.dom.minidom.parseString(str)
+ dom = NonvalidatingReader.parseString(str,"http://www.mpiwg-berlin.mpg.de/")
+ #links=dom.getElementsByTagName("link")
+ links=Ft.Xml.XPath.Evaluate(".//link", contextNode=dom)
for link in links:
- link.tagName="a"
- ref=link.getAttribute("ref")
- pn=link.getAttribute("page")
-
- if checkRef(self,ref):
- if pn:
- link.setAttribute("href",self.aq_parent.absolute_url()+"/vlp_coll?id="+ref+"&p="+pn)
- else:
- link.setAttribute("href",self.aq_parent.absolute_url()+"/vlp_coll?id="+ref)
-
- str= dom.toxml()
-
- #print link.toxml('utf-8')
- retStr=regexpPage.search(str)
+ #link.tagName="a"
+
+ ref=link.getAttributeNS(EMPTY_NAMESPACE,"ref")
+ pn=link.getAttributeNS(EMPTY_NAMESPACE,"page")
- try:
- return retStr.group(1)
+ cns=link.childNodes[0:]
+
+ newLink=dom.createElementNS(EMPTY_NAMESPACE,"a")
+ for x in cns:
+ newLink.appendChild(x)
+
+
+
+ link.parentNode.replaceChild(newLink,link)
+
+ if self.checkRef(ref):
+ if pn:
+ newLink.setAttributeNS(EMPTY_NAMESPACE,"href",self.REQUEST['SERVER_URL']+"/references?id="+ref+"&page="+pn)
+ else:
+ newLink.setAttributeNS(EMPTY_NAMESPACE,"href",self.REQUEST['SERVER_URL']+"/references?id="+ref)
+
+ #str= dom.toxml('utf-8')
+ buf = cStringIO.StringIO()
+ PrettyPrint(dom, stream=buf)
+ str = buf.getvalue()
+ buf.close()
+ #str=PrettyPrint(dom.documentElement,encoding='UTF-8')
+ #print link.toxml('utf-8')
+ #print type(str)
+ retStr=regexpPage.search(str)
+
+ try: # hack warum fehtl manchmal page??
+ return retStr.group(1).decode('utf-8')
except:
- exStr=""""""
- str=re.sub("\n","",str)
- #str=
- #print repr(str)
- return str.replace(exStr,'')
+ return str
return ""
+
def xlink2html(self,xlink,parClass=None):
ret=""