--- ECHO_content/vlp_xmlhelpers.py 2004/10/06 13:02:56 1.4
+++ ECHO_content/vlp_xmlhelpers.py 2008/08/05 16:17:46 1.14
@@ -1,6 +1,7 @@
from sys import argv
import string
+import logging
import xml.dom.minidom
import Ft.Xml.XLink.Processor
import Ft.Xml.XLink.XLinkElements
@@ -9,153 +10,156 @@ from Ft.Xml import XPath
from Ft.Xml.XPath import Evaluate
from Ft.Xml.XLink import XLINK_NAMESPACE
from Ft.Xml.XLink import XLinkElements
-
-#from Ft.Xml.Domlette import NonvalidatingReader,InputSource
-#from Ft.Xml import EMPTY_NAMESPACE
+import cStringIO
+from Ft.Xml.Domlette import NonvalidatingReader, PrettyPrint,Print
+from Ft.Xml import EMPTY_NAMESPACE
from Ft.Lib import Uri
import urllib
import re
+from ECHO_collection import unicodify,utf8ify
+patternTXT=r"<\s*txt.*?>(.*?)"
+regexpTXT = re.compile(patternTXT, re.IGNORECASE + re.DOTALL)
patternPage=r"<\s*page.*?>(.*?)"
regexpPage = re.compile(patternPage, re.IGNORECASE + re.DOTALL)
-xml2htmlArray={'WEB_normal':('
','
'),'Normal':('','
'),'WEB_picture':('','
'),'WEB_figuretitle':('','
'),'WEB_bibliography':('','
'),'Web_kursiv':('',''),'WEB_kursiv':('',''),'WEB_hyperlink':('',''),'Hyperlink':('',''),'Picture':('','
'),'FigureTitle':('','
')}
-
-def addToDict(dict,name,value):
- if name=="":
- return 0
- else:
-
- if not dict.has_key(name):
- dict[name]=[] # als array anlegen
-
- dict[name].append(value)
- return 1
-
-def proj2hash(self,xmlstring):
- """wandelt xml-files fuer die projekte in ein hash"""
-
- dom=xml.dom.minidom.parseString(xmlstring)
-
-
- list={}
-
- #gettitle
- pars=Evaluate('par',dom.getElementsByTagName('part')[0])
- for par in pars:
- className=par.getAttribute('class')
- content=getText(self,par.childNodes)
- addToDict(list,className,content)
-
-
- sectionXPath="section"
-
-
- sections=Evaluate(sectionXPath,dom.getElementsByTagName('part')[0])
-
- while sections:
-
- for section in sections:
-
- sec=parseSection(self,section)
-
- if sec[0]=="WEB_project_header": # Sonderfall project
- addToDict(list,'WEB_project_header',sec[1]) # store title
- addToDict(list,'WEB_project_description',sec[2]) #store description
- else: # no information in heading
- level=int(sec[3])+2
- aTag=""%level
- eTag=""%level
- addToDict(list,"text",aTag+sec[1]+eTag)
- addToDict(list,"text",sec[2])
- sectionXPath+="/section"
- sections=Evaluate(sectionXPath,dom.getElementsByTagName('part')[0])
- return list
-
-
-def parseSection(self,section):
- type=""
- header=""
- level=section.getAttribute('level')
- for heading in section.childNodes:
- if getattr(heading,'tagName','')=="heading":
-
- type=heading.getAttribute('class')
- header=getText(self,heading.childNodes)
-
- if type=="": # falls heading fehlt, pruefe ob erster par richtig
- par=section.getElementsByTagName('par')[0]
- type=par.getAttribute('class')
- header=getText(par.childNodes)
-
- #print section.childNodes
- #pars=Evaluate('par',section)
- pars=section.childNodes
- content=par2html(self,pars)
- #print "CONTENT",repr(content)
- return (type,header,content,level)
-
-def parseTable(table):
- fields={}
- rows=table.getElementsByTagName('html:tr')
- for row in rows:
- #print "ROW"
- cols=row.getElementsByTagName('html:td')
-
- #Name des Datenfeldes einlesen
- try:
- field=cols[0].getElementsByTagName('par')[0].getAttribute('class')
- #print "field",field
- except:
- print "error"
- field=""
-
- #Wandeln der Eintrge in HTML
-
- #pars=cols[1].getElementsByTagName('par')
- pars=cols[1].childNodes
-
- html=par2html(self,pars,tags=("",";"))
-
- addToDict(fields,field,html)
- #print fields
- return fields
-
-def par2html(self,pars,tags=None):
- html=""
-
- for par in pars:
- tagName=getattr(par,'tagName','')
- if tagName in ["par","inline"]:
- #print repr(par.getAttribute('class')),xml2htmlArray.get(par.getAttribute('class'),'NOT FOUND')
- #print "par",par
- if not tags:
- try:
- tag=xml2htmlArray[par.getAttribute('class')]
- except:
- tag=('','
')
- else:
- tag=tags
- #print "TAG",tag
- content=getText(self,par.childNodes,par.getAttribute('class'))
-
-
-
- #print par.getAttribute('class'),node
- try:
- html+=tag[0]+content+tag[1]
- except:
- html=+tag[0]+content+tag[1]
-
- elif tagName=="pb":
- html+=""
-
-
- try:
-
- return html
- except:
- return ""
+#xml2htmlArray={'WEB_normal':('','
'),'Normal':('','
'),'WEB_picture':('','
'),'WEB_figuretitle':('','
'),'WEB_bibliography':('','
'),'Web_kursiv':('',''),'WEB_kursiv':('',''),'WEB_hyperlink':('',''),'Hyperlink':('',''),'Picture':('','
'),'FigureTitle':('','
')}
+#
+#def addToDict(dict,name,value):
+# if name=="":
+# return 0
+# else:
+#
+# if not dict.has_key(name):
+# dict[name]=[] # als array anlegen
+#
+# dict[name].append(value)
+# return 1
+#
+#def proj2hash(self,xmlstring):
+# """wandelt xml-files fuer die projekte in ein hash"""
+#
+# dom=xml.dom.minidom.parseString(xmlstring)
+#
+#
+# list={}
+#
+# #gettitle
+# pars=Evaluate('par',dom.getElementsByTagName('part')[0])
+# for par in pars:
+# className=par.getAttribute('class')
+# content=getText(self,par.childNodes)
+# addToDict(list,className,content)
+#
+#
+# sectionXPath="section"
+#
+#
+# sections=Evaluate(sectionXPath,dom.getElementsByTagName('part')[0])
+#
+# while sections:
+#
+# for section in sections:
+#
+# sec=parseSection(self,section)
+#
+# if sec[0]=="WEB_project_header": # Sonderfall project
+# addToDict(list,'WEB_project_header',sec[1]) # store title
+# addToDict(list,'WEB_project_description',sec[2]) #store description
+# else: # no information in heading
+# level=int(sec[3])+2
+# aTag=""%level
+# eTag=""%level
+# addToDict(list,"text",aTag+sec[1]+eTag)
+# addToDict(list,"text",sec[2])
+# sectionXPath+="/section"
+# sections=Evaluate(sectionXPath,dom.getElementsByTagName('part')[0])
+# return list
+#
+#
+#def parseSection(self,section):
+# type=""
+# header=""
+# level=section.getAttribute('level')
+# for heading in section.childNodes:
+# if getattr(heading,'tagName','')=="heading":
+#
+# type=heading.getAttribute('class')
+# header=getText(self,heading.childNodes)
+#
+# if type=="": # falls heading fehlt, pruefe ob erster par richtig
+# par=section.getElementsByTagName('par')[0]
+# type=par.getAttribute('class')
+# header=getText(par.childNodes)
+#
+# #print section.childNodes
+# #pars=Evaluate('par',section)
+# pars=section.childNodes
+# content=par2html(self,pars)
+# #print "CONTENT",repr(content)
+# return (type,header,content,level)
+#
+#def parseTable(table):
+# fields={}
+# rows=table.getElementsByTagName('html:tr')
+# for row in rows:
+# #print "ROW"
+# cols=row.getElementsByTagName('html:td')
+#
+# #Name des Datenfeldes einlesen
+# try:
+# field=cols[0].getElementsByTagName('par')[0].getAttribute('class')
+# #print "field",field
+# except:
+# print "error"
+# field=""
+#
+# #Wandeln der Eintrge in HTML
+#
+# #pars=cols[1].getElementsByTagName('par')
+# pars=cols[1].childNodes
+#
+# html=par2html(self,pars,tags=("",";"))
+#
+# addToDict(fields,field,html)
+# #print fields
+# return fields
+#
+#def par2html(self,pars,tags=None):
+# html=""
+#
+# for par in pars:
+# tagName=getattr(par,'tagName','')
+# if tagName in ["par","inline"]:
+# #print repr(par.getAttribute('class')),xml2htmlArray.get(par.getAttribute('class'),'NOT FOUND')
+# #print "par",par
+# if not tags:
+# try:
+# tag=xml2htmlArray[par.getAttribute('class')]
+# except:
+# tag=('','
')
+# else:
+# tag=tags
+# #print "TAG",tag
+# content=getText(self,par.childNodes,par.getAttribute('class'))
+#
+#
+#
+# #print par.getAttribute('class'),node
+# try:
+# html+=tag[0]+content+tag[1]
+# except:
+# html=+tag[0]+content+tag[1]
+#
+# elif tagName=="pb":
+# html+=""
+#
+#
+# try:
+#
+# return html
+# except:
+# return ""
def getXlink(nodes):
"""searches xlinks and gives them back as html"""
@@ -167,52 +171,156 @@ def getXlink(nodes):
return ret
def checkRef(self,ref):
- dbs={'vl_literature':'AND CD LIKE \'%lise%\'','vl_technology':'','vl_people':'','vl_sites':''}
+ """teste ob reference angezeigt werden sollen"""
+ dbs={'vl_literature':'AND online = \'1\'',
+ 'vl_technology':'AND complete =\'yes\'',
+ 'vl_people':'AND complete =\'yes\'',
+ 'vl_sites':'AND complete =\'yes\'',
+ 'vl_transcript':'AND complete =\'yes\'',
+ 'vl_essays':'AND online =\'yes\'',
+ 'vl_categories':''
+ }
res=None
for db in dbs.keys():
-
- res=res or self.search(var=str("select reference from %s where reference =\'%s\' %s"%(db,ref,dbs[db])))
+ searchStr=str("select reference from %s where reference =\'%s\' %s"%(db,ref,dbs[db]))
+ res=res or self.search(var=searchStr)
return res
-
-def xml2html(self,str,quote="yes"):
- """link2html fuer VLP muss hier noch raus"""
-
-
+
+def link2html(self,str):
+ """link2html links in html wandeln"""
if str:
- if quote=="yes2":
- str=re.sub("\&","&",str)
-
- str=re.sub("ref\=([^>]*)\>",'ref=\"\g<1>\">',str)# einfuegen anfuehrungszeichen um ref attribut, falls fehlt.
- #str=re.sub("ref\=([.[*^[>]]])",'XX',str)
- #print "STR::",str
- dom=xml.dom.minidom.parseString(str)
+
+ str=re.sub("\&","&",str)
+ dom=xml.dom.minidom.parseString(""+utf8ify(str)+"")
links=dom.getElementsByTagName("link")
+
for link in links:
link.tagName="a"
ref=link.getAttribute("ref")
- pn=link.getAttribute("page")
+ pn=link.getAttribute("page")
+ mk=link.getAttribute("mk")
+
+ if self.checkRef(ref):
+ more = ""
+ if pn:
+ more += "&page=%s"%pn
+
+ if mk:
+ more += "&mk=%s"%mk
+
+ link.setAttribute("href",self.REQUEST['SERVER_URL']+"/references?id="+ref+more)
+
+ newxml=dom.toxml('utf-8')
+
+ retStr=regexpTXT.search(newxml)
+ retStr = retStr.group(1)
+
+ return retStr.decode('utf-8') # we return unicode
+
+ return u""
+
+def related2html(self,str):
+ """related library items: xlinks in html wandeln / mb 22.11.2006"""
+ if str:
+
+ str=re.sub("\&","&",str)
+ dom=xml.dom.minidom.parseString(""+utf8ify(str)+"")
+ links=dom.getElementsByTagName("link")
+
+ for link in links:
+ link.tagName = "a"
+ ref = link.getAttribute("ref")
+ pn = link.getAttribute("page")
+
+ searchStr="select fullreference, online from vl_literature where reference =\'%s\' and authorized = 1"%(ref)
+ res = self.search(var=searchStr)
+
+ if res:
+ if res[0]['online'] == 1:
+ # item online verfuegbar
+ if pn:
+ link.setAttribute("href",self.REQUEST['SERVER_URL']+"/references?id="+ref+"&page="+pn)
+ else:
+ link.setAttribute("href",self.REQUEST['SERVER_URL']+"/references?id="+ref)
+
+ link.setAttribute("title", "click to view")
+ link.removeAttribute("ref")
+
+ # prefix preceding the link
+ prefix = dom.createTextNode(U"\u2013\u0020") # = ndash + space
+ dom.documentElement.insertBefore(prefix, link)
+
+ else:
+ # item nur als bibliographische angabe vorhanden
+ link.setAttribute("alt", unicodify(res[0]['fullreference']))
+ link.setAttribute("title", "click to expand")
+ link.setAttribute("onclick", "return toggle(this);")
+ link.setAttribute("class", "x_offline")
+
+ # prefix inside link text
+ link.firstChild.data = '+ ' + link.firstChild.data
+
+
+ newxml=dom.toxml('utf-8')
+
+ retStr=regexpTXT.search(newxml)
+ retStr = retStr.group(1)
+ #logging.debug("related2html out=%s"%repr(retStr))
+ return retStr.decode('utf-8') # we return unicode
+
+ return u""
+
+
- if checkRef(self,ref):
- if pn:
- link.setAttribute("href",self.aq_parent.absolute_url()+"/vlp_coll?id="+ref+"&p="+pn)
- else:
- link.setAttribute("href",self.aq_parent.absolute_url()+"/vlp_coll?id="+ref)
-
- str= dom.toxml()
-
- #print link.toxml('utf-8')
- retStr=regexpPage.search(str)
- try:
- return retStr.group(1)
+def xml2html(self,str,quote="yes"):
+ """link2html fuer VLP muss hier noch raus"""
+ if str:
+ if quote=="yes2":
+ str=re.sub("\&","&",str)
+ #dom=xml.dom.minidom.parseString(str)
+ dom = NonvalidatingReader.parseString(str,"http://www.mpiwg-berlin.mpg.de/")
+ #links=dom.getElementsByTagName("link")
+ links=Ft.Xml.XPath.Evaluate(".//link", contextNode=dom)
+ for link in links:
+ #link.tagName="a"
+
+ ref=link.getAttributeNS(EMPTY_NAMESPACE,"ref")
+ pn=link.getAttributeNS(EMPTY_NAMESPACE,"page")
+
+ cns=link.childNodes[0:]
+
+ newLink=dom.createElementNS(EMPTY_NAMESPACE,"a")
+ for x in cns:
+ newLink.appendChild(x)
+
+
+
+ link.parentNode.replaceChild(newLink,link)
+
+ if self.checkRef(ref):
+ if pn:
+ newLink.setAttributeNS(EMPTY_NAMESPACE,"href",self.REQUEST['SERVER_URL']+"/references?id="+ref+"&page="+pn)
+ else:
+ newLink.setAttributeNS(EMPTY_NAMESPACE,"href",self.REQUEST['SERVER_URL']+"/references?id="+ref)
+
+ #str= dom.toxml('utf-8')
+ buf = cStringIO.StringIO()
+ PrettyPrint(dom, stream=buf)
+ str = buf.getvalue()
+ buf.close()
+ #str=PrettyPrint(dom.documentElement,encoding='UTF-8')
+ #print link.toxml('utf-8')
+ #print type(str)
+ retStr=regexpPage.search(str)
+
+ try: # hack warum fehtl manchmal page??
+ return retStr.group(1).decode('utf-8')
except:
- exStr=""""""
- str=re.sub("\n","",str)
- #str=
- #print repr(str)
- return str.replace(exStr,'')
+ return str
return ""
+
def xlink2html(self,xlink,parClass=None):
ret=""