Annotation of ECHO_content/vlp_xmlhelpers.py, revision 1.1
1.1 ! dwinter 1: from sys import argv
! 2:
! 3: import string
! 4: import xml.dom.minidom
! 5: import Ft.Xml.XLink.Processor
! 6: import Ft.Xml.XLink.XLinkElements
! 7:
! 8: from Ft.Xml import XPath
! 9: from Ft.Xml.XPath import Evaluate
! 10: from Ft.Xml.XLink import XLINK_NAMESPACE
! 11: from Ft.Xml.XLink import XLinkElements
! 12:
! 13: #from Ft.Xml.Domlette import NonvalidatingReader,InputSource
! 14: #from Ft.Xml import EMPTY_NAMESPACE
! 15: from Ft.Lib import Uri
! 16: import urllib
! 17: import re
! 18:
! 19: patternPage=r"<\s*page.*?>(.*?)</page>"
! 20: regexpPage = re.compile(patternPage, re.IGNORECASE + re.DOTALL)
! 21:
! 22: xml2htmlArray={'WEB_normal':('<p>','</p>'),'Normal':('<p>','</p>'),'WEB_picture':('<p class=\"picture\">','</p>'),'WEB_figuretitle':('<p class=\"picturetitle\">','</p>'),'WEB_bibliography':('<p><i>','</i></p>'),'Web_kursiv':('<i>','</i>'),'WEB_kursiv':('<i>','</i>'),'WEB_hyperlink':('',''),'Hyperlink':('',''),'Picture':('<p class=\"picture\">','</p>'),'FigureTitle':('<p class=\"picturetitle\">','</p>')}
! 23:
! 24: def addToDict(dict,name,value):
! 25: if name=="":
! 26: return 0
! 27: else:
! 28:
! 29: if not dict.has_key(name):
! 30: dict[name]=[] # als array anlegen
! 31:
! 32: dict[name].append(value)
! 33: return 1
! 34:
! 35: def proj2hash(self,xmlstring):
! 36: """wandelt xml-files fuer die projekte in ein hash"""
! 37:
! 38: dom=xml.dom.minidom.parseString(xmlstring)
! 39:
! 40:
! 41: list={}
! 42:
! 43: #gettitle
! 44: pars=Evaluate('par',dom.getElementsByTagName('part')[0])
! 45: for par in pars:
! 46: className=par.getAttribute('class')
! 47: content=getText(self,par.childNodes)
! 48: addToDict(list,className,content)
! 49:
! 50:
! 51: sectionXPath="section"
! 52:
! 53:
! 54: sections=Evaluate(sectionXPath,dom.getElementsByTagName('part')[0])
! 55:
! 56: while sections:
! 57:
! 58: for section in sections:
! 59:
! 60: sec=parseSection(self,section)
! 61:
! 62: if sec[0]=="WEB_project_header": # Sonderfall project
! 63: addToDict(list,'WEB_project_header',sec[1]) # store title
! 64: addToDict(list,'WEB_project_description',sec[2]) #store description
! 65: else: # no information in heading
! 66: level=sec[3]
! 67: aTag="<h%s>"%level
! 68: eTag="</h%s>"%level
! 69: addToDict(list,"text",aTag+sec[1]+eTag)
! 70: addToDict(list,"text",sec[2])
! 71: sectionXPath+="/section"
! 72: sections=Evaluate(sectionXPath,dom.getElementsByTagName('part')[0])
! 73: return list
! 74:
! 75:
! 76: def parseSection(self,section):
! 77: type=""
! 78: header=""
! 79: level=section.getAttribute('level')
! 80: for heading in section.childNodes:
! 81: if getattr(heading,'tagName','')=="heading":
! 82:
! 83: type=heading.getAttribute('class')
! 84: header=getText(self,heading.childNodes)
! 85:
! 86: if type=="": # falls heading fehlt, pruefe ob erster par richtig
! 87: par=section.getElementsByTagName('par')[0]
! 88: type=par.getAttribute('class')
! 89: header=getText(par.childNodes)
! 90:
! 91: #print section.childNodes
! 92: #pars=Evaluate('par',section)
! 93: pars=section.childNodes
! 94: content=par2html(self,pars)
! 95: #print "CONTENT",repr(content)
! 96: return (type,header,content,level)
! 97:
! 98: def parseTable(table):
! 99: fields={}
! 100: rows=table.getElementsByTagName('html:tr')
! 101: for row in rows:
! 102: #print "ROW"
! 103: cols=row.getElementsByTagName('html:td')
! 104:
! 105: #Name des Datenfeldes einlesen
! 106: try:
! 107: field=cols[0].getElementsByTagName('par')[0].getAttribute('class')
! 108: #print "field",field
! 109: except:
! 110: print "error"
! 111: field=""
! 112:
! 113: #Wandeln der Eintrge in HTML
! 114:
! 115: #pars=cols[1].getElementsByTagName('par')
! 116: pars=cols[1].childNodes
! 117:
! 118: html=par2html(self,pars,tags=("",";"))
! 119:
! 120: addToDict(fields,field,html)
! 121: #print fields
! 122: return fields
! 123:
! 124: def par2html(self,pars,tags=None):
! 125: html=""
! 126:
! 127: for par in pars:
! 128: tagName=getattr(par,'tagName','')
! 129: if tagName in ["par","inline"]:
! 130: #print repr(par.getAttribute('class')),xml2htmlArray.get(par.getAttribute('class'),'NOT FOUND')
! 131: #print "par",par
! 132: if not tags:
! 133: try:
! 134: tag=xml2htmlArray[par.getAttribute('class')]
! 135: except:
! 136: tag=('<p>','</p>')
! 137: else:
! 138: tag=tags
! 139: #print "TAG",tag
! 140: content=getText(self,par.childNodes,par.getAttribute('class'))
! 141:
! 142:
! 143:
! 144: #print par.getAttribute('class'),node
! 145: try:
! 146: html+=tag[0]+content+tag[1]
! 147: except:
! 148: html=+tag[0]+content+tag[1]
! 149:
! 150: elif tagName=="pb":
! 151: html+="<pb/>"
! 152:
! 153: try:
! 154:
! 155: return html
! 156: except:
! 157: return ""
! 158:
! 159: def getXlink(nodes):
! 160: """searches xlinks and gives them back as html"""
! 161: ret=""
! 162: for node in nodes:
! 163: if node.attributes:
! 164: if 'xlink:type' in node.attributes.keys(): #is a xlink?
! 165: ret +=xlink2html(node)
! 166: return ret
! 167:
! 168: def checkRef(self,ref):
! 169: dbs={'vl_literature':'AND CD LIKE \'%lise%\'','vl_technology':'','vl_people':'','vl_sites':''}
! 170: res=None
! 171: for db in dbs.keys():
! 172:
! 173: res=res or self.search(var=str("select reference from %s where reference =\'%s\' %s"%(db,ref,dbs[db])))
! 174: return res
! 175:
! 176: def xml2html(self,str,quote="yes"):
! 177: """link2html fuer VLP muss hier noch raus"""
! 178:
! 179:
! 180: if str:
! 181: if quote=="yes2":
! 182: str=re.sub("\&","&",str)
! 183:
! 184: str=re.sub("ref\=([^>]*)\>",'ref=\"\g<1>\">',str)# einfuegen anfuehrungszeichen um ref attribut, falls fehlt.
! 185: #str=re.sub("ref\=([.[*^[>]]])",'XX',str)
! 186: #print "STR::",str
! 187: dom=xml.dom.minidom.parseString(str)
! 188: links=dom.getElementsByTagName("link")
! 189:
! 190: for link in links:
! 191: link.tagName="a"
! 192: ref=link.getAttribute("ref")
! 193: pn=link.getAttribute("page")
! 194:
! 195: if checkRef(self,ref):
! 196: if pn:
! 197: link.setAttribute("href",self.aq_parent.absolute_url()+"/vlp_coll?id="+ref+"&p="+pn)
! 198: else:
! 199: link.setAttribute("href",self.aq_parent.absolute_url()+"/vlp_coll?id="+ref)
! 200:
! 201: str= dom.toxml()
! 202:
! 203: #print link.toxml('utf-8')
! 204: retStr=regexpPage.search(str)
! 205:
! 206: try:
! 207: return retStr.group(1)
! 208: except:
! 209: exStr="""<?xml version="1.0" ?>"""
! 210: str=re.sub("\n","",str)
! 211: #str=
! 212: #print repr(str)
! 213: return str.replace(exStr,'')
! 214: return ""
! 215:
! 216: def xlink2html(self,xlink,parClass=None):
! 217: ret=""
! 218: attributes=xlink.attributes
! 219:
! 220: if xlink.tagName.lower()=="image":
! 221: ret +="""<img src="%s" />"""%xlink.getAttribute('href')
! 222: elif xlink.tagName.lower()=="link":
! 223: reference=urllib.unquote(xlink.getAttribute('href'))
! 224: label=getText(self,xlink.childNodes)
! 225:
! 226: # check if href is already a correct url
! 227: if reference.split(":")[0] in ['http','file']:
! 228: if parClass=="Picture":
! 229: ret +="""<img src="%s" />"""%(reference)
! 230: else:
! 231:
! 232: ret +="""<a href="%s" >%s</a>"""%(reference,label)
! 233: else: # transform
! 234: #href=xml2html(self,reference)
! 235: #print "refer",reference
! 236: reference=re.sub("ref\=([^>]*)\>",'ref=\"\g<1>\">',reference)# einfuegen anfuehrungszeichen um ref attribut, falls fehlt.
! 237: ret +=reference
! 238:
! 239: return ret
! 240:
! 241: def getText(self,nodelist,parClass=None):
! 242:
! 243: rc = u''
! 244: for node in nodelist:
! 245:
! 246: if node.nodeType == node.TEXT_NODE:
! 247:
! 248: try:
! 249: try:
! 250: #rc += node.data.encode('utf-8','ignore')
! 251: rc += node.data
! 252:
! 253: except:
! 254: #rc= node.data.encode('utf-8','ignore')
! 255: rc=node.data
! 256: except:
! 257: rc="ERROR"
! 258: #node.data.decode('utf-8','ignore')
! 259:
! 260: node.data.encode('utf-8','ignore')
! 261: #print "RC",rc
! 262: elif node.tagName =="inline":
! 263:
! 264: rc+=par2html(self,[node])
! 265:
! 266: elif node.tagName =="pb":
! 267: rc+="<pb/>"
! 268: elif node.attributes:
! 269:
! 270: if 'type' in node.attributes.keys(): #is a xlink?
! 271:
! 272: try:
! 273: rc +=xlink2html(self,node,parClass).encode('utf-8')
! 274:
! 275: except:
! 276: rc +=xlink2html(self,node,parClass)
! 277:
! 278: #print "RWT",rc
! 279: return rc
! 280:
! 281:
! 282: #filename=argv[1]
! 283: #fileString=file(filename).read()
! 284: #print proj2hash(fileString)
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>