Annotation of MPIWGWeb/xmlhelper.py, revision 1.1
1.1 ! dwinter 1:
! 2: from sys import argv
! 3:
! 4: import string
! 5: import xml.dom.minidom
! 6: import Ft.Xml.XLink.Processor
! 7: import Ft.Xml.XLink.XLinkElements
! 8:
! 9: from Ft.Xml import XPath
! 10: from Ft.Xml.XPath import Evaluate
! 11: from Ft.Xml.XLink import XLINK_NAMESPACE
! 12: from Ft.Xml.XLink import XLinkElements
! 13:
! 14: #from Ft.Xml.Domlette import NonvalidatingReader,InputSource
! 15: #from Ft.Xml import EMPTY_NAMESPACE
! 16: from Ft.Lib import Uri
! 17:
! 18: xml2html={'WEB_normal':('<p>','</p>'),'Normal':('<p>','</p>'),'WEB_picture':('<p>','</p>'),'WEB_figuretitle':('<i>','</i>'),'WEB_bibliography':('<p><i>','</i></p>'),'Web_kursiv':('<i>','</i>'),'WEB_kursiv':('<i>','</i>'),'WEB_hyperlink':('','')}
! 19:
! 20: def addToDict(dict,name,value):
! 21: if name=="":
! 22: return 0
! 23: else:
! 24:
! 25: if not dict.has_key(name):
! 26: dict[name]=[] # als array anlegen
! 27:
! 28: dict[name].append(value)
! 29: return 1
! 30:
! 31: def proj2hash(xmlstring):
! 32: """wandelt xml-files fuer die projekte in ein hash"""
! 33:
! 34: dom=xml.dom.minidom.parseString(xmlstring)
! 35:
! 36:
! 37: list={}
! 38:
! 39: #gettitle
! 40: pars=Evaluate('par',dom.getElementsByTagName('part')[0])
! 41: for par in pars:
! 42: className=par.getAttribute('class')
! 43: content=getText(par.childNodes)
! 44: addToDict(list,className,content)
! 45:
! 46: list.update(parseTable(dom.getElementsByTagName('html:table')[0])) # Parse the Table
! 47:
! 48: #evaluate level 1
! 49:
! 50: sections=Evaluate('section',dom.getElementsByTagName('part')[0])# Parse all Sections
! 51: #print sections,dom.getElementsByTagName('part')[0]
! 52: for section in sections:
! 53:
! 54: sec=parseSection(section)
! 55: if sec[0]=="WEB_project_header": # Sonderfall project
! 56: addToDict(list,'WEB_project_header',sec[1]) # store title
! 57: addToDict(list,'WEB_project_description',sec[2]) #store description
! 58: else: # no information in heading
! 59: addToDict(list,sec[0],sec[2])
! 60:
! 61: #evaluate higher level sections
! 62:
! 63: sections=Evaluate('section/section',dom.getElementsByTagName('part')[0])
! 64:
! 65: for section in sections:
! 66: sec=parseSection(section)
! 67:
! 68: if sec[0]=="WEB_project_header": # Sonderfall project
! 69: addToDict(list,'WEB_project_header',sec[1]) # store title
! 70: addToDict(list,'WEB_project_description',sec[2]) #store description
! 71: else: # no information in heading
! 72: addToDict(list,sec[0],sec[2])
! 73:
! 74:
! 75: return list
! 76:
! 77:
! 78: def parseSection(section):
! 79: heading=section.getElementsByTagName('heading')[0]
! 80: type=heading.getAttribute('class')
! 81: header=getText(heading.childNodes)
! 82: #print section.childNodes
! 83: pars=Evaluate('par',section)
! 84: content=par2html(pars)
! 85:
! 86: return (type,header,content)
! 87:
! 88: def parseTable(table):
! 89: fields={}
! 90: rows=table.getElementsByTagName('html:tr')
! 91: for row in rows:
! 92: #print "ROW"
! 93: cols=row.getElementsByTagName('html:td')
! 94:
! 95: #Name des Datenfeldes einlesen
! 96: try:
! 97: field=cols[0].getElementsByTagName('par')[0].getAttribute('class')
! 98: #print "field",field
! 99: except:
! 100: print "error"
! 101: field=""
! 102:
! 103: #Wandeln der Eintrge in HTML
! 104:
! 105: pars=cols[1].getElementsByTagName('par')
! 106:
! 107:
! 108: html=par2html(pars,tags=("",";"))
! 109:
! 110: addToDict(fields,field,html)
! 111: #print fields
! 112: return fields
! 113:
! 114: def par2html(pars,tags=None):
! 115: #html=""
! 116:
! 117: for par in pars:
! 118: #print "par",par
! 119: if not tags:
! 120: try:
! 121: tag=xml2html[par.getAttribute('class')]
! 122: except:
! 123: tag=('<p>','</p>')
! 124: else:
! 125: tag=tags
! 126:
! 127: content=getText(par.childNodes)
! 128: #print "CONTETN",content
! 129:
! 130: #print par.getAttribute('class'),node
! 131: try:
! 132: html=html+tag[0]+content+tag[1]
! 133: except:
! 134: html=tag[0]+content+tag[1]
! 135:
! 136: try:
! 137: return html
! 138: except:
! 139: return ""
! 140:
! 141: def getXlink(nodes):
! 142: """searches xlinks and gives them back as html"""
! 143: ret=""
! 144: for node in nodes:
! 145: if node.attributes:
! 146: if 'xlink:type' in node.attributes.keys(): #is a xlink?
! 147: ret +=xlink2html(node)
! 148: return ret
! 149:
! 150: def xlink2html(xlink):
! 151: ret=""
! 152: attributes=xlink.attributes
! 153:
! 154: if xlink.tagName.lower()=="image":
! 155: ret +="<img src=%s />"%xlink.getAttribute('xlink:href')
! 156: elif xlink.tagName.lower()=="link":
! 157: ret +="<a href='%s' >%s</a>"%(xlink.getAttribute('xlink:href'),getText(xlink.childNodes))
! 158:
! 159:
! 160:
! 161:
! 162: return ret
! 163:
! 164: def getText(nodelist):
! 165:
! 166: rc = u''
! 167: for node in nodelist:
! 168: print "HHHH"
! 169: if node.nodeType == node.TEXT_NODE:
! 170: #print "node",node
! 171: #print "NODE",node.data.encode('utf-8','ignore'),"V"
! 172: #print "HALII"
! 173: try:
! 174: try:
! 175: print "try1"
! 176: #rc += node.data.encode('utf-8','ignore')
! 177: rc += node.data
! 178:
! 179: except:
! 180: print "try2"
! 181: #rc= node.data.encode('utf-8','ignore')
! 182: rc=node.data
! 183: except:
! 184: rc="ERROR"
! 185: #node.data.decode('utf-8','ignore')
! 186: print "ERROR"
! 187: node.data.encode('utf-8','ignore')
! 188: #print "RC",rc
! 189: elif node.tagName =="inline":
! 190: print "HI", node.getAttribute('class')
! 191: rc+=par2html([node])
! 192: elif node.attributes:
! 193: print "xlink?"
! 194: if 'xlink:type' in node.attributes.keys(): #is a xlink?
! 195: rc +=xlink2html(node)
! 196: #print "RWT",rc
! 197: return rc
! 198:
! 199:
! 200: #filename=argv[1]
! 201: #fileString=file(filename).read()
! 202: #print proj2hash(fileString)
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>