MPIWGWeb/xmlhelper.py - view

File: [Repository] / MPIWGWeb / xmlhelper.py
Revision 1.4: download - view: text, annotated - select for diffs - revision graph
Wed Sep 1 09:35:12 2004 UTC (20 years, 10 months ago) by dwinter
Branches: MAIN
CVS tags: HEAD

bug fixed in parseSection

1: 2: from sys import argv 3: 4: import string 5: import xml.dom.minidom 6: import Ft.Xml.XLink.Processor 7: import Ft.Xml.XLink.XLinkElements 8: 9: from Ft.Xml import XPath 10: from Ft.Xml.XPath import Evaluate 11: from Ft.Xml.XLink import XLINK_NAMESPACE 12: from Ft.Xml.XLink import XLinkElements 13: 14: #from Ft.Xml.Domlette import NonvalidatingReader,InputSource 15: #from Ft.Xml import EMPTY_NAMESPACE 16: from Ft.Lib import Uri 17: 18: xml2html={'WEB_normal':('',''),'Normal':('',''),'WEB_picture':('',''),'WEB_figuretitle':('',''),'WEB_bibliography':('',''),'Web_kursiv':('',''),'WEB_kursiv':('',''),'WEB_hyperlink':('',''),'Hyperlink':('','')} 19: 20: def addToDict(dict,name,value): 21: if name=="": 22: return 0 23: else: 24: 25: if not dict.has_key(name): 26: dict[name]=[] # als array anlegen 27: 28: dict[name].append(value) 29: return 1 30: 31: def proj2hash(xmlstring): 32: """wandelt xml-files fuer die projekte in ein hash""" 33: 34: dom=xml.dom.minidom.parseString(xmlstring) 35: 36: 37: list={} 38: 39: #gettitle 40: pars=Evaluate('par',dom.getElementsByTagName('part')[0]) 41: for par in pars: 42: className=par.getAttribute('class') 43: content=getText(par.childNodes) 44: addToDict(list,className,content) 45: 46: list.update(parseTable(dom.getElementsByTagName('html:table')[0])) # Parse the Table 47: 48: #evaluate level 1 49: 50: sections=Evaluate('section',dom.getElementsByTagName('part')[0])# Parse all Sections 51: #print sections,dom.getElementsByTagName('part')[0] 52: for section in sections: 53: 54: sec=parseSection(section) 55: if sec[0]=="WEB_project_header": # Sonderfall project 56: 57: addToDict(list,'WEB_project_header',sec[1]) # store title 58: addToDict(list,'WEB_project_description',sec[2]) #store description 59: else: # no information in heading 60: 61: addToDict(list,sec[0],sec[2]) 62: 63: #evaluate higher level sections 64: 65: sections=Evaluate('section/section',dom.getElementsByTagName('part')[0]) 66: 67: for section in sections: 68: sec=parseSection(section) 69: 70: if sec[0]=="WEB_project_header": # Sonderfall project 71: addToDict(list,'WEB_project_header',sec[1]) # store title 72: addToDict(list,'WEB_project_description',sec[2]) #store description 73: else: # no information in heading 74: addToDict(list,sec[0],sec[2]) 75: 76: 77: return list 78: 79: 80: def parseSection(section): 81: type="" 82: header="" 83: for heading in section.childNodes: 84: if getattr(heading,'tagName','')=="heading": 85: 86: type=heading.getAttribute('class') 87: header=getText(heading.childNodes) 88: 89: if type=="": # falls heading fehlt, pruefe ob erster par richtig 90: par=section.getElementsByTagName('par')[0] 91: type=par.getAttribute('class') 92: header=getText(par.childNodes) 93: 94: #print section.childNodes 95: pars=Evaluate('par',section) 96: content=par2html(pars) 97: 98: return (type,header,content) 99: 100: def parseTable(table): 101: fields={} 102: rows=table.getElementsByTagName('html:tr') 103: for row in rows: 104: #print "ROW" 105: cols=row.getElementsByTagName('html:td') 106: 107: #Name des Datenfeldes einlesen 108: try: 109: field=cols[0].getElementsByTagName('par')[0].getAttribute('class') 110: #print "field",field 111: except: 112: print "error" 113: field="" 114: 115: #Wandeln der Eintrge in HTML 116: 117: pars=cols[1].getElementsByTagName('par') 118: 119: 120: html=par2html(pars,tags=("",";")) 121: 122: addToDict(fields,field,html) 123: #print fields 124: return fields 125: 126: def par2html(pars,tags=None): 127: #html="" 128: 129: for par in pars: 130: #print "par",par 131: if not tags: 132: try: 133: tag=xml2html[par.getAttribute('class')] 134: except: 135: tag=('','') 136: else: 137: tag=tags 138: 139: content=getText(par.childNodes) 140: #print "CONTETN",content 141: 142: #print par.getAttribute('class'),node 143: try: 144: html=html+tag[0]+content+tag[1] 145: except: 146: html=tag[0]+content+tag[1] 147: 148: try: 149: return html 150: except: 151: return "" 152: 153: def getXlink(nodes): 154: """searches xlinks and gives them back as html""" 155: ret="" 156: for node in nodes: 157: if node.attributes: 158: if 'xlink:type' in node.attributes.keys(): #is a xlink? 159: ret +=xlink2html(node) 160: return ret 161: 162: def xlink2html(xlink): 163: ret="" 164: attributes=xlink.attributes 165: 166: if xlink.tagName.lower()=="image": 167: ret +="<img src=%s />"%xlink.getAttribute('xlink:href') 168: elif xlink.tagName.lower()=="link": 169: ret +="<a href='%s' >%s</a>"%(xlink.getAttribute('xlink:href'),getText(xlink.childNodes)) 170: 171: 172: 173: 174: return ret 175: 176: def getText(nodelist): 177: 178: rc = u'' 179: for node in nodelist: 180: if node.nodeType == node.TEXT_NODE: 181: #print "node",node 182: #print "NODE",node.data.encode('utf-8','ignore'),"V" 183: #print "HALII" 184: try: 185: try: 186: #rc += node.data.encode('utf-8','ignore') 187: rc += node.data 188: 189: except: 190: #rc= node.data.encode('utf-8','ignore') 191: rc=node.data 192: except: 193: rc="ERROR" 194: #node.data.decode('utf-8','ignore') 195: print "ERROR" 196: node.data.encode('utf-8','ignore') 197: #print "RC",rc 198: elif node.tagName =="inline": 199: rc+=par2html([node]) 200: elif node.attributes: 201: 202: if 'xlink:type' in node.attributes.keys(): #is a xlink? 203: rc +=xlink2html(node) 204: #print "RWT",rc 205: return rc 206: 207: 208: #filename=argv[1] 209: #fileString=file(filename).read() 210: #print proj2hash(fileString) 211: