MPIWGWeb/xmlhelper.py - view

File: [Repository] / MPIWGWeb / xmlhelper.py
Revision 1.6.2.2: download - view: text, annotated - select for diffs - revision graph
Mon Jan 9 07:33:31 2012 UTC (13 years, 5 months ago) by dwinter
Branches: r2

errors in template und helper fixed

1: 2: from sys import argv 3: 4: import string 5: import xml.dom.minidom 6: #import Ft.Xml.XLink.Processor 7: #import Ft.Xml.XLink.XLinkElements 8: # 9: #from Ft.Xml import XPath 10: #from Ft.Xml.XPath import Evaluate 11: #from Ft.Xml.XLink import XLINK_NAMESPACE 12: #from Ft.Xml.XLink import XLinkElements 13: 14: #from Ft.Xml.Domlette import NonvalidatingReader,InputSource 15: #from Ft.Xml import EMPTY_NAMESPACE 16: 17: #from Ft.Lib import Uri 18: 19: xml2html={'WEB_normal':('',''),'Normal':('',''),'WEB_picture':('',''),'WEB_figuretitle':('',''),'WEB_bibliography':('',''),'Web_kursiv':('',''),'WEB_kursiv':('',''),'WEB_hyperlink':('',''),'Hyperlink':('','')} 20: 21: def addToDict(dict,name,value): 22: if name=="": 23: return 0 24: else: 25: 26: if not dict.has_key(name): 27: dict[name]=[] # als array anlegen 28: 29: dict[name].append(value) 30: return 1 31: 32: def proj2hash(xmlstring): 33: """wandelt xml-files fuer die projekte in ein hash""" 34: 35: dom=xml.dom.minidom.parseString(xmlstring) 36: 37: 38: list={} 39: 40: #gettitle 41: pars=Evaluate('par',dom.getElementsByTagName('part')[0]) 42: for par in pars: 43: className=par.getAttribute('class') 44: content=getText(par.childNodes) 45: addToDict(list,className,content) 46: 47: list.update(parseTable(dom.getElementsByTagName('html:table')[0])) # Parse the Table 48: 49: #evaluate level 1 50: 51: sections=Evaluate('section',dom.getElementsByTagName('part')[0])# Parse all Sections 52: #print sections,dom.getElementsByTagName('part')[0] 53: for section in sections: 54: 55: sec=parseSection(section) 56: if sec[0]=="WEB_project_header": # Sonderfall project 57: 58: addToDict(list,'WEB_project_header',sec[1]) # store title 59: addToDict(list,'WEB_project_description',sec[2]) #store description 60: else: # no information in heading 61: 62: addToDict(list,sec[0],sec[2]) 63: 64: #evaluate higher level sections 65: 66: sections=Evaluate('section/section',dom.getElementsByTagName('part')[0]) 67: 68: for section in sections: 69: sec=parseSection(section) 70: 71: if sec[0]=="WEB_project_header": # Sonderfall project 72: addToDict(list,'WEB_project_header',sec[1]) # store title 73: addToDict(list,'WEB_project_description',sec[2]) #store description 74: else: # no information in heading 75: addToDict(list,sec[0],sec[2]) 76: 77: 78: return list 79: 80: 81: def parseSection(section): 82: type="" 83: header="" 84: for heading in section.childNodes: 85: if getattr(heading,'tagName','')=="heading": 86: 87: type=heading.getAttribute('class') 88: header=getText(heading.childNodes) 89: 90: if type=="": # falls heading fehlt, pruefe ob erster par richtig 91: par=section.getElementsByTagName('par')[0] 92: type=par.getAttribute('class') 93: header=getText(par.childNodes) 94: 95: #print section.childNodes 96: pars=Evaluate('par',section) 97: content=par2html(pars) 98: 99: return (type,header,content) 100: 101: def parseTable(table): 102: fields={} 103: rows=table.getElementsByTagName('html:tr') 104: for row in rows: 105: #print "ROW" 106: cols=row.getElementsByTagName('html:td') 107: 108: #Name des Datenfeldes einlesen 109: try: 110: field=cols[0].getElementsByTagName('par')[0].getAttribute('class') 111: #print "field",field 112: except: 113: print "error" 114: field="" 115: 116: #Wandeln der Eintrge in HTML 117: 118: pars=cols[1].getElementsByTagName('par') 119: 120: 121: html=par2html(pars,tags=("",";")) 122: 123: addToDict(fields,field,html) 124: #print fields 125: return fields 126: 127: def par2html(pars,tags=None): 128: #html="" 129: 130: for par in pars: 131: #print "par",par 132: if not tags: 133: try: 134: tag=xml2html[par.getAttribute('class')] 135: except: 136: tag=('','') 137: else: 138: tag=tags 139: 140: content=getText(par.childNodes) 141: #print "CONTETN",content 142: 143: #print par.getAttribute('class'),node 144: try: 145: html=html+tag[0]+content+tag[1] 146: except: 147: html=tag[0]+content+tag[1] 148: 149: try: 150: return html 151: except: 152: return "" 153: 154: def getXlink(nodes): 155: """searches xlinks and gives them back as html""" 156: ret="" 157: for node in nodes: 158: if node.attributes: 159: if 'xlink:type' in node.attributes.keys(): #is a xlink? 160: ret +=xlink2html(node) 161: return ret 162: 163: def xlink2html(xlink): 164: ret="" 165: attributes=xlink.attributes 166: 167: if xlink.tagName.lower()=="image": 168: ret +="<img src=%s />"%xlink.getAttribute('xlink:href') 169: elif xlink.tagName.lower()=="link": 170: ret +="<a href='%s' >%s</a>"%(xlink.getAttribute('xlink:href'),getText(xlink.childNodes)) 171: 172: 173: 174: 175: return ret 176: 177: def getText(nodelist): 178: 179: rc = u'' 180: for node in nodelist: 181: if node.nodeType == node.TEXT_NODE: 182: #print "node",node 183: #print "NODE",node.data.encode('utf-8','ignore'),"V" 184: #print "HALII" 185: try: 186: try: 187: #rc += node.data.encode('utf-8','ignore') 188: rc += node.data 189: 190: except: 191: #rc= node.data.encode('utf-8','ignore') 192: rc=node.data 193: except: 194: rc="ERROR" 195: #node.data.decode('utf-8','ignore') 196: print "ERROR" 197: node.data.encode('utf-8','ignore') 198: #print "RC",rc 199: elif node.tagName =="inline": 200: rc+=par2html([node]) 201: elif node.attributes: 202: 203: if 'xlink:type' in node.attributes.keys(): #is a xlink? 204: rc +=xlink2html(node) 205: #print "RWT",rc 206: return rc 207: 208: 209: #filename=argv[1] 210: #fileString=file(filename).read() 211: #print proj2hash(fileString) 212: