![]() ![]() | ![]() |
first import product fuer www des insitutes
1: 2: from sys import argv 3: 4: import string 5: import xml.dom.minidom 6: import Ft.Xml.XLink.Processor 7: import Ft.Xml.XLink.XLinkElements 8: 9: from Ft.Xml import XPath 10: from Ft.Xml.XPath import Evaluate 11: from Ft.Xml.XLink import XLINK_NAMESPACE 12: from Ft.Xml.XLink import XLinkElements 13: 14: #from Ft.Xml.Domlette import NonvalidatingReader,InputSource 15: #from Ft.Xml import EMPTY_NAMESPACE 16: from Ft.Lib import Uri 17: 18: xml2html={'WEB_normal':('<p>','</p>'),'Normal':('<p>','</p>'),'WEB_picture':('<p>','</p>'),'WEB_figuretitle':('<i>','</i>'),'WEB_bibliography':('<p><i>','</i></p>'),'Web_kursiv':('<i>','</i>'),'WEB_kursiv':('<i>','</i>'),'WEB_hyperlink':('','')} 19: 20: def addToDict(dict,name,value): 21: if name=="": 22: return 0 23: else: 24: 25: if not dict.has_key(name): 26: dict[name]=[] # als array anlegen 27: 28: dict[name].append(value) 29: return 1 30: 31: def proj2hash(xmlstring): 32: """wandelt xml-files fuer die projekte in ein hash""" 33: 34: dom=xml.dom.minidom.parseString(xmlstring) 35: 36: 37: list={} 38: 39: #gettitle 40: pars=Evaluate('par',dom.getElementsByTagName('part')[0]) 41: for par in pars: 42: className=par.getAttribute('class') 43: content=getText(par.childNodes) 44: addToDict(list,className,content) 45: 46: list.update(parseTable(dom.getElementsByTagName('html:table')[0])) # Parse the Table 47: 48: #evaluate level 1 49: 50: sections=Evaluate('section',dom.getElementsByTagName('part')[0])# Parse all Sections 51: #print sections,dom.getElementsByTagName('part')[0] 52: for section in sections: 53: 54: sec=parseSection(section) 55: if sec[0]=="WEB_project_header": # Sonderfall project 56: addToDict(list,'WEB_project_header',sec[1]) # store title 57: addToDict(list,'WEB_project_description',sec[2]) #store description 58: else: # no information in heading 59: addToDict(list,sec[0],sec[2]) 60: 61: #evaluate higher level sections 62: 63: sections=Evaluate('section/section',dom.getElementsByTagName('part')[0]) 64: 65: for section in sections: 66: sec=parseSection(section) 67: 68: if sec[0]=="WEB_project_header": # Sonderfall project 69: addToDict(list,'WEB_project_header',sec[1]) # store title 70: addToDict(list,'WEB_project_description',sec[2]) #store description 71: else: # no information in heading 72: addToDict(list,sec[0],sec[2]) 73: 74: 75: return list 76: 77: 78: def parseSection(section): 79: heading=section.getElementsByTagName('heading')[0] 80: type=heading.getAttribute('class') 81: header=getText(heading.childNodes) 82: #print section.childNodes 83: pars=Evaluate('par',section) 84: content=par2html(pars) 85: 86: return (type,header,content) 87: 88: def parseTable(table): 89: fields={} 90: rows=table.getElementsByTagName('html:tr') 91: for row in rows: 92: #print "ROW" 93: cols=row.getElementsByTagName('html:td') 94: 95: #Name des Datenfeldes einlesen 96: try: 97: field=cols[0].getElementsByTagName('par')[0].getAttribute('class') 98: #print "field",field 99: except: 100: print "error" 101: field="" 102: 103: #Wandeln der Eintrge in HTML 104: 105: pars=cols[1].getElementsByTagName('par') 106: 107: 108: html=par2html(pars,tags=("",";")) 109: 110: addToDict(fields,field,html) 111: #print fields 112: return fields 113: 114: def par2html(pars,tags=None): 115: #html="" 116: 117: for par in pars: 118: #print "par",par 119: if not tags: 120: try: 121: tag=xml2html[par.getAttribute('class')] 122: except: 123: tag=('<p>','</p>') 124: else: 125: tag=tags 126: 127: content=getText(par.childNodes) 128: #print "CONTETN",content 129: 130: #print par.getAttribute('class'),node 131: try: 132: html=html+tag[0]+content+tag[1] 133: except: 134: html=tag[0]+content+tag[1] 135: 136: try: 137: return html 138: except: 139: return "" 140: 141: def getXlink(nodes): 142: """searches xlinks and gives them back as html""" 143: ret="" 144: for node in nodes: 145: if node.attributes: 146: if 'xlink:type' in node.attributes.keys(): #is a xlink? 147: ret +=xlink2html(node) 148: return ret 149: 150: def xlink2html(xlink): 151: ret="" 152: attributes=xlink.attributes 153: 154: if xlink.tagName.lower()=="image": 155: ret +="<img src=%s />"%xlink.getAttribute('xlink:href') 156: elif xlink.tagName.lower()=="link": 157: ret +="<a href='%s' >%s</a>"%(xlink.getAttribute('xlink:href'),getText(xlink.childNodes)) 158: 159: 160: 161: 162: return ret 163: 164: def getText(nodelist): 165: 166: rc = u'' 167: for node in nodelist: 168: print "HHHH" 169: if node.nodeType == node.TEXT_NODE: 170: #print "node",node 171: #print "NODE",node.data.encode('utf-8','ignore'),"V" 172: #print "HALII" 173: try: 174: try: 175: print "try1" 176: #rc += node.data.encode('utf-8','ignore') 177: rc += node.data 178: 179: except: 180: print "try2" 181: #rc= node.data.encode('utf-8','ignore') 182: rc=node.data 183: except: 184: rc="ERROR" 185: #node.data.decode('utf-8','ignore') 186: print "ERROR" 187: node.data.encode('utf-8','ignore') 188: #print "RC",rc 189: elif node.tagName =="inline": 190: print "HI", node.getAttribute('class') 191: rc+=par2html([node]) 192: elif node.attributes: 193: print "xlink?" 194: if 'xlink:type' in node.attributes.keys(): #is a xlink? 195: rc +=xlink2html(node) 196: #print "RWT",rc 197: return rc 198: 199: 200: #filename=argv[1] 201: #fileString=file(filename).read() 202: #print proj2hash(fileString) 203: