ECHO_content/vlp_xmlhelpers.py - annotate

Return to vlp_xmlhelpers.py CVS log
Up to [Repository] / ECHO_content
Annotation of ECHO_content/vlp_xmlhelpers.py, revision 1.1

1.1     ! dwinter     1: from sys import argv
        !             2: 
        !             3: import string
        !             4: import xml.dom.minidom
        !             5: import Ft.Xml.XLink.Processor
        !             6: import Ft.Xml.XLink.XLinkElements
        !             7: 
        !             8: from Ft.Xml import XPath
        !             9: from Ft.Xml.XPath import Evaluate
        !            10: from Ft.Xml.XLink import XLINK_NAMESPACE
        !            11: from Ft.Xml.XLink import XLinkElements
        !            12: 
        !            13: #from Ft.Xml.Domlette import NonvalidatingReader,InputSource
        !            14: #from Ft.Xml import EMPTY_NAMESPACE
        !            15: from Ft.Lib import Uri
        !            16: import urllib
        !            17: import re
        !            18: 
        !            19: patternPage=r"<\s*page.*?>(.*?)</page>"
        !            20: regexpPage = re.compile(patternPage, re.IGNORECASE + re.DOTALL)
        !            21: 
        !            22: xml2htmlArray={'WEB_normal':('<p>','</p>'),'Normal':('<p>','</p>'),'WEB_picture':('<p class=\"picture\">','</p>'),'WEB_figuretitle':('<p class=\"picturetitle\">','</p>'),'WEB_bibliography':('<p><i>','</i></p>'),'Web_kursiv':('<i>','</i>'),'WEB_kursiv':('<i>','</i>'),'WEB_hyperlink':('',''),'Hyperlink':('',''),'Picture':('<p class=\"picture\">','</p>'),'FigureTitle':('<p class=\"picturetitle\">','</p>')}
        !            23: 
        !            24: def addToDict(dict,name,value):
        !            25:     if name=="":
        !            26:         return 0
        !            27:     else:
        !            28:         
        !            29:         if not dict.has_key(name):
        !            30:             dict[name]=[] # als array anlegen
        !            31: 
        !            32:         dict[name].append(value)
        !            33:         return 1    
        !            34: 
        !            35: def proj2hash(self,xmlstring):
        !            36:     """wandelt xml-files fuer die projekte in ein hash"""
        !            37:     
        !            38:     dom=xml.dom.minidom.parseString(xmlstring)
        !            39:     
        !            40:         
        !            41:     list={}
        !            42: 
        !            43:     #gettitle
        !            44:     pars=Evaluate('par',dom.getElementsByTagName('part')[0])
        !            45:     for par in pars:
        !            46:         className=par.getAttribute('class')
        !            47:         content=getText(self,par.childNodes)
        !            48:         addToDict(list,className,content)
        !            49:              
        !            50: 
        !            51:     sectionXPath="section"
        !            52: 
        !            53:     
        !            54:     sections=Evaluate(sectionXPath,dom.getElementsByTagName('part')[0])
        !            55:     
        !            56:     while sections:
        !            57:         
        !            58:         for section in sections:
        !            59:             
        !            60:             sec=parseSection(self,section)
        !            61:             
        !            62:             if sec[0]=="WEB_project_header": # Sonderfall project
        !            63:                 addToDict(list,'WEB_project_header',sec[1]) # store title
        !            64:                 addToDict(list,'WEB_project_description',sec[2]) #store description
        !            65:             else: # no information in heading
        !            66:                 level=sec[3]
        !            67:                 aTag="<h%s>"%level
        !            68:                 eTag="</h%s>"%level
        !            69:                 addToDict(list,"text",aTag+sec[1]+eTag)
        !            70:                 addToDict(list,"text",sec[2])
        !            71:         sectionXPath+="/section"
        !            72:         sections=Evaluate(sectionXPath,dom.getElementsByTagName('part')[0])
        !            73:     return list
        !            74: 
        !            75: 
        !            76: def parseSection(self,section):
        !            77:     type=""
        !            78:     header=""
        !            79:     level=section.getAttribute('level')
        !            80:     for heading in section.childNodes:
        !            81:         if getattr(heading,'tagName','')=="heading":
        !            82:             
        !            83:             type=heading.getAttribute('class')
        !            84:             header=getText(self,heading.childNodes)
        !            85: 
        !            86:     if type=="": # falls heading fehlt, pruefe ob erster par richtig
        !            87:         par=section.getElementsByTagName('par')[0]
        !            88:         type=par.getAttribute('class')
        !            89:         header=getText(par.childNodes)
        !            90: 
        !            91:     #print section.childNodes
        !            92:     #pars=Evaluate('par',section)
        !            93:     pars=section.childNodes
        !            94:     content=par2html(self,pars)
        !            95:     #print "CONTENT",repr(content)
        !            96:     return (type,header,content,level)
        !            97: 
        !            98: def parseTable(table):
        !            99:     fields={}
        !           100:     rows=table.getElementsByTagName('html:tr')
        !           101:     for row in rows:
        !           102:         #print "ROW"
        !           103:         cols=row.getElementsByTagName('html:td')
        !           104:         
        !           105:         #Name des Datenfeldes einlesen
        !           106:         try:
        !           107:             field=cols[0].getElementsByTagName('par')[0].getAttribute('class')
        !           108:             #print "field",field
        !           109:         except:
        !           110:             print "error"
        !           111:             field=""
        !           112: 
        !           113:         #Wandeln der Eintrge in HTML
        !           114: 
        !           115:         #pars=cols[1].getElementsByTagName('par')
        !           116:         pars=cols[1].childNodes
        !           117:         
        !           118:         html=par2html(self,pars,tags=("",";"))
        !           119:         
        !           120:         addToDict(fields,field,html)
        !           121:         #print fields
        !           122:     return fields
        !           123: 
        !           124: def par2html(self,pars,tags=None):
        !           125:     html=""
        !           126: 
        !           127:     for par in pars:
        !           128:         tagName=getattr(par,'tagName','')
        !           129:         if tagName in ["par","inline"]:
        !           130:             #print repr(par.getAttribute('class')),xml2htmlArray.get(par.getAttribute('class'),'NOT FOUND')
        !           131:             #print "par",par
        !           132:             if not tags:
        !           133:                 try:
        !           134:                     tag=xml2htmlArray[par.getAttribute('class')]
        !           135:                 except:
        !           136:                     tag=('<p>','</p>')
        !           137:             else:
        !           138:                 tag=tags
        !           139:             #print "TAG",tag
        !           140:             content=getText(self,par.childNodes,par.getAttribute('class'))
        !           141:             
        !           142:             
        !           143: 
        !           144:             #print par.getAttribute('class'),node
        !           145:             try:
        !           146:                 html+=tag[0]+content+tag[1]
        !           147:             except:
        !           148:                 html=+tag[0]+content+tag[1]
        !           149:             
        !           150:         elif tagName=="pb":
        !           151:             html+="<pb/>"
        !           152:     
        !           153:     try:
        !           154: 
        !           155:         return html
        !           156:     except:
        !           157:         return ""
        !           158: 
        !           159: def getXlink(nodes):
        !           160:     """searches xlinks and gives them back as html"""
        !           161:     ret=""
        !           162:     for node in nodes:
        !           163:         if node.attributes:
        !           164:             if 'xlink:type' in node.attributes.keys(): #is a xlink?
        !           165:                 ret +=xlink2html(node)
        !           166:     return ret
        !           167: 
        !           168: def checkRef(self,ref):
        !           169:         dbs={'vl_literature':'AND CD LIKE \'%lise%\'','vl_technology':'','vl_people':'','vl_sites':''}
        !           170:         res=None
        !           171:         for db in dbs.keys():
        !           172: 
        !           173:             res=res or self.search(var=str("select reference from %s where reference =\'%s\' %s"%(db,ref,dbs[db])))
        !           174:         return res
        !           175: 
        !           176: def xml2html(self,str,quote="yes"):
        !           177:         """link2html fuer VLP muss hier noch raus"""
        !           178:    
        !           179:         
        !           180:         if str:
        !           181:             if quote=="yes2":
        !           182:                 str=re.sub("\&","&amp;",str)
        !           183:             
        !           184:             str=re.sub("ref\=([^>]*)\>",'ref=\"\g<1>\">',str)# einfuegen anfuehrungszeichen um ref attribut, falls fehlt.
        !           185:             #str=re.sub("ref\=([.[*^[>]]])",'XX',str)
        !           186:             #print "STR::",str
        !           187:             dom=xml.dom.minidom.parseString(str)
        !           188:             links=dom.getElementsByTagName("link")
        !           189:             
        !           190:             for link in links:
        !           191:                 link.tagName="a"
        !           192:                 ref=link.getAttribute("ref")
        !           193:        pn=link.getAttribute("page")
        !           194: 
        !           195:                 if checkRef(self,ref):
        !           196:            if pn:
        !           197:                link.setAttribute("href",self.aq_parent.absolute_url()+"/vlp_coll?id="+ref+"&p="+pn)
        !           198:            else:
        !           199:                link.setAttribute("href",self.aq_parent.absolute_url()+"/vlp_coll?id="+ref)
        !           200: 
        !           201:             str= dom.toxml()
        !           202:        
        !           203:        #print link.toxml('utf-8')
        !           204:        retStr=regexpPage.search(str)
        !           205: 
        !           206:             try:
        !           207:                 return retStr.group(1)
        !           208:             except:
        !           209:                 exStr="""<?xml version="1.0" ?>"""
        !           210:                 str=re.sub("\n","",str)
        !           211:                 #str=
        !           212:                 #print repr(str)
        !           213:                 return str.replace(exStr,'')
        !           214:         return ""
        !           215:     
        !           216: def xlink2html(self,xlink,parClass=None):
        !           217:     ret=""
        !           218:     attributes=xlink.attributes
        !           219:  
        !           220:     if xlink.tagName.lower()=="image":
        !           221:         ret +="""<img src="%s" />"""%xlink.getAttribute('href')
        !           222:     elif xlink.tagName.lower()=="link":
        !           223:         reference=urllib.unquote(xlink.getAttribute('href'))
        !           224:         label=getText(self,xlink.childNodes)
        !           225: 
        !           226:         # check if href is already a correct url
        !           227:         if reference.split(":")[0] in ['http','file']:
        !           228:             if parClass=="Picture":
        !           229:                 ret +="""<img src="%s" />"""%(reference)
        !           230:             else:
        !           231: 
        !           232:                 ret +="""<a href="%s" >%s</a>"""%(reference,label)
        !           233:         else: # transform
        !           234:             #href=xml2html(self,reference)
        !           235:             #print "refer",reference
        !           236:             reference=re.sub("ref\=([^>]*)\>",'ref=\"\g<1>\">',reference)# einfuegen anfuehrungszeichen um ref attribut, falls fehlt.
        !           237:             ret +=reference
        !           238:             
        !           239:     return ret
        !           240: 
        !           241: def getText(self,nodelist,parClass=None):
        !           242:     
        !           243:     rc = u''
        !           244:     for node in nodelist:
        !           245:         
        !           246:        if node.nodeType == node.TEXT_NODE:
        !           247: 
        !           248:             try:
        !           249:                 try:
        !           250:                     #rc += node.data.encode('utf-8','ignore')
        !           251:                     rc += node.data
        !           252:                                         
        !           253:                 except:
        !           254:                     #rc= node.data.encode('utf-8','ignore')
        !           255:                     rc=node.data
        !           256:             except:
        !           257:                 rc="ERROR"
        !           258:                 #node.data.decode('utf-8','ignore')
        !           259: 
        !           260:             node.data.encode('utf-8','ignore')
        !           261:             #print "RC",rc
        !           262:         elif node.tagName =="inline":
        !           263: 
        !           264:             rc+=par2html(self,[node])
        !           265: 
        !           266:         elif node.tagName =="pb":
        !           267:             rc+="<pb/>"
        !           268:         elif node.attributes:
        !           269: 
        !           270:             if 'type' in node.attributes.keys(): #is a xlink?
        !           271: 
        !           272:                 try:
        !           273:                     rc +=xlink2html(self,node,parClass).encode('utf-8')
        !           274:                     
        !           275:                 except:
        !           276:                     rc +=xlink2html(self,node,parClass)
        !           277:                     
        !           278:     #print "RWT",rc        
        !           279:     return rc
        !           280: 
        !           281: 
        !           282: #filename=argv[1]
        !           283: #fileString=file(filename).read()
        !           284: #print proj2hash(fileString)
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>