ECHO_content/vlp_xmlhelpers.py - annotate

Return to vlp_xmlhelpers.py CVS log
Up to [Repository] / ECHO_content
Annotation of ECHO_content/vlp_xmlhelpers.py, revision 1.3

1.1       dwinter     1: from sys import argv
                      2: 
                      3: import string
                      4: import xml.dom.minidom
                      5: import Ft.Xml.XLink.Processor
                      6: import Ft.Xml.XLink.XLinkElements
                      7: 
                      8: from Ft.Xml import XPath
                      9: from Ft.Xml.XPath import Evaluate
                     10: from Ft.Xml.XLink import XLINK_NAMESPACE
                     11: from Ft.Xml.XLink import XLinkElements
                     12: 
                     13: #from Ft.Xml.Domlette import NonvalidatingReader,InputSource
                     14: #from Ft.Xml import EMPTY_NAMESPACE
                     15: from Ft.Lib import Uri
                     16: import urllib
                     17: import re
                     18: 
                     19: patternPage=r"<\s*page.*?>(.*?)</page>"
                     20: regexpPage = re.compile(patternPage, re.IGNORECASE + re.DOTALL)
                     21: 
                     22: xml2htmlArray={'WEB_normal':('<p>','</p>'),'Normal':('<p>','</p>'),'WEB_picture':('<p class=\"picture\">','</p>'),'WEB_figuretitle':('<p class=\"picturetitle\">','</p>'),'WEB_bibliography':('<p><i>','</i></p>'),'Web_kursiv':('<i>','</i>'),'WEB_kursiv':('<i>','</i>'),'WEB_hyperlink':('',''),'Hyperlink':('',''),'Picture':('<p class=\"picture\">','</p>'),'FigureTitle':('<p class=\"picturetitle\">','</p>')}
                     23: 
                     24: def addToDict(dict,name,value):
                     25:     if name=="":
                     26:         return 0
                     27:     else:
                     28:         
                     29:         if not dict.has_key(name):
                     30:             dict[name]=[] # als array anlegen
                     31: 
                     32:         dict[name].append(value)
                     33:         return 1    
                     34: 
                     35: def proj2hash(self,xmlstring):
                     36:     """wandelt xml-files fuer die projekte in ein hash"""
                     37:     
                     38:     dom=xml.dom.minidom.parseString(xmlstring)
                     39:     
                     40:         
                     41:     list={}
                     42: 
                     43:     #gettitle
                     44:     pars=Evaluate('par',dom.getElementsByTagName('part')[0])
                     45:     for par in pars:
                     46:         className=par.getAttribute('class')
                     47:         content=getText(self,par.childNodes)
                     48:         addToDict(list,className,content)
                     49:              
                     50: 
                     51:     sectionXPath="section"
                     52: 
                     53:     
                     54:     sections=Evaluate(sectionXPath,dom.getElementsByTagName('part')[0])
                     55:     
                     56:     while sections:
                     57:         
                     58:         for section in sections:
                     59:             
                     60:             sec=parseSection(self,section)
                     61:             
                     62:             if sec[0]=="WEB_project_header": # Sonderfall project
                     63:                 addToDict(list,'WEB_project_header',sec[1]) # store title
                     64:                 addToDict(list,'WEB_project_description',sec[2]) #store description
                     65:             else: # no information in heading
1.3     ! dwinter    66:                 level=int(sec[3])+2
        !            67:                 aTag="<h%i>"%level
        !            68:                 eTag="</h%i>"%level
1.1       dwinter    69:                 addToDict(list,"text",aTag+sec[1]+eTag)
                     70:                 addToDict(list,"text",sec[2])
                     71:         sectionXPath+="/section"
                     72:         sections=Evaluate(sectionXPath,dom.getElementsByTagName('part')[0])
                     73:     return list
                     74: 
                     75: 
                     76: def parseSection(self,section):
                     77:     type=""
                     78:     header=""
                     79:     level=section.getAttribute('level')
                     80:     for heading in section.childNodes:
                     81:         if getattr(heading,'tagName','')=="heading":
                     82:             
                     83:             type=heading.getAttribute('class')
                     84:             header=getText(self,heading.childNodes)
                     85: 
                     86:     if type=="": # falls heading fehlt, pruefe ob erster par richtig
                     87:         par=section.getElementsByTagName('par')[0]
                     88:         type=par.getAttribute('class')
                     89:         header=getText(par.childNodes)
                     90: 
                     91:     #print section.childNodes
                     92:     #pars=Evaluate('par',section)
                     93:     pars=section.childNodes
                     94:     content=par2html(self,pars)
                     95:     #print "CONTENT",repr(content)
                     96:     return (type,header,content,level)
                     97: 
                     98: def parseTable(table):
                     99:     fields={}
                    100:     rows=table.getElementsByTagName('html:tr')
                    101:     for row in rows:
                    102:         #print "ROW"
                    103:         cols=row.getElementsByTagName('html:td')
                    104:         
                    105:         #Name des Datenfeldes einlesen
                    106:         try:
                    107:             field=cols[0].getElementsByTagName('par')[0].getAttribute('class')
                    108:             #print "field",field
                    109:         except:
                    110:             print "error"
                    111:             field=""
                    112: 
                    113:         #Wandeln der Eintrge in HTML
                    114: 
                    115:         #pars=cols[1].getElementsByTagName('par')
                    116:         pars=cols[1].childNodes
                    117:         
                    118:         html=par2html(self,pars,tags=("",";"))
                    119:         
                    120:         addToDict(fields,field,html)
                    121:         #print fields
                    122:     return fields
                    123: 
                    124: def par2html(self,pars,tags=None):
                    125:     html=""
                    126: 
                    127:     for par in pars:
                    128:         tagName=getattr(par,'tagName','')
                    129:         if tagName in ["par","inline"]:
                    130:             #print repr(par.getAttribute('class')),xml2htmlArray.get(par.getAttribute('class'),'NOT FOUND')
                    131:             #print "par",par
                    132:             if not tags:
                    133:                 try:
                    134:                     tag=xml2htmlArray[par.getAttribute('class')]
                    135:                 except:
                    136:                     tag=('<p>','</p>')
                    137:             else:
                    138:                 tag=tags
                    139:             #print "TAG",tag
                    140:             content=getText(self,par.childNodes,par.getAttribute('class'))
                    141:             
                    142:             
                    143: 
                    144:             #print par.getAttribute('class'),node
                    145:             try:
                    146:                 html+=tag[0]+content+tag[1]
                    147:             except:
                    148:                 html=+tag[0]+content+tag[1]
                    149:             
                    150:         elif tagName=="pb":
                    151:             html+="<pb/>"
1.2       dwinter   152:         elif tagName=="img":
                    153:             html+="XXX"
1.1       dwinter   154:     
                    155:     try:
                    156: 
                    157:         return html
                    158:     except:
                    159:         return ""
                    160: 
                    161: def getXlink(nodes):
                    162:     """searches xlinks and gives them back as html"""
                    163:     ret=""
                    164:     for node in nodes:
                    165:         if node.attributes:
                    166:             if 'xlink:type' in node.attributes.keys(): #is a xlink?
                    167:                 ret +=xlink2html(node)
                    168:     return ret
                    169: 
                    170: def checkRef(self,ref):
                    171:         dbs={'vl_literature':'AND CD LIKE \'%lise%\'','vl_technology':'','vl_people':'','vl_sites':''}
                    172:         res=None
                    173:         for db in dbs.keys():
                    174: 
                    175:             res=res or self.search(var=str("select reference from %s where reference =\'%s\' %s"%(db,ref,dbs[db])))
                    176:         return res
                    177: 
                    178: def xml2html(self,str,quote="yes"):
                    179:         """link2html fuer VLP muss hier noch raus"""
                    180:    
                    181:         
                    182:         if str:
                    183:             if quote=="yes2":
                    184:                 str=re.sub("\&","&amp;",str)
                    185:             
                    186:             str=re.sub("ref\=([^>]*)\>",'ref=\"\g<1>\">',str)# einfuegen anfuehrungszeichen um ref attribut, falls fehlt.
                    187:             #str=re.sub("ref\=([.[*^[>]]])",'XX',str)
                    188:             #print "STR::",str
                    189:             dom=xml.dom.minidom.parseString(str)
                    190:             links=dom.getElementsByTagName("link")
                    191:             
                    192:             for link in links:
                    193:                 link.tagName="a"
                    194:                 ref=link.getAttribute("ref")
                    195:        pn=link.getAttribute("page")
                    196: 
                    197:                 if checkRef(self,ref):
                    198:            if pn:
                    199:                link.setAttribute("href",self.aq_parent.absolute_url()+"/vlp_coll?id="+ref+"&p="+pn)
                    200:            else:
                    201:                link.setAttribute("href",self.aq_parent.absolute_url()+"/vlp_coll?id="+ref)
                    202: 
                    203:             str= dom.toxml()
                    204:        
                    205:        #print link.toxml('utf-8')
                    206:        retStr=regexpPage.search(str)
                    207: 
                    208:             try:
                    209:                 return retStr.group(1)
                    210:             except:
                    211:                 exStr="""<?xml version="1.0" ?>"""
                    212:                 str=re.sub("\n","",str)
                    213:                 #str=
                    214:                 #print repr(str)
                    215:                 return str.replace(exStr,'')
                    216:         return ""
                    217:     
                    218: def xlink2html(self,xlink,parClass=None):
                    219:     ret=""
                    220:     attributes=xlink.attributes
                    221:  
                    222:     if xlink.tagName.lower()=="image":
                    223:         ret +="""<img src="%s" />"""%xlink.getAttribute('href')
                    224:     elif xlink.tagName.lower()=="link":
                    225:         reference=urllib.unquote(xlink.getAttribute('href'))
                    226:         label=getText(self,xlink.childNodes)
                    227: 
                    228:         # check if href is already a correct url
                    229:         if reference.split(":")[0] in ['http','file']:
                    230:             if parClass=="Picture":
                    231:                 ret +="""<img src="%s" />"""%(reference)
                    232:             else:
                    233: 
                    234:                 ret +="""<a href="%s" >%s</a>"""%(reference,label)
                    235:         else: # transform
                    236:             #href=xml2html(self,reference)
                    237:             #print "refer",reference
                    238:             reference=re.sub("ref\=([^>]*)\>",'ref=\"\g<1>\">',reference)# einfuegen anfuehrungszeichen um ref attribut, falls fehlt.
                    239:             ret +=reference
                    240:             
                    241:     return ret
                    242: 
                    243: def getText(self,nodelist,parClass=None):
                    244:     
                    245:     rc = u''
                    246:     for node in nodelist:
                    247:         
                    248:        if node.nodeType == node.TEXT_NODE:
                    249: 
                    250:             try:
                    251:                 try:
                    252:                     #rc += node.data.encode('utf-8','ignore')
                    253:                     rc += node.data
                    254:                                         
                    255:                 except:
                    256:                     #rc= node.data.encode('utf-8','ignore')
                    257:                     rc=node.data
                    258:             except:
                    259:                 rc="ERROR"
                    260:                 #node.data.decode('utf-8','ignore')
                    261: 
                    262:             node.data.encode('utf-8','ignore')
                    263:             #print "RC",rc
                    264:         elif node.tagName =="inline":
                    265: 
                    266:             rc+=par2html(self,[node])
                    267: 
                    268:         elif node.tagName =="pb":
                    269:             rc+="<pb/>"
                    270:         elif node.attributes:
                    271: 
                    272:             if 'type' in node.attributes.keys(): #is a xlink?
                    273: 
                    274:                 try:
                    275:                     rc +=xlink2html(self,node,parClass).encode('utf-8')
                    276:                     
                    277:                 except:
                    278:                     rc +=xlink2html(self,node,parClass)
                    279:                     
                    280:     #print "RWT",rc        
                    281:     return rc
                    282: 
                    283: 
                    284: #filename=argv[1]
                    285: #fileString=file(filename).read()
                    286: #print proj2hash(fileString)
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>