File:  [Repository] / ECHO_content / vlp_xmlhelpers.py
Revision 1.4: download - view: text, annotated - select for diffs - revision graph
Wed Oct 6 13:02:56 2004 UTC (19 years, 8 months ago) by dwinter
Branches: MAIN
CVS tags: HEAD
teccat eingefuehrt

    1: from sys import argv
    2: 
    3: import string
    4: import xml.dom.minidom
    5: import Ft.Xml.XLink.Processor
    6: import Ft.Xml.XLink.XLinkElements
    7: 
    8: from Ft.Xml import XPath
    9: from Ft.Xml.XPath import Evaluate
   10: from Ft.Xml.XLink import XLINK_NAMESPACE
   11: from Ft.Xml.XLink import XLinkElements
   12: 
   13: #from Ft.Xml.Domlette import NonvalidatingReader,InputSource
   14: #from Ft.Xml import EMPTY_NAMESPACE
   15: from Ft.Lib import Uri
   16: import urllib
   17: import re
   18: 
   19: patternPage=r"<\s*page.*?>(.*?)</page>"
   20: regexpPage = re.compile(patternPage, re.IGNORECASE + re.DOTALL)
   21: 
   22: xml2htmlArray={'WEB_normal':('<p>','</p>'),'Normal':('<p>','</p>'),'WEB_picture':('<p class=\"picture\">','</p>'),'WEB_figuretitle':('<p class=\"picturetitle\">','</p>'),'WEB_bibliography':('<p><i>','</i></p>'),'Web_kursiv':('<i>','</i>'),'WEB_kursiv':('<i>','</i>'),'WEB_hyperlink':('',''),'Hyperlink':('',''),'Picture':('<p class=\"picture\">','</p>'),'FigureTitle':('<p class=\"picturetitle\">','</p>')}
   23: 
   24: def addToDict(dict,name,value):
   25:     if name=="":
   26:         return 0
   27:     else:
   28:         
   29:         if not dict.has_key(name):
   30:             dict[name]=[] # als array anlegen
   31: 
   32:         dict[name].append(value)
   33:         return 1    
   34: 
   35: def proj2hash(self,xmlstring):
   36:     """wandelt xml-files fuer die projekte in ein hash"""
   37:     
   38:     dom=xml.dom.minidom.parseString(xmlstring)
   39:     
   40:         
   41:     list={}
   42: 
   43:     #gettitle
   44:     pars=Evaluate('par',dom.getElementsByTagName('part')[0])
   45:     for par in pars:
   46:         className=par.getAttribute('class')
   47:         content=getText(self,par.childNodes)
   48:         addToDict(list,className,content)
   49:              
   50: 
   51:     sectionXPath="section"
   52: 
   53:     
   54:     sections=Evaluate(sectionXPath,dom.getElementsByTagName('part')[0])
   55:     
   56:     while sections:
   57:         
   58:         for section in sections:
   59:             
   60:             sec=parseSection(self,section)
   61:             
   62:             if sec[0]=="WEB_project_header": # Sonderfall project
   63:                 addToDict(list,'WEB_project_header',sec[1]) # store title
   64:                 addToDict(list,'WEB_project_description',sec[2]) #store description
   65:             else: # no information in heading
   66:                 level=int(sec[3])+2
   67:                 aTag="<h%i>"%level
   68:                 eTag="</h%i>"%level
   69:                 addToDict(list,"text",aTag+sec[1]+eTag)
   70:                 addToDict(list,"text",sec[2])
   71:         sectionXPath+="/section"
   72:         sections=Evaluate(sectionXPath,dom.getElementsByTagName('part')[0])
   73:     return list
   74: 
   75: 
   76: def parseSection(self,section):
   77:     type=""
   78:     header=""
   79:     level=section.getAttribute('level')
   80:     for heading in section.childNodes:
   81:         if getattr(heading,'tagName','')=="heading":
   82:             
   83:             type=heading.getAttribute('class')
   84:             header=getText(self,heading.childNodes)
   85: 
   86:     if type=="": # falls heading fehlt, pruefe ob erster par richtig
   87:         par=section.getElementsByTagName('par')[0]
   88:         type=par.getAttribute('class')
   89:         header=getText(par.childNodes)
   90: 
   91:     #print section.childNodes
   92:     #pars=Evaluate('par',section)
   93:     pars=section.childNodes
   94:     content=par2html(self,pars)
   95:     #print "CONTENT",repr(content)
   96:     return (type,header,content,level)
   97: 
   98: def parseTable(table):
   99:     fields={}
  100:     rows=table.getElementsByTagName('html:tr')
  101:     for row in rows:
  102:         #print "ROW"
  103:         cols=row.getElementsByTagName('html:td')
  104:         
  105:         #Name des Datenfeldes einlesen
  106:         try:
  107:             field=cols[0].getElementsByTagName('par')[0].getAttribute('class')
  108:             #print "field",field
  109:         except:
  110:             print "error"
  111:             field=""
  112: 
  113:         #Wandeln der Eintrge in HTML
  114: 
  115:         #pars=cols[1].getElementsByTagName('par')
  116:         pars=cols[1].childNodes
  117:         
  118:         html=par2html(self,pars,tags=("",";"))
  119:         
  120:         addToDict(fields,field,html)
  121:         #print fields
  122:     return fields
  123: 
  124: def par2html(self,pars,tags=None):
  125:     html=""
  126: 
  127:     for par in pars:
  128:         tagName=getattr(par,'tagName','')
  129:         if tagName in ["par","inline"]:
  130:             #print repr(par.getAttribute('class')),xml2htmlArray.get(par.getAttribute('class'),'NOT FOUND')
  131:             #print "par",par
  132:             if not tags:
  133:                 try:
  134:                     tag=xml2htmlArray[par.getAttribute('class')]
  135:                 except:
  136:                     tag=('<p>','</p>')
  137:             else:
  138:                 tag=tags
  139:             #print "TAG",tag
  140:             content=getText(self,par.childNodes,par.getAttribute('class'))
  141:             
  142:             
  143: 
  144:             #print par.getAttribute('class'),node
  145:             try:
  146:                 html+=tag[0]+content+tag[1]
  147:             except:
  148:                 html=+tag[0]+content+tag[1]
  149:             
  150:         elif tagName=="pb":
  151:             html+="<pb/>"
  152:         
  153:     
  154:     try:
  155: 
  156:         return html
  157:     except:
  158:         return ""
  159: 
  160: def getXlink(nodes):
  161:     """searches xlinks and gives them back as html"""
  162:     ret=""
  163:     for node in nodes:
  164:         if node.attributes:
  165:             if 'xlink:type' in node.attributes.keys(): #is a xlink?
  166:                 ret +=xlink2html(node)
  167:     return ret
  168: 
  169: def checkRef(self,ref):
  170:         dbs={'vl_literature':'AND CD LIKE \'%lise%\'','vl_technology':'','vl_people':'','vl_sites':''}
  171:         res=None
  172:         for db in dbs.keys():
  173: 
  174:             res=res or self.search(var=str("select reference from %s where reference =\'%s\' %s"%(db,ref,dbs[db])))
  175:         return res
  176: 
  177: def xml2html(self,str,quote="yes"):
  178:         """link2html fuer VLP muss hier noch raus"""
  179: 	
  180:         
  181:         if str:
  182:             if quote=="yes2":
  183:                 str=re.sub("\&","&amp;",str)
  184:             
  185:             str=re.sub("ref\=([^>]*)\>",'ref=\"\g<1>\">',str)# einfuegen anfuehrungszeichen um ref attribut, falls fehlt.
  186:             #str=re.sub("ref\=([.[*^[>]]])",'XX',str)
  187:             #print "STR::",str
  188:             dom=xml.dom.minidom.parseString(str)
  189:             links=dom.getElementsByTagName("link")
  190:             
  191:             for link in links:
  192:                 link.tagName="a"
  193:                 ref=link.getAttribute("ref")
  194: 		pn=link.getAttribute("page")
  195: 
  196:                 if checkRef(self,ref):
  197: 			if pn:
  198: 				link.setAttribute("href",self.aq_parent.absolute_url()+"/vlp_coll?id="+ref+"&p="+pn)
  199: 			else:
  200: 				link.setAttribute("href",self.aq_parent.absolute_url()+"/vlp_coll?id="+ref)
  201: 
  202:             str= dom.toxml()
  203: 	    
  204: 	    #print link.toxml('utf-8')
  205: 	    retStr=regexpPage.search(str)
  206: 
  207:             try:
  208:                 return retStr.group(1)
  209:             except:
  210:                 exStr="""<?xml version="1.0" ?>"""
  211:                 str=re.sub("\n","",str)
  212:                 #str=
  213:                 #print repr(str)
  214:                 return str.replace(exStr,'')
  215:         return ""
  216:     
  217: def xlink2html(self,xlink,parClass=None):
  218:     ret=""
  219:     attributes=xlink.attributes
  220:  
  221:     if xlink.tagName.lower()=="image":
  222:         ret +="""<img src="%s" />"""%xlink.getAttribute('href')
  223:     elif xlink.tagName.lower()=="link":
  224:         reference=urllib.unquote(xlink.getAttribute('href'))
  225:         label=getText(self,xlink.childNodes)
  226: 
  227:         # check if href is already a correct url
  228:         if reference.split(":")[0] in ['http','file']:
  229:             if parClass=="Picture":
  230:                 ret +="""<img src="%s" />"""%(reference)
  231:             else:
  232: 
  233:                 ret +="""<a href="%s" >%s</a>"""%(reference,label)
  234:         else: # transform
  235:             #href=xml2html(self,reference)
  236:             #print "refer",reference
  237:             reference=re.sub("ref\=([^>]*)\>",'ref=\"\g<1>\">',reference)# einfuegen anfuehrungszeichen um ref attribut, falls fehlt.
  238:             ret +=reference
  239:             
  240:     return ret
  241: 
  242: def getText(self,nodelist,parClass=None):
  243:     
  244:     rc = u''
  245:     for node in nodelist:
  246:         
  247:     	if node.nodeType == node.TEXT_NODE:
  248: 
  249:             try:
  250:                 try:
  251:                     #rc += node.data.encode('utf-8','ignore')
  252:                     rc += node.data
  253:                                         
  254:                 except:
  255:                     #rc= node.data.encode('utf-8','ignore')
  256:                     rc=node.data
  257:             except:
  258:                 rc="ERROR"
  259:                 #node.data.decode('utf-8','ignore')
  260: 
  261:             node.data.encode('utf-8','ignore')
  262:             #print "RC",rc
  263:         elif node.tagName =="inline":
  264: 
  265:             rc+=par2html(self,[node])
  266: 
  267:         elif node.tagName =="pb":
  268:             rc+="<pb/>"
  269:         elif node.attributes:
  270: 
  271:             if 'type' in node.attributes.keys(): #is a xlink?
  272: 
  273:                 try:
  274:                     rc +=xlink2html(self,node,parClass).encode('utf-8')
  275:                     
  276:                 except:
  277:                     rc +=xlink2html(self,node,parClass)
  278:                     
  279:     #print "RWT",rc        
  280:     return rc
  281: 
  282: 
  283: #filename=argv[1]
  284: #fileString=file(filename).read()
  285: #print proj2hash(fileString)
  286: 

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>