File:  [Repository] / ECHO_content / vlp_xmlhelpers.py
Revision 1.1: download - view: text, annotated - select for diffs - revision graph
Tue Oct 5 07:34:58 2004 UTC (19 years, 8 months ago) by dwinter
Branches: MAIN
CVS tags: HEAD
missing vlp_xmlhelpers and zpt for encyclodpedia added

    1: from sys import argv
    2: 
    3: import string
    4: import xml.dom.minidom
    5: import Ft.Xml.XLink.Processor
    6: import Ft.Xml.XLink.XLinkElements
    7: 
    8: from Ft.Xml import XPath
    9: from Ft.Xml.XPath import Evaluate
   10: from Ft.Xml.XLink import XLINK_NAMESPACE
   11: from Ft.Xml.XLink import XLinkElements
   12: 
   13: #from Ft.Xml.Domlette import NonvalidatingReader,InputSource
   14: #from Ft.Xml import EMPTY_NAMESPACE
   15: from Ft.Lib import Uri
   16: import urllib
   17: import re
   18: 
   19: patternPage=r"<\s*page.*?>(.*?)</page>"
   20: regexpPage = re.compile(patternPage, re.IGNORECASE + re.DOTALL)
   21: 
   22: xml2htmlArray={'WEB_normal':('<p>','</p>'),'Normal':('<p>','</p>'),'WEB_picture':('<p class=\"picture\">','</p>'),'WEB_figuretitle':('<p class=\"picturetitle\">','</p>'),'WEB_bibliography':('<p><i>','</i></p>'),'Web_kursiv':('<i>','</i>'),'WEB_kursiv':('<i>','</i>'),'WEB_hyperlink':('',''),'Hyperlink':('',''),'Picture':('<p class=\"picture\">','</p>'),'FigureTitle':('<p class=\"picturetitle\">','</p>')}
   23: 
   24: def addToDict(dict,name,value):
   25:     if name=="":
   26:         return 0
   27:     else:
   28:         
   29:         if not dict.has_key(name):
   30:             dict[name]=[] # als array anlegen
   31: 
   32:         dict[name].append(value)
   33:         return 1    
   34: 
   35: def proj2hash(self,xmlstring):
   36:     """wandelt xml-files fuer die projekte in ein hash"""
   37:     
   38:     dom=xml.dom.minidom.parseString(xmlstring)
   39:     
   40:         
   41:     list={}
   42: 
   43:     #gettitle
   44:     pars=Evaluate('par',dom.getElementsByTagName('part')[0])
   45:     for par in pars:
   46:         className=par.getAttribute('class')
   47:         content=getText(self,par.childNodes)
   48:         addToDict(list,className,content)
   49:              
   50: 
   51:     sectionXPath="section"
   52: 
   53:     
   54:     sections=Evaluate(sectionXPath,dom.getElementsByTagName('part')[0])
   55:     
   56:     while sections:
   57:         
   58:         for section in sections:
   59:             
   60:             sec=parseSection(self,section)
   61:             
   62:             if sec[0]=="WEB_project_header": # Sonderfall project
   63:                 addToDict(list,'WEB_project_header',sec[1]) # store title
   64:                 addToDict(list,'WEB_project_description',sec[2]) #store description
   65:             else: # no information in heading
   66:                 level=sec[3]
   67:                 aTag="<h%s>"%level
   68:                 eTag="</h%s>"%level
   69:                 addToDict(list,"text",aTag+sec[1]+eTag)
   70:                 addToDict(list,"text",sec[2])
   71:         sectionXPath+="/section"
   72:         sections=Evaluate(sectionXPath,dom.getElementsByTagName('part')[0])
   73:     return list
   74: 
   75: 
   76: def parseSection(self,section):
   77:     type=""
   78:     header=""
   79:     level=section.getAttribute('level')
   80:     for heading in section.childNodes:
   81:         if getattr(heading,'tagName','')=="heading":
   82:             
   83:             type=heading.getAttribute('class')
   84:             header=getText(self,heading.childNodes)
   85: 
   86:     if type=="": # falls heading fehlt, pruefe ob erster par richtig
   87:         par=section.getElementsByTagName('par')[0]
   88:         type=par.getAttribute('class')
   89:         header=getText(par.childNodes)
   90: 
   91:     #print section.childNodes
   92:     #pars=Evaluate('par',section)
   93:     pars=section.childNodes
   94:     content=par2html(self,pars)
   95:     #print "CONTENT",repr(content)
   96:     return (type,header,content,level)
   97: 
   98: def parseTable(table):
   99:     fields={}
  100:     rows=table.getElementsByTagName('html:tr')
  101:     for row in rows:
  102:         #print "ROW"
  103:         cols=row.getElementsByTagName('html:td')
  104:         
  105:         #Name des Datenfeldes einlesen
  106:         try:
  107:             field=cols[0].getElementsByTagName('par')[0].getAttribute('class')
  108:             #print "field",field
  109:         except:
  110:             print "error"
  111:             field=""
  112: 
  113:         #Wandeln der Eintrge in HTML
  114: 
  115:         #pars=cols[1].getElementsByTagName('par')
  116:         pars=cols[1].childNodes
  117:         
  118:         html=par2html(self,pars,tags=("",";"))
  119:         
  120:         addToDict(fields,field,html)
  121:         #print fields
  122:     return fields
  123: 
  124: def par2html(self,pars,tags=None):
  125:     html=""
  126: 
  127:     for par in pars:
  128:         tagName=getattr(par,'tagName','')
  129:         if tagName in ["par","inline"]:
  130:             #print repr(par.getAttribute('class')),xml2htmlArray.get(par.getAttribute('class'),'NOT FOUND')
  131:             #print "par",par
  132:             if not tags:
  133:                 try:
  134:                     tag=xml2htmlArray[par.getAttribute('class')]
  135:                 except:
  136:                     tag=('<p>','</p>')
  137:             else:
  138:                 tag=tags
  139:             #print "TAG",tag
  140:             content=getText(self,par.childNodes,par.getAttribute('class'))
  141:             
  142:             
  143: 
  144:             #print par.getAttribute('class'),node
  145:             try:
  146:                 html+=tag[0]+content+tag[1]
  147:             except:
  148:                 html=+tag[0]+content+tag[1]
  149:             
  150:         elif tagName=="pb":
  151:             html+="<pb/>"
  152:     
  153:     try:
  154: 
  155:         return html
  156:     except:
  157:         return ""
  158: 
  159: def getXlink(nodes):
  160:     """searches xlinks and gives them back as html"""
  161:     ret=""
  162:     for node in nodes:
  163:         if node.attributes:
  164:             if 'xlink:type' in node.attributes.keys(): #is a xlink?
  165:                 ret +=xlink2html(node)
  166:     return ret
  167: 
  168: def checkRef(self,ref):
  169:         dbs={'vl_literature':'AND CD LIKE \'%lise%\'','vl_technology':'','vl_people':'','vl_sites':''}
  170:         res=None
  171:         for db in dbs.keys():
  172: 
  173:             res=res or self.search(var=str("select reference from %s where reference =\'%s\' %s"%(db,ref,dbs[db])))
  174:         return res
  175: 
  176: def xml2html(self,str,quote="yes"):
  177:         """link2html fuer VLP muss hier noch raus"""
  178: 	
  179:         
  180:         if str:
  181:             if quote=="yes2":
  182:                 str=re.sub("\&","&amp;",str)
  183:             
  184:             str=re.sub("ref\=([^>]*)\>",'ref=\"\g<1>\">',str)# einfuegen anfuehrungszeichen um ref attribut, falls fehlt.
  185:             #str=re.sub("ref\=([.[*^[>]]])",'XX',str)
  186:             #print "STR::",str
  187:             dom=xml.dom.minidom.parseString(str)
  188:             links=dom.getElementsByTagName("link")
  189:             
  190:             for link in links:
  191:                 link.tagName="a"
  192:                 ref=link.getAttribute("ref")
  193: 		pn=link.getAttribute("page")
  194: 
  195:                 if checkRef(self,ref):
  196: 			if pn:
  197: 				link.setAttribute("href",self.aq_parent.absolute_url()+"/vlp_coll?id="+ref+"&p="+pn)
  198: 			else:
  199: 				link.setAttribute("href",self.aq_parent.absolute_url()+"/vlp_coll?id="+ref)
  200: 
  201:             str= dom.toxml()
  202: 	    
  203: 	    #print link.toxml('utf-8')
  204: 	    retStr=regexpPage.search(str)
  205: 
  206:             try:
  207:                 return retStr.group(1)
  208:             except:
  209:                 exStr="""<?xml version="1.0" ?>"""
  210:                 str=re.sub("\n","",str)
  211:                 #str=
  212:                 #print repr(str)
  213:                 return str.replace(exStr,'')
  214:         return ""
  215:     
  216: def xlink2html(self,xlink,parClass=None):
  217:     ret=""
  218:     attributes=xlink.attributes
  219:  
  220:     if xlink.tagName.lower()=="image":
  221:         ret +="""<img src="%s" />"""%xlink.getAttribute('href')
  222:     elif xlink.tagName.lower()=="link":
  223:         reference=urllib.unquote(xlink.getAttribute('href'))
  224:         label=getText(self,xlink.childNodes)
  225: 
  226:         # check if href is already a correct url
  227:         if reference.split(":")[0] in ['http','file']:
  228:             if parClass=="Picture":
  229:                 ret +="""<img src="%s" />"""%(reference)
  230:             else:
  231: 
  232:                 ret +="""<a href="%s" >%s</a>"""%(reference,label)
  233:         else: # transform
  234:             #href=xml2html(self,reference)
  235:             #print "refer",reference
  236:             reference=re.sub("ref\=([^>]*)\>",'ref=\"\g<1>\">',reference)# einfuegen anfuehrungszeichen um ref attribut, falls fehlt.
  237:             ret +=reference
  238:             
  239:     return ret
  240: 
  241: def getText(self,nodelist,parClass=None):
  242:     
  243:     rc = u''
  244:     for node in nodelist:
  245:         
  246:     	if node.nodeType == node.TEXT_NODE:
  247: 
  248:             try:
  249:                 try:
  250:                     #rc += node.data.encode('utf-8','ignore')
  251:                     rc += node.data
  252:                                         
  253:                 except:
  254:                     #rc= node.data.encode('utf-8','ignore')
  255:                     rc=node.data
  256:             except:
  257:                 rc="ERROR"
  258:                 #node.data.decode('utf-8','ignore')
  259: 
  260:             node.data.encode('utf-8','ignore')
  261:             #print "RC",rc
  262:         elif node.tagName =="inline":
  263: 
  264:             rc+=par2html(self,[node])
  265: 
  266:         elif node.tagName =="pb":
  267:             rc+="<pb/>"
  268:         elif node.attributes:
  269: 
  270:             if 'type' in node.attributes.keys(): #is a xlink?
  271: 
  272:                 try:
  273:                     rc +=xlink2html(self,node,parClass).encode('utf-8')
  274:                     
  275:                 except:
  276:                     rc +=xlink2html(self,node,parClass)
  277:                     
  278:     #print "RWT",rc        
  279:     return rc
  280: 
  281: 
  282: #filename=argv[1]
  283: #fileString=file(filename).read()
  284: #print proj2hash(fileString)
  285: 

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>