ECHO_content/vlp_xmlhelpers.py - annotate

Return to vlp_xmlhelpers.py CVS log
Up to [Repository] / ECHO_content
Annotation of ECHO_content/vlp_xmlhelpers.py, revision 1.16

1.1       dwinter     1: from sys import argv
                      2: 
                      3: import string
1.11      casties     4: import logging
1.1       dwinter     5: import xml.dom.minidom
                      6: import Ft.Xml.XLink.Processor
                      7: import Ft.Xml.XLink.XLinkElements
                      8: 
                      9: from Ft.Xml import XPath
                     10: from Ft.Xml.XPath import Evaluate
                     11: from Ft.Xml.XLink import XLINK_NAMESPACE
                     12: from Ft.Xml.XLink import XLinkElements
1.5       dwinter    13: import cStringIO
                     14: from Ft.Xml.Domlette import NonvalidatingReader, PrettyPrint,Print
                     15: from Ft.Xml import EMPTY_NAMESPACE
1.1       dwinter    16: from Ft.Lib import Uri
                     17: import urllib
                     18: import re
1.11      casties    19: from ECHO_collection import unicodify,utf8ify
1.1       dwinter    20: 
1.5       dwinter    21: patternTXT=r"<\s*txt.*?>(.*?)</txt>"
                     22: regexpTXT = re.compile(patternTXT, re.IGNORECASE + re.DOTALL)
1.1       dwinter    23: patternPage=r"<\s*page.*?>(.*?)</page>"
                     24: regexpPage = re.compile(patternPage, re.IGNORECASE + re.DOTALL)
                     25: 
1.14      casties    26: #xml2htmlArray={'WEB_normal':('<p>','</p>'),'Normal':('<p>','</p>'),'WEB_picture':('<p class=\"picture\">','</p>'),'WEB_figuretitle':('<p class=\"picturetitle\">','</p>'),'WEB_bibliography':('<p><i>','</i></p>'),'Web_kursiv':('<i>','</i>'),'WEB_kursiv':('<i>','</i>'),'WEB_hyperlink':('',''),'Hyperlink':('',''),'Picture':('<p class=\"picture\">','</p>'),'FigureTitle':('<p class=\"picturetitle\">','</p>')}
                     27: #
                     28: #def addToDict(dict,name,value):
                     29: #    if name=="":
                     30: #        return 0
                     31: #    else:
                     32: #        
                     33: #        if not dict.has_key(name):
                     34: #            dict[name]=[] # als array anlegen
                     35: #
                     36: #        dict[name].append(value)
                     37: #        return 1    
                     38: #
                     39: #def proj2hash(self,xmlstring):
                     40: #    """wandelt xml-files fuer die projekte in ein hash"""
                     41: #    
                     42: #    dom=xml.dom.minidom.parseString(xmlstring)
                     43: #    
                     44: #        
                     45: #    list={}
                     46: #
                     47: #    #gettitle
                     48: #    pars=Evaluate('par',dom.getElementsByTagName('part')[0])
                     49: #    for par in pars:
                     50: #        className=par.getAttribute('class')
                     51: #        content=getText(self,par.childNodes)
                     52: #        addToDict(list,className,content)
                     53: #             
                     54: #
                     55: #    sectionXPath="section"
                     56: #
                     57: #    
                     58: #    sections=Evaluate(sectionXPath,dom.getElementsByTagName('part')[0])
                     59: #    
                     60: #    while sections:
                     61: #        
                     62: #        for section in sections:
                     63: #            
                     64: #            sec=parseSection(self,section)
                     65: #            
                     66: #            if sec[0]=="WEB_project_header": # Sonderfall project
                     67: #                addToDict(list,'WEB_project_header',sec[1]) # store title
                     68: #                addToDict(list,'WEB_project_description',sec[2]) #store description
                     69: #            else: # no information in heading
                     70: #                level=int(sec[3])+2
                     71: #                aTag="<h%i>"%level
                     72: #                eTag="</h%i>"%level
                     73: #                addToDict(list,"text",aTag+sec[1]+eTag)
                     74: #                addToDict(list,"text",sec[2])
                     75: #        sectionXPath+="/section"
                     76: #        sections=Evaluate(sectionXPath,dom.getElementsByTagName('part')[0])
                     77: #    return list
                     78: #
                     79: #
                     80: #def parseSection(self,section):
                     81: #    type=""
                     82: #    header=""
                     83: #    level=section.getAttribute('level')
                     84: #    for heading in section.childNodes:
                     85: #        if getattr(heading,'tagName','')=="heading":
                     86: #            
                     87: #            type=heading.getAttribute('class')
                     88: #            header=getText(self,heading.childNodes)
                     89: #
                     90: #    if type=="": # falls heading fehlt, pruefe ob erster par richtig
                     91: #        par=section.getElementsByTagName('par')[0]
                     92: #        type=par.getAttribute('class')
                     93: #        header=getText(par.childNodes)
                     94: #
                     95: #    #print section.childNodes
                     96: #    #pars=Evaluate('par',section)
                     97: #    pars=section.childNodes
                     98: #    content=par2html(self,pars)
                     99: #    #print "CONTENT",repr(content)
                    100: #    return (type,header,content,level)
                    101: #
                    102: #def parseTable(table):
                    103: #    fields={}
                    104: #    rows=table.getElementsByTagName('html:tr')
                    105: #    for row in rows:
                    106: #        #print "ROW"
                    107: #        cols=row.getElementsByTagName('html:td')
                    108: #        
                    109: #        #Name des Datenfeldes einlesen
                    110: #        try:
                    111: #            field=cols[0].getElementsByTagName('par')[0].getAttribute('class')
                    112: #            #print "field",field
                    113: #        except:
                    114: #            print "error"
                    115: #            field=""
                    116: #
                    117: #        #Wandeln der Eintrge in HTML
                    118: #
                    119: #        #pars=cols[1].getElementsByTagName('par')
                    120: #        pars=cols[1].childNodes
                    121: #        
                    122: #        html=par2html(self,pars,tags=("",";"))
                    123: #        
                    124: #        addToDict(fields,field,html)
                    125: #        #print fields
                    126: #    return fields
                    127: #
                    128: #def par2html(self,pars,tags=None):
                    129: #    html=""
                    130: #
                    131: #    for par in pars:
                    132: #        tagName=getattr(par,'tagName','')
                    133: #        if tagName in ["par","inline"]:
                    134: #            #print repr(par.getAttribute('class')),xml2htmlArray.get(par.getAttribute('class'),'NOT FOUND')
                    135: #            #print "par",par
                    136: #            if not tags:
                    137: #                try:
                    138: #                    tag=xml2htmlArray[par.getAttribute('class')]
                    139: #                except:
                    140: #                    tag=('<p>','</p>')
                    141: #            else:
                    142: #                tag=tags
                    143: #            #print "TAG",tag
                    144: #            content=getText(self,par.childNodes,par.getAttribute('class'))
                    145: #            
                    146: #            
                    147: #
                    148: #            #print par.getAttribute('class'),node
                    149: #            try:
                    150: #                html+=tag[0]+content+tag[1]
                    151: #            except:
                    152: #                html=+tag[0]+content+tag[1]
                    153: #            
                    154: #        elif tagName=="pb":
                    155: #            html+="<pb/>"
                    156: #        
                    157: #    
                    158: #    try:
                    159: #
                    160: #        return html
                    161: #    except:
                    162: #        return ""
1.1       dwinter   163: 
                    164: def getXlink(nodes):
                    165:     """searches xlinks and gives them back as html"""
                    166:     ret=""
                    167:     for node in nodes:
                    168:         if node.attributes:
                    169:             if 'xlink:type' in node.attributes.keys(): #is a xlink?
                    170:                 ret +=xlink2html(node)
                    171:     return ret
                    172: 
                    173: def checkRef(self,ref):
1.5       dwinter   174:         """teste ob reference angezeigt werden sollen"""
                    175:         dbs={'vl_literature':'AND online = \'1\'',
                    176:              'vl_technology':'AND complete =\'yes\'',
                    177:              'vl_people':'AND complete =\'yes\'',
                    178:              'vl_sites':'AND complete =\'yes\'',
                    179:              'vl_transcript':'AND complete =\'yes\'',
1.11      casties   180:              'vl_essays':'AND online =\'yes\'',
                    181:         'vl_categories':''
1.5       dwinter   182:              }
1.1       dwinter   183:         res=None
                    184:         for db in dbs.keys():
1.5       dwinter   185:             searchStr=str("select reference from %s where reference =\'%s\' %s"%(db,ref,dbs[db]))
                    186:             res=res or self.search(var=searchStr)
                    187:         return res
                    188:     
                    189: def link2html(self,str):
1.12      casties   190:         """link2html links in html wandeln"""
1.5       dwinter   191:         if str:
                    192: 
                    193:             str=re.sub("\&","&amp;",str)
1.11      casties   194:             dom=xml.dom.minidom.parseString("<?xml version='1.0' encoding='utf-8'?><txt>"+utf8ify(str)+"</txt>")
1.15      dwinter   195:            
                    196:             
1.5       dwinter   197:             links=dom.getElementsByTagName("link")
                    198:             
                    199: 
                    200:             for link in links:
                    201:                 link.tagName="a"
                    202:                 ref=link.getAttribute("ref")
                    203:                 pn=link.getAttribute("page")
1.13      casties   204:                 mk=link.getAttribute("mk")
1.15      dwinter   205:                 href= link.getAttribute("href")
                    206:                 if href:
                    207:                     link.setAttribute("class","external")
                    208:                                     
1.5       dwinter   209:                 if self.checkRef(ref):
1.13      casties   210:                     more = ""
                    211:                     if pn:
                    212:                         more += "&page=%s"%pn
                    213:                         
                    214:                     if mk:
                    215:                         more += "&mk=%s"%mk
                    216:                         
                    217:                     link.setAttribute("href",self.REQUEST['SERVER_URL']+"/references?id="+ref+more)
1.5       dwinter   218: 
1.15      dwinter   219: 
1.5       dwinter   220:             newxml=dom.toxml('utf-8')
                    221:           
1.15      dwinter   222:             
                    223:             
1.5       dwinter   224:             retStr=regexpTXT.search(newxml)
1.11      casties   225:             retStr = retStr.group(1)
1.5       dwinter   226: 
1.11      casties   227:             return retStr.decode('utf-8') # we return unicode
1.5       dwinter   228: 
1.11      casties   229:         return u""
1.7       casties   230: 
                    231: def related2html(self,str):
1.10      casties   232:     """related library items: xlinks in html wandeln / mb 22.11.2006"""
1.7       casties   233:     if str:
                    234:                 
                    235:         str=re.sub("\&","&amp;",str)
1.12      casties   236:         dom=xml.dom.minidom.parseString("<?xml version='1.0' encoding='utf-8'?><txt>"+utf8ify(str)+"</txt>")
1.7       casties   237:         links=dom.getElementsByTagName("link")
                    238:                 
                    239:         for link in links:
                    240:             link.tagName = "a"
                    241:             ref = link.getAttribute("ref")
                    242:             pn = link.getAttribute("page")
1.16    ! dwinter   243:             obj = ref[0:3]
        !           244:             
        !           245:             """erweiterung der related items von literatur auf weitere datenbankobjekte, mb 05.06.2009"""
        !           246:             if obj == 'lit':            
        !           247:                 searchStr="select fullreference, online from vl_literature where reference =\'%s\' and authorized = 1"%(ref)
        !           248:             elif obj == 'sit':
        !           249:                 searchStr="select reference from vl_sites where reference =\'%s\' and complete = 'yes'"%(ref)
        !           250:             elif obj == 'per':
        !           251:                 searchStr="select reference from vl_people where reference =\'%s\' and complete = 'yes'"%(ref)
        !           252:             elif obj == 'tec':
        !           253:                 searchStr="select reference from vl_technology where reference =\'%s\' and complete = 'yes'"%(ref)
        !           254:             elif obj == 'exp':
        !           255:                 searchStr="select reference from vl_experiments where reference =\'%s\' and complete = 'yes'"%(ref)
        !           256:                 
1.7       casties   257:             res = self.search(var=searchStr)
                    258:                                         
                    259:             if res:
1.16    ! dwinter   260:                 if obj == 'lit':
        !           261:                     if res[0]['online'] == 1: 
        !           262:                         # literatur item online verfuegbar
        !           263:                         if pn:
        !           264:                             link.setAttribute("href",self.REQUEST['SERVER_URL']+"/references?id="+ref+"&page="+pn)
        !           265:                         else:
        !           266:                             link.setAttribute("href",self.REQUEST['SERVER_URL']+"/references?id="+ref)
        !           267:                             
        !           268:                         link.setAttribute("title", "click to view!")
        !           269:                         link.removeAttribute("ref")
        !           270:                         
        !           271:                         # prefix preceding the link
        !           272:                         prefix = dom.createTextNode(U"\u2013\u0020") # = ndash + space
        !           273:                         dom.documentElement.insertBefore(prefix, link)
        !           274:   
1.7       casties   275:                     else:
1.16    ! dwinter   276:                         # literatur item nur als bibliographische angabe vorhanden
        !           277:                         link.setAttribute("alt", unicodify(res[0]['fullreference']))
        !           278:                         link.setAttribute("title", "click to expand")
        !           279:                         link.setAttribute("onclick", "return toggle(this);")
        !           280:                         link.setAttribute("class", "x_offline")
1.10      casties   281:                         
1.16    ! dwinter   282:                         # prefix inside link text
        !           283:                         link.firstChild.data = '+ ' + link.firstChild.data
        !           284:                 else:
        !           285:                     # links zu den anderen datenbankobjekten
        !           286:                     link.setAttribute("href",self.REQUEST['SERVER_URL']+"/references?id="+ref)
1.10      casties   287:                     link.setAttribute("title", "click to view")
                    288:                     link.removeAttribute("ref")
1.16    ! dwinter   289:             
1.10      casties   290:                     # prefix preceding the link
                    291:                     prefix = dom.createTextNode(U"\u2013\u0020") # = ndash + space
                    292:                     dom.documentElement.insertBefore(prefix, link)
1.16    ! dwinter   293:             
        !           294:             else:
        !           295:                 # objekt nicht verfügbar/freigegeben oder (web)link mit href statt ref
        !           296:                 
        !           297:                 #if ref != '':
        !           298:                 #    link.removeAttribute("ref")
        !           299:                 #    link.setAttribute("title", ref)
        !           300:                 
        !           301:                 
        !           302:                 # prefix preceding the link
        !           303:                 prefix = dom.createTextNode(U"\u2013\u0020") # = ndash + space
        !           304:                 dom.documentElement.insertBefore(prefix, link)
1.10      casties   305: 
1.16    ! dwinter   306:                 
1.7       casties   307:         newxml=dom.toxml('utf-8')
                    308:                 
                    309:         retStr=regexpTXT.search(newxml)
1.11      casties   310:         retStr = retStr.group(1)
1.12      casties   311:         #logging.debug("related2html out=%s"%repr(retStr))
                    312:         return retStr.decode('utf-8') # we return unicode
1.11      casties   313: 
                    314:     return u""
1.7       casties   315: 
1.5       dwinter   316:     
1.1       dwinter   317: 
                    318: 
                    319: def xml2html(self,str,quote="yes"):
                    320:         """link2html fuer VLP muss hier noch raus"""
                    321:         if str:
                    322:             if quote=="yes2":
                    323:                 str=re.sub("\&","&amp;",str)
1.5       dwinter   324:             #dom=xml.dom.minidom.parseString(str)
                    325:             dom = NonvalidatingReader.parseString(str,"http://www.mpiwg-berlin.mpg.de/")
                    326:             #links=dom.getElementsByTagName("link")
                    327:             links=Ft.Xml.XPath.Evaluate(".//link", contextNode=dom)
1.1       dwinter   328:             for link in links:
1.5       dwinter   329:                 #link.tagName="a"
                    330:         
                    331:                 ref=link.getAttributeNS(EMPTY_NAMESPACE,"ref")
                    332:                 pn=link.getAttributeNS(EMPTY_NAMESPACE,"page")
1.1       dwinter   333: 
1.5       dwinter   334:                 cns=link.childNodes[0:]
                    335:                 
                    336:                 newLink=dom.createElementNS(EMPTY_NAMESPACE,"a")
                    337:                 for x in cns:
                    338:                         newLink.appendChild(x)
                    339:                 
                    340:                         
                    341:                 
                    342:                 link.parentNode.replaceChild(newLink,link)
                    343: 
                    344:                 if self.checkRef(ref):
                    345:                         if pn:
1.6       dwinter   346:                                 newLink.setAttributeNS(EMPTY_NAMESPACE,"href",self.REQUEST['SERVER_URL']+"/references?id="+ref+"&page="+pn)
1.5       dwinter   347:                         else:
                    348:                                 newLink.setAttributeNS(EMPTY_NAMESPACE,"href",self.REQUEST['SERVER_URL']+"/references?id="+ref)
                    349: 
                    350:             #str= dom.toxml('utf-8')
                    351:             buf = cStringIO.StringIO()
                    352:             PrettyPrint(dom, stream=buf)
                    353:             str = buf.getvalue()
                    354:             buf.close()
                    355:             #str=PrettyPrint(dom.documentElement,encoding='UTF-8')
                    356:             #print link.toxml('utf-8')
                    357:             #print type(str)
                    358:             retStr=regexpPage.search(str)
                    359:             
                    360:             try: # hack warum fehtl manchmal page??
1.11      casties   361:                     return retStr.group(1).decode('utf-8')
1.1       dwinter   362:             except:
1.5       dwinter   363:                     return str
1.1       dwinter   364:         return ""
1.5       dwinter   365: 
1.1       dwinter   366:     
                    367: def xlink2html(self,xlink,parClass=None):
                    368:     ret=""
                    369:     attributes=xlink.attributes
                    370:  
                    371:     if xlink.tagName.lower()=="image":
                    372:         ret +="""<img src="%s" />"""%xlink.getAttribute('href')
                    373:     elif xlink.tagName.lower()=="link":
                    374:         reference=urllib.unquote(xlink.getAttribute('href'))
                    375:         label=getText(self,xlink.childNodes)
                    376: 
                    377:         # check if href is already a correct url
                    378:         if reference.split(":")[0] in ['http','file']:
                    379:             if parClass=="Picture":
                    380:                 ret +="""<img src="%s" />"""%(reference)
                    381:             else:
                    382: 
                    383:                 ret +="""<a href="%s" >%s</a>"""%(reference,label)
                    384:         else: # transform
                    385:             #href=xml2html(self,reference)
                    386:             #print "refer",reference
                    387:             reference=re.sub("ref\=([^>]*)\>",'ref=\"\g<1>\">',reference)# einfuegen anfuehrungszeichen um ref attribut, falls fehlt.
                    388:             ret +=reference
                    389:             
                    390:     return ret
                    391: 
                    392: def getText(self,nodelist,parClass=None):
                    393:     
                    394:     rc = u''
                    395:     for node in nodelist:
                    396:         
                    397:        if node.nodeType == node.TEXT_NODE:
                    398: 
                    399:             try:
                    400:                 try:
                    401:                     #rc += node.data.encode('utf-8','ignore')
                    402:                     rc += node.data
                    403:                                         
                    404:                 except:
                    405:                     #rc= node.data.encode('utf-8','ignore')
                    406:                     rc=node.data
                    407:             except:
                    408:                 rc="ERROR"
                    409:                 #node.data.decode('utf-8','ignore')
                    410: 
                    411:             node.data.encode('utf-8','ignore')
                    412:             #print "RC",rc
                    413:         elif node.tagName =="inline":
                    414: 
                    415:             rc+=par2html(self,[node])
                    416: 
                    417:         elif node.tagName =="pb":
                    418:             rc+="<pb/>"
                    419:         elif node.attributes:
                    420: 
                    421:             if 'type' in node.attributes.keys(): #is a xlink?
                    422: 
                    423:                 try:
                    424:                     rc +=xlink2html(self,node,parClass).encode('utf-8')
                    425:                     
                    426:                 except:
                    427:                     rc +=xlink2html(self,node,parClass)
                    428:                     
                    429:     #print "RWT",rc        
                    430:     return rc
                    431: 
                    432: 
                    433: #filename=argv[1]
                    434: #fileString=file(filename).read()
                    435: #print proj2hash(fileString)
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>