from sys import argv

import string
import xml.dom.minidom
import Ft.Xml.XLink.Processor
import Ft.Xml.XLink.XLinkElements

from Ft.Xml import XPath
from Ft.Xml.XPath import Evaluate
from Ft.Xml.XLink import XLINK_NAMESPACE
from Ft.Xml.XLink import XLinkElements

#from Ft.Xml.Domlette import NonvalidatingReader,InputSource
#from Ft.Xml import EMPTY_NAMESPACE
from Ft.Lib import Uri
import urllib
import re

patternPage=r"<\s*page.*?>(.*?)</page>"
regexpPage = re.compile(patternPage, re.IGNORECASE + re.DOTALL)

xml2htmlArray={'WEB_normal':('<p>','</p>'),'Normal':('<p>','</p>'),'WEB_picture':('<p class=\"picture\">','</p>'),'WEB_figuretitle':('<p class=\"picturetitle\">','</p>'),'WEB_bibliography':('<p><i>','</i></p>'),'Web_kursiv':('<i>','</i>'),'WEB_kursiv':('<i>','</i>'),'WEB_hyperlink':('',''),'Hyperlink':('',''),'Picture':('<p class=\"picture\">','</p>'),'FigureTitle':('<p class=\"picturetitle\">','</p>')}

def addToDict(dict,name,value):
    if name=="":
        return 0
    else:
        
        if not dict.has_key(name):
            dict[name]=[] # als array anlegen

        dict[name].append(value)
        return 1    

def proj2hash(self,xmlstring):
    """wandelt xml-files fuer die projekte in ein hash"""
    
    dom=xml.dom.minidom.parseString(xmlstring)
    
        
    list={}

    #gettitle
    pars=Evaluate('par',dom.getElementsByTagName('part')[0])
    for par in pars:
        className=par.getAttribute('class')
        content=getText(self,par.childNodes)
        addToDict(list,className,content)
             

    sectionXPath="section"

    
    sections=Evaluate(sectionXPath,dom.getElementsByTagName('part')[0])
    
    while sections:
        
        for section in sections:
            
            sec=parseSection(self,section)
            
            if sec[0]=="WEB_project_header": # Sonderfall project
                addToDict(list,'WEB_project_header',sec[1]) # store title
                addToDict(list,'WEB_project_description',sec[2]) #store description
            else: # no information in heading
                level=sec[3]
                aTag="<h%s>"%level
                eTag="</h%s>"%level
                addToDict(list,"text",aTag+sec[1]+eTag)
                addToDict(list,"text",sec[2])
        sectionXPath+="/section"
        sections=Evaluate(sectionXPath,dom.getElementsByTagName('part')[0])
    return list


def parseSection(self,section):
    type=""
    header=""
    level=section.getAttribute('level')
    for heading in section.childNodes:
        if getattr(heading,'tagName','')=="heading":
            
            type=heading.getAttribute('class')
            header=getText(self,heading.childNodes)

    if type=="": # falls heading fehlt, pruefe ob erster par richtig
        par=section.getElementsByTagName('par')[0]
        type=par.getAttribute('class')
        header=getText(par.childNodes)

    #print section.childNodes
    #pars=Evaluate('par',section)
    pars=section.childNodes
    content=par2html(self,pars)
    #print "CONTENT",repr(content)
    return (type,header,content,level)

def parseTable(table):
    fields={}
    rows=table.getElementsByTagName('html:tr')
    for row in rows:
        #print "ROW"
        cols=row.getElementsByTagName('html:td')
        
        #Name des Datenfeldes einlesen
        try:
            field=cols[0].getElementsByTagName('par')[0].getAttribute('class')
            #print "field",field
        except:
            print "error"
            field=""

        #Wandeln der Eintrge in HTML

        #pars=cols[1].getElementsByTagName('par')
        pars=cols[1].childNodes
        
        html=par2html(self,pars,tags=("",";"))
        
        addToDict(fields,field,html)
        #print fields
    return fields

def par2html(self,pars,tags=None):
    html=""

    for par in pars:
        tagName=getattr(par,'tagName','')
        if tagName in ["par","inline"]:
            #print repr(par.getAttribute('class')),xml2htmlArray.get(par.getAttribute('class'),'NOT FOUND')
            #print "par",par
            if not tags:
                try:
                    tag=xml2htmlArray[par.getAttribute('class')]
                except:
                    tag=('<p>','</p>')
            else:
                tag=tags
            #print "TAG",tag
            content=getText(self,par.childNodes,par.getAttribute('class'))
            
            
            #print par.getAttribute('class'),node
            try:
                html+=tag[0]+content+tag[1]
            except:
                html=+tag[0]+content+tag[1]
            
        elif tagName=="pb":
            html+="<pb/>"
        elif tagName=="img":
            html+="XXX"
    
    try:

        return html
    except:
        return ""

def getXlink(nodes):
    """searches xlinks and gives them back as html"""
    ret=""
    for node in nodes:
        if node.attributes:
            if 'xlink:type' in node.attributes.keys(): #is a xlink?
                ret +=xlink2html(node)
    return ret

def checkRef(self,ref):
        dbs={'vl_literature':'AND CD LIKE \'%lise%\'','vl_technology':'','vl_people':'','vl_sites':''}
        res=None
        for db in dbs.keys():

            res=res or self.search(var=str("select reference from %s where reference =\'%s\' %s"%(db,ref,dbs[db])))
        return res

def xml2html(self,str,quote="yes"):
        """link2html fuer VLP muss hier noch raus"""
	
        
        if str:
            if quote=="yes2":
                str=re.sub("\&","&amp;",str)
            
            str=re.sub("ref\=([^>]*)\>",'ref=\"\g<1>\">',str)# einfuegen anfuehrungszeichen um ref attribut, falls fehlt.
            #str=re.sub("ref\=([.[*^[>]]])",'XX',str)
            #print "STR::",str
            dom=xml.dom.minidom.parseString(str)
            links=dom.getElementsByTagName("link")
            
            for link in links:
                link.tagName="a"
                ref=link.getAttribute("ref")
		pn=link.getAttribute("page")

                if checkRef(self,ref):
			if pn:
				link.setAttribute("href",self.aq_parent.absolute_url()+"/vlp_coll?id="+ref+"&p="+pn)
			else:
				link.setAttribute("href",self.aq_parent.absolute_url()+"/vlp_coll?id="+ref)

            str= dom.toxml()
	    
	    #print link.toxml('utf-8')
	    retStr=regexpPage.search(str)

            try:
                return retStr.group(1)
            except:
                exStr="""<?xml version="1.0" ?>"""
                str=re.sub("\n","",str)
                #str=
                #print repr(str)
                return str.replace(exStr,'')
        return ""
    
def xlink2html(self,xlink,parClass=None):
    ret=""
    attributes=xlink.attributes
 
    if xlink.tagName.lower()=="image":
        ret +="""<img src="%s" />"""%xlink.getAttribute('href')
    elif xlink.tagName.lower()=="link":
        reference=urllib.unquote(xlink.getAttribute('href'))
        label=getText(self,xlink.childNodes)

        # check if href is already a correct url
        if reference.split(":")[0] in ['http','file']:
            if parClass=="Picture":
                ret +="""<img src="%s" />"""%(reference)
            else:

                ret +="""<a href="%s" >%s</a>"""%(reference,label)
        else: # transform
            #href=xml2html(self,reference)
            #print "refer",reference
            reference=re.sub("ref\=([^>]*)\>",'ref=\"\g<1>\">',reference)# einfuegen anfuehrungszeichen um ref attribut, falls fehlt.
            ret +=reference
            
    return ret

def getText(self,nodelist,parClass=None):
    
    rc = u''
    for node in nodelist:
        
    	if node.nodeType == node.TEXT_NODE:

            try:
                try:
                    #rc += node.data.encode('utf-8','ignore')
                    rc += node.data
                                        
                except:
                    #rc= node.data.encode('utf-8','ignore')
                    rc=node.data
            except:
                rc="ERROR"
                #node.data.decode('utf-8','ignore')

            node.data.encode('utf-8','ignore')
            #print "RC",rc
        elif node.tagName =="inline":

            rc+=par2html(self,[node])

        elif node.tagName =="pb":
            rc+="<pb/>"
        elif node.attributes:

            if 'type' in node.attributes.keys(): #is a xlink?

                try:
                    rc +=xlink2html(self,node,parClass).encode('utf-8')
                    
                except:
                    rc +=xlink2html(self,node,parClass)
                    
    #print "RWT",rc        
    return rc


#filename=argv[1]
#fileString=file(filename).read()
#print proj2hash(fileString)