view RestDbInterface.py @ 20:67ca17753cd5

NEW - # 12: create table and upload data https://it-dev.mpiwg-berlin.mpg.de/tracs/GIS/ticket/12
author casties
date Wed, 18 Aug 2010 16:20:25 +0200
parents 060797795a4d
children a67b7c1f7ec5
line wrap: on
line source

'''
Created on 19.5.2010

@author: casties
'''

from OFS.Folder import Folder
from Products.PageTemplates.PageTemplateFile import PageTemplateFile
from Products.ZSQLExtend import ZSQLExtend
import logging
import re
import psycopg2
import json

from zope.interface import implements
from zope.publisher.interfaces import IPublishTraverse
from ZPublisher.BaseRequest import DefaultPublishTraverse
#from zope.publisher.interfaces import NotFound 
#from zope.app import zapi 
#from zope.component import queryMultiAdapter
import Shared.DC.ZRDB.DA
from Products.ZSQLMethods.SQL import SQLConnectionIDs


def getTextFromNode(node):
    """get the cdata content of a XML node"""
    if node is None:
        return ""
    
    if isinstance(node, list):
        nodelist = node
    else:
        nodelist=node.childNodes

    rc = ""
    for node in nodelist:
        if node.nodeType == node.TEXT_NODE:
           rc = rc + node.data
    return rc

def sqlName(s,lc=True):
    """returns restricted ASCII-only version of string"""
    if s is None:
        return ""
    
    # all else -> "_"
    s = re.sub(r'[^A-Za-z0-9_]','_',s)
    if lc:
        return s.lower()
    
    return s

class RestDbInterface(Folder):
    """Object for RESTful database queries
    path schema: /db/{schema}/{table}/
    omitting table gives a list of schemas
    omitting table and schema gives a list of schemas 
    """
    implements(IPublishTraverse)
    
    meta_type="RESTdb"
    manage_options=Folder.manage_options+(
        {'label':'Config','action':'manage_editRestDbInterfaceForm'},
        )

    # management templates
    manage_editRestDbInterfaceForm=PageTemplateFile('zpt/editRestDbInterface',globals())

    # data templates
    XML_index = PageTemplateFile('zpt/XML_index', globals())
    XML_schema = PageTemplateFile('zpt/XML_schema', globals())
    XML_schema_table = PageTemplateFile('zpt/XML_schema_table', globals())
    HTML_index = PageTemplateFile('zpt/HTML_index', globals())
    HTML_schema = PageTemplateFile('zpt/HTML_schema', globals())
    HTML_schema_table = PageTemplateFile('zpt/HTML_schema_table', globals())

    
    
    def __init__(self, id, title, connection_id=None):
        """init"""
        self.id = id
        self.title = title
        # database connection id
        self.connection_id = connection_id
        # create template folder
        self.manage_addFolder('template')


    def getCursor(self,autocommit=True):
        """returns fresh DB cursor"""
        conn = getattr(self,"_v_database_connection",None)
        if conn is None:
            # create a new connection object
            try:
                if self.connection_id is None:
                    # try to take the first existing ID
                    connids = SQLConnectionIDs(self)
                    if len(connids) > 0:
                        connection_id = connids[0][0]
                        self.connection_id = connection_id
                        logging.debug("connection_id: %s"%repr(connection_id))

                da = getattr(self, self.connection_id)
                da.connect('')
                # we copy the DAs database connection
                conn = da._v_database_connection
                #conn._register() # register with the Zope transaction system
                self._v_database_connection = conn
            except Exception, e:
                raise IOError("No database connection! (%s)"%str(e))
        
        cursor = conn.getcursor()
        if autocommit:
            # is there a better version to get to the connection?
            cursor.connection.set_isolation_level(psycopg2.extensions.ISOLATION_LEVEL_AUTOCOMMIT)
            
        return cursor
    
    def executeSQL(self, query, args=None, hasResult=True, autocommit=True):
        """execute query with args on database and return all results.
        result format: {"fields":fields, "rows":data}"""
        logging.debug("executeSQL query=%s args=%s"%(query,args))
        cur = self.getCursor(autocommit=autocommit)
        cur.execute(query, args)
        # description of returned fields 
        fields = cur.description
        if hasResult:
            # get all data in an array
            data = cur.fetchall()
            cur.close()
            return {"fields":fields, "rows":data}
        else:
            cur.close()
            return None

    
    def publishTraverse(self,request,name):
        """change the traversal"""
        # get stored path
        path = request.get('restdb_path', [])
        logging.debug("publishtraverse: name=%s restdb_path=%s"%(name,path))
        
        if name in ("index_html", "PUT"):
            # end of traversal
            if request.get("method") == "POST" and request.get("action",None) == "PUT":
                # fake PUT by POST with action=PUT
                name = "PUT"
                
            return getattr(self, name)
            #TODO: should we check more?
        else:
            # traverse
            if len(path) == 0:
                # first segment
                if name == 'db':
                    # virtual path -- continue traversing
                    path = [name]
                    request['restdb_path'] = path
                else:
                    # try real path
                    tr = DefaultPublishTraverse(self, request)
                    ob = tr.publishTraverse(request, name)
                    return ob
            else:
                path.append(name)

        # continue traversing
        return self
 
    def index_html(self,REQUEST,RESPONSE):
        """index method"""
        # ReST path was stored in request
        path = REQUEST.get('restdb_path',[])
        # type and format are real parameter
        format = REQUEST.get('format','HTML').upper()
        type = REQUEST.get('type',None)
        logging.debug("index_html path=%s format=%s type=%s"%(path,format,type))

        if type is not None:
            # non-empty type -- look for template
            pt = getattr(self.template, "%s_%s"%(format,type), None)
            if pt is not None:
                return pt(format=format,type=type,path=path)
            
        if len(path) == 1:
            # list of schemas
            return self.showListOfSchemas(format=format)
        elif len(path) == 2:
            # list of tables
            return self.showListOfTables(format=format,schema=path[1])
        elif len(path) == 3:
            # table
            return self.showTable(format=format,schema=path[1],table=path[2])
        
        # don't know what to do
        return str(REQUEST)

    def PUT(self, REQUEST, RESPONSE):
        """
        Implement WebDAV/HTTP PUT/FTP put method for this object.
        """
        logging.debug("RestDbInterface PUT")
        #logging.debug("req=%s"%REQUEST)
        #self.dav__init(REQUEST, RESPONSE)
        #self.dav__simpleifhandler(REQUEST, RESPONSE)
        # ReST path was stored in request
        path = REQUEST.get('restdb_path',[])
        if len(path) == 3:
            schema = path[1]
            tablename = path[2]
            file = REQUEST.get("create_table_file",None)
            if file is None:
                RESPONSE.setStatus(400)
                return

            logging.debug("put with schema=%s table=%s file=%s"%(schema,tablename,file))
            ret = self.createTableFromXML(schema, tablename, file)
            resultType = REQUEST.get("result_type","application/json")
            if resultType == "application/json":
                RESPONSE.setHeader("Content-Type", "application/json")
                json.dump(ret, RESPONSE)
            elif resultType == "html/json":
                RESPONSE.setHeader("Content-Type", "text/html")
                RESPONSE.write("<html>\n<body>\n<pre>")
                json.dump(ret, RESPONSE)
                RESPONSE.write("</pre>\n</body>\n</html>")
                
            
        else:
            # 400 Bad Request
            RESPONSE.setStatus(400)
            return
        

    def showTable(self,format='XML',schema='public',table=None):
        """returns PageTemplate with tables"""
        logging.debug("showtable")
        pt = getattr(self.template, '%s_schema_table'%format, None)
        if pt is None:
            return "ERROR!! template %s_schema_table not found"%format
        
        data = self.getTable(schema,table)
        return pt(data=data,tablename=table)
 
 
    def getTable(self,schema='public',table=None,username='guest'):
        """return table data"""
        logging.debug("gettable")
        data = self.executeSQL('select * from "%s"."%s"'%(schema,table))
        return data

    def showListOfTables(self,format='XML',schema='public'):
        """returns PageTemplate with list of tables"""
        logging.debug("showlistoftables")
        pt = getattr(self.template, '%s_schema'%format, None)
        if pt is None:
            return "ERROR!! template %s_schema not found"%format
        
        data = self.getListOfTables(schema)
        return pt(data=data,schema=schema)
 
    def getListOfTables(self,schema='public',username='guest'):
        """return list of tables"""
        logging.debug("getlistoftables")
        # get list of fields and types of db table
        qstr="""select c.relname FROM pg_catalog.pg_class c
            LEFT JOIN pg_catalog.pg_namespace n ON n.oid = c.relnamespace
            WHERE c.relkind IN ('r','') AND n.nspname NOT IN ('pg_catalog', 'pg_toast')
            AND pg_catalog.pg_table_is_visible(c.oid)"""
        #qstr="select attname, format_type(pg_attribute.atttypid, pg_attribute.atttypmod) from pg_attribute, pg_class where attrelid = pg_class.oid and pg_attribute.attnum > 0"
        data=self.executeSQL(qstr)
        return data

    def showListOfSchemas(self,format='XML'):
        """returns PageTemplate with list of schemas"""
        logging.debug("showlistofschemas")
        pt = getattr(self.template, '%s_index'%format, None)
        if pt is None:
            return "ERROR!! template %s_index not found"%format
        
        data = self.getListOfSchemas()
        return pt(data=data)
 
    def getListOfSchemas(self,username='guest'):
        """return list of schemas"""
        logging.debug("getlistofschemas")
        # TODO: really look up schemas
        data={'fields': (('schemas',),), 'rows': [('public',),]}
        return data
    
    def createEmptyTable(self,schema,table,fields):
        """create a table with the given fields
           returns list of created fields"""
        logging.debug("createEmptyTable")
        sqlFields = []
        for f in fields:
            if isinstance(f,dict):
                # {name: XX, type: YY}
                name = sqlName(f['name'])
                type = f['type']
            
            else:
                # name only
                name = sqlName(f)
                type = 'text'
                
            sqlFields.append({'name':name, 'type':type})
            
        # drop table if it exists
        try:
            res = self.executeSQL('select * from "%s"."%s" where 1=0'%(schema,table))
            logging.debug("createemptytable: table %s.%s exists"%(schema,table))
            self.executeSQL('drop table "%s"."%s"'%(schema,table),hasResult=False)
        except:
            pass
        
        fieldString = ", ".join(['"%s" %s'%(f['name'],f['type']) for f in sqlFields])
        sqlString = 'create table "%s"."%s" (%s)'%(schema,table,fieldString)
        logging.debug("createemptytable: SQL=%s"%sqlString)
        ret = self.executeSQL(sqlString,hasResult=False)
        
        return sqlFields

    def createTableFromXML(self,schema,table,data):
        """create or replace a table with the given XML data"""
        logging.debug("createTableFromXML schema=%s table=%s data=%s"%(schema,table,data))
        tablename = sqlName(table)
        self.importExcelXML(schema,tablename, data)
        return {"tablename": tablename}
        
    def importExcelXML(self,schema,table,xmldata,fieldNamesOnly=False):
        '''
        Import XML file in Excel format into the table
        @param table: name of the table the xml shall be imported into
        @param containerTagName: XML-Tag which describes a dataset
        @param data: data to be imported
        @param identify: (optional) field res. tag which identifies a entry uniquely for updating purposes.
        @param RESPONSE: (optional)
        '''
        from xml.dom.pulldom import parseString,parse
        
        namespace = "urn:schemas-microsoft-com:office:spreadsheet"
        containerTagName = "Table"
        rowTagName = "Row"
        colTagName = "Cell"
        dataTagName = "Data"
        fieldNames = []
        sqlFields = []
        numFields = 0
        sqlInsert = None
        
        logging.debug("import excel xml")
        
        ret=""
        if isinstance(xmldata, str):
            logging.debug("importXML reading string data")
            doc=parseString(xmldata)
        else:
            logging.debug("importXML reading file data")
            doc=parse(xmldata)
            
        cnt = 0
        while True:
            node=doc.getEvent()

            if node is None:
                break
            
            else:
                #logging.debug("tag=%s"%node[1].localName)
                if node[1].localName is not None:
                    tagName = node[1].localName.lower()
                else:
                    # ignore non-tag nodes
                    continue
                                
                if tagName == rowTagName.lower():
                    # start of row
                    doc.expandNode(node[1])
                    cnt += 1
                    if cnt == 1:
                        # first row -- field names
                        names=node[1].getElementsByTagNameNS(namespace, dataTagName)
                        for name in names:
                            fn = getTextFromNode(name)
                            fieldNames.append(fn)
                            
                        if fieldNamesOnly:
                            # return just field names
                            return fieldNames
                        
                        # create table
                        sqlFields = self.createEmptyTable(schema, table, fieldNames)
                        numFields = len(sqlFields)
                        fieldString = ", ".join(['"%s"'%f['name'] for f in sqlFields])
                        valString = ", ".join(["%s" for f in sqlFields])
                        sqlInsert = 'insert into "%s"."%s" (%s) values (%s)'%(schema,table,fieldString,valString)
                        logging.debug("importexcelsql: sqlInsert=%s"%sqlInsert)
                        
                    else:
                        # following rows are data
                        colNodes=node[1].getElementsByTagNameNS(namespace, colTagName)
                        data = []
                        hasData = False
                        for colNode in colNodes:
                            dataNodes=colNode.getElementsByTagNameNS(namespace, dataTagName)
                            if len(dataNodes) > 0:
                                val = getTextFromNode(dataNodes[0])
                                hasData = True
                            else:
                                val = None

                            data.append(val)
                            
                        if not hasData:
                            # ignore empty rows
                            continue
                            
                        # fix number of data fields
                        if len(data) > numFields:
                            del data[numFields:]
                        elif len(data) < numFields:
                            missFields = numFields - len(data) 
                            data.extend(missFields * [None,])
                            
                        logging.debug("importexcel sqlinsert=%s data=%s"%(sqlInsert,data))
                        self.executeSQL(sqlInsert, data, hasResult=False)
                      
        return cnt
         
        
    def manage_editRestDbInterface(self, title=None, connection_id=None,
                     REQUEST=None):
        """Change the object"""
        if title is not None:
            self.title = title
            
        if connection_id is not None:
            self.connection_id = connection_id
                
        #checkPermission=getSecurityManager().checkPermission
        REQUEST.RESPONSE.redirect('manage_main')

        
manage_addRestDbInterfaceForm=PageTemplateFile('zpt/addRestDbInterface',globals())

def manage_addRestDbInterface(self, id, title='', label='', description='',
                     createPublic=0,
                     createUserF=0,
                     REQUEST=None):
        """Add a new object with id *id*."""
    
        ob=RestDbInterface(str(id),title)
        self._setObject(id, ob)
        
        #checkPermission=getSecurityManager().checkPermission
        REQUEST.RESPONSE.redirect('manage_main')