File:  [Repository] / cdli / tools / uploadATF.py
Revision 1.2: download - view: text, annotated - select for diffs - revision graph
Wed Jul 20 09:33:34 2011 UTC (12 years, 10 months ago) by dwinter
Branches: MAIN
CVS tags: HEAD
cli tools in tools verschoben,

neue develop-verisonen von cdli_basket und files im devel ordner

import sys
from ZODB import DB
from ZEO import ClientStorage
from ZPublisher.HTTPResponse import HTTPResponse
from ZPublisher.HTTPRequest import HTTPRequest
from ZPublisher.BaseRequest import RequestContainer

import logging
import os
import os.path
import tempfile
import codecs
from Products.cdli.cdli_files import CDLIRoot

import pickle

class StoreObject:
    
    
    returnValue={}
    def __init__(self,storeId):
        self.storeId= storeId
    
    
    def save(self):
        pf = file("/tmp/"+self.storeId+".result","w")
        pickle.dump(self.returnValue,pf)
        pf.close()
        
    
class uploadATFThread:
    """class for checking the files befor uploading"""
    
    DEBUG=True
    
    def checkFile(self,filename,data,folder):
        """check the files"""
        # first check the file name
        fn=filename.split(".") # no extension
    
        if not (fn[0][0]=="P" or fn[0][0]=="S"):
            return False,"P/S missing in the filename"
        elif len(fn[0])!=7:
            return False,"P number has not the right length 6"
        elif not checkUTF8(data):
            return False,"not utf-8"
        else:
            return True,""
           
    def splitatf(self,fh,dir=None,ext=None):
        """split it"""
        ret=None
        nf=None
        i=0
        fileCount=0
        #ROC: why split \n first and then \r???
        if isinstance(fh, basestring):
            iter=fh.split("\n")
        else:
            iter=fh.readlines()
            
        for lineTmp in iter:
            if (self.DEBUG==True) and (fileCount>10):
              break;
            lineTmp=lineTmp.replace(codecs.BOM_UTF8,'') # make sure that all BOM are removed..
            for line in lineTmp.split("\r"):
                #logging.info("Deal with: %s"%line)
               
                i+=1
                if (i%100)==0:
                    self.result.write(str(i)+"\n")
                    self.result.flush()
                #check if basket name is in the first line
                if line.find("#atf basket")>=0: #old convention
                    ret=line.replace('#atf basket ','')
                    ret=ret.split('_')[0]
                elif line.find("#basket:")>=0: #new convention
                    ret=line.replace('#basket: ','')
                    ret=ret.split('_')[0]
    
                else:
                    if (len(line.lstrip())>0) and (line.lstrip()[0]=="&"): #newfile
                        if nf:
                            fileCount+=1
                            print fileCount
                            nf.close() #close  file
                            if (self.DEBUG==True) and (fileCount>10):
                              break
    
                        filename=line[1:].split("=")[0].rstrip()+".atf"
                        if dir:
                            filename=os.path.join(dir,filename)
                        nf=file(filename,"w")
                        logging.debug("open %s"%filename)
                    if nf:    
                        nf.write(line.replace("\n","")+"\n")
    
        try:        
            nf.close()
            
        except:
            pass
        
        if not isinstance(fh, basestring):
            fh.close()
        
        files = os.popen4('find %s' % dir)[1].read().rstrip().split('\n')
        files.remove(dir)
       
        return ret,len(files)
        

    def __init__(self):
        """initialise"""
        logging.getLogger().setLevel(logging.DEBUG)
        
        
    def set(self,uploadId,basketId,username,serverport="29080"):
        """set start values for the thread"""
        self.result=file("/tmp/"+str(uploadId)+".out","w")
        self.uploadId=uploadId
        self.basketId=basketId
        self.username=username
        self.serverport=serverport
      
        
    def __call__(self):
        """call method """
        self.run()
        return True
    
    def getContext(self, app,serverport="8080"):
        """get the context within the ZODB"""
        resp = HTTPResponse(stdout=None)
        env = {
            'SERVER_NAME':'localhost',
            'SERVER_PORT':serverport,
            'REQUEST_METHOD':'GET'
            }
        req = HTTPRequest(None, env, resp)
        return app.__of__(RequestContainer(REQUEST = req))
        
    def run(self):
       
     
        #find context within ZODB
        storage=ClientStorage.ClientStorage(("localhost",8100));
        db = DB(storage)
        conn = db.open()
        root = conn.root()
        app  = root['Application']
        ctx = self.getContext(app,serverport=self.serverport)
        logging.info("run intern")
       
        logging.info("call thread intern")
        self.uploadATFThread(ctx,self.uploadId,self.basketId)
     
        #ctx.cdliRoot.cdli_main.tmpStore2[self.getName()[0:]]=self.returnValue
        
        
        
    def getResult(self):
        """method for accessing result"""
        ret=""
        for x in self.result.readlines():
            ret+=x
            
        return ret
    
    def uploadATFThread(self,ctx,uploadId,basketId=0):
        """upload an atf file"""
        #TODO: add comments
        #TODO: finish uploadATF
        
        
        logging.info("start, upload thread")
        self.result.write("<html><body><h2>I got your file, start now to split it into single atf-files!</h2><p>\n")
    
        #make sure that id is a string and not an integer
        basketId=str(basketId)
        logging.info("basketID:"+basketId)
        #TODO: make this configurable, at the moment, rootFolder for cdli has to be cdliRoot
        ctx2=ctx.cdliRoot
        
        #get temporary file for staging the downloaded and splitted files
        dir=tempfile.mkdtemp()
        
        logging.info("tmpfFile:"+str(dir))
        changed=[] # changed files
        errors=[]  # files with errors
        lockerrors=[]  # files with errors

        newPs=[]   # new p filed
        psNotInCatalog=[] # files not in the catalog
        
        #split the uploadedd atf file
        logging.info("start splitting")
        basketNameFromFile,numberOfFiles=self.splitatf(file("/tmp/"+uploadId,'r'),dir)
        
        #find basketId if not set
        
        #get active abaket
        if basketId == '0':
            print ctx2
            basketObj=ctx2.basketContainer.getActiveBasket()
            if basketObj:
                basketId=basketObj.getId()
                
        #if there is no active basket and no basketid given, id is empty, else get besketname and length
        if basketId == '0':
            basketNameFromId=""
            basketLen=0
        else:
            basketNameFromId=getattr(ctx2.basketContainer,basketId).title
            basketLen=getattr(ctx2.basketContainer,basketId).getLastVersion().numberOfItems()
            
        logging.info("got the file, upload thread")
        self.result.write("""<html><body><h2>I got the files</h2><
                        p>I am computing the differences to the exisiting files</p>\n""")
                                   
        #start to check the files
        
        #workaround fuer memory fehler in listdir 
        #http://stackoverflow.com/questions/4098831/workaround-oserror-with-os-listdir
        files = os.popen4('find %s' % dir)[1].read().rstrip().split('\n')
        files.remove(dir)
        n = len(dir)
        if dir[-1] != os.path.sep:
            n += 1
            files = [f[n:] for f in files] # remove dir prefix

        #for fn in os.listdir(dir):
        for fn in files:
            
            self.result.write("<p>process:%s</p>\n"%fn)
            logging.debug(fn)
            # check if file is in the catalog
            #TODO: checkCatalog is not implemented yet
            if ctx2.cdli_main.checkCatalog(fn):
                psNotInCatalog.append(fn)
                
            #check if p-file already at the server  
            founds=ctx2.CDLICatalog.search({'title':fn})    
      
            #if not than add filename to the list of newfiles
            dataFile=file(os.path.join(dir,fn))
            data=dataFile.read()
            dataFile.close()
            status,msg=self.checkFile(fn,data,dir)
            #status=True
            
            
            if not status: # error
                errors.append((fn,msg))
            
            else:
                if len(founds)==0:
                    newPs.append(fn)

                #if p file alread at the server    
                for found in founds:
                    #analyse the differences to the actual file
                    obj=found.getObject()

                    if (not (str(obj.lockedBy))=='') and (not (str(obj.lockedBy)==str(self.username))):
                                lockerrors.append((fn,str(obj.lockedBy)))
                    else:
                
                        diffs=obj.diff(data)
                        if diffs[0]>0:
                            changed.append((obj,diffs)) #hochladen

        #ready, set the returnValues
        self.result.write("<h3>Done</h3></body></html>\n")
        
        stObj = StoreObject(uploadId);
        
        stObj.returnValue={}
        
        stObj.returnValue['errors']=errors
        
        stObj.returnValue['newPs']=newPs
        stObj.returnValue['tmpdir']=dir
        stObj.returnValue['basketLen']=basketLen
        stObj.returnValue['numberOfFiles']=numberOfFiles
        stObj.returnValue['basketNameFromId']=basketNameFromId
        stObj.returnValue['basketNameFromFile']=basketNameFromFile
        stObj.returnValue['basketId']=basketId
        stObj.returnValue['dir']=dir
        #stObj.returnValue['changed']=copy.copy(changed)
        stObj.returnValue['changed']=[(x[0].getId(),x[1][0]) for x in changed]
        #stObj.returnValue['lockerrors']=[x[0].getId() for x in lockerrors]
        stObj.returnValue['lockerrors']=[x for x in lockerrors]
        self.returnValue=True
        #ctx2.cdli_main.setTemp('v_uploadATF_returnValue',True)
 
        stObj.save();
 
def checkUTF8(data):
    """check utf 8"""
    if not isinstance(data, str):
        logging.error("checkUTF8 data is not string! (%s)"%repr(data))

    try:
        data.decode('utf-8')
        logging.debug("checkUTF8: ok!")
        return True
    except:
        logging.debug("checkUTF8: false!")
        return False
           
if __name__ == "__main__":
       if len(sys.argv)<5:
           print """Usage: procedure uploadId comment basketName unlock username
           uploadId: Ticket ID von uploadATF
           basketName: name of the basket
           username: username
           port of a running zope (not the zeo)
           """
        
       upload = uploadATFThread()
       x=sys.argv;
       print x
       upload.set(sys.argv[1],sys.argv[2],sys.argv[3],sys.argv[4])
       upload.run();

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>