cdli/tools/uploadATF.py - annotate

Return to uploadATF.py CVS log
Up to [Repository] / cdli / tools
Annotation of cdli/tools/uploadATF.py, revision 1.2

1.1       dwinter     1: import sys
                      2: from ZODB import DB
                      3: from ZEO import ClientStorage
                      4: from ZPublisher.HTTPResponse import HTTPResponse
                      5: from ZPublisher.HTTPRequest import HTTPRequest
                      6: from ZPublisher.BaseRequest import RequestContainer
                      7: 
                      8: import logging
                      9: import os
                     10: import os.path
                     11: import tempfile
                     12: import codecs
                     13: from Products.cdli.cdli_files import CDLIRoot
                     14: 
                     15: import pickle
                     16: 
                     17: class StoreObject:
1.2     ! dwinter    18:     
        !            19:     
1.1       dwinter    20:     returnValue={}
                     21:     def __init__(self,storeId):
                     22:         self.storeId= storeId
                     23:     
                     24:     
                     25:     def save(self):
                     26:         pf = file("/tmp/"+self.storeId+".result","w")
                     27:         pickle.dump(self.returnValue,pf)
                     28:         pf.close()
                     29:         
                     30:     
                     31: class uploadATFThread:
                     32:     """class for checking the files befor uploading"""
                     33:     
1.2     ! dwinter    34:     DEBUG=True
        !            35:     
1.1       dwinter    36:     def checkFile(self,filename,data,folder):
                     37:         """check the files"""
                     38:         # first check the file name
                     39:         fn=filename.split(".") # no extension
                     40:     
                     41:         if not (fn[0][0]=="P" or fn[0][0]=="S"):
                     42:             return False,"P/S missing in the filename"
                     43:         elif len(fn[0])!=7:
                     44:             return False,"P number has not the right length 6"
                     45:         elif not checkUTF8(data):
                     46:             return False,"not utf-8"
                     47:         else:
                     48:             return True,""
                     49:            
                     50:     def splitatf(self,fh,dir=None,ext=None):
                     51:         """split it"""
                     52:         ret=None
                     53:         nf=None
                     54:         i=0
                     55:         fileCount=0
                     56:         #ROC: why split \n first and then \r???
                     57:         if isinstance(fh, basestring):
                     58:             iter=fh.split("\n")
                     59:         else:
                     60:             iter=fh.readlines()
                     61:             
                     62:         for lineTmp in iter:
1.2     ! dwinter    63:             if (self.DEBUG==True) and (fileCount>10):
        !            64:               break;
1.1       dwinter    65:             lineTmp=lineTmp.replace(codecs.BOM_UTF8,'') # make sure that all BOM are removed..
                     66:             for line in lineTmp.split("\r"):
                     67:                 #logging.info("Deal with: %s"%line)
                     68:                
                     69:                 i+=1
                     70:                 if (i%100)==0:
                     71:                     self.result.write(str(i)+"\n")
                     72:                     self.result.flush()
                     73:                 #check if basket name is in the first line
                     74:                 if line.find("#atf basket")>=0: #old convention
                     75:                     ret=line.replace('#atf basket ','')
                     76:                     ret=ret.split('_')[0]
                     77:                 elif line.find("#basket:")>=0: #new convention
                     78:                     ret=line.replace('#basket: ','')
                     79:                     ret=ret.split('_')[0]
                     80:     
                     81:                 else:
                     82:                     if (len(line.lstrip())>0) and (line.lstrip()[0]=="&"): #newfile
                     83:                         if nf:
                     84:                             fileCount+=1
                     85:                             print fileCount
                     86:                             nf.close() #close  file
1.2     ! dwinter    87:                             if (self.DEBUG==True) and (fileCount>10):
        !            88:                               break
1.1       dwinter    89:     
                     90:                         filename=line[1:].split("=")[0].rstrip()+".atf"
                     91:                         if dir:
                     92:                             filename=os.path.join(dir,filename)
                     93:                         nf=file(filename,"w")
                     94:                         logging.debug("open %s"%filename)
                     95:                     if nf:    
                     96:                         nf.write(line.replace("\n","")+"\n")
                     97:     
                     98:         try:        
                     99:             nf.close()
                    100:             
                    101:         except:
                    102:             pass
                    103:         
                    104:         if not isinstance(fh, basestring):
                    105:             fh.close()
                    106:         
                    107:         files = os.popen4('find %s' % dir)[1].read().rstrip().split('\n')
                    108:         files.remove(dir)
                    109:        
                    110:         return ret,len(files)
                    111:         
                    112: 
                    113:     def __init__(self):
                    114:         """initialise"""
                    115:         logging.getLogger().setLevel(logging.DEBUG)
                    116:         
                    117:         
1.2     ! dwinter   118:     def set(self,uploadId,basketId,username,serverport="29080"):
1.1       dwinter   119:         """set start values for the thread"""
                    120:         self.result=file("/tmp/"+str(uploadId)+".out","w")
                    121:         self.uploadId=uploadId
                    122:         self.basketId=basketId
                    123:         self.username=username
                    124:         self.serverport=serverport
                    125:       
                    126:         
                    127:     def __call__(self):
                    128:         """call method """
                    129:         self.run()
                    130:         return True
                    131:     
                    132:     def getContext(self, app,serverport="8080"):
                    133:         """get the context within the ZODB"""
                    134:         resp = HTTPResponse(stdout=None)
                    135:         env = {
                    136:             'SERVER_NAME':'localhost',
                    137:             'SERVER_PORT':serverport,
                    138:             'REQUEST_METHOD':'GET'
                    139:             }
                    140:         req = HTTPRequest(None, env, resp)
                    141:         return app.__of__(RequestContainer(REQUEST = req))
                    142:         
                    143:     def run(self):
                    144:        
                    145:      
                    146:         #find context within ZODB
                    147:         storage=ClientStorage.ClientStorage(("localhost",8100));
                    148:         db = DB(storage)
                    149:         conn = db.open()
                    150:         root = conn.root()
                    151:         app  = root['Application']
                    152:         ctx = self.getContext(app,serverport=self.serverport)
                    153:         logging.info("run intern")
                    154:        
                    155:         logging.info("call thread intern")
                    156:         self.uploadATFThread(ctx,self.uploadId,self.basketId)
                    157:      
                    158:         #ctx.cdliRoot.cdli_main.tmpStore2[self.getName()[0:]]=self.returnValue
                    159:         
                    160:         
                    161:         
                    162:     def getResult(self):
                    163:         """method for accessing result"""
                    164:         ret=""
                    165:         for x in self.result.readlines():
                    166:             ret+=x
                    167:             
                    168:         return ret
                    169:     
                    170:     def uploadATFThread(self,ctx,uploadId,basketId=0):
                    171:         """upload an atf file"""
                    172:         #TODO: add comments
                    173:         #TODO: finish uploadATF
                    174:         
                    175:         
                    176:         logging.info("start, upload thread")
                    177:         self.result.write("<html><body><h2>I got your file, start now to split it into single atf-files!</h2><p>\n")
                    178:     
                    179:         #make sure that id is a string and not an integer
                    180:         basketId=str(basketId)
                    181:         logging.info("basketID:"+basketId)
                    182:         #TODO: make this configurable, at the moment, rootFolder for cdli has to be cdliRoot
                    183:         ctx2=ctx.cdliRoot
                    184:         
                    185:         #get temporary file for staging the downloaded and splitted files
                    186:         dir=tempfile.mkdtemp()
                    187:         
                    188:         logging.info("tmpfFile:"+str(dir))
                    189:         changed=[] # changed files
                    190:         errors=[]  # files with errors
                    191:         lockerrors=[]  # files with errors
                    192: 
                    193:         newPs=[]   # new p filed
                    194:         psNotInCatalog=[] # files not in the catalog
                    195:         
                    196:         #split the uploadedd atf file
                    197:         logging.info("start splitting")
                    198:         basketNameFromFile,numberOfFiles=self.splitatf(file("/tmp/"+uploadId,'r'),dir)
                    199:         
                    200:         #find basketId if not set
                    201:         
                    202:         #get active abaket
                    203:         if basketId == '0':
                    204:             print ctx2
                    205:             basketObj=ctx2.basketContainer.getActiveBasket()
                    206:             if basketObj:
                    207:                 basketId=basketObj.getId()
                    208:                 
                    209:         #if there is no active basket and no basketid given, id is empty, else get besketname and length
                    210:         if basketId == '0':
                    211:             basketNameFromId=""
                    212:             basketLen=0
                    213:         else:
                    214:             basketNameFromId=getattr(ctx2.basketContainer,basketId).title
                    215:             basketLen=getattr(ctx2.basketContainer,basketId).getLastVersion().numberOfItems()
                    216:             
                    217:         logging.info("got the file, upload thread")
                    218:         self.result.write("""<html><body><h2>I got the files</h2><
                    219:                         p>I am computing the differences to the exisiting files</p>\n""")
                    220:                                    
                    221:         #start to check the files
                    222:         
                    223:         #workaround fuer memory fehler in listdir 
                    224:         #http://stackoverflow.com/questions/4098831/workaround-oserror-with-os-listdir
                    225:         files = os.popen4('find %s' % dir)[1].read().rstrip().split('\n')
                    226:         files.remove(dir)
                    227:         n = len(dir)
                    228:         if dir[-1] != os.path.sep:
                    229:             n += 1
                    230:             files = [f[n:] for f in files] # remove dir prefix
                    231: 
                    232:         #for fn in os.listdir(dir):
                    233:         for fn in files:
                    234:             
                    235:             self.result.write("<p>process:%s</p>\n"%fn)
                    236:             logging.debug(fn)
                    237:             # check if file is in the catalog
                    238:             #TODO: checkCatalog is not implemented yet
                    239:             if ctx2.cdli_main.checkCatalog(fn):
                    240:                 psNotInCatalog.append(fn)
                    241:                 
                    242:             #check if p-file already at the server  
                    243:             founds=ctx2.CDLICatalog.search({'title':fn})    
                    244:       
                    245:             #if not than add filename to the list of newfiles
                    246:             dataFile=file(os.path.join(dir,fn))
                    247:             data=dataFile.read()
                    248:             dataFile.close()
                    249:             status,msg=self.checkFile(fn,data,dir)
                    250:             #status=True
                    251:             
                    252:             
                    253:             if not status: # error
                    254:                 errors.append((fn,msg))
                    255:             
                    256:             else:
                    257:                 if len(founds)==0:
                    258:                     newPs.append(fn)
                    259: 
                    260:                 #if p file alread at the server    
                    261:                 for found in founds:
                    262:                     #analyse the differences to the actual file
                    263:                     obj=found.getObject()
                    264: 
                    265:                     if (not (str(obj.lockedBy))=='') and (not (str(obj.lockedBy)==str(self.username))):
                    266:                                 lockerrors.append((fn,str(obj.lockedBy)))
                    267:                     else:
                    268:                 
                    269:                         diffs=obj.diff(data)
                    270:                         if diffs[0]>0:
                    271:                             changed.append((obj,diffs)) #hochladen
                    272: 
                    273:         #ready, set the returnValues
                    274:         self.result.write("<h3>Done</h3></body></html>\n")
                    275:         
                    276:         stObj = StoreObject(uploadId);
                    277:         
                    278:         stObj.returnValue={}
                    279:         
                    280:         stObj.returnValue['errors']=errors
                    281:         
                    282:         stObj.returnValue['newPs']=newPs
                    283:         stObj.returnValue['tmpdir']=dir
                    284:         stObj.returnValue['basketLen']=basketLen
                    285:         stObj.returnValue['numberOfFiles']=numberOfFiles
                    286:         stObj.returnValue['basketNameFromId']=basketNameFromId
                    287:         stObj.returnValue['basketNameFromFile']=basketNameFromFile
                    288:         stObj.returnValue['basketId']=basketId
                    289:         stObj.returnValue['dir']=dir
                    290:         #stObj.returnValue['changed']=copy.copy(changed)
                    291:         stObj.returnValue['changed']=[(x[0].getId(),x[1][0]) for x in changed]
                    292:         #stObj.returnValue['lockerrors']=[x[0].getId() for x in lockerrors]
                    293:         stObj.returnValue['lockerrors']=[x for x in lockerrors]
                    294:         self.returnValue=True
                    295:         #ctx2.cdli_main.setTemp('v_uploadATF_returnValue',True)
                    296:  
                    297:         stObj.save();
                    298:  
                    299: def checkUTF8(data):
                    300:     """check utf 8"""
                    301:     if not isinstance(data, str):
                    302:         logging.error("checkUTF8 data is not string! (%s)"%repr(data))
                    303: 
                    304:     try:
                    305:         data.decode('utf-8')
                    306:         logging.debug("checkUTF8: ok!")
                    307:         return True
                    308:     except:
                    309:         logging.debug("checkUTF8: false!")
                    310:         return False
                    311:            
                    312: if __name__ == "__main__":
1.2     ! dwinter   313:        if len(sys.argv)<5:
        !           314:            print """Usage: procedure uploadId comment basketName unlock username
        !           315:            uploadId: Ticket ID von uploadATF
        !           316:            basketName: name of the basket
        !           317:            username: username
        !           318:            port of a running zope (not the zeo)
        !           319:            """
        !           320:         
1.1       dwinter   321:        upload = uploadATFThread()
                    322:        x=sys.argv;
                    323:        print x
                    324:        upload.set(sys.argv[1],sys.argv[2],sys.argv[3],sys.argv[4])
                    325:        upload.run();
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>