cdli/tools/uploadATF.py - view

Return to uploadATF.py CVS log

Up to [Repository] / cdli / tools

File: [Repository] / cdli / tools / uploadATF.py
Revision 1.1: download - view: text, annotated - select for diffs - revision graph
Tue Jul 19 12:43:26 2011 UTC (13 years, 11 months ago) by dwinter
Branches: MAIN
CVS tags: HEAD

added command linetools for uploading atf files

    1: import sys
    2: from ZODB import DB
    3: from ZEO import ClientStorage
    4: from ZPublisher.HTTPResponse import HTTPResponse
    5: from ZPublisher.HTTPRequest import HTTPRequest
    6: from ZPublisher.BaseRequest import RequestContainer
    7: 
    8: import logging
    9: import os
   10: import os.path
   11: import tempfile
   12: import codecs
   13: from Products.cdli.cdli_files import CDLIRoot
   14: 
   15: import pickle
   16: 
   17: class StoreObject:
   18:     returnValue={}
   19:     def __init__(self,storeId):
   20:         self.storeId= storeId
   21:     
   22:     
   23:     def save(self):
   24:         pf = file("/tmp/"+self.storeId+".result","w")
   25:         pickle.dump(self.returnValue,pf)
   26:         pf.close()
   27:         
   28:     
   29: class uploadATFThread:
   30:     """class for checking the files befor uploading"""
   31:     
   32:     def checkFile(self,filename,data,folder):
   33:         """check the files"""
   34:         # first check the file name
   35:         fn=filename.split(".") # no extension
   36:     
   37:         if not (fn[0][0]=="P" or fn[0][0]=="S"):
   38:             return False,"P/S missing in the filename"
   39:         elif len(fn[0])!=7:
   40:             return False,"P number has not the right length 6"
   41:         elif not checkUTF8(data):
   42:             return False,"not utf-8"
   43:         else:
   44:             return True,""
   45:            
   46:     def splitatf(self,fh,dir=None,ext=None):
   47:         """split it"""
   48:         ret=None
   49:         nf=None
   50:         i=0
   51:         fileCount=0
   52:         #ROC: why split \n first and then \r???
   53:         if isinstance(fh, basestring):
   54:             iter=fh.split("\n")
   55:         else:
   56:             iter=fh.readlines()
   57:             
   58:         for lineTmp in iter:
   59:             #if fileCount>10:
   60:             #    break;
   61:             lineTmp=lineTmp.replace(codecs.BOM_UTF8,'') # make sure that all BOM are removed..
   62:             for line in lineTmp.split("\r"):
   63:                 #logging.info("Deal with: %s"%line)
   64:                
   65:                 i+=1
   66:                 if (i%100)==0:
   67:                     self.result.write(str(i)+"\n")
   68:                     self.result.flush()
   69:                 #check if basket name is in the first line
   70:                 if line.find("#atf basket")>=0: #old convention
   71:                     ret=line.replace('#atf basket ','')
   72:                     ret=ret.split('_')[0]
   73:                 elif line.find("#basket:")>=0: #new convention
   74:                     ret=line.replace('#basket: ','')
   75:                     ret=ret.split('_')[0]
   76:     
   77:                 else:
   78:                     if (len(line.lstrip())>0) and (line.lstrip()[0]=="&"): #newfile
   79:                         if nf:
   80:                             fileCount+=1
   81:                             print fileCount
   82:                             nf.close() #close  file
   83:                             #if fileCount>10:
   84:                             #   break
   85:     
   86:                         filename=line[1:].split("=")[0].rstrip()+".atf"
   87:                         if dir:
   88:                             filename=os.path.join(dir,filename)
   89:                         nf=file(filename,"w")
   90:                         logging.debug("open %s"%filename)
   91:                     if nf:    
   92:                         nf.write(line.replace("\n","")+"\n")
   93:     
   94:         try:        
   95:             nf.close()
   96:             
   97:         except:
   98:             pass
   99:         
  100:         if not isinstance(fh, basestring):
  101:             fh.close()
  102:         
  103:         files = os.popen4('find %s' % dir)[1].read().rstrip().split('\n')
  104:         files.remove(dir)
  105:        
  106:         return ret,len(files)
  107:         
  108: 
  109:     def __init__(self):
  110:         """initialise"""
  111:         logging.getLogger().setLevel(logging.DEBUG)
  112:         
  113:         
  114:     def set(self,uploadId,basketId,username,serverport="8080"):
  115:         """set start values for the thread"""
  116:         self.result=file("/tmp/"+str(uploadId)+".out","w")
  117:         self.uploadId=uploadId
  118:         self.basketId=basketId
  119:         self.username=username
  120:         self.serverport=serverport
  121:       
  122:         
  123:     def __call__(self):
  124:         """call method """
  125:         self.run()
  126:         return True
  127:     
  128:     def getContext(self, app,serverport="8080"):
  129:         """get the context within the ZODB"""
  130:         resp = HTTPResponse(stdout=None)
  131:         env = {
  132:             'SERVER_NAME':'localhost',
  133:             'SERVER_PORT':serverport,
  134:             'REQUEST_METHOD':'GET'
  135:             }
  136:         req = HTTPRequest(None, env, resp)
  137:         return app.__of__(RequestContainer(REQUEST = req))
  138:         
  139:     def run(self):
  140:        
  141:      
  142:         #find context within ZODB
  143:         storage=ClientStorage.ClientStorage(("localhost",8100));
  144:         db = DB(storage)
  145:         conn = db.open()
  146:         root = conn.root()
  147:         app  = root['Application']
  148:         ctx = self.getContext(app,serverport=self.serverport)
  149:         logging.info("run intern")
  150:        
  151:         logging.info("call thread intern")
  152:         self.uploadATFThread(ctx,self.uploadId,self.basketId)
  153:      
  154:         #ctx.cdliRoot.cdli_main.tmpStore2[self.getName()[0:]]=self.returnValue
  155:         
  156:         
  157:         
  158:     def getResult(self):
  159:         """method for accessing result"""
  160:         ret=""
  161:         for x in self.result.readlines():
  162:             ret+=x
  163:             
  164:         return ret
  165:     
  166:     def uploadATFThread(self,ctx,uploadId,basketId=0):
  167:         """upload an atf file"""
  168:         #TODO: add comments
  169:         #TODO: finish uploadATF
  170:         
  171:         
  172:         logging.info("start, upload thread")
  173:         self.result.write("<html><body><h2>I got your file, start now to split it into single atf-files!</h2><p>\n")
  174:     
  175:         #make sure that id is a string and not an integer
  176:         basketId=str(basketId)
  177:         logging.info("basketID:"+basketId)
  178:         #TODO: make this configurable, at the moment, rootFolder for cdli has to be cdliRoot
  179:         ctx2=ctx.cdliRoot
  180:         
  181:         #get temporary file for staging the downloaded and splitted files
  182:         dir=tempfile.mkdtemp()
  183:         
  184:         logging.info("tmpfFile:"+str(dir))
  185:         changed=[] # changed files
  186:         errors=[]  # files with errors
  187:         lockerrors=[]  # files with errors
  188: 
  189:         newPs=[]   # new p filed
  190:         psNotInCatalog=[] # files not in the catalog
  191:         
  192:         #split the uploadedd atf file
  193:         logging.info("start splitting")
  194:         basketNameFromFile,numberOfFiles=self.splitatf(file("/tmp/"+uploadId,'r'),dir)
  195:         
  196:         #find basketId if not set
  197:         
  198:         #get active abaket
  199:         if basketId == '0':
  200:             print ctx2
  201:             basketObj=ctx2.basketContainer.getActiveBasket()
  202:             if basketObj:
  203:                 basketId=basketObj.getId()
  204:                 
  205:         #if there is no active basket and no basketid given, id is empty, else get besketname and length
  206:         if basketId == '0':
  207:             basketNameFromId=""
  208:             basketLen=0
  209:         else:
  210:             basketNameFromId=getattr(ctx2.basketContainer,basketId).title
  211:             basketLen=getattr(ctx2.basketContainer,basketId).getLastVersion().numberOfItems()
  212:             
  213:         logging.info("got the file, upload thread")
  214:         self.result.write("""<html><body><h2>I got the files</h2><
  215:                         p>I am computing the differences to the exisiting files</p>\n""")
  216:                                    
  217:         #start to check the files
  218:         
  219:         #workaround fuer memory fehler in listdir 
  220:         #http://stackoverflow.com/questions/4098831/workaround-oserror-with-os-listdir
  221:         files = os.popen4('find %s' % dir)[1].read().rstrip().split('\n')
  222:         files.remove(dir)
  223:         n = len(dir)
  224:         if dir[-1] != os.path.sep:
  225:             n += 1
  226:             files = [f[n:] for f in files] # remove dir prefix
  227: 
  228:         #for fn in os.listdir(dir):
  229:         for fn in files:
  230:             
  231:             self.result.write("<p>process:%s</p>\n"%fn)
  232:             logging.debug(fn)
  233:             # check if file is in the catalog
  234:             #TODO: checkCatalog is not implemented yet
  235:             if ctx2.cdli_main.checkCatalog(fn):
  236:                 psNotInCatalog.append(fn)
  237:                 
  238:             #check if p-file already at the server  
  239:             founds=ctx2.CDLICatalog.search({'title':fn})    
  240:       
  241:             #if not than add filename to the list of newfiles
  242:             dataFile=file(os.path.join(dir,fn))
  243:             data=dataFile.read()
  244:             dataFile.close()
  245:             status,msg=self.checkFile(fn,data,dir)
  246:             #status=True
  247:             
  248:             
  249:             if not status: # error
  250:                 errors.append((fn,msg))
  251:             
  252:             else:
  253:                 if len(founds)==0:
  254:                     newPs.append(fn)
  255: 
  256:                 #if p file alread at the server    
  257:                 for found in founds:
  258:                     #analyse the differences to the actual file
  259:                     obj=found.getObject()
  260: 
  261:                     if (not (str(obj.lockedBy))=='') and (not (str(obj.lockedBy)==str(self.username))):
  262:                                 lockerrors.append((fn,str(obj.lockedBy)))
  263:                     else:
  264:                 
  265:                         diffs=obj.diff(data)
  266:                         if diffs[0]>0:
  267:                             changed.append((obj,diffs)) #hochladen
  268: 
  269:         #ready, set the returnValues
  270:         self.result.write("<h3>Done</h3></body></html>\n")
  271:         
  272:         stObj = StoreObject(uploadId);
  273:         
  274:         stObj.returnValue={}
  275:         
  276:         stObj.returnValue['errors']=errors
  277:         
  278:         stObj.returnValue['newPs']=newPs
  279:         stObj.returnValue['tmpdir']=dir
  280:         stObj.returnValue['basketLen']=basketLen
  281:         stObj.returnValue['numberOfFiles']=numberOfFiles
  282:         stObj.returnValue['basketNameFromId']=basketNameFromId
  283:         stObj.returnValue['basketNameFromFile']=basketNameFromFile
  284:         stObj.returnValue['basketId']=basketId
  285:         stObj.returnValue['dir']=dir
  286:         #stObj.returnValue['changed']=copy.copy(changed)
  287:         stObj.returnValue['changed']=[(x[0].getId(),x[1][0]) for x in changed]
  288:         #stObj.returnValue['lockerrors']=[x[0].getId() for x in lockerrors]
  289:         stObj.returnValue['lockerrors']=[x for x in lockerrors]
  290:         self.returnValue=True
  291:         #ctx2.cdli_main.setTemp('v_uploadATF_returnValue',True)
  292:  
  293:         stObj.save();
  294:  
  295: def checkUTF8(data):
  296:     """check utf 8"""
  297:     if not isinstance(data, str):
  298:         logging.error("checkUTF8 data is not string! (%s)"%repr(data))
  299: 
  300:     try:
  301:         data.decode('utf-8')
  302:         logging.debug("checkUTF8: ok!")
  303:         return True
  304:     except:
  305:         logging.debug("checkUTF8: false!")
  306:         return False
  307:            
  308: if __name__ == "__main__":
  309:        upload = uploadATFThread()
  310:        x=sys.argv;
  311:        print x
  312:        upload.set(sys.argv[1],sys.argv[2],sys.argv[3],sys.argv[4])
  313:        upload.run();

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>