--- cdli/cdli_files.py 2006/12/22 11:56:08 1.53 +++ cdli/cdli_files.py 2008/10/15 10:00:06 1.101 @@ -21,20 +21,229 @@ from ZPublisher.HTTPRequest import HTTPR from ZPublisher.HTTPResponse import HTTPResponse from ZPublisher.BaseRequest import RequestContainer import threading -from BTrees.OOBTree import OOBTree +from BTrees.OOBTree import OOBTree, OOTreeSet +import logging +import transaction +import copy +import codecs +import sys +from BTrees.IOBTree import IOBTree +import cdliSplitter +from sets import Set +import md5 +from DownloadBasket import DownloadBasketFinallyThread + +def makelist(mySet): + x = list(mySet) + x.sort() + return x + +def unicodify(s): + """decode str (utf-8 or latin-1 representation) into unicode object""" + if not s: + return u"" + if isinstance(s, str): + try: + return s.decode('utf-8') + except: + return s.decode('latin-1') + else: + return s + +def utf8ify(s): + """encode unicode object or string into byte string in utf-8 representation. + assumes string objects to be utf-8""" + if not s: + return "" + if isinstance(s, str): + return s + else: + return s.encode('utf-8') + +def formatAtfHtml(l): + """escape special ATF characters for HTML""" + if not l: + return "" + + # replace & + l = l.replace('&','&') + # replace angular brackets + l = l.replace('<','<') + l = l.replace('>','>') + return l + +def formatAtfLineHtml(l, nolemma=True): + """format ATF line for HTML""" + if not l: + return "" + + if nolemma: + # ignore lemma lines + if l.lstrip().startswith('#lem:'): + return "" + + return formatAtfHtml(l) + + + +def formatAtfFullLineNum(txt, nolemma=True): + """format full line numbers in ATF text""" + # surface codes + surfaces = {'@obverse':'obv', + '@reverse':'rev', + '@surface':'surface', + '@edge':'edge', + '@left':'left', + '@right':'right', + '@top':'top', + '@bottom':'bottom', + '@face':'face', + '@seal':'seal'} + + if not txt: + return "" + + ret = [] + surf = "" + col = "" + for line in txt.splitlines(): + line = unicodify(line) + if line and line[0] == '@': + # surface or column + words = line.split(' ') + if words[0] in surfaces: + surf = line.replace(words[0],surfaces[words[0]]).strip() + + elif words[0] == '@column': + col = ' '.join(words[1:]) + + elif line and line[0] in '123456789': + # ordinary line -> add line number + line = "%s:%s:%s"%(surf,col,line) + + ret.append(line) + + return '\n'.join(ret) + + +def generateXMLReturn(hash): + """erzeugt das xml file als returnwert fuer uploadATFRPC""" + + ret="" + + ret+="" + for error in hash['errors']: + ret+="""%s"""%error + + ret+="" + + ret+="" + for changed in hash['changed']: + ret+="""%s"""%changed + ret+="" + + ret+="" + for new in hash['newPs']: + ret+=""""""%new + ret+="" + + ret+="" + return ret + + +def unique(s): + """Return a list of the elements in s, but without duplicates. + + For example, unique([1,2,3,1,2,3]) is some permutation of [1,2,3], + unique("abcabc") some permutation of ["a", "b", "c"], and + unique(([1, 2], [2, 3], [1, 2])) some permutation of + [[2, 3], [1, 2]]. + + For best speed, all sequence elements should be hashable. Then + unique() will usually work in linear time. + + If not possible, the sequence elements should enjoy a total + ordering, and if list(s).sort() doesn't raise TypeError it's + assumed that they do enjoy a total ordering. Then unique() will + usually work in O(N*log2(N)) time. + + If that's not possible either, the sequence elements must support + equality-testing. Then unique() will usually work in quadratic + time. + (from the python cookbook) + """ + + n = len(s) + if n == 0: + return [] + + # Try using a dict first, as that's the fastest and will usually + # work. If it doesn't work, it will usually fail quickly, so it + # usually doesn't cost much to *try* it. It requires that all the + # sequence elements be hashable, and support equality comparison. + u = {} + try: + for x in s: + u[x] = 1 + except TypeError: + del u # move on to the next method + else: + return u.keys() + + # We can't hash all the elements. Second fastest is to sort, + # which brings the equal elements together; then duplicates are + # easy to weed out in a single pass. + # NOTE: Python's list.sort() was designed to be efficient in the + # presence of many duplicate elements. This isn't true of all + # sort functions in all languages or libraries, so this approach + # is more effective in Python than it may be elsewhere. + try: + t = list(s) + t.sort() + except TypeError: + del t # move on to the next method + else: + assert n > 0 + last = t[0] + lasti = i = 1 + while i < n: + if t[i] != last: + t[lasti] = last = t[i] + lasti += 1 + i += 1 + return t[:lasti] + + # Brute force is all that's left. + u = [] + for x in s: + if x not in u: + u.append(x) + return u + class BasketContent(SimpleItem): """classe fuer den Inhalt eines Baskets""" - + def __init__(self,content=[]): """content""" self.contentList=content[0:] - def getContent(self): + def getContent(self,filtered=True): """get content""" + ret=[] + if filtered: + for x in self.contentList: + if not((x[0] is None) or (x[1] is None)): + ret.append(x) + return ret + + else: + return self.contentList + + def allContent(self): + """get all content""" + return self.getContent(filtered=False) - return self.contentList - def setContent(self,content): self.contentList=content[0:] @@ -63,6 +272,7 @@ class uploadATFfinallyThread(Thread): self.SESSION=SESSION self.username=username self.serverport=serverport + def __call__(self): """call of the thread (equals run)""" @@ -96,12 +306,12 @@ class uploadATFfinallyThread(Thread): #add the files self.uploadATFfinallyThread(ctx,self.procedure,comment=self.comment,basketname=self.basketname,unlock=self.unlock,SESSION=self.SESSION,username=self.username) #commit the transactions - get_transaction().commit() + transaction.get().commit() conn.close() #set flag for end of this method self.end=True - print "ended" - return True + logging.info("ended") + return True def __del__(self): """delete""" @@ -121,9 +331,10 @@ class uploadATFfinallyThread(Thread): self.result+="

Start processing

" #shall I only upload the changed files? + logging.debug("uploadATFfinally procedure: %s"%procedure) if procedure=="uploadchanged": - - uploadFns=SESSION.get('changed',[])+SESSION.get('newPs',[]) + changed=[x[0] for x in SESSION.get('changed',[])] + uploadFns=changed+SESSION.get('newPs',[]) #or all elif procedure=="uploadAll": @@ -134,64 +345,91 @@ class uploadATFfinallyThread(Thread): #or maybe nothing elif procedure=="noupload": - return True + return True else: uploadFns=[] #do first the changed files + i=0 for fn in uploadFns: + logging.debug("uploadATFfinally uploadFn=%s"%fn) + i+=1 founds=ctx2.CDLICatalog.search({'title':fn}) if len(founds)>0: SESSION['author']=str(username) - self.result+="

Changing : %s"%fn + self.result="

Changing : %s"%fn+self.result + logging.debug("uploadatffinallythread changing:%s"%fn+self.result) founds[0].getObject().manage_addCDLIFileObject('',comment,SESSION['author'],file=os.path.join(SESSION['tmpdir'],fn),from_tmp=True) - + if i%200==0: + transaction.get().commit() + logging.debug("uploadatffinallythread changing: do commit") + transaction.get().commit() + logging.debug("uploadatffinallythread changing: last commit") + #now add the new files newPs=SESSION['newPs'] if len(newPs)>0: tmpDir=SESSION['tmpdir'] - self.result+="

Adding files

" + logging.debug("uploadatffinallythread adding start") + self.result="

Adding files

"+self.result #TODO: make this configurable, at the moment base folder for the files has to be cdli_main - ctx2.importFiles(comment=comment,author=str(username) ,folderName=tmpDir, files=newPs,ext=self) - - + logging.debug("uploadatffinallythread adding finished") #unlock locked files? if unlock: - self.result+="

Unlock files

" + logging.debug("uploadatffinallythread unlocking start") + self.result="

Unlock files

"+self.result unlockFns=[] for x in os.listdir(SESSION['tmpdir']): if not x in SESSION['errors']: unlockFns.append(x) - + + logging.debug("unlocking have now what to unlock") + for fn in unlockFns: + #logging.info("will unlock: %s"%fn) founds=ctx2.CDLICatalog.search({'title':fn}) + #logging.info("found it: %s"%repr(founds)) if len(founds)>0: + #logging.info("unlock: %s"%founds[0].getObject().getId()) SESSION['author']=str(username) - founds[0].getObject().lockedBy="" + + logging.debug("uploadatffinallythread unlocking done") - #if a basketname is give, add files to the basket + #if a basketname is given, add files to the basket if not (basketname ==''): - self.result+="

Add basket

" + logging.debug("uploadatffinallythread add to basket %s"%basketname) + self.result="

Add to basket

"+self.result basketId=ctx2.basketContainer.getBasketIdfromName(basketname) if not basketId: # create new basket + logging.debug("uploadatffinallythread create basket %s"%basketname) + self.result="

Create a new basket

"+self.result ob=ctx2.basketContainer.addBasket(basketname) basketId=ob.getId() basket=getattr(ctx2.basketContainer,str(basketId)) ids=os.listdir(SESSION['tmpdir']) + #logging.debug("should add:"+repr(ids)) basket.addObjects(ids,deleteOld=True,username=str(username)) + logging.debug("uploadatffinallythread uploadfinally done") + if RESPONSE is not None: RESPONSE.redirect(self.aq_parent.absolute_url()) - - return True + +class tmpStore(SimpleItem): + """simple item""" + meta_type="cdli_upload" + def __init__(self,id): + """init tmp""" + self.id=id + class uploadATFThread(Thread): """class for checking the files befor uploading""" @@ -204,13 +442,14 @@ class uploadATFThread(Thread): Thread.__init__(self) - def set(self,upload,basketId,username,serverport="8080"): + def set(self,upload,basketId,username,idTmp,serverport="8080"): """set start values for the thread""" self.result="" self.upload=upload self.basketId=basketId self.username=username self.serverport=serverport + self.idTmp=idTmp def __call__(self): """call method """ @@ -229,7 +468,7 @@ class uploadATFThread(Thread): return app.__of__(RequestContainer(REQUEST = req)) def run(self): - + idTmp=self.idTmp self.result="" #find context within ZODB from Zope import DB @@ -237,26 +476,36 @@ class uploadATFThread(Thread): root = conn.root() app = root['Application'] ctx = self.getContext(app,serverport=self.serverport) - self.uploadATFThread(ctx,self.upload,self.basketId) + logging.info("run intern") + try: + logging.info("created: %s"%idTmp) + ctx.temp_folder._setObject(idTmp,tmpStore(idTmp)) + except: + logging.error("thread upload: %s %s"%sys.exc_info()[0:2]) + + logging.info("call thread intern") + self.uploadATFThread(ctx,self.upload,idTmp,self.basketId) #ctx.cdliRoot.cdli_main.tmpStore2[self.getName()[0:]]=self.returnValue - - get_transaction().commit() - - while self.continueVar: - pass + + + transaction.get().commit() conn.close() + return getattr(ctx.temp_folder,idTmp) def getResult(self): """method for accessing result""" return self.result - def uploadATFThread(self,ctx,upload,basketId=0): + def uploadATFThread(self,ctx,upload,idTmp,basketId=0): """upload an atf file""" #TODO: add comments #TODO: finish uploadATF + + stObj=getattr(ctx.temp_folder,idTmp) + logging.info("start, upload thread") self.result="

I got your file, start now to split it into single atf-files!

" #make sure that id is a string and not an integer @@ -295,14 +544,14 @@ class uploadATFThread(Thread): basketNameFromId=getattr(ctx2.basketContainer,basketId).title basketLen=getattr(ctx2.basketContainer,basketId).getLastVersion().numberOfItems() - + logging.info("got the file, upload thread") self.result+="""

I got the files

< p>I am computing the differences to the exisiting files

""" #start to check the files for fn in os.listdir(dir): - self.result+="

process:%s

"%fn + self.result="

process:%s

"%fn+self.result # check if file is in the catalog #TODO: checkCatalog is not implemented yet @@ -315,11 +564,13 @@ class uploadATFThread(Thread): #if not than add filename to the list of newfiles data=file(os.path.join(dir,fn)).read() - #status,msg=checkFile(fn,data,dir) - status=True - msg="" + status,msg=checkFile(fn,data,dir) + #status=True + + if not status: # error errors.append((fn,msg)) + else: if len(founds)==0: newPs.append(fn) @@ -330,7 +581,7 @@ class uploadATFThread(Thread): obj=found.getObject() if (not (str(obj.lockedBy))=='') and (not (str(obj.lockedBy)==str(self.username))): - lockerrors.append(fn) + lockerrors.append((fn,str(obj.lockedBy))) else: diffs=obj.diff(data) @@ -340,272 +591,73 @@ class uploadATFThread(Thread): #ready, set the returnValues self.result+="

Done

" - self.returnValue={} - self.returnValue['changed']=changed - self.returnValue['errors']=errors - self.returnValue['lockerrors']=lockerrors - self.returnValue['newPs']=newPs - self.returnValue['tmpdir']=dir - self.returnValue['basketLen']=basketLen - self.returnValue['numberOfFiles']=numberOfFiles - self.returnValue['basketNameFromId']=basketNameFromId - self.returnValue['basketNameFromFile']=basketNameFromFile - self.returnValue['basketId']=basketId - self.returnValue['dir']=dir + stObj.returnValue={} - #ctx2.cdli_main.setTemp('v_uploadATF_returnValue',True) - + stObj.returnValue['errors']=errors -class Basket_old(Folder): - """shopping basket - alte fassung """ + stObj.returnValue['newPs']=newPs + stObj.returnValue['tmpdir']=dir + stObj.returnValue['basketLen']=basketLen + stObj.returnValue['numberOfFiles']=numberOfFiles + stObj.returnValue['basketNameFromId']=basketNameFromId + stObj.returnValue['basketNameFromFile']=basketNameFromFile + stObj.returnValue['basketId']=basketId + stObj.returnValue['dir']=dir + #stObj.returnValue['changed']=copy.copy(changed) + stObj.returnValue['changed']=[(x[0].getId(),x[1][0]) for x in changed] + #stObj.returnValue['lockerrors']=[x[0].getId() for x in lockerrors] + stObj.returnValue['lockerrors']=[x for x in lockerrors] + self.returnValue=True + #ctx2.cdli_main.setTemp('v_uploadATF_returnValue',True) - meta_type="Basket" - _v_stack={} - - def getObjUrl(self,objId): - """getUrl""" - founds=self.CDLICatalog.search({'title':objId}) - if len(founds)>0: - return founds[0].getObject().absolute_url() - - else: #assume version number - splitted=objId.split("_") - founds=self.CDLICatalog.search({'title':splitted[1]}) - return founds[0].getObject().absolute_url()+'/'+objId - - def storeAllLink(self,results): - """erzeuge link zum speicher aller results""" - nr=self.REQUEST['_ZopeId'] - - if results: - self._v_stack[nr]=[x.getObject().getId() for x in results] - - return self.absolute_url()+"/storeAll?id="+nr + +class CDLIBasketContainer(OrderedFolder): + """contains the baskets""" - def storeAll(self,id): - """store all""" - try: - results=self._v_stack[id] - except: - #TODO: write expired page - return "expired" - - return self.storeInBasketForm(results) - - def storeInBasketForm(self,ids): - """ store an object form""" - - if type(ids) is not ListType: - ids=[ids] - self.REQUEST.SESSION['ids']=ids[0:] - - self.REQUEST.SESSION['BACKLINK']=self.REQUEST['HTTP_REFERER'] - - pt=PageTemplateFile(os.path.join(package_home(globals()),'zpt','storeBasketObject.zpt')).__of__(self) - return pt() - - def storeInBasket(self,username,ids=None,RESPONSE=None,REQUEST=None): - """store it""" - - if not ids: - ids=REQUEST.SESSION['ids'] - - self.REQUEST.SESSION['basketUser']=username - - baskets=self.ZopeFind(self,obj_ids=[username]) - if len(baskets)>0: - basket=baskets[0][1] - else: - manage_addBasketObject(self,username) - basket=self._getOb(username) - - - basket.addObjects(ids) - back=self.REQUEST.SESSION.get('BACKLINK', None) - - if RESPONSE: - RESPONSE.redirect(back) - + security=ClassSecurityInfo() + meta_type="CDLIBasketContainer" - def showBasket(self,user=None,set=None,RESPONSE=None): - """show the basket""" - - if user: - self.REQUEST.SESSION['basketUser']=user - - if not user and not set: - user=self.REQUEST.SESSION.get('basketUser',None) - - if not user: - pt=PageTemplateFile(os.path.join(package_home(globals()),'zpt','orizeBasketUser.zpt')).__of__(self) - return pt() - else: - baskets=self.ZopeFind(self,obj_ids=[user]) - - - if len(baskets)>0: - RESPONSE.redirect(baskets[0][1].absolute_url()) - return True - else: - pt=PageTemplateFile(os.path.join(package_home(globals()),'zpt','emptyBasket.zpt')).__of__(self) - return pt() + def getPNumbersOfBasket(self,basketName): + """get all pnumbers of a basket as a list, returns an empty list if basket not found + @param basketName: name of the basket + """ + ret=[] + basketId=self.getBasketIdfromName(basketName) + if not basketId: + return [] - -def manage_addBasket_oldForm(self): - """add the basket form""" - pt=PageTemplateFile(os.path.join(package_home(globals()),'zpt','addBasket.zpt')).__of__(self) - return pt() - -def manage_addBasket_old(self,id,title,RESPONSE=None): - """add the basket""" - ob=Basket() - - ob.id=str(id) - ob.title=title - self._setObject(id, ob) - ob=self._getOb(id) - - if RESPONSE is not None: - RESPONSE.redirect('manage_main') - - -class BasketObject_old(Folder): - """Basket Object - alte fassung""" - - meta_type="basketObject" - def __init__(self): - """init basket object""" - self.contents=[] - - def numberOfItems(self): - """return anzahl der elemente im basket""" - num=len(self.contents) + ob=getattr(self,basketId).getContent() - return num - - def addObjects(self,ids): - """addObjects""" + ret=[x[0].split(".")[0] for x in ob] - for id in ids: - founds=self.CDLICatalog.search({'title':id}) - for found in founds: - if found.getObject() not in self.contents: - tm=self.contents[0:] - tm.append(found.getObject()) - self.contents=tm[0:] + return ret - return True - - def index_html(self): - """view the basket""" - pt=PageTemplateFile(os.path.join(package_home(globals()),'zpt','basketObject_index_html.zpt')).__of__(self) - return pt() - - def deleteObjects(self,ids,RESPONSE=None): - """delete objects""" - list = self.contents[0:] - for content in list: - - if content.getId() in ids: - self.contents.remove(content) - - - if RESPONSE: - RESPONSE.redirect(self.absolute_url()) - - - def unlockTest(self): - """unlock all files of the testuser for debuggin""" - for object in self.contents: - - if str(object.lockedBy)=="test": - object.lockedBy="" - - def downloadObjectsAsOneFile(self,lock=None,procedure=None,REQUEST=None): - """download all selected files in one file""" - + security.declareProtected('manage','getBasketAsOneFile') + def getBasketAsOneFile(self,basketName,current="no"): + """returns all files of the basket combined in one file + @param basketName: Name of the basket + @param current: (optional) if current is set to "yes" then the most current version of + all files are downloaded and not the versions of the files as stored in the basket + """ ret="" - lockedObjects={} - - if self.temp_folder.downloadCounter > 10: - return """I am sorry, currently the server has to many requests for downloads, please come back later!""" - - - if lock: - - if str(self.REQUEST['AUTHENTICATED_USER'])=='Anonymous User': - - return "please login first" - - #check if a locked object exist in the basket. - lockedObjects={} - for object in self.contents: - - if not object.lockedBy=="": - lockedObjects[object.title]=repr(object.lockedBy) - - - keys=lockedObjects.keys() - - - if len(keys)>0 and (not procedure): - self.REQUEST.SESSION['lockedObjects']=lockedObjects - pt=PageTemplateFile(os.path.join(package_home(globals()),'zpt','lockedObjects.zpt')).__of__(self) - return pt() - - elif not procedure: #keine fails gesperrt dann alle donwloaden - procedure="downloadAll" - - self.temp_folder.downloadCounter+=1 - self._p_changed=1 - get_transaction().commit() - - - for object in self.contents: - - if (procedure=="downloadAll") or (object.lockedBy=='') or (object.lockedBy==self.REQUEST['AUTHENTICATED_USER']): - ret+=object.getLastVersion().getData() - - if lock and object.lockedBy=='': - object.lockedBy=self.REQUEST['AUTHENTICATED_USER'] - - - self.REQUEST.RESPONSE.setHeader("Content-Disposition","""attachement; filename="basket_%s.atf" """%self.getId()) - self.REQUEST.RESPONSE.setHeader("Content-Type","application/octet-stream") - length=len(ret) - self.REQUEST.RESPONSE.setHeader("Content-Length",length) - self.REQUEST.RESPONSE.write(ret) - self.temp_folder.downloadCounter-=1 - self._p_changed=1 - get_transaction().commit() - + basketId=self.getBasketIdfromName(basketName) + if not basketId: + return "" -def manage_addBasket_oldObjectForm(self): - """add form""" - pass - -def manage_addBasket_oldObject(self,id,title='',RESPONSE=None): - """add""" - - ob=BasketObject() - - ob.id=str(id) - ob.title=title - self._setObject(id, ob) - ob=self._getOb(id) - - if RESPONSE is not None: - RESPONSE.redirect('manage_main') - - -class CDLIBasketContainer(OrderedFolder): - """contains the baskets""" - - - security=ClassSecurityInfo() - meta_type="CDLIBasketContainer" + ob=getattr(self,basketId).getLastVersion() + for object in ob.getContent(): + if current=="no": #version as they are in the basket + ret+=str(object[0].getData())+"\n" + elif current=="yes": + #search current object + #logging.debug("current: %s"%object[1].getId().split(".")[0]) + founds=self.CDLICatalog.search({'title':object[1].getId().split(".")[0]}) + if len(founds)>0: + ret+=str(founds[0].getObject().getLastVersion().getData())+"\n" + return ret + security.declareProtected('manage','upDateBaskets') def upDateBaskets(self): """update content in to objects""" @@ -618,6 +670,10 @@ class CDLIBasketContainer(OrderedFolder) def deleteBaskets(self,ids=None): """delete baskets, i.e. move them into trash folder""" + if ids is None: + pt=PageTemplateFile(os.path.join(package_home(globals()),'zpt','cdliError_html.zpt')).__of__(self) + txt="Sorry, no basket selected!" + return pt(txt=txt) found=self.ZopeFind(self,obj_ids=['trash']) @@ -629,16 +685,34 @@ class CDLIBasketContainer(OrderedFolder) if type(ids) is not ListType: ids=[ids] + logging.error("XERXON:"+repr(ids)) + if len(ids)==0: + pt=PageTemplateFile(os.path.join(package_home(globals()),'zpt','cdliError_html.zpt')).__of__(self) + txt="Sorry, no basket selected!" + return pt(txt=txt) + cut=self.manage_cutObjects(ids) trash.manage_pasteObjects(cut) - + return None security.declareProtected('manage','manageBaskets') - def manageBaskets(self,ids,submit,REQUEST=None,RESPONSE=None): + def manageBaskets(self,submit,ids=None,basket1="",basket2="",joinBasket="",subtractBasket="",REQUEST=None,RESPONSE=None): """manage baskets, delete or copy""" if submit=="delete": - self.deleteBaskets(ids) - + ret= self.deleteBaskets(ids) + if ret: + return ret + elif submit=="join": + flag,msg=self.joinBasket(joinBasket, ids) + logging.info("joining %s %s"%(flag,msg)) + if not flag: + pt=PageTemplateFile(os.path.join(package_home(globals()),'zpt','cdliError_html.zpt')).__of__(self) + + return pt(txt=msg) + elif submit=="subtract": + logging.info("BBBb %s %s"%(basket1,basket2)) + flag,msg=self.subtractBasket(subtractBasket, basket1,basket2) + logging.info("subtract %s %s"%(flag,msg)) if RESPONSE: RESPONSE.redirect(self.absolute_url()) @@ -669,7 +743,7 @@ class CDLIBasketContainer(OrderedFolder) return pt(basketId=basketId,basketName=basketName) - security.declareProtected('View','index_html') + security.declareProtected('manage','index_html') def index_html(self): """stanadard ansicht""" @@ -702,7 +776,7 @@ class CDLIBasketContainer(OrderedFolder) ret+= "BASKET:"+com+"\t"+user+"\t"+time+"\n" for x in values: ret+= x[0]+"\t"+x[1]+"\n" - return ret + return ret def getBaskets(self,sortField='title'): """get all baskets files""" @@ -761,9 +835,88 @@ class CDLIBasketContainer(OrderedFolder) baskets.sort(sortComment) return baskets + + + def subtractBasket(self,newBasket,basket1,basket2): + """subtract basket2 from basket1 + (i.e. newbasket will contain alle elements of basket1 which are not in basket2), + if basket2 contains files which are not in basket1, then theses files fill be ignored + + @param newbasket: name of the new basket + @param basket1: basket where basket2 will be subtracted from + @param basket2: see above + + """ + + logging.info("CCCCC %s %s"%(basket1,basket2)) + + try: + newB=self.addBasket(newBasket) + except: + return False, "cannot create the new basket" + + - - + + + bas2= getattr(self,basket2) + bas2content=bas2.getContent() + bas2ids=[x[0] for x in bas2content] + + + + bas1= getattr(self,basket1) + bas1content=bas1.getContent() + + + newBasketContent={} + + for id,version in bas1content: + if not (id in bas2ids): + newBasketContent[id]=version + + username=self.getActualUserName() + + logging.info("sbc %s"%newBasketContent) + newB.addObjectsWithVersion(newBasketContent,username=username,catalog=self.CDLICatalog) + + return True, "" + + + def joinBasket(self,newBasket,oldBaskets): + """join two baskets + @param newbasket: name of the new basket + @param oldbaskets: list of baskets to be joined + """ + if oldBaskets is None: + return False, "No Baskets selected!" + + try: + newB=self.addBasket(newBasket) + except: + return False, "cannot create the new basket" + + newBasketContent={} + + for ob in oldBaskets: + x= getattr(self,ob,None) + if x is None: + return False, "cannot find basket: %s"%ob + + ids=x.getContent() # hole den Inhalt + + for id,version in ids: + if newBasketContent.has_key(id): # p number gibt's schon + newBasketContent[id]=max(newBasketContent[id],version) # speichere die groessere Versionsnumber + else: + newBasketContent[id]=version + username=self.getActualUserName() + + logging.info("nbc %s"%newBasketContent) + newB.addObjectsWithVersion(newBasketContent,username=username,catalog=self.CDLICatalog) + + return True, "" + def getNewId(self): """createIds""" last=getattr(self,'last',0) @@ -777,11 +930,11 @@ class CDLIBasketContainer(OrderedFolder) def setActiveBasket(self,basketId,REQUEST=None): """store active basketId in a cookie""" self.REQUEST.RESPONSE.setCookie("CDLIActiveBasket",basketId,path="/") - try: - qs=cgi.parse_qs(REQUEST['QUERY_STRING']) - del(qs['basketId']) - except: - qs={} + try: + qs=cgi.parse_qs(REQUEST['QUERY_STRING']) + del(qs['basketId']) + except: + qs={} if REQUEST: REQUEST.RESPONSE.redirect(REQUEST['URL1']+'?'+urllib.urlencode(qs)) @@ -799,21 +952,24 @@ class CDLIBasketContainer(OrderedFolder) """get name of the actualuser""" return str(self.REQUEST['AUTHENTICATED_USER']) - + security.declareProtected('manage','addBasket') def addBasket(self,newBasketName): """add a new basket""" ob=manage_addCDLIBasket(self,newBasketName) return ob - + def storeInBasket(self,submit,ids=None,newBasketName=None,fromFileList=None,RESPONSE=None,REQUEST=None): """store it""" if not ids: ids=self.REQUEST.SESSION['fileIds'] - - if type(ids) is not ListType: + + if (type(ids) is not ListType) and (not isinstance(ids,Set)): ids=[ids] + if isinstance(ids,Set): + ids=list(ids) + if (submit.lower()=="store in new basket") or (submit.lower()=="new basket"): basketRet=self.addBasket(newBasketName) self.setActiveBasket(basketRet.getId()) @@ -827,7 +983,7 @@ class CDLIBasketContainer(OrderedFolder) if fromFileList: - return self.cdli_main.findObjectsFromList(list=self.REQUEST.SESSION['fileIds'],basketName=basket.title,numberOfObjects=added) + return self.cdli_main.findObjectsFromList(list=ids,basketName=basket.title,numberOfObjects=added) if RESPONSE: @@ -855,7 +1011,43 @@ class CDLIBasket(Folder,CatalogAware): meta_type="CDLIBasket" default_catalog="CDLIBasketCatalog" + def searchInBasket(self,indexName,searchStr,regExp=False): + """searchInBasket""" + lst=self.searchInLineIndexDocs(indexName,searchStr,uniq=True,regExp=regExp) #TODO: fix this + ret={} + + lv=self.getLastVersion() + + + for obj in lv.content.getContent(): + id=obj[1].getId().split(".")[0] + if id in lst: + + ret[id]=self.showWordInFile(id,searchStr,lineList=self.getLinesFromIndex(indexName,searchStr,id,regExp=regExp),regExp=regExp,indexName=indexName) + + + pt=PageTemplateFile(os.path.join(package_home(globals()),'zpt','searchResultsInBasket')).__of__(self) + return pt(result=ret,indexName=indexName,regExp=regExp,word=searchStr) + + + + + def searchInBasket_v1(self,searchStr): + """search occurences of searchStr in files im basket""" + ret=[] + lv=self.getLastVersion() + logging.info("searching") + for obj in lv.content.getContent(): + txt=obj[0].getData() + for x in txt.split("\n"): + logging.info("search %s"%x) + if re.match(searchStr,x): + ret.append(x) + + return "\n".join(ret) + + def getFile(self,obj): return obj[1] @@ -870,21 +1062,26 @@ class CDLIBasket(Folder,CatalogAware): def isActual(self,obj): """teste ob im basket die aktuelle version ist""" - actualNo=obj[1].getLastVersion().getVersionNumber() - storedNo=obj[0].getVersionNumber() - - founds=self.CDLICatalog.search({'title':obj[0].getId()}) - if len(founds)>0: - actualNo=founds[0].getObject().getLastVersion().getVersionNumber() + try: + #logging.debug("isActual:"+repr(obj)) + actualNo=obj[1].getLastVersion().getVersionNumber() + storedNo=obj[0].getVersionNumber() - if len(founds)>0 and founds[0].getObject().aq_parent.getId()==".trash": + + #actualNo=self.getFileObjectLastVersion(obj.getId()).getVersionNumber() + + #if len(founds)>0 and founds[0].getObject().aq_parent.getId()==".trash": + # return False, -1 + + if actualNo==storedNo: + return True , 0 + else: + return False, actualNo + except: + logging.error( """is actual: %s (%s %s)"""%(repr(obj),sys.exc_info()[0],sys.exc_info()[1])) + return False, -1 - - if actualNo==storedNo: - return True , 0 - else: - return False, actualNo - + def history(self): """history""" @@ -907,7 +1104,11 @@ class CDLIBasket(Folder,CatalogAware): self.shortDescription=shortDescription self.comment=comment - + def getActualUserName(self): + """get name of the actualuser""" + + return str(self.REQUEST['AUTHENTICATED_USER']) + def getLastVersion(self): """hole letzte version""" @@ -971,11 +1172,17 @@ class CDLIBasket(Folder,CatalogAware): return obj - def addObjects(self,ids,deleteOld=None,username=None): - """generate a new version of the basket with objects added""" - + def addObjectsWithVersion(self,ids,deleteOld=None,username=None,catalog=None): + """generate a new version of the basket with objects added, + hier wird jedoch nicht die letzte Version jedes Files hinzugefuegt, s + ondern ids is ein Tupel mit der Id (d.h. der p-number) und der Versionsnummer. + """ + logging.info("add to basket (%s)"%(self.getId())) lastVersion=self.getLastVersion() + if not catalog: + catalog=self.CDLICatalog + if lastVersion is None: oldContent=[] else: @@ -986,30 +1193,107 @@ class CDLIBasket(Folder,CatalogAware): newContent=[] added=0 - for id in ids: - try: - founds=self.CDLICatalog.search({'title':id}) - except: - founds=[] + + for id,version in ids.iteritems(): + logging.info("adding %s %s"%(id,version)) + id=id.split(".")[0] # title nur die pnumber ohne atf + + try: + founds=catalog.search({'title':id}) + except: + founds=[] + logging.info(" found %s "%(founds)) for found in founds: if found.getObject() not in oldContent: + #TODO: was passiert wenn, man eine Object dazufŸgt, das schon da ist aber eine neuere version - newContent.append((found.getObject().getLastVersion(),found.getObject())) + newContent.append((found.getObject().getVersions()[version-1][1],found.getObject())) added+=1 content=oldContent+newContent if not username: + logging.error("XXXXXXXXXXX %s"%repr(self)) user=self.getActualUserName() else: user = username ob=manage_addCDLIBasketVersion(self,user,comment="",basketContent=content) + logging.info("add to basket (%s) done"%(self.getId())) + return added + + def addObjects(self,ids,deleteOld=None,username=None): + """generate a new version of the basket with objects added""" + + def swap(x): + return (x[1],x[0]) + + logging.info("add to basket (%s)"%(repr(ids))) + logging.info("add to basket (%s)"%(self.getId())) + lastVersion=self.getLastVersion() + + if lastVersion is None: + oldContent=[] + else: + oldContent=lastVersion.content.getContent() + + if deleteOld: + oldContent=[] + + added=0 +# for id in ids: +# logging.debug("adding:"+id) +# try: +# founds=self.CDLICatalog.search({'title':id}) +# except: +# founds=[] +# +# for found in founds: +# if found.getObject() not in oldContent: +# #TODO: was passiert wenn, man eine Object dazufŸgt, das schon da ist aber eine neuere version +# newContent.append((found.getObject().getLastVersion(),found.getObject())) +# added+=1 + + hash = md5.new(repr(makelist(ids))).hexdigest() # erzeuge hash als identification + #logging.debug("JJJJJJJ:"+repr(self.makelist(ids))) + + + if hasattr(self.cdliRoot,'v_tmpStore') and self.cdliRoot.v_tmpStore.has_key("hash"): #TODO: muss eigentlich self.cdliRoot.v_tmpStore.has_key(hash): heissen (ohne "), erstmal so gesetzt damit der hash hier nie benutzt wird + logging.debug("from store!") + newContent=Set(map(swap,self.cdliRoot.v_tmpStore[hash])) + + else: + logging.debug("not from store!") + newContent=Set([(self.getFileObjectLastVersion(x),self.getFileObject(x)) for x in ids]) + + #remove all Elements which are not stored + if (None,None) in newContent: + newContent.remove((None,None)) + content=Set(oldContent).union(newContent) + added = len(content)-len(oldContent) + if not username: + user=self.getActualUserName() + else: + user = username + + #logging.debug("content:"+repr(list(content))) + ob=manage_addCDLIBasketVersion(self,user,comment="",basketContent=list(content)) + logging.info("add to basket (%s) done"%(self.getId())) return added - + def getContent(self): + """print content""" + ret=[] + + lv=self.getLastVersion() + for obj in lv.content.getContent(): + #logging.info("XXXXXXXXXX %s"%repr(obj)) + ret.append((obj[1].getId(),obj[0].versionNumber)) + + return ret + def getContentIds(self): """print basket content""" ret=[] @@ -1096,7 +1380,22 @@ class CDLIBasketVersion(Implicit,Persist return True return False - security.declareProtected('View','downloadObjectsAsOneFile') + def downloadListOfPnumbers(self): + """download pnumbers of the basket as list""" + + basket_name=self.aq_parent.title + + ids=self.getContent() # get the list of objects + logging.error(ids) + ret="\n".join([x[1].getId().split(".")[0] for x in ids]) + + self.REQUEST.RESPONSE.setHeader("Content-Disposition","""attachement; filename="%s.txt" """%basket_name) + self.REQUEST.RESPONSE.setHeader("Content-Type","application/octet-stream") + length=len(ret) + self.REQUEST.RESPONSE.setHeader("Content-Length",length) + self.REQUEST.RESPONSE.write(ret) + + security.declareProtected('manage','downloadObjectsAsOneFile') def downloadObjectsAsOneFile(self,lock=None,procedure=None,REQUEST=None,check="yes",current="no"): """download all selected files in one file""" @@ -1113,32 +1412,26 @@ class CDLIBasketVersion(Implicit,Persist return self.downloadObjectsAsOneFileFinally(lock=lock,procedure=procedure,REQUEST=REQUEST,current="no") - def downloadObjectsAsOneFileFinally(self,lock=None,procedure=None,REQUEST=None,current="no"): + def downloadObjectsAsOneFileFinally(self,lock=None,procedure=None,REQUEST=None,current="no",repeat=None): """print do the download""" - + + ret="" lockedObjects={} - self.temp_folder.downloadCounterBaskets+=1 - self._p_changed=1 - get_transaction().commit() + if lock: - + logging.debug("------lock:"+repr(lock)) if str(self.REQUEST['AUTHENTICATED_USER'])=='Anonymous User': - self.temp_folder.downloadCounterBaskets-=1 - self._p_changed=1 - get_transaction().commit() - self.temp_folder.downloadCounterBaskets-=1 - self._p_changed=1 - get_transaction().commit() + return "please login first" #check if a locked object exist in the basket. lockedObjects={} for object in self.content.getContent(): - if not object[1].lockedBy=="": + if (not str(object[1].lockedBy)=="") and (not (str(object[1].lockedBy)==str(self.REQUEST['AUTHENTICATED_USER']))): lockedObjects[object[1].title]=repr(object[1].lockedBy) @@ -1149,47 +1442,138 @@ class CDLIBasketVersion(Implicit,Persist self.REQUEST.SESSION['lockedObjects']=lockedObjects pt=PageTemplateFile(os.path.join(package_home(globals()),'zpt','lockedObjects.zpt')).__of__(self) - self.temp_folder.downloadCounterBaskets-=1 - self._p_changed=1 - get_transaction().commit() - + return pt() elif not procedure: #keine fails gesperrt dann alle donwloaden procedure="downloadAll" + + + threadName=repeat + if not threadName or threadName=="": + thread=DownloadBasketFinallyThread() + threadName=thread.getName()[0:] + if (not hasattr(self,'_v_downloadBasket')): + self._v_downloadBasket={} - for object in self.content.getContent(): - - if (procedure=="downloadAll") or (object[1].lockedBy=='') or (object[1].lockedBy==self.REQUEST['AUTHENTICATED_USER']): - if current=="no": #version as they are in the basket - ret+=str(object[0].getData())+"\n" - elif current=="yes": - #search current object - founds=self.CDLICatalog.search({'title':object[0].getId()}) - if len(founds)>0: - ret+=str(founds[0].getObject().getLastVersion().getData())+"\n" - - if lock and object[1].lockedBy=='': - object[1].lockedBy=self.REQUEST['AUTHENTICATED_USER'] - basket_name=self.aq_parent.title+"_V"+self.getId() + + self._v_downloadBasket[threadName]=thread + logging.debug("dwonloadfinally:"+repr(self)) + + if isinstance(self,CDLIBasketVersion): + obj=self + else: + obj=self.aq_parent + logging.debug("dwonloadfinally2:"+repr(obj)) + logging.debug("dwonloadfinally2:"+repr(obj.aq_parent)) + + obj2=obj.aq_parent + if not isinstance(obj2,CDLIBasket): + obj2=obj2.aq_parent + + basketID=obj2.getId() + versionNumber=obj.getId() + logging.debug("dwonloadfinally2:"+repr(basketID)) + logging.debug("dwonloadfinally2:"+repr(versionNumber)) + + + if lock: + logging.debug("-----start locking") + for object in self.content.getContent(): + if object[1].lockedBy =='': + object[1].lockedBy=self.REQUEST['AUTHENTICATED_USER'] + logging.debug("-----finished locking") + + #obj.lockedBy=user + self._v_downloadBasket[threadName].set(lock,procedure,self.REQUEST['AUTHENTICATED_USER'],current,basketID,versionNumber) + + self._v_downloadBasket[threadName].start() + + + + wait_template=self.aq_parent.ZopeFind(self.aq_parent,obj_ids=['wait_template']) + + if wait_template: + return wait_template[0][1]() + pt=PageTemplateFile(os.path.join(package_home(globals()),'zpt','downloadBasketWait.zpt')).__of__(self) + + return pt(txt=self.absolute_url()+'/downloadObjectsAsOneFileFinally',threadName=threadName, + counter=self._v_downloadBasket[threadName].getCounter(), + number=self._v_downloadBasket[threadName].getNumberOfFiles()) + #_v_xmltrans.run() - #write basketname to header of atf file - ret="#basket: %s\n"%basket_name+ret + else: + #recover thread, if lost + if not hasattr(self,'_v_downloadBasket'): + self._v_downloadBasket={} + if not self._v_downloadBasket.get(threadName,None): + for thread in threading.enumerate(): + if threadName == thread.getName(): + self._v_downloadBasket[threadName]=thread + + if self._v_downloadBasket.get(threadName,None) and (self._v_downloadBasket[threadName] is not None) and (not self._v_downloadBasket[threadName].end) : - self.temp_folder.downloadCounterBaskets-=1 - self._p_changed=1 - get_transaction().commit() + wait_template=self.aq_parent.ZopeFind(self.aq_parent,obj_ids=['wait_template']) + if wait_template: + return wait_template[0][1]() + + pt=PageTemplateFile(os.path.join(package_home(globals()),'zpt','downloadBasketWait.zpt')).__of__(self) + return pt(txt=self.absolute_url()+'/downloadObjectsAsOneFileFinally',threadName=threadName, + counter=self._v_downloadBasket[threadName].getCounter(), + number=self._v_downloadBasket[threadName].getNumberOfFiles()) + else: + + + logging.debug("FINISHED") + if not self._v_downloadBasket.get(threadName,None): + for thread in threading.enumerate(): + if threadName == thread.getName(): + self._v_downloadBasket[threadName]=thread + + #files = self._v_downloadBasket[threadName].result + files=self.basketContainer.resultHash[threadName] + lockedFiles=self.basketContainer.resultLockedHash[threadName] + + # fh=file("/var/tmp/test") + #ret =fh.read() + + if (not isinstance(self.aq_parent,CDLIBasket)): + basket_name=self.aq_parent.aq_parent.title+"_V"+self.getId() + else: + basket_name=self.aq_parent.title+"_V"+self.getId() - self.REQUEST.RESPONSE.setHeader("Content-Disposition","""attachement; filename="%s.atf" """%basket_name) - self.REQUEST.RESPONSE.setHeader("Content-Type","application/octet-stream") - length=len(ret) - self.REQUEST.RESPONSE.setHeader("Content-Length",length) - self.REQUEST.RESPONSE.write(ret) - return True + + #write basketname to header of atf file + + + self.REQUEST.RESPONSE.setHeader("Content-Disposition","""attachement; filename="%s.atf" """%basket_name) + self.REQUEST.RESPONSE.setHeader("Content-Type","application/octet-stream") + #length=len(ret) + #self.REQUEST.RESPONSE.setHeader("Content-Length",length) + + ret="#basket: %s\n"%basket_name + self.REQUEST.RESPONSE.write(ret) + + for fileName in files: + try: + self.REQUEST.RESPONSE.write(file(fileName).read()) + except: + logging.error("downloadasonefile: cannot read %s"%fileName) + + + self.REQUEST.RESPONSE.write("\n# locked files\n") + for fileName in lockedFiles: + self.REQUEST.RESPONSE.write("# %s by %s\n"%fileName) + + self.REQUEST.RESPONSE.write("# locked files end\n") + + del self.basketContainer.resultHash[threadName] + del self.basketContainer.resultLockedHash[threadName] + def numberOfItems(self): """return anzahl der elemente im basket""" return self.content.numberOfItems() @@ -1214,7 +1598,7 @@ class CDLIBasketVersion(Implicit,Persist def __init__(self,id,user,comment="",basketContent=[]): """ init a basket version""" self.id=id - self.coment=comment + self.comment=comment self._setObject('content',BasketContent(basketContent)) #self.basketContent=basketContent[0:]a self.user=user @@ -1228,7 +1612,7 @@ class CDLIBasketVersion(Implicit,Persist """get Comment""" return self.comment - security.declareProtected('View','index_html') + security.declareProtected('manage','index_html') def index_html(self): """view the basket""" @@ -1280,43 +1664,47 @@ class CDLIFileObject(CatalogAware,extVer security=ClassSecurityInfo() - - security.declarePublic('makeThisVersionCurrent') - + security.declareProtected('manage','index_html') + + security.declarePublic('view') + view = PageTemplateFile('zpt/viewCDLIFile.zpt', globals()) + + security.declarePublic('editATF') + editATF = PageTemplateFile('zpt/editATFFile.zpt', globals()) + def PrincipiaSearchSource(self): """Return cataloguable key for ourselves.""" return str(self) + def setAuthor(self, author): + """change the author""" + self.author = author + def makeThisVersionCurrent_html(self): - """form for making this version current""" + """form for mthis version current""" pt=PageTemplateFile(os.path.join(package_home(globals()),'zpt','makeThisVersionCurrent.zpt')).__of__(self) return pt() + + security.declarePublic('makeThisVersionCurrent') def makeThisVersionCurrent(self,comment,author,RESPONSE=None): """copy this version to current""" parent=self.aq_parent - - - newversion=parent.manage_addCDLIFileObject('',comment,author) - newversion.data=self.data[0:] + parent.manage_addVersionedFileObject(id=None,vC=comment,author=author,file=self.getData(),RESPONSE=RESPONSE) + #newversion=parent.manage_addCDLIFileObject('',comment,author) + #newversion.manage_upload(self.getData()) - if RESPONSE is not None: - RESPONSE.redirect(self.aq_parent.absolute_url()+'/history') - + #if RESPONSE is not None: + # RESPONSE.redirect(self.aq_parent.absolute_url()+'/history') return True - security.declarePublic('view') - def getFormattedData(self): """fromat text""" data=self.getData() - return re.sub("\s\#lem"," #lem",data) #remove return vor #lem +# return re.sub("\s\#lem"," #lem",data) #remove return vor #lem + return re.sub("#lem"," #lem",data) #remove return vor #lem - def view(self): - """view file""" - pt=PageTemplateFile(os.path.join(package_home(globals()),'zpt','viewCDLIFile.zpt')).__of__(self) - return pt() security.declarePublic('getPNumber') def getPNumber(self): @@ -1345,13 +1733,14 @@ class CDLIFileObject(CatalogAware,extVer return txt.group(2) except: return "ERROR" + manage_addCDLIFileObjectForm=DTMLFile('dtml/fileAdd', globals(),Kind='CDLIFileObject',kind='CDLIFileObject', version='1') -def manage_addCDLIFileObject(self,id,vC='',author='', file='',title='',precondition='', content_type='', +def manage_addCDLIFileObject(self,id,vC='',author='', file='',title='',versionNumber=0, + precondition='', content_type='', from_tmp=False,REQUEST=None): """Add a new File object. - Creates a new File object 'id' with the contents of 'file'""" id=str(id) @@ -1364,47 +1753,57 @@ def manage_addCDLIFileObject(self,id,vC= self=self.this() # First, we create the file without data: - self._setObject(id, CDLIFileObject(id,title,'',content_type, precondition)) - self._getOb(id).versionComment=str(vC) - self._getOb(id).time=time.localtime() - - setattr(self._getOb(id),'author',author) - + self._setObject(id, CDLIFileObject(id,title,versionNumber=versionNumber,versionComment=vC,time=time.localtime(),author=author)) + fob = self._getOb(id) # Now we "upload" the data. By doing this in two steps, we # can use a database trick to make the upload more efficient. if file and not from_tmp: - self._getOb(id).manage_upload(file) + fob.manage_upload(file) elif file and from_tmp: - self._getOb(id).manage_upload_from_tmp(file) + fob.manage_file_upload(file) # manage_upload_from_tmp doesn't exist in ExtFile2 + # fob.manage_upload_from_tmp(file) # manage_upload_from_tmp doesn't exist in ExtFile2 if content_type: - self._getOb(id).content_type=content_type + fob.content_type=content_type + #logging.debug("manage_add: lastversion=%s"%self.getData()) + logging.debug("reindex1: %s in %s"%(repr(self),repr(self.default_catalog))) self.reindex_object() - self._getOb(id).reindex_object() + #logging.debug("manage_add: fob_data=%s"%fob.getData()) + logging.debug("reindex2: %s in %s"%(repr(fob), repr(fob.default_catalog))) + fob.index_object() + self.CDLIRoot.updateOrAddToFileBTree(ob) if REQUEST is not None: REQUEST['RESPONSE'].redirect(self.absolute_url()+'/manage_main') + class CDLIFile(extVersionedFile,CatalogAware): """CDLI file""" + security=ClassSecurityInfo() meta_type="CDLI file" + content_meta_type = ["CDLI File Object"] + default_catalog='CDLICatalog' - #security.declarePublic('history') + security.declareProtected('manage','index_html') + def getLastVersionData(self): """get last version data""" - return self.getLastVersion().getData() + return self.getData() def getLastVersionFormattedData(self): """get last version data""" - return self.getLastVersion().getFormattedData() + return self.getContentObject().getFormattedData() + + def getTextId(self): + """returns P-number of text""" + # assuming that its the beginning of the title + return self.title[:7] #security.declarePublic('history') - - def history(self): """history""" @@ -1449,6 +1848,12 @@ class CDLIFile(extVersionedFile,CatalogA #return [x.getObject() for x in context.CDLIBasketCatalog.search({'getFileNamesInLastVersion':self.getId()})] + def _newContentObject(self, id, title='', versionNumber=0, versionComment=None, time=None, author=None): + """factory for content objects. to be overridden in derived classes.""" + logging.debug("_newContentObject(CDLI)") + return CDLIFileObject(id,title,versionNumber=versionNumber,versionComment=versionComment,time=time,author=author) + + def addCDLIFileObjectForm(self): """add a new version""" @@ -1474,58 +1879,30 @@ class CDLIFile(extVersionedFile,CatalogA except: pass - - if changeName=="yes": - filename=file.filename - self.title=filename[max(filename.rfind('/'), - filename.rfind('\\'), - filename.rfind(':'), - )+1:] - - - if not newName=='': - self.title=newName[0:] - - + ob = self.addContentObject(id, vC, author, file, title, changeName=changeName, newName=newName, from_tmp=from_tmp, + precondition=precondition, content_type=content_type) - - positionVersionNum=getattr(self,'positionVersionNum','front') - - if positionVersionNum=='front': - id="V%i"%self.getVersion()+"_"+self.title - else: - tmp=os.path.splitext(self.title) - if len(tmp)>1: - id=tmp[0]+"_V%i"%self.getVersion()+tmp[1] - else: - id=tmp[0]+"_V%i"%self.getVersion() - - - manage_addCDLIFileObject(self,id,vC,author,file,id,precondition, content_type,from_tmp=from_tmp) - #objs=self.ZopeFind(self,obj_ids=[id])[0][1].setVersionNumber(int(self.getVersion())) - objs=getattr(self,id).setVersionNumber(int(self.getVersion())) try: - #FIXME: wozu ist das gut? - self.REQUEST.SESSION['objID_parent']=self.getId() + #FIXME: wozu ist das gut? + self.REQUEST.SESSION['objID_parent']=self.getId() except: - pass + pass + #self.cdliRoot.updateOrAddToFileBTree(self)# now update the object in the cache + + if RESPONSE: - - obj=self.ZopeFind(self,obj_ids=[id])[0][1] - if obj.getSize()==0: - self.REQUEST.SESSION['objID']=obj.getId() + if ob.getSize()==0: + self.REQUEST.SESSION['objID']=ob.getId() pt=PageTemplateFile(os.path.join(package_home(globals()),'zpt','errorUploadFile')).__of__(self) return pt() - else: if come_from and (come_from!=""): - RESPONSE.redirect(come_from+"?change="+self.getId()) + RESPONSE.redirect(come_from+"?change="+self.getId()) else: RESPONSE.redirect(self.REQUEST['URL2']+'?uploaded=%s'%self.title) - else: - return self.ZopeFind(self,obj_ids=[id])[0][1] + return ob def manage_addCDLIFileForm(self): @@ -1539,8 +1916,6 @@ def manage_addCDLIFile(self,id,title,loc tryToggle=True tryCount=0 - - self._setObject(id,newObj) getattr(self,id).reindex_object() @@ -1549,6 +1924,14 @@ def manage_addCDLIFile(self,id,title,loc RESPONSE.redirect('manage_main') +def checkUTF8(data): + """check utf 8""" + try: + data.encode('utf-8') + return True + except: + return False + def checkFile(filename,data,folder): """check the files""" @@ -1559,17 +1942,11 @@ def checkFile(filename,data,folder): return False,"P missing in the filename" elif len(fn[0])!=7: return False,"P number has not the right length 6" + elif not checkUTF8(data): + return False,"not utf-8" else: - fn=os.path.join(folder,filename) - stin,out=os.popen4("/usr/bin/atfcheck.plx %s"%fn) - value=out.read() - ret= out.close() - - if value: - - return False,"atf checker error: %s"%value - else: - return True,"" + return True,"" + def splitatf(fh,dir=None,ext=None): """split it""" @@ -1577,38 +1954,52 @@ def splitatf(fh,dir=None,ext=None): nf=None i=0 - for lineTmp in fh.readlines(): - for line in lineTmp.split("\r"): - if ext: - i+=1 - if (i%100)==0: - ext.result+="." - if i==10000: - i=0 - ext.result+="
" - #check if basket name is in the first line - if line.find("#atf basket")>=0: #old convention - ret=line.replace('#atf basket ','') - ret=ret.split('_')[0] - elif line.find("#basket:")>=0: #new convention - ret=line.replace('#basket: ','') - ret=ret.split('_')[0] - - else: - if (len(line.lstrip())>0) and (line.lstrip()[0]=="&"): #newfile - if nf: - nf.close() #close last file - - - filename=line[1:].split("=")[0].rstrip()+".atf" - if dir: - filename=os.path.join(dir,filename) - nf=file(filename,"w") - if nf: - nf.write(line.replace("\n","")+"\n") + #ROC: why split \n first and then \r??? + if (type(fh) is StringType) or (type(fh) is UnicodeType): + iter=fh.split("\n") + else: + iter=fh.readlines() + + for lineTmp in iter: + lineTmp=lineTmp.replace(codecs.BOM_UTF8,'') # make sure that all BOM are removed.. + for line in lineTmp.split("\r"): + #logging.log("Deal with: %s"%line) + if ext: + i+=1 + if (i%100)==0: + ext.result+="." + if i==10000: + i=0 + ext.result+="
" + #check if basket name is in the first line + if line.find("#atf basket")>=0: #old convention + ret=line.replace('#atf basket ','') + ret=ret.split('_')[0] + elif line.find("#basket:")>=0: #new convention + ret=line.replace('#basket: ','') + ret=ret.split('_')[0] - nf.close() - fh.close() + else: + if (len(line.lstrip())>0) and (line.lstrip()[0]=="&"): #newfile + if nf: + nf.close() #close last file + + + filename=line[1:].split("=")[0].rstrip()+".atf" + if dir: + filename=os.path.join(dir,filename) + nf=file(filename,"w") + logging.info("open %s"%filename) + if nf: + nf.write(line.replace("\n","")+"\n") + + try: + nf.close() + except: + pass + + if not((type(fh) is StringType) or (type(fh) is UnicodeType)): + fh.close() return ret,len(os.listdir(dir)) @@ -1617,34 +2008,37 @@ class CDLIFileFolder(extVersionedFileFol security=ClassSecurityInfo() meta_type="CDLI Folder" - filesMetaType=['CDLI file'] - folderMetaType=['CDLI Folder'] - default_catalog='CDLICatalog' - defaultFileCatalog=default_catalog #wenn dieses definiert ist, wird beim hinzufŸgen einer neuen version eines files dieser catalog neuiniziert + file_meta_type=['CDLI file'] + folder_meta_type=['CDLI Folder'] + + file_catalog='CDLICatalog' + #downloadCounter=0 # counts how many download for all files currently run, be mehr als 5 wird verweigert. tmpStore2={} + + def _newVersionedFile(self, id, title='', lockedBy=None, author=None): + """factory for versioned files. to be overridden in derived classes.""" + logging.debug("_newVersionedFile(CDLI)") + return CDLIFile(id, title, lockedBy=lockedBy, author=author) + def setTemp(self,name,value): """set tmp""" setattr(self,name,value) + deleteFileForm = PageTemplateFile("zpt/doDeleteFile", globals()) - def delete(self,ids): - """delete this file, i.e. move into a trash folder""" - - found=self.ZopeFind(self,obj_ids=['.trash']) - - if len(found)<1: - manage_addCDLIFileFolder(self, '.trash',title="Trash") - trash=self._getOb('.trash') - else: - trash=found[0][1] - + def delete(self,ids,REQUEST=None): + """delete these files""" if type(ids) is not ListType: ids=[ids] - cut=self.manage_cutObjects(ids) - trash.manage_pasteObjects(cut) + + self.manage_delObjects(ids) + if REQUEST is not None: + return self.index_html() + + def getVersionNumbersFromIds(self,ids): """get the numbers of the current versions of documents described by their ids""" @@ -1654,38 +2048,65 @@ class CDLIFileFolder(extVersionedFileFol founds=self.CDLICatalog.search({'title':searchStr}) for found in founds: - lastVersion=found.getObject().getLastVersion() + lastVersion=found.getObject().getContentObject() ret.append((found.getId,lastVersion)) return ret + def getFile(self,fn): + """get the content of the file fn""" + logging.debug("getFile: %s"%repr(fn)) + if not self.hasObject(fn): + # search deeper + founds=getattr(self, self.file_catalog).search({'textid':fn}) + if founds: + obj=founds[0].getObject().getContentObject() + else: + return "" + else: + obj = self[fn].getContentObject() + + return obj.getData()[0:] + + def checkCatalog(self,fn): """check if fn is in the catalog""" #TODO add checkCatalog - def findObjectsFromListWithVersion(self,list,author=None): """find objects from a list with versions @param list: list of tuples (cdliFile,version) """ - - - #self.REQUEST.SESSION['fileIds']=list#store fieldIds in session for further usage #self.REQUEST.SESSION['searchList']=self.REQUEST.SESSION['fileIds'] - pt=getattr(self,'filelistVersioned.html') return pt(search=list,author=author) - - def findObjectsFromList(self,enterList=None,display=False,start=None,upload=None,list=None,basketName=None,numberOfObjects=None,RESPONSE=None): + def getAllPNumbers(self): + """get a list of all files (resp their p-numbers) stored""" + + ret=[x.getId for x in self.CDLICatalog()] + + return ret + + def expandFile(self,fileId,fileTree): + """wildcard in fileID suche alle Treffer""" + founds=self.CDLICatalog({'title':fileId}) + for found in founds: + fileTree.add(found.getId) + logging.debug("ADDD:"+found.getId) + + def findObjectsFromList(self,enterList=None,display=False,start=None,upload=None,list=None,basketName=None,numberOfObjects=None,RESPONSE=None,REQUEST=None,returnHash=False,hash=None): """findObjectsFromList (, TAB oder LINE separated)""" + logging.debug("start: findObjectsFromList") + #logging.debug("start: findObjectsFromList"+repr(list)) + if upload: # list from file upload txt=upload.read() @@ -1716,32 +2137,85 @@ class CDLIFileFolder(extVersionedFileFol pt=getattr(self,'filelist.html') return pt(basketName=basketName,numberOfObjects=numberOfObjects) + if hash is not None and hasattr(self.cdliRoot,'v_tmpStore') and self.cdliRoot.v_tmpStore.has_key(hash): + + logging.debug("asking for storage2") + result =self.cdliRoot.v_tmpStore[hash] + if result: + logging.debug("give result from storage2") + return hash,self.cdliRoot.v_tmpStore[hash] + if list is not None: # got already a list + + logging.debug(" ----List version") ret=[] + fileTree=Set() + for fileId in list: - if fileId.find("*"): #check for wildcards - fileId=fileId + + if fileId.find("*")>-1: #check for wildcards + self.expandFile(fileId,fileTree) + elif len(fileId.split("."))==1: fileId=fileId+".atf" + fileTree.add(fileId) + #logging.debug(" -----:"+fileId) + #ret+=self.CDLICatalog({'title':fileId}) + #x =self.getFileObject(fileId) + #if x is not None: + # ret.append(x) - ret+=self.CDLICatalog({'title':fileId}) + + + ids = fileTree & self.v_file_ids + #self.REQUEST.SESSION['fileIds']=ids#store fieldIds in session for further usage + l=makelist(fileTree)[0:] + #logging.debug("l-list:"+repr(l)) + self.REQUEST.SESSION['fileIds']=l#store fieldIds in session for further usage + self.REQUEST.SESSION['searchList']=l + #self.REQUEST.SESSION['searchList']=['P000001.atf'] + + + hash = md5.new(repr(makelist(fileTree))).hexdigest() # erzeuge hash als identification + self.REQUEST.SESSION['hash']=hash + #TODO: do I need garbage collection for v_tmpStore ? + + #logging.debug("Hash:"+repr(hash)) +# +# if hasattr(self.cdliRoot,'v_tmpStore') and self.cdliRoot.v_tmpStore.has_key(hash): +# logging.debug("asking for storage") +# res=self.cdliRoot.v_tmpStore[hash] +# if res: +# if returnHash == True: +# return hash,res +# return res + #TODO: get rid of one of these.. - ids=[x.getObject().getId() for x in ret] - self.REQUEST.SESSION['fileIds']=ids#store fieldIds in session for further usage - self.REQUEST.SESSION['searchList']=self.REQUEST.SESSION['fileIds'] + #ids=[x.getObject().getId() for x in ret] + ret=[(self.getFileObject(x),self.getFileObjectLastVersion(x)) for x in ids] + #self.REQUEST.SESSION['fileIds']=ids#store fieldIds in session for further usage + #self.REQUEST.SESSION['searchList']=self.REQUEST.SESSION['fileIds'] + if display: pt=getattr(self,'filelist.html') return pt(search=ids) - else: + else: + #self.REQUEST.SESSION['hash'] = ret # store in session + if not hasattr(self,'v_tmpStore'): + self.cdliRoot.v_tmpStore={} + #logging.debug("HHHHHHNEU:"+repr(self.makelist(ids))) + #logging.debug("HHHHHHNEU:"+repr(hash)) + self.cdliRoot.v_tmpStore[hash] = ret # store in session + if returnHash == True: + return hash,ret return ret if start: RESPONSE.redirect("filelist.html?start:int="+str(start)) - security.declareProtected('Manage','createAllFilesAsSingleFile') def createAllFilesAsSingleFile(self,RESPONSE=None): @@ -1750,17 +2224,17 @@ class CDLIFileFolder(extVersionedFileFol def sortF(x,y): return cmp(x[0],y[0]) - catalog=getattr(self,self.default_catalog) + catalog=getattr(self,self.file_catalog) #tf,tfilename=mkstemp() - if not hasattr(self.temp_folder,'downloadCounter'): - self.temp_folder.downloadCounter=0 + if not hasattr(self.temp_folder,'downloadCounter'): + self.temp_folder.downloadCounter=0 if getattr(self.temp_folder,'downloadCounter',0) > 5: return """I am sorry, currently the server has to many requests for downloads, please come back later!""" self.temp_folder.downloadCounter+=1 self._p_changed=1 - get_transaction().commit() + transaction.get().commit() list=[(x.getId,x) for x in catalog()] list.sort(sortF) @@ -1777,10 +2251,11 @@ class CDLIFileFolder(extVersionedFileFol #os.write(tf,obj.getLastVersion().data) if RESPONSE: - RESPONSE.write(obj.getLastVersion().getData()[0:]) + RESPONSE.write(obj.getData()[0:]) + RESPONSE.write("\n") self.temp_folder.downloadCounter-=1 self._p_changed=1 - get_transaction().commit() + transaction.get().commit() #os.close(tf) #RESPONSE.redirect(self.absolute_url()+"/downloadFile?fn="%tfilename) return True @@ -1796,7 +2271,7 @@ class CDLIFileFolder(extVersionedFileFol def hasParent(self): """returns true falls subfolder""" - if self.aq_parent.meta_type in self.folderMetaType: + if self.aq_parent.meta_type in self.folder_meta_type: return True else: return False @@ -1804,16 +2279,16 @@ class CDLIFileFolder(extVersionedFileFol def getFolders(self): """get all subfolders""" ret=[] - folders=self.ZopeFind(self,obj_metatypes=self.folderMetaType) + folders=self.ZopeFind(self,obj_metatypes=self.folder_meta_type) for folder in folders: ret.append((folder[1], - len(self.ZopeFind(folder[1],obj_metatypes=self.folderMetaType)), - len(self.ZopeFind(folder[1],obj_metatypes=self.filesMetaType)) + len(self.ZopeFind(folder[1],obj_metatypes=self.folder_meta_type)), + len(self.ZopeFind(folder[1],obj_metatypes=self.file_meta_type)) )) return ret - security.declareProtected('View','index_html') + security.declareProtected('manage','index_html') def index_html(self): """main""" ext=self.ZopeFind(self,obj_ids=["index.html"]) @@ -1860,67 +2335,298 @@ class CDLIRoot(Folder): """main folder for cdli""" meta_type="CDLIRoot" - downloadCounterBaskets=0# counts the current basket downloads if counter > 10 no downloads are possible + downloadCounterBaskets=0 # counts the current basket downloads if counter > 10 no downloads are possible - def showInLineIndex(self): - """get the index for debug purposes""" - print "show" - for x in self.lineIndex.iterkeys(): - print "word:",x - for y in self.lineIndex[x].iterkeys(): - print "doc",y,self.lineIndex[x][y] - - return self.lineIndex - - def searchInLineIndexDocs(self,word): - """search occurences""" - return list(self.lineIndex.get(word.upper()).keys()) - - def getLinesFromIndex(self,word,doc): - """get lines""" - return self.lineIndex[word][doc] - - def cleanInLineIndex(self): - """delete InlineIndex""" - for x in list(self.lineIndex.keys()): - del(self.lineIndex[x]) - print [x for x in self.lineIndex.keys()] - - return "ok" + file_catalog = 'CDLICatalog' - def storeInLineIndex(self,key,value): - """store in index""" - - if (not hasattr(self,'lineIndex')) or (type(self.lineIndex) is DictType): - self.lineIndex=OOBTree() - li=self.lineIndex + # word splitter for search + splitter = {'words':cdliSplitter.wordSplitter(), + 'graphemes':cdliSplitter.graphemeSplitter()} + + + def viewATF(self,id,RESPONSE): + """view an Object""" + ob = self.CDLICatalog({'title':id}) + logging.debug(ob[0].getObject().getLastVersion().absolute_url()+"/view") + if len(ob)>0: + RESPONSE.redirect(ob[0].getObject().getLastVersion().absolute_url()+"/view") + return "not found" + + def history(self,id,RESPONSE): + """view an Object""" + ob = self.CDLICatalog({'title':id}) + if len(ob)>0: + RESPONSE.redirect(ob[0].absolute_url+"/history") + return "not found" + + + def downloadLocked(self,id,RESPONSE): + """view an Object""" + ob = self.CDLICatalog({'title':id}) + if len(ob)>0: + RESPONSE.redirect(ob[0].absolute_url+"/downloadLocked") + return "not found" + + def download(self,id,RESPONSE): + """view an Object""" + ob = self.CDLICatalog({'title':id}) + if len(ob)>0: + RESPONSE.redirect(ob[0].getLastVersion().absolute_url()) + return "not found" + def addCDLIFileObjectForm(self,id,RESPONSE): + """view an Object""" + ob = self.CDLICatalog({'title':id}) + if len(ob)>0: + RESPONSE.redirect(ob[0].absolute_url+"/addCDLIFileObjectForm") + return "not found" + + def addVersionedFileObjectForm(self,id,RESPONSE): + """view an Object""" + ob = self.CDLICatalog({'title':id}) + if len(ob)>0: + RESPONSE.redirect(ob[0].absolute_url+"/addVersionedFileObjectForm") + return "not found" + + def unlock(self,id,RESPONSE): + """view an Object""" + ob = self.CDLICatalog({'title':id}) + if len(ob)>0: + RESPONSE.redirect(ob[0].absolute_url+"/unlock") + return "not found" + + def getFileObject(self,fileId): + """get an object""" + x=self.v_files.get(fileId) + #logging.debug(x) + return x + + def getFileObjectLastVersion(self,fileId): + """get an object""" + x=self.v_files_lastVersion.get(fileId) + #logging.debug("lastVersion: "+repr(x)) + return x + + def showFileIds(self): + """showIds""" + return self.v_file_ids + + def generateFileBTree(self): + """erzeuge einen Btree aus allen Files""" + self.v_files = OOBTree() + self.v_files_lastVersion = OOBTree() + self.v_file_ids = Set() + + for x in self.CDLICatalog.searchResults(): + + self.v_files.update({x.getId:x.getObject()}) + self.v_files_lastVersion.update({x.getId:x.getObject().getLastVersion()}) + self.v_file_ids.add(x.getId) + logging.debug("add:"+x.getId+"XXX"+repr(x.getObject())) - if li.has_key(key): - - if li[key].has_key(value[0]) and (not (value[1] in li[key][value[0]])): - li[key][value[0]].append(value[1]) # add it if now in the array - else: - li[key][value[0]]=[value[1]] # new array for lines - - else: - - li[key]=OOBTree()# new btree for lines - li[key][value[0]]=[value[1]] - + return True + + + def updateOrAddToFileBTree(self,obj): + """update a BTree""" + self.v_files.update({obj.getId():obj}) + self.v_files_lastVersion.update({obj.getId():obj.getLastVersion()}) + + self.v_file_ids.add(obj.getId()) + logging.debug("update:"+obj.getId()+"XXX"+repr(obj)) + + def deleteFromBTree(self,objId): + """delete an obj""" + self.v_files.pop(objId) + self.v_files_lastVersion.pop(objId) + self.v_file_ids.remove(objId) - self.lineIndex=li - - get_transaction().commit() + + + def deleteFiles(self,ids): + """delete files""" + for id in ids: + founds=self.CDLICatalog.search({'title':id.split(".")[0]}) + if founds: + logging.debug("deleting %s"%founds) + folder=founds[0].getObject().aq_parent #get the parent folder of the object + logging.debug("deleting from %s"%folder) + cut=folder.delete([founds[0].getId]) #cut it out + + + + def searchText(self, query, index='graphemes'): + """searches query in the fulltext index and returns a list of file ids/P-numbers""" + # see also: http://www.plope.com/Books/2_7Edition/SearchingZCatalog.stx#2-13 + logging.debug("searchtext for '%s' in index %s"%(query,index)) + #import Products.ZCTextIndex.QueryParser + #qp = QueryParser.QueryParser() + #logging.debug() + idxQuery = {index:{'query':query}} + idx = getattr(self, self.file_catalog) + # do search + resultset = idx.search(query_request=idxQuery,sort_index='textid') + # put only the P-Number in the result + results = [res.getId[:7] for res in resultset] + logging.debug("searchtext: found %d texts"%len(results)) + return results + + + def getFile(self, pnum): + """get the translit file with the given pnum""" + f = getattr(self, self.file_catalog).search({'textid':pnum}) + if not f: + return "" + return f[0].getObject().getData() + - def showFile(self,fileId): - """show a file""" - f=self.CDLICatalog({'title':fileId}) + def showFile(self,fileId,wholePage=False): + """show a file + @param fileId: P-Number of the document to be displayed + """ + f=getattr(self, self.file_catalog).search({'textid':fileId}) if not f: return "" - return f[0].getObject().getLastVersionFormattedData() + if wholePage: + logging.debug("show whole page") + return f[0].getObject().getContentObject().view() + else: + return f[0].getObject().getLastVersionFormattedData() + + + def showWordInFile(self,fileId,word,indexName='graphemes',regExp=False,): + """get lines with word from FileId""" + logging.debug("showwordinfile word='%s' index=%s file=%s"%(word,indexName,fileId)) + file = formatAtfFullLineNum(self.getFile(fileId)) + ret=[] + + # add whitespace before and whitespace and line-end to splitter bounds expressions + bounds = self.splitter[indexName].bounds + splitexp = "(%s|\s)(%%s)(%s|\s|\Z)"%(bounds,bounds) + # clean word expression + # TODO: this should use QueryParser itself + # take out double quotes + word = word.replace('"','') + # take out ignorable signs + ignorable = self.splitter[indexName].ignorex + word = ignorable.sub('', word) + # compile into regexp objects and escape parens + wordlist = [re.compile(splitexp%re.escape(w)) for w in word.split(' ')] + + for line in file.splitlines(): + for word in wordlist: + #logging.debug("showwordinfile: searching for %s in %s"%(word.pattern,ignoreable.sub('',line))) + if word.search(ignorable.sub('',line)): + line = formatAtfLineHtml(line) + ret.append(line) + break + + return ret + + + def showWordInFiles(self,fileIds,word,indexName='graphemes',regExp=False): + """ + get lines with word from all ids in list FileIds. + returns dict with id:lines pairs. + """ + logging.debug("showwordinfiles word='%s' index=%s file=%s"%(word,indexName,fileIds)) + + return dict([(id,self.showWordInFile(id, word, indexName, regExp)) for id in fileIds]) + + + def tagWordInFile(self,fileId,word,indexName='graphemes',regExp=False): + """get text with word highlighted from FileId""" + logging.debug("tagwordinfile word='%s' index=%s file=%s"%(word,indexName,fileId)) + + file=self.getFile(fileId) + tagStart=u'' + tagEnd=u'' + tagStr=tagStart + u'%%s' + tagEnd + ret=[] + + # add whitespace to splitter bounds expressions and compile into regexp object + bounds = self.splitter[indexName].bounds + wordsplit = re.compile("(%s|\s)"%bounds) + # clean word expression + # TODO: this should use QueryParser itself + word = word.replace('"','') # take out double quotes + # take out ignoreable signs + ignorable = self.splitter[indexName].ignorex + word = ignorable.sub('', word) + # split search terms by blanks + words = word.split(' ') + # split search terms again (for grapheme search with words) + splitwords = dict(((w,self.splitter[indexName].process([w])) for w in words)) + + for line in file.splitlines(): + line = unicodify(line) + # ignore lemma and other lines + if line.lstrip().startswith('#lem:'): + continue + # ignore p-num line + if line.startswith('&P'): + continue + # ignore version lines + if line.startswith('#version'): + continue + # ignore atf type lines + if line.startswith('#atf:'): + continue + + # first scan + hitwords = [] + for w in words: + if ignorable.sub('',line).find(w) > -1: + # word is in line + # append split word for grapheme search with words + hitwords.extend(splitwords[w]) + #hitwords.extend(wordsplit.split(w)) + + # examine hits closer + if hitwords: + # split line into words + parts = wordsplit.split(line) + line = "" + for p in parts: + #logging.debug("tagwordinfile: searching for %s in %s"%(p,hitwords)) + # reassemble line + if ignorable.sub('', p) in hitwords: + #logging.debug("tagwordinfile: found %s in %s"%(p,hitwords)) + # this part was found + line += tagStart + formatAtfHtml(p) + tagEnd + else: + line += formatAtfHtml(p) + + else: + # no hits + line = formatAtfHtml(line) + + ret.append(line) + + return u'
\n'.join(ret) + + + + def tagWordInFiles(self,fileIds,word,indexName='graphemes',regExp=False): + """ + get texts with highlighted word from all ids in list FileIds. + returns dict with id:text pairs. + """ + logging.debug("tagwordinfiles word='%s' index=%s file=%s"%(word,indexName,fileIds)) + return dict([(id,self.tagWordInFile(id, word, indexName, regExp)) for id in fileIds]) + + + def getFileVersionList(self, pnum): + """get the version history as a list for the translit file with the given pnum""" + f = getattr(self, self.file_catalog).search({'textid':pnum}) + if not f: + return [] + + return f[0].getObject().getVersionList() + + def URLquote(self,str): """quote url""" return urllib.quote(str) @@ -1929,6 +2635,14 @@ class CDLIRoot(Folder): """unquote url""" return urllib.unquote(str) + def URLquote_plus(self,str): + """quote url""" + return urllib.quote_plus(str) + + def URLunquote_plus(self,str): + """unquote url""" + return urllib.unquote_plus(str) + def forceunlock(self): "break all locks" @@ -1939,8 +2653,9 @@ class CDLIRoot(Folder): if un and un !="": ret.append((f[0],un)) - return ret - + return ret + + def getChangesByAuthor(self,author,n=100): """getChangesByAuthor""" zcat=self.CDLIObjectsCatalog @@ -1984,6 +2699,11 @@ class CDLIRoot(Folder): return """ 2;url=%s?repeat=%s """%(self.absolute_url()+txt,threadName) + def refreshTxtBasket(self,txt="",threadName=None): + """txt fuer refresh""" + + return """ 2;url=%s?repeat=%s """%(txt,threadName) + def getResult(self,threadName=None): """result of thread""" @@ -2002,13 +2722,52 @@ class CDLIRoot(Folder): return ret - + def uploadATFRPC(self,data,username): + """upload an atffile via xml-rpc""" + uploader=uploadATFThread() + + #generate an random id for the upload object + from random import randint + if (not self.REQUEST.SESSION.get('idTmp',None)): + + idTmp=str(randint(0,1000000000)) + self.REQUEST.SESSION['idTmp']=idTmp + else: + idTmp=self.REQUEST.SESSION.get('idTmp',None) + + + uploader.set(data,0,username,idTmp) + + stObj=uploader.run() + + processor=uploadATFfinallyThread() + + basketname=stObj.returnValue['basketNameFromFile'] + + processor.set("uploadchanged",basketname=basketname,SESSION=stObj.returnValue,username=username,serverport=self.REQUEST['SERVER_PORT']) + + processor.run() + + + return generateXMLReturn(stObj.returnValue) + def uploadATF(self,repeat=None,upload=None,basketId=0,RESPONSE=None): - """standard ausgabe""" + """upload an atf file / basket file""" #self._v_uploadATF.returnValue=None + + #generate an random id for the upload thread + from random import randint + if (not self.REQUEST.SESSION.get('idTmp',None)): + idTmp=str(randint(0,1000000000)) + self.REQUEST.SESSION['idTmp']=idTmp + else: + idTmp=self.REQUEST.SESSION.get('idTmp',None) + + threadName=repeat if not threadName or threadName=="": + #new thread not called from the waiting page tmpVar=False thread=uploadATFThread() @@ -2019,9 +2778,10 @@ class CDLIRoot(Folder): self._v_uploadATF[threadName]=thread #self._xmltrans.start() #thread=Thread(target=self._v_uploadATF) - - self._v_uploadATF[threadName].set(upload,basketId,self.REQUEST['AUTHENTICATED_USER'],serverport=self.REQUEST['SERVER_PORT']) + logging.info("set thread. extern") + self._v_uploadATF[threadName].set(upload,basketId,self.REQUEST['AUTHENTICATED_USER'],idTmp,serverport=self.REQUEST['SERVER_PORT']) #thread.start() + logging.info("start thread. extern") self._v_uploadATF[threadName].start() @@ -2043,7 +2803,7 @@ class CDLIRoot(Folder): if threadName == thread.getName(): self._v_uploadATF[threadName]=thread - if not self._v_uploadATF[threadName].returnValue: + if self._v_uploadATF.get(threadName,None) and (not self._v_uploadATF[threadName].returnValue): wait_template=self.aq_parent.ZopeFind(self.aq_parent,obj_ids=['wait_template']) @@ -2055,29 +2815,8 @@ class CDLIRoot(Folder): return pt(txt='/uploadATF',threadName=threadName) else: -# tmp={} -# for key in self._v_uploadATF[threadName].returnValue.keys(): -# t=self._v_uploadATF[threadName].returnValue[key] -# if type(t) is ListType: -# tmp[key]=self._v_uploadATF[threadName].returnValue[key][0:] -# else: -# tmp[key]=self._v_uploadATF[threadName].returnValue[key] -# repr(tmp[key]),repr(key) -# -# # - #tmp=self.cdli_main.tmpStore2[threadName] - tmp=self._v_uploadATF[threadName].returnValue - - self._v_uploadATF[threadName].continueVar=False - - self.REQUEST.SESSION['changed']=[x[0].getId() for x in tmp['changed']] - self.REQUEST.SESSION['lockerrors']=[x[0].getId() for x in tmp['lockerrors']] - self.REQUEST.SESSION['errors']=tmp['errors'] - self.REQUEST.SESSION['newPs']=tmp['newPs'] - self.REQUEST.SESSION['tmpdir']=tmp['dir'] - #del(self.cdli_main.tmpStore2[threadName]) - - + tmp=getattr(self.temp_folder,idTmp).returnValue + pt=PageTemplateFile(os.path.join(package_home(globals()),'zpt','uploadCheck.zpt')).__of__(self) return pt(changed=tmp['changed'],lockerrors=tmp['lockerrors'],errors=tmp['errors'],dir=tmp['dir'],newPs=tmp['newPs'],basketLen=tmp['basketLen'],numberOfFiles=tmp['numberOfFiles'], @@ -2106,8 +2845,9 @@ class CDLIRoot(Folder): self._v_uploadATF[threadName]=thread - - self._v_uploadATF[threadName].set(procedure,comment=comment,basketname=basketname,unlock=unlock,SESSION=self.REQUEST.SESSION,username=self.REQUEST['AUTHENTICATED_USER'],serverport=self.REQUEST['SERVER_PORT']) + idTmp=self.REQUEST.SESSION['idTmp'] + stObj=getattr(self.temp_folder,idTmp) + self._v_uploadATF[threadName].set(procedure,comment=comment,basketname=basketname,unlock=unlock,SESSION=stObj.returnValue,username=self.REQUEST['AUTHENTICATED_USER'],serverport=self.REQUEST['SERVER_PORT']) self._v_uploadATF[threadName].start() @@ -2140,12 +2880,24 @@ class CDLIRoot(Folder): pt=PageTemplateFile(os.path.join(package_home(globals()),'zpt','uploadATFWait.zpt')).__of__(self) return pt(txt='/uploadATFfinally',threadName=threadName) else: - + + + idTmp=self.REQUEST.SESSION['idTmp'] + stObj=getattr(self.temp_folder,idTmp) + self.REQUEST.SESSION['idTmp']=None + + #update changed + logging.debug("dir:"+repr(stObj.returnValue['changed'])) + for x in stObj.returnValue['changed']: + ob=self.CDLICatalog.search({'title':x[0]}) + + self.cdliRoot.updateOrAddToFileBTree(ob[0].getObject()) if RESPONSE is not None: RESPONSE.redirect(self.absolute_url()) def importFiles(self,comment="",author="" ,folderName="/Users/dwinter/atf", files=None,ext=None): """import files""" + logging.debug("importFiles folderName=%s files=%s ext=%s"%(folderName,files,ext)) root=self.cdli_main count=0 if not files: @@ -2154,44 +2906,49 @@ class CDLIRoot(Folder): for f in files: folder=f[0:3] f2=f[0:5] + + #check if main folder PXX already exists obj=self.ZopeFind(root,obj_ids=[folder]) + logging.debug("importFiles: folder=%s f2=%s obj=%s"%(folder,f2,obj)) if ext: - - ext.result+="

adding: %s

"%f - if not obj: + ext.result="

adding: %s

"%f+ext.result + + + if not obj: # if not create it manage_addCDLIFileFolder(root,folder,folder) fobj=getattr(root,folder) - #get_transaction().commit() + #transaction.get().commit() + else: fobj=obj[0][1] + #check IF PYYYYY already exist obj2=fobj.ZopeFind(fobj,obj_ids=[f2]) + logging.debug("importFiles: fobj=%s obj2=%s"%(fobj,obj2)) - if not obj2: + if not obj2:# if not create it manage_addCDLIFileFolder(fobj,f2,f2) fobj2=getattr(fobj,f2) else: fobj2=obj2[0][1] + # not add the file file2=os.path.join(folderName,f) id=f - manage_addCDLIFile(fobj2,f,'','') - id=f - ob=fobj2._getOb(f) - ob.title=id - - manage_addCDLIFileObject(ob,id,comment,author,file2,content_type='',from_tmp=True) - self.CDLICatalog.catalog_object(ob) - #self.CDLICatalog.manage_catalogFoundItems(obj_ids=[id],search_sub=1) - #self.CDLICatalog.manage_catalogObject(self.REQUEST, self.REQUEST.RESPONSE, 'CDLICatalog', urlparse.urlparse(ob.absolute_url())[1]) - count+=1 - - if count > 1000: - print "committing" - get_transaction().commit() - count=0 - get_transaction().commit() + logging.debug("importFiles: addCDLIFile fobj2=%s, f=%s file2=%s"%(fobj2,repr(f),repr(file2))) + fobj2.addFile(vC='',file=file(file2),author=author,newName=f) + count+=1 + + #now add the file to the storage + ob = getattr(fobj2,f) + self.cdliRoot.updateOrAddToFileBTree(ob) + + if count%100==0: + logging.debug("importfiles: committing") + transaction.get().commit() + + transaction.get().commit() return "ok" @@ -2211,7 +2968,10 @@ def manage_addCDLIRoot(self, id, title=' ob=CDLIRoot() ob.id=str(id) ob.title=title - self._setObject(id, ob) + try: + self._setObject(id, ob) + except: + pass ob=self._getOb(id) checkPermission=getSecurityManager().checkPermission