--- cdli/cdli_files.py 2006/10/05 07:22:58 1.48 +++ cdli/cdli_files.py 2006/12/22 20:35:33 1.55 @@ -6,6 +6,8 @@ import os.path import os from types import * import urlparse +import urllib +import cgi from OFS.OrderedFolder import OrderedFolder from OFS.SimpleItem import SimpleItem import time @@ -19,7 +21,77 @@ from ZPublisher.HTTPRequest import HTTPR from ZPublisher.HTTPResponse import HTTPResponse from ZPublisher.BaseRequest import RequestContainer import threading - +from BTrees.OOBTree import OOBTree + +def unique(s): + """Return a list of the elements in s, but without duplicates. + + For example, unique([1,2,3,1,2,3]) is some permutation of [1,2,3], + unique("abcabc") some permutation of ["a", "b", "c"], and + unique(([1, 2], [2, 3], [1, 2])) some permutation of + [[2, 3], [1, 2]]. + + For best speed, all sequence elements should be hashable. Then + unique() will usually work in linear time. + + If not possible, the sequence elements should enjoy a total + ordering, and if list(s).sort() doesn't raise TypeError it's + assumed that they do enjoy a total ordering. Then unique() will + usually work in O(N*log2(N)) time. + + If that's not possible either, the sequence elements must support + equality-testing. Then unique() will usually work in quadratic + time. + (from the python cookbook) + """ + + n = len(s) + if n == 0: + return [] + + # Try using a dict first, as that's the fastest and will usually + # work. If it doesn't work, it will usually fail quickly, so it + # usually doesn't cost much to *try* it. It requires that all the + # sequence elements be hashable, and support equality comparison. + u = {} + try: + for x in s: + u[x] = 1 + except TypeError: + del u # move on to the next method + else: + return u.keys() + + # We can't hash all the elements. Second fastest is to sort, + # which brings the equal elements together; then duplicates are + # easy to weed out in a single pass. + # NOTE: Python's list.sort() was designed to be efficient in the + # presence of many duplicate elements. This isn't true of all + # sort functions in all languages or libraries, so this approach + # is more effective in Python than it may be elsewhere. + try: + t = list(s) + t.sort() + except TypeError: + del t # move on to the next method + else: + assert n > 0 + last = t[0] + lasti = i = 1 + while i < n: + if t[i] != last: + t[lasti] = last = t[i] + lasti += 1 + i += 1 + return t[:lasti] + + # Brute force is all that's left. + u = [] + for x in s: + if x not in u: + u.append(x) + return u + class BasketContent(SimpleItem): """classe fuer den Inhalt eines Baskets""" @@ -98,7 +170,8 @@ class uploadATFfinallyThread(Thread): conn.close() #set flag for end of this method self.end=True - return True + print "ended" + return True def __del__(self): """delete""" @@ -141,7 +214,7 @@ class uploadATFfinallyThread(Thread): if len(founds)>0: SESSION['author']=str(username) self.result+="

Changing : %s"%fn - founds[0].getObject().manage_addCDLIFileObject('',comment,SESSION['author'],file=os.path.join(SESSION['tmpdir'],fn,from_tmp=True)) + founds[0].getObject().manage_addCDLIFileObject('',comment,SESSION['author'],file=os.path.join(SESSION['tmpdir'],fn),from_tmp=True) #now add the new files @@ -239,10 +312,10 @@ class uploadATFThread(Thread): #ctx.cdliRoot.cdli_main.tmpStore2[self.getName()[0:]]=self.returnValue get_transaction().commit() - + while self.continueVar: pass - + conn.close() @@ -688,7 +761,19 @@ class CDLIBasketContainer(OrderedFolder) self.id=id self.title=title - + + def getBasketsId(self): + """get all baskets als klartext""" + + ret="" + baskets=self.ZopeFind(self,obj_metatypes=['CDLIBasket']) + for basket in baskets: + com,user,time,values = basket[1].getContentIds() + ret+= "BASKET:"+com+"\t"+user+"\t"+time+"\n" + for x in values: + ret+= x[0]+"\t"+x[1]+"\n" + return ret + def getBaskets(self,sortField='title'): """get all baskets files""" @@ -762,9 +847,13 @@ class CDLIBasketContainer(OrderedFolder) def setActiveBasket(self,basketId,REQUEST=None): """store active basketId in a cookie""" self.REQUEST.RESPONSE.setCookie("CDLIActiveBasket",basketId,path="/") - + try: + qs=cgi.parse_qs(REQUEST['QUERY_STRING']) + del(qs['basketId']) + except: + qs={} if REQUEST: - REQUEST.RESPONSE.redirect(REQUEST['URL1']+'?'+REQUEST['QUERY_STRING']) + REQUEST.RESPONSE.redirect(REQUEST['URL1']+'?'+urllib.urlencode(qs)) def getActiveBasket(self): """get active basket from cookie""" @@ -968,8 +1057,10 @@ class CDLIBasket(Folder,CatalogAware): newContent=[] added=0 for id in ids: - founds=self.CDLICatalog.search({'title':id}) - + try: + founds=self.CDLICatalog.search({'title':id}) + except: + founds=[] for found in founds: if found.getObject() not in oldContent: #TODO: was passiert wenn, man eine Object dazufŸgt, das schon da ist aber eine neuere version @@ -986,6 +1077,19 @@ class CDLIBasket(Folder,CatalogAware): return added + + + + def getContentIds(self): + """print basket content""" + ret=[] + lv=self.getLastVersion() + for obj in lv.content.getContent(): + ret.append((obj[0].getId(),obj[1].getId())) + + + return lv.getComment(),lv.getUser(),lv.getTime(),ret + def changeBasket(self,ids,submit,RESPONSE=None,REQUEST=None): """change a basket""" if submit=="update": @@ -1131,12 +1235,12 @@ class CDLIBasketVersion(Implicit,Persist if (procedure=="downloadAll") or (object[1].lockedBy=='') or (object[1].lockedBy==self.REQUEST['AUTHENTICATED_USER']): if current=="no": #version as they are in the basket - ret+=str(object[0].data)+"\n" + ret+=str(object[0].getData())+"\n" elif current=="yes": #search current object founds=self.CDLICatalog.search({'title':object[0].getId()}) if len(founds)>0: - ret+=str(founds[0].getObject().getLastVersion().data)+"\n" + ret+=str(founds[0].getObject().getLastVersion().getData())+"\n" if lock and object[1].lockedBy=='': object[1].lockedBy=self.REQUEST['AUTHENTICATED_USER'] @@ -1273,7 +1377,12 @@ class CDLIFileObject(CatalogAware,extVer return True security.declarePublic('view') - + + def getFormattedData(self): + """fromat text""" + data=self.getData() + return re.sub("\s\#lem"," #lem",data) #remove return vor #lem + def view(self): """view file""" pt=PageTemplateFile(os.path.join(package_home(globals()),'zpt','viewCDLIFile.zpt')).__of__(self) @@ -1283,9 +1392,9 @@ class CDLIFileObject(CatalogAware,extVer def getPNumber(self): """get the pnumber""" try: - txt=re.match("&[Pp](\d*)\s*=([^\r\n]*)",self.data[0:]) + txt=re.match("&[Pp](\d*)\s*=([^\r\n]*)",self.getData()[0:]) except: - txt=self.data[0:] + txt=self.getData()[0:] return "ERROR" try: @@ -1343,6 +1452,8 @@ def manage_addCDLIFileObject(self,id,vC= self._getOb(id).content_type=content_type self.reindex_object() + self._getOb(id).reindex_object() + if REQUEST is not None: REQUEST['RESPONSE'].redirect(self.absolute_url()+'/manage_main') @@ -1353,6 +1464,17 @@ class CDLIFile(extVersionedFile,CatalogA default_catalog='CDLICatalog' #security.declarePublic('history') + def getLastVersionData(self): + """get last version data""" + return self.getLastVersion().getData() + + def getLastVersionFormattedData(self): + """get last version data""" + return self.getLastVersion().getFormattedData() + + #security.declarePublic('history') + + def history(self): """history""" @@ -1524,35 +1646,37 @@ def splitatf(fh,dir=None,ext=None): ret=None nf=None i=0 - for line in fh.readlines(): - if ext: - i+=1 - if (i%100)==0: - ext.result+="." - if i==10000: - i=0 - ext.result+="
" - #check if basket name is in the first line - if line.find("#atf basket")>=0: #old convention - ret=line.replace('#atf basket ','') - ret=ret.split('_')[0] - elif line.find("#basket:")>=0: #new convention - ret=line.replace('#basket: ','') - ret=ret.split('_')[0] - - else: - if (len(line.lstrip())>0) and (line.lstrip()[0]=="&"): #newfile - if nf: - nf.close() #close last file - - - filename=line[1:].split("=")[0].rstrip()+".atf" - if dir: - filename=os.path.join(dir,filename) - nf=file(filename,"w") - if nf: - nf.write(line) - + + for lineTmp in fh.readlines(): + for line in lineTmp.split("\r"): + if ext: + i+=1 + if (i%100)==0: + ext.result+="." + if i==10000: + i=0 + ext.result+="
" + #check if basket name is in the first line + if line.find("#atf basket")>=0: #old convention + ret=line.replace('#atf basket ','') + ret=ret.split('_')[0] + elif line.find("#basket:")>=0: #new convention + ret=line.replace('#basket: ','') + ret=ret.split('_')[0] + + else: + if (len(line.lstrip())>0) and (line.lstrip()[0]=="&"): #newfile + if nf: + nf.close() #close last file + + + filename=line[1:].split("=")[0].rstrip()+".atf" + if dir: + filename=os.path.join(dir,filename) + nf=file(filename,"w") + if nf: + nf.write(line.replace("\n","")+"\n") + nf.close() fh.close() return ret,len(os.listdir(dir)) @@ -1698,8 +1822,10 @@ class CDLIFileFolder(extVersionedFileFol catalog=getattr(self,self.default_catalog) #tf,tfilename=mkstemp() - - if self.temp_folder.downloadCounter > 5: + if not hasattr(self.temp_folder,'downloadCounter'): + self.temp_folder.downloadCounter=0 + + if getattr(self.temp_folder,'downloadCounter',0) > 5: return """I am sorry, currently the server has to many requests for downloads, please come back later!""" self.temp_folder.downloadCounter+=1 @@ -1721,7 +1847,7 @@ class CDLIFileFolder(extVersionedFileFol #os.write(tf,obj.getLastVersion().data) if RESPONSE: - RESPONSE.write(obj.getLastVersion().data[0:]) + RESPONSE.write(obj.getLastVersion().getData()[0:]) self.temp_folder.downloadCounter-=1 self._p_changed=1 get_transaction().commit() @@ -1805,6 +1931,87 @@ class CDLIRoot(Folder): meta_type="CDLIRoot" downloadCounterBaskets=0# counts the current basket downloads if counter > 10 no downloads are possible + + def showInLineIndex(self): + """get the index for debug purposes""" + print "show" + for x in self.lineIndex.iterkeys(): + print "word:",x + for y in self.lineIndex[x].iterkeys(): + print "doc",y,self.lineIndex[x][y] + + return self.lineIndex + + def searchInLineIndexDocs(self,word,uniq=True): + """search occurences""" + + + lst=list(self.lineIndex.get(word.upper()).keys()) + if uniq: + return unique(lst) + else: + return lst + + def getLinesFromIndex(self,word,doc): + """get lines""" + return self.lineIndex[word][doc] + + def cleanInLineIndex(self): + """delete InlineIndex""" + for x in list(self.lineIndex.keys()): + del(self.lineIndex[x]) + print [x for x in self.lineIndex.keys()] + + return "ok" + + def storeInLineIndex(self,key,value): + """store in index""" + + if (not hasattr(self,'lineIndex')) or (type(self.lineIndex) is DictType): + self.lineIndex=OOBTree() + li=self.lineIndex + + if li.has_key(key): + +# if li[key].has_key(value[0]) and (not (value[1] in li[key][value[0]])): + if li[key].has_key(value[0]): + tmp=li[key][value[0]] + tmp.append(value[1]) # add it if now in the array + li[key][value[0]]=tmp[0:] + else: + li[key][value[0]]=[value[1]] # new array for lines + + else: + + li[key]=OOBTree()# new btree for lines + li[key][value[0]]=[value[1]] + + + self.lineIndex=li + + get_transaction().commit() + + + def showFile(self,fileId): + """show a file""" + f=self.CDLICatalog({'title':fileId}) + if not f: + return "" + + return f[0].getObject().getLastVersionFormattedData() + + def showLineFromFile(self,fileId,lineNum): + """get line lineNum fromFileId""" + + file=self.showFile(fileId) + str="^%s\.(.*)"%lineNum + + m=re.search(str,file,flags=re.M) + if m: + return m.group(1) + else: + return "" + def URLquote(self,str): """quote url""" return urllib.quote(str) @@ -1952,7 +2159,7 @@ class CDLIRoot(Folder): #tmp=self.cdli_main.tmpStore2[threadName] tmp=self._v_uploadATF[threadName].returnValue - #self._v_uploadATF[threadName].continueVar=False + self._v_uploadATF[threadName].continueVar=False self.REQUEST.SESSION['changed']=[x[0].getId() for x in tmp['changed']] self.REQUEST.SESSION['lockerrors']=[x[0].getId() for x in tmp['lockerrors']] @@ -2028,10 +2235,10 @@ class CDLIRoot(Folder): if RESPONSE is not None: RESPONSE.redirect(self.absolute_url()) - def importFiles(self,comment="",author="" ,folderName="/Users/dwinter/Documents/workspace/cdli/atf", files=None,ext=None): + def importFiles(self,comment="",author="" ,folderName="/Users/dwinter/atf", files=None,ext=None): """import files""" root=self.cdli_main - + count=0 if not files: files=os.listdir(folderName) @@ -2069,7 +2276,13 @@ class CDLIRoot(Folder): self.CDLICatalog.catalog_object(ob) #self.CDLICatalog.manage_catalogFoundItems(obj_ids=[id],search_sub=1) #self.CDLICatalog.manage_catalogObject(self.REQUEST, self.REQUEST.RESPONSE, 'CDLICatalog', urlparse.urlparse(ob.absolute_url())[1]) - + count+=1 + + if count > 1000: + print "committing" + get_transaction().commit() + count=0 + get_transaction().commit() return "ok"