--- cdli/cdli_files.py 2006/10/05 07:22:58 1.48 +++ cdli/cdli_files.py 2006/12/22 20:35:33 1.55 @@ -6,6 +6,8 @@ import os.path import os from types import * import urlparse +import urllib +import cgi from OFS.OrderedFolder import OrderedFolder from OFS.SimpleItem import SimpleItem import time @@ -19,7 +21,77 @@ from ZPublisher.HTTPRequest import HTTPR from ZPublisher.HTTPResponse import HTTPResponse from ZPublisher.BaseRequest import RequestContainer import threading - +from BTrees.OOBTree import OOBTree + +def unique(s): + """Return a list of the elements in s, but without duplicates. + + For example, unique([1,2,3,1,2,3]) is some permutation of [1,2,3], + unique("abcabc") some permutation of ["a", "b", "c"], and + unique(([1, 2], [2, 3], [1, 2])) some permutation of + [[2, 3], [1, 2]]. + + For best speed, all sequence elements should be hashable. Then + unique() will usually work in linear time. + + If not possible, the sequence elements should enjoy a total + ordering, and if list(s).sort() doesn't raise TypeError it's + assumed that they do enjoy a total ordering. Then unique() will + usually work in O(N*log2(N)) time. + + If that's not possible either, the sequence elements must support + equality-testing. Then unique() will usually work in quadratic + time. + (from the python cookbook) + """ + + n = len(s) + if n == 0: + return [] + + # Try using a dict first, as that's the fastest and will usually + # work. If it doesn't work, it will usually fail quickly, so it + # usually doesn't cost much to *try* it. It requires that all the + # sequence elements be hashable, and support equality comparison. + u = {} + try: + for x in s: + u[x] = 1 + except TypeError: + del u # move on to the next method + else: + return u.keys() + + # We can't hash all the elements. Second fastest is to sort, + # which brings the equal elements together; then duplicates are + # easy to weed out in a single pass. + # NOTE: Python's list.sort() was designed to be efficient in the + # presence of many duplicate elements. This isn't true of all + # sort functions in all languages or libraries, so this approach + # is more effective in Python than it may be elsewhere. + try: + t = list(s) + t.sort() + except TypeError: + del t # move on to the next method + else: + assert n > 0 + last = t[0] + lasti = i = 1 + while i < n: + if t[i] != last: + t[lasti] = last = t[i] + lasti += 1 + i += 1 + return t[:lasti] + + # Brute force is all that's left. + u = [] + for x in s: + if x not in u: + u.append(x) + return u + class BasketContent(SimpleItem): """classe fuer den Inhalt eines Baskets""" @@ -98,7 +170,8 @@ class uploadATFfinallyThread(Thread): conn.close() #set flag for end of this method self.end=True - return True + print "ended" + return True def __del__(self): """delete""" @@ -141,7 +214,7 @@ class uploadATFfinallyThread(Thread): if len(founds)>0: SESSION['author']=str(username) self.result+="
Changing : %s"%fn
- founds[0].getObject().manage_addCDLIFileObject('',comment,SESSION['author'],file=os.path.join(SESSION['tmpdir'],fn,from_tmp=True))
+ founds[0].getObject().manage_addCDLIFileObject('',comment,SESSION['author'],file=os.path.join(SESSION['tmpdir'],fn),from_tmp=True)
#now add the new files
@@ -239,10 +312,10 @@ class uploadATFThread(Thread):
#ctx.cdliRoot.cdli_main.tmpStore2[self.getName()[0:]]=self.returnValue
get_transaction().commit()
-
+
while self.continueVar:
pass
-
+
conn.close()
@@ -688,7 +761,19 @@ class CDLIBasketContainer(OrderedFolder)
self.id=id
self.title=title
-
+
+ def getBasketsId(self):
+ """get all baskets als klartext"""
+
+ ret=""
+ baskets=self.ZopeFind(self,obj_metatypes=['CDLIBasket'])
+ for basket in baskets:
+ com,user,time,values = basket[1].getContentIds()
+ ret+= "BASKET:"+com+"\t"+user+"\t"+time+"\n"
+ for x in values:
+ ret+= x[0]+"\t"+x[1]+"\n"
+ return ret
+
def getBaskets(self,sortField='title'):
"""get all baskets files"""
@@ -762,9 +847,13 @@ class CDLIBasketContainer(OrderedFolder)
def setActiveBasket(self,basketId,REQUEST=None):
"""store active basketId in a cookie"""
self.REQUEST.RESPONSE.setCookie("CDLIActiveBasket",basketId,path="/")
-
+ try:
+ qs=cgi.parse_qs(REQUEST['QUERY_STRING'])
+ del(qs['basketId'])
+ except:
+ qs={}
if REQUEST:
- REQUEST.RESPONSE.redirect(REQUEST['URL1']+'?'+REQUEST['QUERY_STRING'])
+ REQUEST.RESPONSE.redirect(REQUEST['URL1']+'?'+urllib.urlencode(qs))
def getActiveBasket(self):
"""get active basket from cookie"""
@@ -968,8 +1057,10 @@ class CDLIBasket(Folder,CatalogAware):
newContent=[]
added=0
for id in ids:
- founds=self.CDLICatalog.search({'title':id})
-
+ try:
+ founds=self.CDLICatalog.search({'title':id})
+ except:
+ founds=[]
for found in founds:
if found.getObject() not in oldContent:
#TODO: was passiert wenn, man eine Object dazufgt, das schon da ist aber eine neuere version
@@ -986,6 +1077,19 @@ class CDLIBasket(Folder,CatalogAware):
return added
+
+
+
+ def getContentIds(self):
+ """print basket content"""
+ ret=[]
+ lv=self.getLastVersion()
+ for obj in lv.content.getContent():
+ ret.append((obj[0].getId(),obj[1].getId()))
+
+
+ return lv.getComment(),lv.getUser(),lv.getTime(),ret
+
def changeBasket(self,ids,submit,RESPONSE=None,REQUEST=None):
"""change a basket"""
if submit=="update":
@@ -1131,12 +1235,12 @@ class CDLIBasketVersion(Implicit,Persist
if (procedure=="downloadAll") or (object[1].lockedBy=='') or (object[1].lockedBy==self.REQUEST['AUTHENTICATED_USER']):
if current=="no": #version as they are in the basket
- ret+=str(object[0].data)+"\n"
+ ret+=str(object[0].getData())+"\n"
elif current=="yes":
#search current object
founds=self.CDLICatalog.search({'title':object[0].getId()})
if len(founds)>0:
- ret+=str(founds[0].getObject().getLastVersion().data)+"\n"
+ ret+=str(founds[0].getObject().getLastVersion().getData())+"\n"
if lock and object[1].lockedBy=='':
object[1].lockedBy=self.REQUEST['AUTHENTICATED_USER']
@@ -1273,7 +1377,12 @@ class CDLIFileObject(CatalogAware,extVer
return True
security.declarePublic('view')
-
+
+ def getFormattedData(self):
+ """fromat text"""
+ data=self.getData()
+ return re.sub("\s\#lem"," #lem",data) #remove return vor #lem
+
def view(self):
"""view file"""
pt=PageTemplateFile(os.path.join(package_home(globals()),'zpt','viewCDLIFile.zpt')).__of__(self)
@@ -1283,9 +1392,9 @@ class CDLIFileObject(CatalogAware,extVer
def getPNumber(self):
"""get the pnumber"""
try:
- txt=re.match("&[Pp](\d*)\s*=([^\r\n]*)",self.data[0:])
+ txt=re.match("&[Pp](\d*)\s*=([^\r\n]*)",self.getData()[0:])
except:
- txt=self.data[0:]
+ txt=self.getData()[0:]
return "ERROR"
try:
@@ -1343,6 +1452,8 @@ def manage_addCDLIFileObject(self,id,vC=
self._getOb(id).content_type=content_type
self.reindex_object()
+ self._getOb(id).reindex_object()
+
if REQUEST is not None:
REQUEST['RESPONSE'].redirect(self.absolute_url()+'/manage_main')
@@ -1353,6 +1464,17 @@ class CDLIFile(extVersionedFile,CatalogA
default_catalog='CDLICatalog'
#security.declarePublic('history')
+ def getLastVersionData(self):
+ """get last version data"""
+ return self.getLastVersion().getData()
+
+ def getLastVersionFormattedData(self):
+ """get last version data"""
+ return self.getLastVersion().getFormattedData()
+
+ #security.declarePublic('history')
+
+
def history(self):
"""history"""
@@ -1524,35 +1646,37 @@ def splitatf(fh,dir=None,ext=None):
ret=None
nf=None
i=0
- for line in fh.readlines():
- if ext:
- i+=1
- if (i%100)==0:
- ext.result+="."
- if i==10000:
- i=0
- ext.result+="
"
- #check if basket name is in the first line
- if line.find("#atf basket")>=0: #old convention
- ret=line.replace('#atf basket ','')
- ret=ret.split('_')[0]
- elif line.find("#basket:")>=0: #new convention
- ret=line.replace('#basket: ','')
- ret=ret.split('_')[0]
-
- else:
- if (len(line.lstrip())>0) and (line.lstrip()[0]=="&"): #newfile
- if nf:
- nf.close() #close last file
-
-
- filename=line[1:].split("=")[0].rstrip()+".atf"
- if dir:
- filename=os.path.join(dir,filename)
- nf=file(filename,"w")
- if nf:
- nf.write(line)
-
+
+ for lineTmp in fh.readlines():
+ for line in lineTmp.split("\r"):
+ if ext:
+ i+=1
+ if (i%100)==0:
+ ext.result+="."
+ if i==10000:
+ i=0
+ ext.result+="
"
+ #check if basket name is in the first line
+ if line.find("#atf basket")>=0: #old convention
+ ret=line.replace('#atf basket ','')
+ ret=ret.split('_')[0]
+ elif line.find("#basket:")>=0: #new convention
+ ret=line.replace('#basket: ','')
+ ret=ret.split('_')[0]
+
+ else:
+ if (len(line.lstrip())>0) and (line.lstrip()[0]=="&"): #newfile
+ if nf:
+ nf.close() #close last file
+
+
+ filename=line[1:].split("=")[0].rstrip()+".atf"
+ if dir:
+ filename=os.path.join(dir,filename)
+ nf=file(filename,"w")
+ if nf:
+ nf.write(line.replace("\n","")+"\n")
+
nf.close()
fh.close()
return ret,len(os.listdir(dir))
@@ -1698,8 +1822,10 @@ class CDLIFileFolder(extVersionedFileFol
catalog=getattr(self,self.default_catalog)
#tf,tfilename=mkstemp()
-
- if self.temp_folder.downloadCounter > 5:
+ if not hasattr(self.temp_folder,'downloadCounter'):
+ self.temp_folder.downloadCounter=0
+
+ if getattr(self.temp_folder,'downloadCounter',0) > 5:
return """I am sorry, currently the server has to many requests for downloads, please come back later!"""
self.temp_folder.downloadCounter+=1
@@ -1721,7 +1847,7 @@ class CDLIFileFolder(extVersionedFileFol
#os.write(tf,obj.getLastVersion().data)
if RESPONSE:
- RESPONSE.write(obj.getLastVersion().data[0:])
+ RESPONSE.write(obj.getLastVersion().getData()[0:])
self.temp_folder.downloadCounter-=1
self._p_changed=1
get_transaction().commit()
@@ -1805,6 +1931,87 @@ class CDLIRoot(Folder):
meta_type="CDLIRoot"
downloadCounterBaskets=0# counts the current basket downloads if counter > 10 no downloads are possible
+
+ def showInLineIndex(self):
+ """get the index for debug purposes"""
+ print "show"
+ for x in self.lineIndex.iterkeys():
+ print "word:",x
+ for y in self.lineIndex[x].iterkeys():
+ print "doc",y,self.lineIndex[x][y]
+
+ return self.lineIndex
+
+ def searchInLineIndexDocs(self,word,uniq=True):
+ """search occurences"""
+
+
+ lst=list(self.lineIndex.get(word.upper()).keys())
+ if uniq:
+ return unique(lst)
+ else:
+ return lst
+
+ def getLinesFromIndex(self,word,doc):
+ """get lines"""
+ return self.lineIndex[word][doc]
+
+ def cleanInLineIndex(self):
+ """delete InlineIndex"""
+ for x in list(self.lineIndex.keys()):
+ del(self.lineIndex[x])
+ print [x for x in self.lineIndex.keys()]
+
+ return "ok"
+
+ def storeInLineIndex(self,key,value):
+ """store in index"""
+
+ if (not hasattr(self,'lineIndex')) or (type(self.lineIndex) is DictType):
+ self.lineIndex=OOBTree()
+ li=self.lineIndex
+
+ if li.has_key(key):
+
+# if li[key].has_key(value[0]) and (not (value[1] in li[key][value[0]])):
+ if li[key].has_key(value[0]):
+ tmp=li[key][value[0]]
+ tmp.append(value[1]) # add it if now in the array
+ li[key][value[0]]=tmp[0:]
+ else:
+ li[key][value[0]]=[value[1]] # new array for lines
+
+ else:
+
+ li[key]=OOBTree()# new btree for lines
+ li[key][value[0]]=[value[1]]
+
+
+ self.lineIndex=li
+
+ get_transaction().commit()
+
+
+ def showFile(self,fileId):
+ """show a file"""
+ f=self.CDLICatalog({'title':fileId})
+ if not f:
+ return ""
+
+ return f[0].getObject().getLastVersionFormattedData()
+
+ def showLineFromFile(self,fileId,lineNum):
+ """get line lineNum fromFileId"""
+
+ file=self.showFile(fileId)
+ str="^%s\.(.*)"%lineNum
+
+ m=re.search(str,file,flags=re.M)
+ if m:
+ return m.group(1)
+ else:
+ return ""
+
def URLquote(self,str):
"""quote url"""
return urllib.quote(str)
@@ -1952,7 +2159,7 @@ class CDLIRoot(Folder):
#tmp=self.cdli_main.tmpStore2[threadName]
tmp=self._v_uploadATF[threadName].returnValue
- #self._v_uploadATF[threadName].continueVar=False
+ self._v_uploadATF[threadName].continueVar=False
self.REQUEST.SESSION['changed']=[x[0].getId() for x in tmp['changed']]
self.REQUEST.SESSION['lockerrors']=[x[0].getId() for x in tmp['lockerrors']]
@@ -2028,10 +2235,10 @@ class CDLIRoot(Folder):
if RESPONSE is not None:
RESPONSE.redirect(self.absolute_url())
- def importFiles(self,comment="",author="" ,folderName="/Users/dwinter/Documents/workspace/cdli/atf", files=None,ext=None):
+ def importFiles(self,comment="",author="" ,folderName="/Users/dwinter/atf", files=None,ext=None):
"""import files"""
root=self.cdli_main
-
+ count=0
if not files:
files=os.listdir(folderName)
@@ -2069,7 +2276,13 @@ class CDLIRoot(Folder):
self.CDLICatalog.catalog_object(ob)
#self.CDLICatalog.manage_catalogFoundItems(obj_ids=[id],search_sub=1)
#self.CDLICatalog.manage_catalogObject(self.REQUEST, self.REQUEST.RESPONSE, 'CDLICatalog', urlparse.urlparse(ob.absolute_url())[1])
-
+ count+=1
+
+ if count > 1000:
+ print "committing"
+ get_transaction().commit()
+ count=0
+ get_transaction().commit()
return "ok"