--- cdli/cdli_files.py 2007/02/08 10:46:36 1.65 +++ cdli/cdli_files.py 2007/03/21 14:14:08 1.70 @@ -25,6 +25,7 @@ from BTrees.OOBTree import OOBTree import logging import transaction import copy +import codecs def unique(s): """Return a list of the elements in s, but without duplicates. @@ -174,8 +175,8 @@ class uploadATFfinallyThread(Thread): conn.close() #set flag for end of this method self.end=True - logging.info("ended") - return True + logging.info("ended") + return True def __del__(self): """delete""" @@ -196,7 +197,7 @@ class uploadATFfinallyThread(Thread): #shall I only upload the changed files? if procedure=="uploadchanged": - changed=[x[0] for x in SESSION.get('changed',[])] + changed=[x[0] for x in SESSION.get('changed',[])] uploadFns=changed+SESSION.get('newPs',[]) #or all @@ -215,16 +216,16 @@ class uploadATFfinallyThread(Thread): #do first the changed files i=0 for fn in uploadFns: - i+=1 + i+=1 founds=ctx2.CDLICatalog.search({'title':fn}) if len(founds)>0: SESSION['author']=str(username) self.result="
Changing : %s"%fn+self.result
founds[0].getObject().manage_addCDLIFileObject('',comment,SESSION['author'],file=os.path.join(SESSION['tmpdir'],fn),from_tmp=True)
if i==200:
- i=0
- transaction.get().commit()
- logging.info("changing: do commit")
+ i=0
+ transaction.get().commit()
+ logging.info("changing: do commit")
transaction.get().commit()
logging.info("changing: last commit")
@@ -349,7 +350,7 @@ class uploadATFThread(Thread):
#ctx.cdliRoot.cdli_main.tmpStore2[self.getName()[0:]]=self.returnValue
-
+
transaction.get().commit()
conn.close()
@@ -424,11 +425,13 @@ class uploadATFThread(Thread):
#if not than add filename to the list of newfiles
data=file(os.path.join(dir,fn)).read()
- #status,msg=checkFile(fn,data,dir)
- status=True
- msg=""
+ status,msg=checkFile(fn,data,dir)
+ #status=True
+
+
if not status: # error
errors.append((fn,msg))
+
else:
if len(founds)==0:
newPs.append(fn)
@@ -452,7 +455,7 @@ class uploadATFThread(Thread):
stObj.returnValue={}
stObj.returnValue['errors']=errors
-
+
stObj.returnValue['newPs']=newPs
stObj.returnValue['tmpdir']=dir
stObj.returnValue['basketLen']=basketLen
@@ -572,7 +575,7 @@ class CDLIBasketContainer(OrderedFolder)
ret+= "BASKET:"+com+"\t"+user+"\t"+time+"\n"
for x in values:
ret+= x[0]+"\t"+x[1]+"\n"
- return ret
+ return ret
def getBaskets(self,sortField='title'):
"""get all baskets files"""
@@ -647,11 +650,11 @@ class CDLIBasketContainer(OrderedFolder)
def setActiveBasket(self,basketId,REQUEST=None):
"""store active basketId in a cookie"""
self.REQUEST.RESPONSE.setCookie("CDLIActiveBasket",basketId,path="/")
- try:
- qs=cgi.parse_qs(REQUEST['QUERY_STRING'])
- del(qs['basketId'])
- except:
- qs={}
+ try:
+ qs=cgi.parse_qs(REQUEST['QUERY_STRING'])
+ del(qs['basketId'])
+ except:
+ qs={}
if REQUEST:
REQUEST.RESPONSE.redirect(REQUEST['URL1']+'?'+urllib.urlencode(qs))
@@ -1182,7 +1185,8 @@ class CDLIFileObject(CatalogAware,extVer
def getFormattedData(self):
"""fromat text"""
data=self.getData()
- return re.sub("\s\#lem"," #lem",data) #remove return vor #lem
+# return re.sub("\s\#lem"," #lem",data) #remove return vor #lem
+ return re.sub("#lem"," #lem",data) #remove return vor #lem
def view(self):
"""view file"""
@@ -1419,7 +1423,14 @@ def manage_addCDLIFile(self,id,title,loc
if RESPONSE is not None:
RESPONSE.redirect('manage_main')
-
+def checkUTF8(data):
+ """check utf 8"""
+ try:
+ data.encode('utf-8')
+ return True
+ except:
+ return False
+
def checkFile(filename,data,folder):
"""check the files"""
@@ -1430,17 +1441,22 @@ def checkFile(filename,data,folder):
return False,"P missing in the filename"
elif len(fn[0])!=7:
return False,"P number has not the right length 6"
+ elif not checkUTF8(data):
+ return False,"not utf-8"
else:
- fn=os.path.join(folder,filename)
- stin,out=os.popen4("/usr/bin/atfcheck.plx %s"%fn)
- value=out.read()
- ret= out.close()
-
- if value:
-
- return False,"atf checker error: %s"%value
- else:
- return True,""
+ return True,""
+
+# else:
+# fn=os.path.join(folder,filename)
+# stin,out=os.popen4("/usr/bin/atfcheck.plx %s"%fn)
+# value=out.read()
+# ret= out.close()
+#
+# if value:
+#
+# return False,"atf checker error: %s"%value
+# else:
+# return True,""
def splitatf(fh,dir=None,ext=None):
"""split it"""
@@ -1449,40 +1465,42 @@ def splitatf(fh,dir=None,ext=None):
i=0
for lineTmp in fh.readlines():
- for line in lineTmp.split("\r"):
- if ext:
- i+=1
- if (i%100)==0:
- ext.result+="."
- if i==10000:
- i=0
- ext.result+="
"
- #check if basket name is in the first line
- if line.find("#atf basket")>=0: #old convention
- ret=line.replace('#atf basket ','')
- ret=ret.split('_')[0]
- elif line.find("#basket:")>=0: #new convention
- ret=line.replace('#basket: ','')
- ret=ret.split('_')[0]
-
- else:
- if (len(line.lstrip())>0) and (line.lstrip()[0]=="&"): #newfile
- if nf:
- nf.close() #close last file
-
-
- filename=line[1:].split("=")[0].rstrip()+".atf"
- if dir:
- filename=os.path.join(dir,filename)
- nf=file(filename,"w")
- logging.info("open %s"%filename)
- if nf:
- nf.write(line.replace("\n","")+"\n")
+ lineTmp=lineTmp.replace(codecs.BOM_UTF8,'') # make sure that all BOM are removed..
+ for line in lineTmp.split("\r"):
+ #logging.log("Deal with: %s"%line)
+ if ext:
+ i+=1
+ if (i%100)==0:
+ ext.result+="."
+ if i==10000:
+ i=0
+ ext.result+="
"
+ #check if basket name is in the first line
+ if line.find("#atf basket")>=0: #old convention
+ ret=line.replace('#atf basket ','')
+ ret=ret.split('_')[0]
+ elif line.find("#basket:")>=0: #new convention
+ ret=line.replace('#basket: ','')
+ ret=ret.split('_')[0]
+
+ else:
+ if (len(line.lstrip())>0) and (line.lstrip()[0]=="&"): #newfile
+ if nf:
+ nf.close() #close last file
+
- try:
- nf.close()
+ filename=line[1:].split("=")[0].rstrip()+".atf"
+ if dir:
+ filename=os.path.join(dir,filename)
+ nf=file(filename,"w")
+ logging.info("open %s"%filename)
+ if nf:
+ nf.write(line.replace("\n","")+"\n")
+
+ try:
+ nf.close()
except:
- pass
+ pass
fh.close()
return ret,len(os.listdir(dir))
@@ -1534,6 +1552,16 @@ class CDLIFileFolder(extVersionedFileFol
return ret
+ def getFile(self,fn):
+ """get the content of the file fn"""
+ founds=self.CDLICatalog.search({'title':fn})
+ if not founds:
+ return []
+ else:
+ obj=founds[0].getObject().getLastVersion()
+
+ return obj.getData()[0:]
+
def checkCatalog(self,fn):
"""check if fn is in the catalog"""
#TODO add checkCatalog
@@ -1556,7 +1584,13 @@ class CDLIFileFolder(extVersionedFileFol
return pt(search=list,author=author)
-
+ def getAllPNumbers(self):
+ """get a list of all files (resp their p-numbers) stored"""
+
+ ret=[x.getId for x in self.CDLICatalog()]
+
+ return ret
+
def findObjectsFromList(self,enterList=None,display=False,start=None,upload=None,list=None,basketName=None,numberOfObjects=None,RESPONSE=None):
"""findObjectsFromList (, TAB oder LINE separated)"""
@@ -1627,8 +1661,8 @@ class CDLIFileFolder(extVersionedFileFol
catalog=getattr(self,self.default_catalog)
#tf,tfilename=mkstemp()
- if not hasattr(self.temp_folder,'downloadCounter'):
- self.temp_folder.downloadCounter=0
+ if not hasattr(self.temp_folder,'downloadCounter'):
+ self.temp_folder.downloadCounter=0
if getattr(self.temp_folder,'downloadCounter',0) > 5:
return """I am sorry, currently the server has to many requests for downloads, please come back later!"""
@@ -1774,7 +1808,7 @@ class CDLIRoot(Folder):
return self.searchRegExpInLineIndexDocs(word)
try:
- lst=list(self.lineIndex.get(word).keys())
+ lst=list(self.lineIndex.get(word).keys())
except:
lst=[]
if uniq:
@@ -1844,63 +1878,63 @@ class CDLIRoot(Folder):
file=self.showFile(fileId)
#str="^%s\.[^%s\.]*%s[^\n]*\n"%(lineNum,lineNum,word)
- #str="^%s\..*?%s[^\n]*\n"%(lineNum,word)
+ #str="^%s\..*?%s[^\n]*\n"%(lineNum,word)
- #print str
+ #print str
#m=re.search(str,file,flags=re.M|re.DOTALL)
#if m:
# return m.group()
#else:
- # return ""
- #ret=lineNum+"."
+ # return ""
+ #ret=lineNum+"."
#splitted=file.split(lineNum+".")
- #if len(splitted)>1:
- #for part in splitted[1:]:
- #if part.find(word)>-1:
- # for x in part.split("\n"):
- #ret+=x
- #if x.find(word)>-1:
- #break
- #break;
- #return ret
+ #if len(splitted)>1:
+ #for part in splitted[1:]:
+ #if part.find(word)>-1:
+ # for x in part.split("\n"):
+ #ret+=x
+ #if x.find(word)>-1:
+ #break
+ #break;
+ #return ret
def showWordInFile(self,fileId,word,lineList=None):
"""get lines with word fromFileId"""
file=self.showFile(fileId)
- ret=[]
- for line in file.split("\n"):
- if line.find(word)>-1:
- if lineList: #liste of moeglichen Zeilennummern
- num=line.split(".")[0] #Zeilenummer ist alles vor dem . in der Zeile
-
- if num in lineList:
-
- ret.append(line)
- else: # nimm alles ohne line check
- ret.append(line)
- return ret
+ ret=[]
+ for line in file.split("\n"):
+ if line.find(word)>-1:
+ if lineList: #liste of moeglichen Zeilennummern
+ num=line.split(".")[0] #Zeilenummer ist alles vor dem . in der Zeile
+
+ if num in lineList:
+
+ ret.append(line)
+ else: # nimm alles ohne line check
+ ret.append(line)
+ return ret
def tagWordInFile(self,fileId,word,lineList=None):
"""get lines with word fromFileId"""
file=self.showFile(fileId)
- tagStr="""%s"""
- ret=[]
- for line in file.split("\n"):
- if line.find(word)>-1:
- if lineList: #liste of moeglichen Zeilennummern
- num=line.split(".")[0] #Zeilenummer ist alles vor dem . in der Zeile
-
- if num in lineList:
-
- ret.append(line.replace(word,tagStr%word))
- else: # nimm alles ohne line check
- ret.append(line.replace(word,tagStr%word))
- else:
- ret.append(line)
- return "
\n".join(ret)
+ tagStr="""%s"""
+ ret=[]
+ for line in file.split("\n"):
+ if line.find(word)>-1:
+ if lineList: #liste of moeglichen Zeilennummern
+ num=line.split(".")[0] #Zeilenummer ist alles vor dem . in der Zeile
+
+ if num in lineList:
+
+ ret.append(line.replace(word,tagStr%word))
+ else: # nimm alles ohne line check
+ ret.append(line.replace(word,tagStr%word))
+ else:
+ ret.append(line)
+ return "
\n".join(ret)
def URLquote(self,str):
"""quote url"""
@@ -1934,8 +1968,8 @@ class CDLIRoot(Folder):
"break all locks"
ret=[]
for f in self.ZopeFind(self,obj_metatypes="CDLI file",search_sub=1):
- if str(f[1].lockedBy)=="dahl":
- un=f[1].forceunlock()
+ if str(f[1].lockedBy)=="dahl":
+ un=f[1].forceunlock()
if un and un !="":
ret.append((f[0],un))
@@ -2005,7 +2039,7 @@ class CDLIRoot(Folder):
def uploadATF(self,repeat=None,upload=None,basketId=0,RESPONSE=None):
- """standard ausgabe"""
+ """upload an atf file / basket file"""
#self._v_uploadATF.returnValue=None
from random import randint
if (not self.REQUEST.SESSION.get('idTmp',None)):
@@ -2192,13 +2226,13 @@ class CDLIRoot(Folder):
self.CDLICatalog.catalog_object(ob)
#self.CDLICatalog.manage_catalogFoundItems(obj_ids=[id],search_sub=1)
#self.CDLICatalog.manage_catalogObject(self.REQUEST, self.REQUEST.RESPONSE, 'CDLICatalog', urlparse.urlparse(ob.absolute_url())[1])
- count+=1
+ count+=1
- if count > 1000:
- print "committing"
- transaction.get().commit()
- count=0
- transaction.get().commit()
+ if count > 1000:
+ print "committing"
+ transaction.get().commit()
+ count=0
+ transaction.get().commit()
return "ok"
@@ -2219,9 +2253,9 @@ def manage_addCDLIRoot(self, id, title='
ob.id=str(id)
ob.title=title
try:
- self._setObject(id, ob)
+ self._setObject(id, ob)
except:
- pass
+ pass
ob=self._getOb(id)
checkPermission=getSecurityManager().checkPermission