--- cdli/cdli_files.py 2007/03/21 14:14:08 1.70 +++ cdli/cdli_files.py 2007/03/21 19:29:23 1.71 @@ -635,7 +635,13 @@ class CDLIBasketContainer(OrderedFolder) return baskets - + def joinBasket(self,newbasket,oldbaskets,delete=False): + """join two baskets + @param newbasket: name of the new basket + @param oldbaskets: list of baskets to be joined + @param delete: (optional) delete the old baskets + """ + def getNewId(self): """createIds""" @@ -1446,17 +1452,6 @@ def checkFile(filename,data,folder): else: return True,"" -# else: -# fn=os.path.join(folder,filename) -# stin,out=os.popen4("/usr/bin/atfcheck.plx %s"%fn) -# value=out.read() -# ret= out.close() -# -# if value: -# -# return False,"atf checker error: %s"%value -# else: -# return True,"" def splitatf(fh,dir=None,ext=None): """split it""" @@ -1687,6 +1682,7 @@ class CDLIFileFolder(extVersionedFileFol #os.write(tf,obj.getLastVersion().data) if RESPONSE: RESPONSE.write(obj.getLastVersion().getData()[0:]) + RESPONSE.write("\n") self.temp_folder.downloadCounter-=1 self._p_changed=1 transaction.get().commit() @@ -1771,78 +1767,113 @@ class CDLIRoot(Folder): meta_type="CDLIRoot" downloadCounterBaskets=0# counts the current basket downloads if counter > 10 no downloads are possible - def findWordRegExp(self,searchTerm): - """find all words in index which match regexp in SearchTerm""" + def findWordRegExp(self,indexName,searchTerm): + """find all words in index which match regexp in SearchTerm + @param indexName: name of the index to be searched in + @param searchTerm: word to be searched""" + ret=[] - for x in self.lineIndex.iterkeys(): + for x in self.lineIndexes[indexName].iterkeys(): if re.match(searchTerm,x): ret.append(x) return ret - def searchRegExpInLineIndexDocs(self,searchTerm): - """search in inLineIndex with regexp""" + def searchRegExpInLineIndexDocs(self,indexName,searchTerm): + """search in inLineIndex with regexp + @param indexName: name of the index to be searched in + @param searchTerm: term to be searched + """ if not searchTerm: return [] ret=[] - words=self.findWordRegExp(searchTerm) # suche nach allen Treffern + words=self.findWordRegExp(indexName,searchTerm) # suche nach allen Treffern logging.info("wd:%s"%words) for word in words: - ret+=self.searchInLineIndexDocs(word) + ret+=self.searchInLineIndexDocs(indexName,word) return unique(ret) def showInLineIndex(self): """get the index for debug purposes""" print "show" - for x in self.lineIndex.iterkeys(): - logging.info("word:%s"%repr(x)) - #for y in self.lineIndex[x].iterkeys(): - # print "doc",repr(y),repr(self.lineIndex[x][y]) - - return self.lineIndex + for key in self.lineIndexes.keys(): + logging.info("index:%s"%key) + for x in self.lineIndexes[key].iterkeys(): + logging.info("word:%s"%repr(x)) + #for y in self.lineIndex[x].iterkeys(): + # print "doc",repr(y),repr(self.lineIndex[x][y]) + + return self.lineIndexes - def searchInLineIndexDocs(self,word,uniq=True,regExp=False): - """search occurences""" + def searchInLineIndexDocs(self,indexName,word,uniq=True,regExp=False): + """search occurences in an index + @param indexName: name of the index to be searched in + @param word: word to be searched + @param unique: (optional) unify the list of results + @param regExp: (optional) use regular expressions + """ if regExp: - return self.searchRegExpInLineIndexDocs(word) + return self.searchRegExpInLineIndexDocs(indexName,word) try: - lst=list(self.lineIndex.get(word).keys()) + + lst=list(self.lineIndexes[indexName].get(word).keys()) except: + logging.error("error: searchInLineIndexDocs (%s %s)"%(sys.exc_info()[0:2])) lst=[] if uniq: return unique(lst) else: return lst - def getLinesFromIndex(self,word,doc,regExp=False): - """get lines""" + def getLinesFromIndex(self,indexName,word,doc,regExp=False): + """return all lines from a document where word is found + @param indexName: Name of the index + @param word: word to be searched + @param doc: name of the document (usuallay the p-number) + @param regExp: (optional) use regExp + """ + if not regExp: - return self.lineIndex.get(word)[doc] + return self.lineIndexes[indexName].get(word)[doc] else: # wenn regexp, suche welches word - for w in self.findWordRegExp(word): - if self.lineIndex.get(w): # ein word in im dex gefunden + for w in self.findWordRegExp(indexName,word): + if self.lineIndexes[indexName].get(w): # ein word in im dex gefunden try: - dc=self.lineIndex.get(word)[doc] + dc=self.lineIndex[indexName].get(word)[doc] return dc # und ein document dann gib es zurueck except: pass #andernfalls weiter - def cleanInLineIndex(self): - """delete InlineIndex""" - for x in list(self.lineIndex.keys()): - del(self.lineIndex[x]) - print [x for x in self.lineIndex.keys()] + def cleanInLineIndex(self,indexName): + """empty an InlineIndex + @param indexName: name of the index + """ + for x in list(self.lineIndexes[indexName].keys()): + del(self.lineIndexes[indexName][x]) + print [x for x in self.lineIndexes[indexName].keys()] return "ok" - def storeInLineIndex(self,key,value): - """store in index""" - - if (not hasattr(self,'lineIndex')) or (type(self.lineIndex) is DictType): - self.lineIndex=OOBTree() - li=self.lineIndex + def storeInLineIndex(self,indexName,key,value): + """store in index, key is normally a word or grapheme + and value is a tuple (documentname, line) where the word can be found + @param indexName: name of the index + @param key: key in index + @param value: value in index, value is a tuple (document name, line) + """ + logging.error("indexing: %s %s"%(indexName,key)) + if (not hasattr(self,'lineIndexes')): + + self.lineIndexes={} + + if self.lineIndexes.get(indexName,None) is None: + #index exisitiert noch nicht dann anlegen + + self.lineIndexes[indexName]=OOBTree() + lis=self.lineIndexes + li=lis[indexName] if li.has_key(key): @@ -1860,51 +1891,36 @@ class CDLIRoot(Folder): li[key][value[0]]=[value[1]] - self.lineIndex=li + self.lineIndexes=lis transaction.get().commit() def showFile(self,fileId): - """show a file""" + """show a file + @param fileId: P-Number of the document to be displayed + """ f=self.CDLICatalog({'title':fileId}) if not f: return "" return f[0].getObject().getLastVersionFormattedData() - def showLineFromFile(self,fileId,lineNum,word): - """get line lineNum fromFileId""" - - file=self.showFile(fileId) - #str="^%s\.[^%s\.]*%s[^\n]*\n"%(lineNum,lineNum,word) - #str="^%s\..*?%s[^\n]*\n"%(lineNum,word) - - #print str - #m=re.search(str,file,flags=re.M|re.DOTALL) - #if m: - # return m.group() - #else: - # return "" - #ret=lineNum+"." - #splitted=file.split(lineNum+".") - #if len(splitted)>1: - #for part in splitted[1:]: - #if part.find(word)>-1: - # for x in part.split("\n"): - #ret+=x - #if x.find(word)>-1: - #break - #break; - #return ret - def showWordInFile(self,fileId,word,lineList=None): + def showWordInFile(self,fileId,word,lineList=None,regExp=True,indexName=""): """get lines with word fromFileId""" file=self.showFile(fileId) ret=[] + if regExp: # wenn regexp dann generiere alle worte aus der list die der regexp entsprechen + wordlist=self.findWordRegExp(indexName,word) + else: + wordlist=[word] + for line in file.split("\n"): + found=False + for word in wordlist: if line.find(word)>-1: if lineList: #liste of moeglichen Zeilennummern num=line.split(".")[0] #Zeilenummer ist alles vor dem . in der Zeile @@ -1914,26 +1930,40 @@ class CDLIRoot(Folder): ret.append(line) else: # nimm alles ohne line check ret.append(line) + + break; return ret - def tagWordInFile(self,fileId,word,lineList=None): + def tagWordInFile(self,fileId,word,lineList=None,regExp=True,indexName=""): """get lines with word fromFileId""" file=self.showFile(fileId) tagStr="""%s""" ret=[] + + if regExp: # wenn regexp dann generiere alle worte aus der list die der regexp entsprechen + wordlist=self.findWordRegExp(indexName,word) + else: + wordlist=[word] + for line in file.split("\n"): - if line.find(word)>-1: + found=False + for word in wordlist: + if line.find(word)>-1: #word ist gefunden dann makiere und breche die Schleife ab if lineList: #liste of moeglichen Zeilennummern num=line.split(".")[0] #Zeilenummer ist alles vor dem . in der Zeile if num in lineList: ret.append(line.replace(word,tagStr%word)) + else: # nimm alles ohne line check ret.append(line.replace(word,tagStr%word)) - else: + found=True + break + if not found: #word wurde nicht gefunden keine makierung ret.append(line) + return "
\n".join(ret) def URLquote(self,str): @@ -1963,19 +1993,8 @@ class CDLIRoot(Folder): ret.append((f[0],un)) return ret - - def forceDahl(self): - "break all locks" - ret=[] - for f in self.ZopeFind(self,obj_metatypes="CDLI file",search_sub=1): - if str(f[1].lockedBy)=="dahl": - un=f[1].forceunlock() - - if un and un !="": - ret.append((f[0],un)) + - return ret - def getChangesByAuthor(self,author,n=100): """getChangesByAuthor""" zcat=self.CDLIObjectsCatalog @@ -2041,6 +2060,8 @@ class CDLIRoot(Folder): def uploadATF(self,repeat=None,upload=None,basketId=0,RESPONSE=None): """upload an atf file / basket file""" #self._v_uploadATF.returnValue=None + + #generate an random id for the upload thread from random import randint if (not self.REQUEST.SESSION.get('idTmp',None)): @@ -2049,8 +2070,10 @@ class CDLIRoot(Folder): else: idTmp=self.REQUEST.SESSION.get('idTmp',None) + threadName=repeat if not threadName or threadName=="": + #new thread not called from the waiting page tmpVar=False thread=uploadATFThread() @@ -2098,26 +2121,8 @@ class CDLIRoot(Folder): return pt(txt='/uploadATF',threadName=threadName) else: -# tmp={} -# for key in self._v_uploadATF[threadName].returnValue.keys(): -# t=self._v_uploadATF[threadName].returnValue[key] -# if type(t) is ListType: -# tmp[key]=self._v_uploadATF[threadName].returnValue[key][0:] -# else: -# tmp[key]=self._v_uploadATF[threadName].returnValue[key] -# repr(tmp[key]),repr(key) -# -# # - #tmp=self.cdli_main.tmpStore2[threadName] - tmp=getattr(self.temp_folder,idTmp).returnValue - - - - - #del(self.cdli_main.tmpStore2[threadName]) - - + pt=PageTemplateFile(os.path.join(package_home(globals()),'zpt','uploadCheck.zpt')).__of__(self) return pt(changed=tmp['changed'],lockerrors=tmp['lockerrors'],errors=tmp['errors'],dir=tmp['dir'],newPs=tmp['newPs'],basketLen=tmp['basketLen'],numberOfFiles=tmp['numberOfFiles'],