--- cdli/cdli_files.py 2007/01/08 14:36:28 1.56 +++ cdli/cdli_files.py 2007/01/09 16:06:09 1.57 @@ -22,6 +22,7 @@ from ZPublisher.HTTPResponse import HTTP from ZPublisher.BaseRequest import RequestContainer import threading from BTrees.OOBTree import OOBTree +import logging def unique(s): """Return a list of the elements in s, but without duplicates. @@ -1932,33 +1933,64 @@ class CDLIRoot(Folder): meta_type="CDLIRoot" downloadCounterBaskets=0# counts the current basket downloads if counter > 10 no downloads are possible + def findWordRegExp(self,searchTerm): + """find all words in index which match regexp in SearchTerm""" + ret=[] + for x in self.lineIndex.iterkeys(): + if re.match(searchTerm,x): + ret.append(x) + return ret + + def searchRegExpInLineIndexDocs(self,searchTerm): + """search in inLineIndex with regexp""" + if not searchTerm: + return [] + ret=[] + words=self.findWordRegExp(searchTerm) # suche nach allen Treffern + logging.info("wd:%s"%words) + for word in words: + ret+=self.searchInLineIndexDocs(word) + + return unique(ret) + def showInLineIndex(self): """get the index for debug purposes""" print "show" for x in self.lineIndex.iterkeys(): - print "word:",repr(x) + logging.info("word:%s"%repr(x)) #for y in self.lineIndex[x].iterkeys(): # print "doc",repr(y),repr(self.lineIndex[x][y]) return self.lineIndex - def searchInLineIndexDocs(self,word,uniq=True): + def searchInLineIndexDocs(self,word,uniq=True,regExp=False): """search occurences""" + + if regExp: + return self.searchRegExpInLineIndexDocs(word) try: lst=list(self.lineIndex.get(word).keys()) - except: - lst=[] + except: + lst=[] if uniq: return unique(lst) else: return lst - def getLinesFromIndex(self,word,doc): + def getLinesFromIndex(self,word,doc,regExp=False): """get lines""" - - return self.lineIndex.get(word)[doc] - + if not regExp: + return self.lineIndex.get(word)[doc] + else: # wenn regexp, suche welches word + for w in self.findWordRegExp(word): + if self.lineIndex.get(w): # ein word in im dex gefunden + try: + dc=self.lineIndex.get(word)[doc] + return dc # und ein document dann gib es zurueck + except: + pass #andernfalls weiter + def cleanInLineIndex(self): """delete InlineIndex""" for x in list(self.lineIndex.keys()):