--- cdli/cdli_files.py 2006/12/22 20:35:33 1.55 +++ cdli/cdli_files.py 2007/01/09 16:06:09 1.57 @@ -22,6 +22,7 @@ from ZPublisher.HTTPResponse import HTTP from ZPublisher.BaseRequest import RequestContainer import threading from BTrees.OOBTree import OOBTree +import logging def unique(s): """Return a list of the elements in s, but without duplicates. @@ -1932,30 +1933,64 @@ class CDLIRoot(Folder): meta_type="CDLIRoot" downloadCounterBaskets=0# counts the current basket downloads if counter > 10 no downloads are possible + def findWordRegExp(self,searchTerm): + """find all words in index which match regexp in SearchTerm""" + ret=[] + for x in self.lineIndex.iterkeys(): + if re.match(searchTerm,x): + ret.append(x) + return ret + + def searchRegExpInLineIndexDocs(self,searchTerm): + """search in inLineIndex with regexp""" + if not searchTerm: + return [] + ret=[] + words=self.findWordRegExp(searchTerm) # suche nach allen Treffern + logging.info("wd:%s"%words) + for word in words: + ret+=self.searchInLineIndexDocs(word) + + return unique(ret) + def showInLineIndex(self): """get the index for debug purposes""" print "show" for x in self.lineIndex.iterkeys(): - print "word:",x - for y in self.lineIndex[x].iterkeys(): - print "doc",y,self.lineIndex[x][y] + logging.info("word:%s"%repr(x)) + #for y in self.lineIndex[x].iterkeys(): + # print "doc",repr(y),repr(self.lineIndex[x][y]) return self.lineIndex - def searchInLineIndexDocs(self,word,uniq=True): + def searchInLineIndexDocs(self,word,uniq=True,regExp=False): """search occurences""" + + if regExp: + return self.searchRegExpInLineIndexDocs(word) - - lst=list(self.lineIndex.get(word.upper()).keys()) + try: + lst=list(self.lineIndex.get(word).keys()) + except: + lst=[] if uniq: return unique(lst) else: return lst - def getLinesFromIndex(self,word,doc): + def getLinesFromIndex(self,word,doc,regExp=False): """get lines""" - return self.lineIndex[word][doc] - + if not regExp: + return self.lineIndex.get(word)[doc] + else: # wenn regexp, suche welches word + for w in self.findWordRegExp(word): + if self.lineIndex.get(w): # ein word in im dex gefunden + try: + dc=self.lineIndex.get(word)[doc] + return dc # und ein document dann gib es zurueck + except: + pass #andernfalls weiter + def cleanInLineIndex(self): """delete InlineIndex""" for x in list(self.lineIndex.keys()): @@ -2000,18 +2035,69 @@ class CDLIRoot(Folder): return f[0].getObject().getLastVersionFormattedData() - def showLineFromFile(self,fileId,lineNum): + def showLineFromFile(self,fileId,lineNum,word): """get line lineNum fromFileId""" file=self.showFile(fileId) - str="^%s\.(.*)"%lineNum - - m=re.search(str,file,flags=re.M) - if m: - return m.group(1) - else: - return "" + #str="^%s\.[^%s\.]*%s[^\n]*\n"%(lineNum,lineNum,word) + #str="^%s\..*?%s[^\n]*\n"%(lineNum,word) + #print str + #m=re.search(str,file,flags=re.M|re.DOTALL) + #if m: + # return m.group() + #else: + # return "" + #ret=lineNum+"." + #splitted=file.split(lineNum+".") + #if len(splitted)>1: + #for part in splitted[1:]: + #if part.find(word)>-1: + # for x in part.split("\n"): + #ret+=x + #if x.find(word)>-1: + #break + #break; + #return ret + + def showWordInFile(self,fileId,word,lineList=None): + """get lines with word fromFileId""" + + file=self.showFile(fileId) + + ret=[] + for line in file.split("\n"): + if line.find(word)>-1: + if lineList: #liste of moeglichen Zeilennummern + num=line.split(".")[0] #Zeilenummer ist alles vor dem . in der Zeile + + if num in lineList: + + ret.append(line) + else: # nimm alles ohne line check + ret.append(line) + return ret + + def tagWordInFile(self,fileId,word,lineList=None): + """get lines with word fromFileId""" + + file=self.showFile(fileId) + tagStr="""%s""" + ret=[] + for line in file.split("\n"): + if line.find(word)>-1: + if lineList: #liste of moeglichen Zeilennummern + num=line.split(".")[0] #Zeilenummer ist alles vor dem . in der Zeile + + if num in lineList: + + ret.append(line.replace(word,tagStr%word)) + else: # nimm alles ohne line check + ret.append(line.replace(word,tagStr%word)) + else: + ret.append(line) + return "
\n".join(ret) + def URLquote(self,str): """quote url""" return urllib.quote(str)