--- cdli/cdli_files.py 2006/12/22 20:35:33 1.55
+++ cdli/cdli_files.py 2007/01/09 16:06:09 1.57
@@ -22,6 +22,7 @@ from ZPublisher.HTTPResponse import HTTP
from ZPublisher.BaseRequest import RequestContainer
import threading
from BTrees.OOBTree import OOBTree
+import logging
def unique(s):
"""Return a list of the elements in s, but without duplicates.
@@ -1932,30 +1933,64 @@ class CDLIRoot(Folder):
meta_type="CDLIRoot"
downloadCounterBaskets=0# counts the current basket downloads if counter > 10 no downloads are possible
+ def findWordRegExp(self,searchTerm):
+ """find all words in index which match regexp in SearchTerm"""
+ ret=[]
+ for x in self.lineIndex.iterkeys():
+ if re.match(searchTerm,x):
+ ret.append(x)
+ return ret
+
+ def searchRegExpInLineIndexDocs(self,searchTerm):
+ """search in inLineIndex with regexp"""
+ if not searchTerm:
+ return []
+ ret=[]
+ words=self.findWordRegExp(searchTerm) # suche nach allen Treffern
+ logging.info("wd:%s"%words)
+ for word in words:
+ ret+=self.searchInLineIndexDocs(word)
+
+ return unique(ret)
+
def showInLineIndex(self):
"""get the index for debug purposes"""
print "show"
for x in self.lineIndex.iterkeys():
- print "word:",x
- for y in self.lineIndex[x].iterkeys():
- print "doc",y,self.lineIndex[x][y]
+ logging.info("word:%s"%repr(x))
+ #for y in self.lineIndex[x].iterkeys():
+ # print "doc",repr(y),repr(self.lineIndex[x][y])
return self.lineIndex
- def searchInLineIndexDocs(self,word,uniq=True):
+ def searchInLineIndexDocs(self,word,uniq=True,regExp=False):
"""search occurences"""
+
+ if regExp:
+ return self.searchRegExpInLineIndexDocs(word)
-
- lst=list(self.lineIndex.get(word.upper()).keys())
+ try:
+ lst=list(self.lineIndex.get(word).keys())
+ except:
+ lst=[]
if uniq:
return unique(lst)
else:
return lst
- def getLinesFromIndex(self,word,doc):
+ def getLinesFromIndex(self,word,doc,regExp=False):
"""get lines"""
- return self.lineIndex[word][doc]
-
+ if not regExp:
+ return self.lineIndex.get(word)[doc]
+ else: # wenn regexp, suche welches word
+ for w in self.findWordRegExp(word):
+ if self.lineIndex.get(w): # ein word in im dex gefunden
+ try:
+ dc=self.lineIndex.get(word)[doc]
+ return dc # und ein document dann gib es zurueck
+ except:
+ pass #andernfalls weiter
+
def cleanInLineIndex(self):
"""delete InlineIndex"""
for x in list(self.lineIndex.keys()):
@@ -2000,18 +2035,69 @@ class CDLIRoot(Folder):
return f[0].getObject().getLastVersionFormattedData()
- def showLineFromFile(self,fileId,lineNum):
+ def showLineFromFile(self,fileId,lineNum,word):
"""get line lineNum fromFileId"""
file=self.showFile(fileId)
- str="^%s\.(.*)"%lineNum
-
- m=re.search(str,file,flags=re.M)
- if m:
- return m.group(1)
- else:
- return ""
+ #str="^%s\.[^%s\.]*%s[^\n]*\n"%(lineNum,lineNum,word)
+ #str="^%s\..*?%s[^\n]*\n"%(lineNum,word)
+ #print str
+ #m=re.search(str,file,flags=re.M|re.DOTALL)
+ #if m:
+ # return m.group()
+ #else:
+ # return ""
+ #ret=lineNum+"."
+ #splitted=file.split(lineNum+".")
+ #if len(splitted)>1:
+ #for part in splitted[1:]:
+ #if part.find(word)>-1:
+ # for x in part.split("\n"):
+ #ret+=x
+ #if x.find(word)>-1:
+ #break
+ #break;
+ #return ret
+
+ def showWordInFile(self,fileId,word,lineList=None):
+ """get lines with word fromFileId"""
+
+ file=self.showFile(fileId)
+
+ ret=[]
+ for line in file.split("\n"):
+ if line.find(word)>-1:
+ if lineList: #liste of moeglichen Zeilennummern
+ num=line.split(".")[0] #Zeilenummer ist alles vor dem . in der Zeile
+
+ if num in lineList:
+
+ ret.append(line)
+ else: # nimm alles ohne line check
+ ret.append(line)
+ return ret
+
+ def tagWordInFile(self,fileId,word,lineList=None):
+ """get lines with word fromFileId"""
+
+ file=self.showFile(fileId)
+ tagStr="""%s"""
+ ret=[]
+ for line in file.split("\n"):
+ if line.find(word)>-1:
+ if lineList: #liste of moeglichen Zeilennummern
+ num=line.split(".")[0] #Zeilenummer ist alles vor dem . in der Zeile
+
+ if num in lineList:
+
+ ret.append(line.replace(word,tagStr%word))
+ else: # nimm alles ohne line check
+ ret.append(line.replace(word,tagStr%word))
+ else:
+ ret.append(line)
+ return "
\n".join(ret)
+
def URLquote(self,str):
"""quote url"""
return urllib.quote(str)