--- cdli/cdli_files.py 2006/12/22 16:43:42 1.54
+++ cdli/cdli_files.py 2007/01/08 14:36:28 1.56
@@ -23,6 +23,76 @@ from ZPublisher.BaseRequest import Reque
import threading
from BTrees.OOBTree import OOBTree
+def unique(s):
+ """Return a list of the elements in s, but without duplicates.
+
+ For example, unique([1,2,3,1,2,3]) is some permutation of [1,2,3],
+ unique("abcabc") some permutation of ["a", "b", "c"], and
+ unique(([1, 2], [2, 3], [1, 2])) some permutation of
+ [[2, 3], [1, 2]].
+
+ For best speed, all sequence elements should be hashable. Then
+ unique() will usually work in linear time.
+
+ If not possible, the sequence elements should enjoy a total
+ ordering, and if list(s).sort() doesn't raise TypeError it's
+ assumed that they do enjoy a total ordering. Then unique() will
+ usually work in O(N*log2(N)) time.
+
+ If that's not possible either, the sequence elements must support
+ equality-testing. Then unique() will usually work in quadratic
+ time.
+ (from the python cookbook)
+ """
+
+ n = len(s)
+ if n == 0:
+ return []
+
+ # Try using a dict first, as that's the fastest and will usually
+ # work. If it doesn't work, it will usually fail quickly, so it
+ # usually doesn't cost much to *try* it. It requires that all the
+ # sequence elements be hashable, and support equality comparison.
+ u = {}
+ try:
+ for x in s:
+ u[x] = 1
+ except TypeError:
+ del u # move on to the next method
+ else:
+ return u.keys()
+
+ # We can't hash all the elements. Second fastest is to sort,
+ # which brings the equal elements together; then duplicates are
+ # easy to weed out in a single pass.
+ # NOTE: Python's list.sort() was designed to be efficient in the
+ # presence of many duplicate elements. This isn't true of all
+ # sort functions in all languages or libraries, so this approach
+ # is more effective in Python than it may be elsewhere.
+ try:
+ t = list(s)
+ t.sort()
+ except TypeError:
+ del t # move on to the next method
+ else:
+ assert n > 0
+ last = t[0]
+ lasti = i = 1
+ while i < n:
+ if t[i] != last:
+ t[lasti] = last = t[i]
+ lasti += 1
+ i += 1
+ return t[:lasti]
+
+ # Brute force is all that's left.
+ u = []
+ for x in s:
+ if x not in u:
+ u.append(x)
+ return u
+
+
class BasketContent(SimpleItem):
"""classe fuer den Inhalt eines Baskets"""
@@ -1866,19 +1936,28 @@ class CDLIRoot(Folder):
"""get the index for debug purposes"""
print "show"
for x in self.lineIndex.iterkeys():
- print "word:",x
- for y in self.lineIndex[x].iterkeys():
- print "doc",y,self.lineIndex[x][y]
+ print "word:",repr(x)
+ #for y in self.lineIndex[x].iterkeys():
+ # print "doc",repr(y),repr(self.lineIndex[x][y])
return self.lineIndex
- def searchInLineIndexDocs(self,word):
+ def searchInLineIndexDocs(self,word,uniq=True):
"""search occurences"""
- return list(self.lineIndex.get(word.upper()).keys())
-
+
+ try:
+ lst=list(self.lineIndex.get(word).keys())
+ except:
+ lst=[]
+ if uniq:
+ return unique(lst)
+ else:
+ return lst
+
def getLinesFromIndex(self,word,doc):
"""get lines"""
- return self.lineIndex[word][doc]
+
+ return self.lineIndex.get(word)[doc]
def cleanInLineIndex(self):
"""delete InlineIndex"""
@@ -1923,7 +2002,70 @@ class CDLIRoot(Folder):
return ""
return f[0].getObject().getLastVersionFormattedData()
+
+ def showLineFromFile(self,fileId,lineNum,word):
+ """get line lineNum fromFileId"""
+ file=self.showFile(fileId)
+ #str="^%s\.[^%s\.]*%s[^\n]*\n"%(lineNum,lineNum,word)
+ #str="^%s\..*?%s[^\n]*\n"%(lineNum,word)
+
+ #print str
+ #m=re.search(str,file,flags=re.M|re.DOTALL)
+ #if m:
+ # return m.group()
+ #else:
+ # return ""
+ #ret=lineNum+"."
+ #splitted=file.split(lineNum+".")
+ #if len(splitted)>1:
+ #for part in splitted[1:]:
+ #if part.find(word)>-1:
+ # for x in part.split("\n"):
+ #ret+=x
+ #if x.find(word)>-1:
+ #break
+ #break;
+ #return ret
+
+ def showWordInFile(self,fileId,word,lineList=None):
+ """get lines with word fromFileId"""
+
+ file=self.showFile(fileId)
+
+ ret=[]
+ for line in file.split("\n"):
+ if line.find(word)>-1:
+ if lineList: #liste of moeglichen Zeilennummern
+ num=line.split(".")[0] #Zeilenummer ist alles vor dem . in der Zeile
+
+ if num in lineList:
+
+ ret.append(line)
+ else: # nimm alles ohne line check
+ ret.append(line)
+ return ret
+
+ def tagWordInFile(self,fileId,word,lineList=None):
+ """get lines with word fromFileId"""
+
+ file=self.showFile(fileId)
+ tagStr="""%s"""
+ ret=[]
+ for line in file.split("\n"):
+ if line.find(word)>-1:
+ if lineList: #liste of moeglichen Zeilennummern
+ num=line.split(".")[0] #Zeilenummer ist alles vor dem . in der Zeile
+
+ if num in lineList:
+
+ ret.append(line.replace(word,tagStr%word))
+ else: # nimm alles ohne line check
+ ret.append(line.replace(word,tagStr%word))
+ else:
+ ret.append(line)
+ return "
\n".join(ret)
+
def URLquote(self,str):
"""quote url"""
return urllib.quote(str)