--- cdli/cdli_files.py	2006/12/22 16:43:42	1.54
+++ cdli/cdli_files.py	2007/01/08 14:36:28	1.56
@@ -23,6 +23,76 @@ from ZPublisher.BaseRequest import Reque
 import threading
 from BTrees.OOBTree import OOBTree
 
+def unique(s):
+    """Return a list of the elements in s, but without duplicates.
+
+    For example, unique([1,2,3,1,2,3]) is some permutation of [1,2,3],
+    unique("abcabc") some permutation of ["a", "b", "c"], and
+    unique(([1, 2], [2, 3], [1, 2])) some permutation of
+    [[2, 3], [1, 2]].
+
+    For best speed, all sequence elements should be hashable.  Then
+    unique() will usually work in linear time.
+
+    If not possible, the sequence elements should enjoy a total
+    ordering, and if list(s).sort() doesn't raise TypeError it's
+    assumed that they do enjoy a total ordering.  Then unique() will
+    usually work in O(N*log2(N)) time.
+
+    If that's not possible either, the sequence elements must support
+    equality-testing.  Then unique() will usually work in quadratic
+    time.
+    (from the python cookbook)
+    """
+
+    n = len(s)
+    if n == 0:
+        return []
+
+    # Try using a dict first, as that's the fastest and will usually
+    # work.  If it doesn't work, it will usually fail quickly, so it
+    # usually doesn't cost much to *try* it.  It requires that all the
+    # sequence elements be hashable, and support equality comparison.
+    u = {}
+    try:
+        for x in s:
+            u[x] = 1
+    except TypeError:
+        del u  # move on to the next method
+    else:
+        return u.keys()
+
+    # We can't hash all the elements.  Second fastest is to sort,
+    # which brings the equal elements together; then duplicates are
+    # easy to weed out in a single pass.
+    # NOTE:  Python's list.sort() was designed to be efficient in the
+    # presence of many duplicate elements.  This isn't true of all
+    # sort functions in all languages or libraries, so this approach
+    # is more effective in Python than it may be elsewhere.
+    try:
+        t = list(s)
+        t.sort()
+    except TypeError:
+        del t  # move on to the next method
+    else:
+        assert n > 0
+        last = t[0]
+        lasti = i = 1
+        while i < n:
+            if t[i] != last:
+                t[lasti] = last = t[i]
+                lasti += 1
+            i += 1
+        return t[:lasti]
+
+    # Brute force is all that's left.
+    u = []
+    for x in s:
+        if x not in u:
+            u.append(x)
+    return u
+
+
 class BasketContent(SimpleItem):
     """classe fuer den Inhalt eines Baskets"""
     
@@ -1866,19 +1936,28 @@ class CDLIRoot(Folder):
         """get the index for debug purposes"""
         print "show"
         for x in self.lineIndex.iterkeys():
-            print "word:",x
-            for y in self.lineIndex[x].iterkeys():
-                print "doc",y,self.lineIndex[x][y]
+            print "word:",repr(x)
+            #for y in self.lineIndex[x].iterkeys():
+            #    print "doc",repr(y),repr(self.lineIndex[x][y])
                 
         return self.lineIndex
         
-    def searchInLineIndexDocs(self,word):
+    def searchInLineIndexDocs(self,word,uniq=True):
         """search occurences"""
-        return list(self.lineIndex.get(word.upper()).keys())
-
+        
+        try:    
+        	lst=list(self.lineIndex.get(word).keys())
+	except:
+		lst=[]
+        if uniq:
+            return unique(lst)
+        else:
+            return lst
+        
     def getLinesFromIndex(self,word,doc):
         """get lines"""
-        return self.lineIndex[word][doc]
+	
+        return self.lineIndex.get(word)[doc]
 
     def cleanInLineIndex(self):
         """delete InlineIndex"""
@@ -1923,7 +2002,70 @@ class CDLIRoot(Folder):
             return ""
         
         return f[0].getObject().getLastVersionFormattedData()
+    
+    def showLineFromFile(self,fileId,lineNum,word):
+        """get line lineNum fromFileId"""
         
+        file=self.showFile(fileId)
+        #str="^%s\.[^%s\.]*%s[^\n]*\n"%(lineNum,lineNum,word)
+	#str="^%s\..*?%s[^\n]*\n"%(lineNum,word)
+        
+	#print str
+        #m=re.search(str,file,flags=re.M|re.DOTALL)
+        #if m:
+        #    return m.group()
+        #else:
+        #    	return ""
+	#ret=lineNum+"."
+        #splitted=file.split(lineNum+".")
+	#if len(splitted)>1:
+		#for part in splitted[1:]:
+			#if part.find(word)>-1:
+			 # for x in part.split("\n"):
+				#ret+=x
+				#if x.find(word)>-1:
+					#break
+			  #break;
+	#return ret
+
+    def showWordInFile(self,fileId,word,lineList=None):
+        """get lines with word  fromFileId"""
+        
+        file=self.showFile(fileId)
+
+	ret=[]
+	for line in file.split("\n"):
+		if line.find(word)>-1:
+			if lineList: #liste of moeglichen Zeilennummern
+				num=line.split(".")[0] #Zeilenummer ist alles vor dem . in der Zeile
+
+				if num in lineList: 
+
+					ret.append(line)
+			else: # nimm alles ohne line check
+				ret.append(line)
+	return ret
+
+    def tagWordInFile(self,fileId,word,lineList=None):
+        """get lines with word  fromFileId"""
+        
+        file=self.showFile(fileId)
+	tagStr="""<span class="found">%s</span>"""
+	ret=[]
+	for line in file.split("\n"):
+		if line.find(word)>-1:
+			if lineList: #liste of moeglichen Zeilennummern
+				num=line.split(".")[0] #Zeilenummer ist alles vor dem . in der Zeile
+
+				if num in lineList: 
+
+					ret.append(line.replace(word,tagStr%word))
+			else: # nimm alles ohne line check
+				ret.append(line.replace(word,tagStr%word))
+		else:
+			ret.append(line)
+	return "<br>\n".join(ret)
+
     def URLquote(self,str):
         """quote url"""
         return urllib.quote(str)