--- cdli/cdli_files.py	2006/10/16 13:21:23	1.50
+++ cdli/cdli_files.py	2006/12/22 20:35:33	1.55
@@ -21,7 +21,77 @@ from ZPublisher.HTTPRequest import HTTPR
 from ZPublisher.HTTPResponse import HTTPResponse
 from ZPublisher.BaseRequest import RequestContainer
 import threading
-    
+from BTrees.OOBTree import OOBTree
+
+def unique(s):
+    """Return a list of the elements in s, but without duplicates.
+
+    For example, unique([1,2,3,1,2,3]) is some permutation of [1,2,3],
+    unique("abcabc") some permutation of ["a", "b", "c"], and
+    unique(([1, 2], [2, 3], [1, 2])) some permutation of
+    [[2, 3], [1, 2]].
+
+    For best speed, all sequence elements should be hashable.  Then
+    unique() will usually work in linear time.
+
+    If not possible, the sequence elements should enjoy a total
+    ordering, and if list(s).sort() doesn't raise TypeError it's
+    assumed that they do enjoy a total ordering.  Then unique() will
+    usually work in O(N*log2(N)) time.
+
+    If that's not possible either, the sequence elements must support
+    equality-testing.  Then unique() will usually work in quadratic
+    time.
+    (from the python cookbook)
+    """
+
+    n = len(s)
+    if n == 0:
+        return []
+
+    # Try using a dict first, as that's the fastest and will usually
+    # work.  If it doesn't work, it will usually fail quickly, so it
+    # usually doesn't cost much to *try* it.  It requires that all the
+    # sequence elements be hashable, and support equality comparison.
+    u = {}
+    try:
+        for x in s:
+            u[x] = 1
+    except TypeError:
+        del u  # move on to the next method
+    else:
+        return u.keys()
+
+    # We can't hash all the elements.  Second fastest is to sort,
+    # which brings the equal elements together; then duplicates are
+    # easy to weed out in a single pass.
+    # NOTE:  Python's list.sort() was designed to be efficient in the
+    # presence of many duplicate elements.  This isn't true of all
+    # sort functions in all languages or libraries, so this approach
+    # is more effective in Python than it may be elsewhere.
+    try:
+        t = list(s)
+        t.sort()
+    except TypeError:
+        del t  # move on to the next method
+    else:
+        assert n > 0
+        last = t[0]
+        lasti = i = 1
+        while i < n:
+            if t[i] != last:
+                t[lasti] = last = t[i]
+                lasti += 1
+            i += 1
+        return t[:lasti]
+
+    # Brute force is all that's left.
+    u = []
+    for x in s:
+        if x not in u:
+            u.append(x)
+    return u
+
 
 class BasketContent(SimpleItem):
     """classe fuer den Inhalt eines Baskets"""
@@ -691,16 +761,18 @@ class CDLIBasketContainer(OrderedFolder)
         self.id=id
         self.title=title
      
+ 
     def getBasketsId(self):
         """get all baskets als klartext"""
-	ret=""
+        
+        ret=""
         baskets=self.ZopeFind(self,obj_metatypes=['CDLIBasket'])
         for basket in baskets:
             com,user,time,values = basket[1].getContentIds()
             ret+= "BASKET:"+com+"\t"+user+"\t"+time+"\n"
             for x in values:
                 ret+= x[0]+"\t"+x[1]+"\n"
-	return ret
+		return ret
 
     def getBaskets(self,sortField='title'):
         """get all baskets files"""
@@ -1005,11 +1077,14 @@ class CDLIBasket(Folder,CatalogAware):
     
         return added
     
+    
+                
+    
     def getContentIds(self):
         """print basket content"""
         ret=[]
         lv=self.getLastVersion()
-	for obj in lv.content.getContent():
+        for obj in lv.content.getContent():
             ret.append((obj[0].getId(),obj[1].getId()))
         
         
@@ -1302,7 +1377,12 @@ class CDLIFileObject(CatalogAware,extVer
         return True
     
     security.declarePublic('view')
-                                        
+ 
+    def getFormattedData(self):
+        """fromat text"""
+        data=self.getData()
+        return re.sub("\s\#lem"," #lem",data) #remove return vor #lem
+        
     def view(self):
         """view file"""
         pt=PageTemplateFile(os.path.join(package_home(globals()),'zpt','viewCDLIFile.zpt')).__of__(self)
@@ -1384,6 +1464,17 @@ class CDLIFile(extVersionedFile,CatalogA
     default_catalog='CDLICatalog'
     
     #security.declarePublic('history')
+    def getLastVersionData(self):
+        """get last version data"""
+        return self.getLastVersion().getData()
+
+    def getLastVersionFormattedData(self):
+        """get last version data"""
+        return self.getLastVersion().getFormattedData()
+
+    #security.declarePublic('history')
+    
+    
     def history(self):
         """history"""  
 
@@ -1840,6 +1931,87 @@ class CDLIRoot(Folder):
     
     meta_type="CDLIRoot"
     downloadCounterBaskets=0# counts the current basket downloads if counter > 10 no downloads are possible
+    
+    def showInLineIndex(self):
+        """get the index for debug purposes"""
+        print "show"
+        for x in self.lineIndex.iterkeys():
+            print "word:",x
+            for y in self.lineIndex[x].iterkeys():
+                print "doc",y,self.lineIndex[x][y]
+                
+        return self.lineIndex
+        
+    def searchInLineIndexDocs(self,word,uniq=True):
+        """search occurences"""
+        
+            
+        lst=list(self.lineIndex.get(word.upper()).keys())
+        if uniq:
+            return unique(lst)
+        else:
+            return lst
+        
+    def getLinesFromIndex(self,word,doc):
+        """get lines"""
+        return self.lineIndex[word][doc]
+
+    def cleanInLineIndex(self):
+        """delete InlineIndex"""
+        for x in list(self.lineIndex.keys()):
+            del(self.lineIndex[x])
+        print [x for x in self.lineIndex.keys()]
+     
+        return "ok"
+    
+    def storeInLineIndex(self,key,value):
+        """store in index"""
+     
+        if (not hasattr(self,'lineIndex')) or (type(self.lineIndex) is DictType):
+            self.lineIndex=OOBTree()
+        li=self.lineIndex
+        
+        if li.has_key(key):
+
+#            if li[key].has_key(value[0]) and (not (value[1] in li[key][value[0]])):
+            if li[key].has_key(value[0]):
+                tmp=li[key][value[0]]
+                tmp.append(value[1]) # add it if now in the array
+                li[key][value[0]]=tmp[0:]
+            else:
+                li[key][value[0]]=[value[1]] # new array for lines
+                
+        else:
+            
+            li[key]=OOBTree()# new btree for lines
+            li[key][value[0]]=[value[1]] 
+                    
+        
+        self.lineIndex=li
+     
+        get_transaction().commit()
+        
+
+    def showFile(self,fileId):
+        """show a file"""
+        f=self.CDLICatalog({'title':fileId})
+        if not f:
+            return ""
+        
+        return f[0].getObject().getLastVersionFormattedData()
+    
+    def showLineFromFile(self,fileId,lineNum):
+        """get line lineNum fromFileId"""
+        
+        file=self.showFile(fileId)
+        str="^%s\.(.*)"%lineNum
+       
+        m=re.search(str,file,flags=re.M)
+        if m:
+            return m.group(1)
+        else:
+            return ""
+        
     def URLquote(self,str):
         """quote url"""
         return urllib.quote(str)