--- cdli/cdli_files.py	2006/11/14 17:02:59	1.51
+++ cdli/cdli_files.py	2007/01/24 18:05:24	1.58
@@ -21,7 +21,79 @@ from ZPublisher.HTTPRequest import HTTPR
 from ZPublisher.HTTPResponse import HTTPResponse
 from ZPublisher.BaseRequest import RequestContainer
 import threading
-    
+from BTrees.OOBTree import OOBTree
+import logging
+import transaction
+
+def unique(s):
+    """Return a list of the elements in s, but without duplicates.
+
+    For example, unique([1,2,3,1,2,3]) is some permutation of [1,2,3],
+    unique("abcabc") some permutation of ["a", "b", "c"], and
+    unique(([1, 2], [2, 3], [1, 2])) some permutation of
+    [[2, 3], [1, 2]].
+
+    For best speed, all sequence elements should be hashable.  Then
+    unique() will usually work in linear time.
+
+    If not possible, the sequence elements should enjoy a total
+    ordering, and if list(s).sort() doesn't raise TypeError it's
+    assumed that they do enjoy a total ordering.  Then unique() will
+    usually work in O(N*log2(N)) time.
+
+    If that's not possible either, the sequence elements must support
+    equality-testing.  Then unique() will usually work in quadratic
+    time.
+    (from the python cookbook)
+    """
+
+    n = len(s)
+    if n == 0:
+        return []
+
+    # Try using a dict first, as that's the fastest and will usually
+    # work.  If it doesn't work, it will usually fail quickly, so it
+    # usually doesn't cost much to *try* it.  It requires that all the
+    # sequence elements be hashable, and support equality comparison.
+    u = {}
+    try:
+        for x in s:
+            u[x] = 1
+    except TypeError:
+        del u  # move on to the next method
+    else:
+        return u.keys()
+
+    # We can't hash all the elements.  Second fastest is to sort,
+    # which brings the equal elements together; then duplicates are
+    # easy to weed out in a single pass.
+    # NOTE:  Python's list.sort() was designed to be efficient in the
+    # presence of many duplicate elements.  This isn't true of all
+    # sort functions in all languages or libraries, so this approach
+    # is more effective in Python than it may be elsewhere.
+    try:
+        t = list(s)
+        t.sort()
+    except TypeError:
+        del t  # move on to the next method
+    else:
+        assert n > 0
+        last = t[0]
+        lasti = i = 1
+        while i < n:
+            if t[i] != last:
+                t[lasti] = last = t[i]
+                lasti += 1
+            i += 1
+        return t[:lasti]
+
+    # Brute force is all that's left.
+    u = []
+    for x in s:
+        if x not in u:
+            u.append(x)
+    return u
+
 
 class BasketContent(SimpleItem):
     """classe fuer den Inhalt eines Baskets"""
@@ -96,7 +168,7 @@ class uploadATFfinallyThread(Thread):
         #add the files
         self.uploadATFfinallyThread(ctx,self.procedure,comment=self.comment,basketname=self.basketname,unlock=self.unlock,SESSION=self.SESSION,username=self.username)
         #commit the transactions
-        get_transaction().commit()
+        transaction.get().commit()
         conn.close()
         #set flag for end of this method
         self.end=True
@@ -240,9 +312,9 @@ class uploadATFThread(Thread):
         self.uploadATFThread(ctx,self.upload,self.basketId)
      
         #ctx.cdliRoot.cdli_main.tmpStore2[self.getName()[0:]]=self.returnValue
-
-        get_transaction().commit()
-    
+        
+        
+        transaction.get().commit()
         while self.continueVar:
             pass
        
@@ -559,8 +631,8 @@ class BasketObject_old(Folder):
         
         self.temp_folder.downloadCounter+=1 
         self._p_changed=1
-        get_transaction().commit()      
-
+         
+        transaction.get().commit()
         
         for object in self.contents:
             
@@ -578,7 +650,8 @@ class BasketObject_old(Folder):
         self.REQUEST.RESPONSE.write(ret)    
         self.temp_folder.downloadCounter-=1 
         self._p_changed=1
-        get_transaction().commit()      
+        transaction.get().commit()
+             
         
         
 def manage_addBasket_oldObjectForm(self):
@@ -1128,10 +1201,10 @@ class CDLIBasketVersion(Implicit,Persist
             if str(self.REQUEST['AUTHENTICATED_USER'])=='Anonymous User':
                 self.temp_folder.downloadCounterBaskets-=1 
                 self._p_changed=1
-                get_transaction().commit()      
+                transaction.get().commit()      
                 self.temp_folder.downloadCounterBaskets-=1 
                 self._p_changed=1
-                get_transaction().commit()      
+                transaction.get().commit()      
                 return "please login first"
 
             #check if a locked object exist in the basket.
@@ -1151,7 +1224,7 @@ class CDLIBasketVersion(Implicit,Persist
                 
                 self.temp_folder.downloadCounterBaskets-=1 
                 self._p_changed=1
-                get_transaction().commit()      
+                transaction.get().commit()      
 
                 return pt()
          
@@ -1181,7 +1254,7 @@ class CDLIBasketVersion(Implicit,Persist
 
         self.temp_folder.downloadCounterBaskets-=1 
         self._p_changed=1
-        get_transaction().commit()      
+        transaction.get().commit()      
         
         self.REQUEST.RESPONSE.setHeader("Content-Disposition","""attachement; filename="%s.atf" """%basket_name)
         self.REQUEST.RESPONSE.setHeader("Content-Type","application/octet-stream")
@@ -1307,7 +1380,12 @@ class CDLIFileObject(CatalogAware,extVer
         return True
     
     security.declarePublic('view')
-                                        
+ 
+    def getFormattedData(self):
+        """fromat text"""
+        data=self.getData()
+        return re.sub("\s\#lem"," #lem",data) #remove return vor #lem
+        
     def view(self):
         """view file"""
         pt=PageTemplateFile(os.path.join(package_home(globals()),'zpt','viewCDLIFile.zpt')).__of__(self)
@@ -1393,6 +1471,10 @@ class CDLIFile(extVersionedFile,CatalogA
         """get last version data"""
         return self.getLastVersion().getData()
 
+    def getLastVersionFormattedData(self):
+        """get last version data"""
+        return self.getLastVersion().getFormattedData()
+
     #security.declarePublic('history')
     
     
@@ -1751,7 +1833,7 @@ class CDLIFileFolder(extVersionedFileFol
 
         self.temp_folder.downloadCounter+=1
         self._p_changed=1
-        get_transaction().commit()
+        transaction.get().commit()
        
         list=[(x.getId,x) for x in catalog()]
         list.sort(sortF)
@@ -1771,7 +1853,7 @@ class CDLIFileFolder(extVersionedFileFol
                     RESPONSE.write(obj.getLastVersion().getData()[0:])
                 self.temp_folder.downloadCounter-=1 
                 self._p_changed=1
-        get_transaction().commit()
+        transaction.get().commit()
         #os.close(tf)
         #RESPONSE.redirect(self.absolute_url()+"/downloadFile?fn="%tfilename)
         return True
@@ -1853,6 +1935,99 @@ class CDLIRoot(Folder):
     meta_type="CDLIRoot"
     downloadCounterBaskets=0# counts the current basket downloads if counter > 10 no downloads are possible
     
+    def findWordRegExp(self,searchTerm):
+        """find all words in index which match regexp in SearchTerm"""
+        ret=[]
+        for x in self.lineIndex.iterkeys():
+            if re.match(searchTerm,x):
+                ret.append(x)
+        return ret
+    
+    def searchRegExpInLineIndexDocs(self,searchTerm):
+        """search in inLineIndex with regexp"""
+        if not searchTerm:
+            return []
+        ret=[]
+        words=self.findWordRegExp(searchTerm) # suche nach allen Treffern
+        logging.info("wd:%s"%words)
+        for word in words:
+            ret+=self.searchInLineIndexDocs(word)
+        
+        return unique(ret)
+        
+    def showInLineIndex(self):
+        """get the index for debug purposes"""
+        print "show"
+        for x in self.lineIndex.iterkeys():
+            logging.info("word:%s"%repr(x))
+            #for y in self.lineIndex[x].iterkeys():
+            #    print "doc",repr(y),repr(self.lineIndex[x][y])
+                
+        return self.lineIndex
+        
+    def searchInLineIndexDocs(self,word,uniq=True,regExp=False):
+        """search occurences"""
+
+        if regExp:
+            return self.searchRegExpInLineIndexDocs(word)
+        
+        try:    
+        	lst=list(self.lineIndex.get(word).keys())
+        except:
+            lst=[]
+        if uniq:
+            return unique(lst)
+        else:
+            return lst
+        
+    def getLinesFromIndex(self,word,doc,regExp=False):
+        """get lines"""
+        if not regExp:
+            return self.lineIndex.get(word)[doc]
+        else: # wenn regexp, suche welches word
+            for w in self.findWordRegExp(word):
+                if self.lineIndex.get(w): # ein word in im dex gefunden
+                    try:    
+                        dc=self.lineIndex.get(word)[doc]
+                        return dc # und ein document dann gib es zurueck
+                    except:
+                         pass #andernfalls weiter
+                     
+    def cleanInLineIndex(self):
+        """delete InlineIndex"""
+        for x in list(self.lineIndex.keys()):
+            del(self.lineIndex[x])
+        print [x for x in self.lineIndex.keys()]
+     
+        return "ok"
+    
+    def storeInLineIndex(self,key,value):
+        """store in index"""
+     
+        if (not hasattr(self,'lineIndex')) or (type(self.lineIndex) is DictType):
+            self.lineIndex=OOBTree()
+        li=self.lineIndex
+        
+        if li.has_key(key):
+
+#            if li[key].has_key(value[0]) and (not (value[1] in li[key][value[0]])):
+            if li[key].has_key(value[0]):
+                tmp=li[key][value[0]]
+                tmp.append(value[1]) # add it if now in the array
+                li[key][value[0]]=tmp[0:]
+            else:
+                li[key][value[0]]=[value[1]] # new array for lines
+                
+        else:
+            
+            li[key]=OOBTree()# new btree for lines
+            li[key][value[0]]=[value[1]] 
+                    
+        
+        self.lineIndex=li
+     
+        transaction.get().commit()
+        
 
     def showFile(self,fileId):
         """show a file"""
@@ -1860,8 +2035,71 @@ class CDLIRoot(Folder):
         if not f:
             return ""
         
-        return f[0].getObject().getLastVersionData()
+        return f[0].getObject().getLastVersionFormattedData()
+    
+    def showLineFromFile(self,fileId,lineNum,word):
+        """get line lineNum fromFileId"""
         
+        file=self.showFile(fileId)
+        #str="^%s\.[^%s\.]*%s[^\n]*\n"%(lineNum,lineNum,word)
+	#str="^%s\..*?%s[^\n]*\n"%(lineNum,word)
+        
+	#print str
+        #m=re.search(str,file,flags=re.M|re.DOTALL)
+        #if m:
+        #    return m.group()
+        #else:
+        #    	return ""
+	#ret=lineNum+"."
+        #splitted=file.split(lineNum+".")
+	#if len(splitted)>1:
+		#for part in splitted[1:]:
+			#if part.find(word)>-1:
+			 # for x in part.split("\n"):
+				#ret+=x
+				#if x.find(word)>-1:
+					#break
+			  #break;
+	#return ret
+
+    def showWordInFile(self,fileId,word,lineList=None):
+        """get lines with word  fromFileId"""
+        
+        file=self.showFile(fileId)
+
+	ret=[]
+	for line in file.split("\n"):
+		if line.find(word)>-1:
+			if lineList: #liste of moeglichen Zeilennummern
+				num=line.split(".")[0] #Zeilenummer ist alles vor dem . in der Zeile
+
+				if num in lineList: 
+
+					ret.append(line)
+			else: # nimm alles ohne line check
+				ret.append(line)
+	return ret
+
+    def tagWordInFile(self,fileId,word,lineList=None):
+        """get lines with word  fromFileId"""
+        
+        file=self.showFile(fileId)
+	tagStr="""<span class="found">%s</span>"""
+	ret=[]
+	for line in file.split("\n"):
+		if line.find(word)>-1:
+			if lineList: #liste of moeglichen Zeilennummern
+				num=line.split(".")[0] #Zeilenummer ist alles vor dem . in der Zeile
+
+				if num in lineList: 
+
+					ret.append(line.replace(word,tagStr%word))
+			else: # nimm alles ohne line check
+				ret.append(line.replace(word,tagStr%word))
+		else:
+			ret.append(line)
+	return "<br>\n".join(ret)
+
     def URLquote(self,str):
         """quote url"""
         return urllib.quote(str)
@@ -1870,6 +2108,14 @@ class CDLIRoot(Folder):
         """unquote url"""
         return urllib.unquote(str)
     
+    def URLquote_plus(self,str):
+        """quote url"""
+        return urllib.quote_plus(str)
+    
+    def URLunquote_plus(self,str):
+        """unquote url"""
+        return urllib.unquote_plus(str)
+    
     
     def forceunlock(self):
         "break all locks"
@@ -2102,7 +2348,7 @@ class CDLIRoot(Folder):
             if not obj:
                 manage_addCDLIFileFolder(root,folder,folder)
                 fobj=getattr(root,folder)
-                #get_transaction().commit()                           
+                #transaction.get().commit()                           
             else:
                 fobj=obj[0][1]
             
@@ -2130,9 +2376,9 @@ class CDLIRoot(Folder):
 
 	    if count > 1000:
 		print "committing"
-		get_transaction().commit()
+		transaction.get().commit()
 		count=0
-	get_transaction().commit()
+	    transaction.get().commit()
         return "ok"