cdli/cdli_files.py - diff

Return to cdli_files.py CVS log

Up to [Repository] / cdli

Diff for /cdli/cdli_files.py between versions 1.53 and 1.56

-version 1.53, 2006/12/22 11:56:08
+version 1.56, 2007/01/08 14:36:28
  Line 23  from ZPublisher.BaseRequest import Reque
  import threading
  from BTrees.OOBTree import OOBTree
+ def unique(s):
+     """Return a list of the elements in s, but without duplicates.
+     For example, unique([1,2,3,1,2,3]) is some permutation of [1,2,3],
+     unique("abcabc") some permutation of ["a", "b", "c"], and
+     unique(([1, 2], [2, 3], [1, 2])) some permutation of
+     [[2, 3], [1, 2]].
+     For best speed, all sequence elements should be hashable.  Then
+     unique() will usually work in linear time.
+     If not possible, the sequence elements should enjoy a total
+     ordering, and if list(s).sort() doesn't raise TypeError it's
+     assumed that they do enjoy a total ordering.  Then unique() will
+     usually work in O(N*log2(N)) time.
+     If that's not possible either, the sequence elements must support
+     equality-testing.  Then unique() will usually work in quadratic
+     time.
+     (from the python cookbook)
+     """
+     n = len(s)
+     if n == 0:
+         return []
+     # Try using a dict first, as that's the fastest and will usually
+     # work.  If it doesn't work, it will usually fail quickly, so it
+     # usually doesn't cost much to *try* it.  It requires that all the
+     # sequence elements be hashable, and support equality comparison.
+     u = {}
+     try:
+         for x in s:
+             u[x] = 1
+     except TypeError:
+         del u  # move on to the next method
+     else:
+         return u.keys()
+     # We can't hash all the elements.  Second fastest is to sort,
+     # which brings the equal elements together; then duplicates are
+     # easy to weed out in a single pass.
+     # NOTE:  Python's list.sort() was designed to be efficient in the
+     # presence of many duplicate elements.  This isn't true of all
+     # sort functions in all languages or libraries, so this approach
+     # is more effective in Python than it may be elsewhere.
+     try:
+         t = list(s)
+         t.sort()
+     except TypeError:
+         del t  # move on to the next method
+     else:
+         assert n > 0
+         last = t[0]
+         lasti = i = 1
+         while i < n:
+             if t[i] != last:
+                 t[lasti] = last = t[i]
+                 lasti += 1
+             i += 1
+         return t[:lasti]
+     # Brute force is all that's left.
+     u = []
+     for x in s:
+         if x not in u:
+             u.append(x)
+     return u
  class BasketContent(SimpleItem):
      """classe fuer den Inhalt eines Baskets"""
- Line 1866  class CDLIRoot(Folder):
+ Line 1936  class CDLIRoot(Folder):
          """get the index for debug purposes"""
          print "show"
          for x in self.lineIndex.iterkeys():
-             print "word:",x
+             print "word:",repr(x)
-             for y in self.lineIndex[x].iterkeys():
+             #for y in self.lineIndex[x].iterkeys():
-                 print "doc",y,self.lineIndex[x][y]
+             #    print "doc",repr(y),repr(self.lineIndex[x][y])
          return self.lineIndex
-     def searchInLineIndexDocs(self,word):
+     def searchInLineIndexDocs(self,word,uniq=True):
          """search occurences"""
-         return list(self.lineIndex.get(word.upper()).keys())
+         try:
+             lst=list(self.lineIndex.get(word).keys())
+     except:
+         lst=[]
+         if uniq:
+             return unique(lst)
+         else:
+             return lst
      def getLinesFromIndex(self,word,doc):
          """get lines"""
-         return self.lineIndex[word][doc]
+         return self.lineIndex.get(word)[doc]
      def cleanInLineIndex(self):
          """delete InlineIndex"""
- Line 1897  class CDLIRoot(Folder):
+ Line 1976  class CDLIRoot(Folder):
          if li.has_key(key):
-             if li[key].has_key(value[0]) and (not (value[1] in li[key][value[0]])):
+ #            if li[key].has_key(value[0]) and (not (value[1] in li[key][value[0]])):
-                 li[key][value[0]].append(value[1]) # add it if now in the array
+             if li[key].has_key(value[0]):
+                 tmp=li[key][value[0]]
+                 tmp.append(value[1]) # add it if now in the array
+                 li[key][value[0]]=tmp[0:]
              else:
                  li[key][value[0]]=[value[1]] # new array for lines
- Line 1921  class CDLIRoot(Folder):
+ Line 2003  class CDLIRoot(Folder):
          return f[0].getObject().getLastVersionFormattedData()
+     def showLineFromFile(self,fileId,lineNum,word):
+         """get line lineNum fromFileId"""
+         file=self.showFile(fileId)
+         #str="^%s\.[^%s\.]*%s[^\n]*\n"%(lineNum,lineNum,word)
+     #str="^%s\..*?%s[^\n]*\n"%(lineNum,word)
+     #print str
+         #m=re.search(str,file,flags=re.M|re.DOTALL)
+         #if m:
+         #    return m.group()
+         #else:
+         #       return ""
+     #ret=lineNum+"."
+         #splitted=file.split(lineNum+".")
+     #if len(splitted)>1:
+         #for part in splitted[1:]:
+             #if part.find(word)>-1:
+              # for x in part.split("\n"):
+                 #ret+=x
+                 #if x.find(word)>-1:
+                     #break
+               #break;
+     #return ret
+     def showWordInFile(self,fileId,word,lineList=None):
+         """get lines with word  fromFileId"""
+         file=self.showFile(fileId)
+     ret=[]
+     for line in file.split("\n"):
+         if line.find(word)>-1:
+             if lineList: #liste of moeglichen Zeilennummern
+                 num=line.split(".")[0] #Zeilenummer ist alles vor dem . in der Zeile
+                 if num in lineList:
+                     ret.append(line)
+             else: # nimm alles ohne line check
+                 ret.append(line)
+     return ret
+     def tagWordInFile(self,fileId,word,lineList=None):
+         """get lines with word  fromFileId"""
+         file=self.showFile(fileId)
+     tagStr="""<span class="found">%s</span>"""
+     ret=[]
+     for line in file.split("\n"):
+         if line.find(word)>-1:
+             if lineList: #liste of moeglichen Zeilennummern
+                 num=line.split(".")[0] #Zeilenummer ist alles vor dem . in der Zeile
+                 if num in lineList:
+                     ret.append(line.replace(word,tagStr%word))
+             else: # nimm alles ohne line check
+                 ret.append(line.replace(word,tagStr%word))
+         else:
+             ret.append(line)
+     return "<br>\n".join(ret)
      def URLquote(self,str):
          """quote url"""
          return urllib.quote(str)

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>

Removed from v.1.53
changed lines
	Added in v.1.56