cdli/cdli_files.py - diff

Return to cdli_files.py CVS log

Up to [Repository] / cdli

Diff for /cdli/cdli_files.py between versions 1.51 and 1.57

-version 1.51, 2006/11/14 17:02:59
+version 1.57, 2007/01/09 16:06:09
  Line 21  from ZPublisher.HTTPRequest import HTTPR
  from ZPublisher.HTTPResponse import HTTPResponse
  from ZPublisher.BaseRequest import RequestContainer
  import threading
+ from BTrees.OOBTree import OOBTree
+ import logging
+ def unique(s):
+     """Return a list of the elements in s, but without duplicates.
+     For example, unique([1,2,3,1,2,3]) is some permutation of [1,2,3],
+     unique("abcabc") some permutation of ["a", "b", "c"], and
+     unique(([1, 2], [2, 3], [1, 2])) some permutation of
+     [[2, 3], [1, 2]].
+     For best speed, all sequence elements should be hashable.  Then
+     unique() will usually work in linear time.
+     If not possible, the sequence elements should enjoy a total
+     ordering, and if list(s).sort() doesn't raise TypeError it's
+     assumed that they do enjoy a total ordering.  Then unique() will
+     usually work in O(N*log2(N)) time.
+     If that's not possible either, the sequence elements must support
+     equality-testing.  Then unique() will usually work in quadratic
+     time.
+     (from the python cookbook)
+     """
+     n = len(s)
+     if n == 0:
+         return []
+     # Try using a dict first, as that's the fastest and will usually
+     # work.  If it doesn't work, it will usually fail quickly, so it
+     # usually doesn't cost much to *try* it.  It requires that all the
+     # sequence elements be hashable, and support equality comparison.
+     u = {}
+     try:
+         for x in s:
+             u[x] = 1
+     except TypeError:
+         del u  # move on to the next method
+     else:
+         return u.keys()
+     # We can't hash all the elements.  Second fastest is to sort,
+     # which brings the equal elements together; then duplicates are
+     # easy to weed out in a single pass.
+     # NOTE:  Python's list.sort() was designed to be efficient in the
+     # presence of many duplicate elements.  This isn't true of all
+     # sort functions in all languages or libraries, so this approach
+     # is more effective in Python than it may be elsewhere.
+     try:
+         t = list(s)
+         t.sort()
+     except TypeError:
+         del t  # move on to the next method
+     else:
+         assert n > 0
+         last = t[0]
+         lasti = i = 1
+         while i < n:
+             if t[i] != last:
+                 t[lasti] = last = t[i]
+                 lasti += 1
+             i += 1
+         return t[:lasti]
+     # Brute force is all that's left.
+     u = []
+     for x in s:
+         if x not in u:
+             u.append(x)
+     return u
  class BasketContent(SimpleItem):
- Line 1308  class CDLIFileObject(CatalogAware,extVer
+ Line 1379  class CDLIFileObject(CatalogAware,extVer
      security.declarePublic('view')
+     def getFormattedData(self):
+         """fromat text"""
+         data=self.getData()
+         return re.sub("\s\#lem"," #lem",data) #remove return vor #lem
      def view(self):
          """view file"""
          pt=PageTemplateFile(os.path.join(package_home(globals()),'zpt','viewCDLIFile.zpt')).__of__(self)
- Line 1393  class CDLIFile(extVersionedFile,CatalogA
+ Line 1469  class CDLIFile(extVersionedFile,CatalogA
          """get last version data"""
          return self.getLastVersion().getData()
+     def getLastVersionFormattedData(self):
+         """get last version data"""
+         return self.getLastVersion().getFormattedData()
      #security.declarePublic('history')
- Line 1853  class CDLIRoot(Folder):
+ Line 1933  class CDLIRoot(Folder):
      meta_type="CDLIRoot"
      downloadCounterBaskets=0# counts the current basket downloads if counter > 10 no downloads are possible
+     def findWordRegExp(self,searchTerm):
+         """find all words in index which match regexp in SearchTerm"""
+         ret=[]
+         for x in self.lineIndex.iterkeys():
+             if re.match(searchTerm,x):
+                 ret.append(x)
+         return ret
+     def searchRegExpInLineIndexDocs(self,searchTerm):
+         """search in inLineIndex with regexp"""
+         if not searchTerm:
+             return []
+         ret=[]
+         words=self.findWordRegExp(searchTerm) # suche nach allen Treffern
+         logging.info("wd:%s"%words)
+         for word in words:
+             ret+=self.searchInLineIndexDocs(word)
+         return unique(ret)
+     def showInLineIndex(self):
+         """get the index for debug purposes"""
+         print "show"
+         for x in self.lineIndex.iterkeys():
+             logging.info("word:%s"%repr(x))
+             #for y in self.lineIndex[x].iterkeys():
+             #    print "doc",repr(y),repr(self.lineIndex[x][y])
+         return self.lineIndex
+     def searchInLineIndexDocs(self,word,uniq=True,regExp=False):
+         """search occurences"""
+         if regExp:
+             return self.searchRegExpInLineIndexDocs(word)
+         try:
+             lst=list(self.lineIndex.get(word).keys())
+         except:
+             lst=[]
+         if uniq:
+             return unique(lst)
+         else:
+             return lst
+     def getLinesFromIndex(self,word,doc,regExp=False):
+         """get lines"""
+         if not regExp:
+             return self.lineIndex.get(word)[doc]
+         else: # wenn regexp, suche welches word
+             for w in self.findWordRegExp(word):
+                 if self.lineIndex.get(w): # ein word in im dex gefunden
+                     try:
+                         dc=self.lineIndex.get(word)[doc]
+                         return dc # und ein document dann gib es zurueck
+                     except:
+                          pass #andernfalls weiter
+     def cleanInLineIndex(self):
+         """delete InlineIndex"""
+         for x in list(self.lineIndex.keys()):
+             del(self.lineIndex[x])
+         print [x for x in self.lineIndex.keys()]
+         return "ok"
+     def storeInLineIndex(self,key,value):
+         """store in index"""
+         if (not hasattr(self,'lineIndex')) or (type(self.lineIndex) is DictType):
+             self.lineIndex=OOBTree()
+         li=self.lineIndex
+         if li.has_key(key):
+ #            if li[key].has_key(value[0]) and (not (value[1] in li[key][value[0]])):
+             if li[key].has_key(value[0]):
+                 tmp=li[key][value[0]]
+                 tmp.append(value[1]) # add it if now in the array
+                 li[key][value[0]]=tmp[0:]
+             else:
+                 li[key][value[0]]=[value[1]] # new array for lines
+         else:
+             li[key]=OOBTree()# new btree for lines
+             li[key][value[0]]=[value[1]]
+         self.lineIndex=li
+         get_transaction().commit()
      def showFile(self,fileId):
          """show a file"""
- Line 1860  class CDLIRoot(Folder):
+ Line 2033  class CDLIRoot(Folder):
          if not f:
              return ""
-         return f[0].getObject().getLastVersionData()
+         return f[0].getObject().getLastVersionFormattedData()
+     def showLineFromFile(self,fileId,lineNum,word):
+         """get line lineNum fromFileId"""
+         file=self.showFile(fileId)
+         #str="^%s\.[^%s\.]*%s[^\n]*\n"%(lineNum,lineNum,word)
+     #str="^%s\..*?%s[^\n]*\n"%(lineNum,word)
+     #print str
+         #m=re.search(str,file,flags=re.M|re.DOTALL)
+         #if m:
+         #    return m.group()
+         #else:
+         #       return ""
+     #ret=lineNum+"."
+         #splitted=file.split(lineNum+".")
+     #if len(splitted)>1:
+         #for part in splitted[1:]:
+             #if part.find(word)>-1:
+              # for x in part.split("\n"):
+                 #ret+=x
+                 #if x.find(word)>-1:
+                     #break
+               #break;
+     #return ret
+     def showWordInFile(self,fileId,word,lineList=None):
+         """get lines with word  fromFileId"""
+         file=self.showFile(fileId)
+     ret=[]
+     for line in file.split("\n"):
+         if line.find(word)>-1:
+             if lineList: #liste of moeglichen Zeilennummern
+                 num=line.split(".")[0] #Zeilenummer ist alles vor dem . in der Zeile
+                 if num in lineList:
+                     ret.append(line)
+             else: # nimm alles ohne line check
+                 ret.append(line)
+     return ret
+     def tagWordInFile(self,fileId,word,lineList=None):
+         """get lines with word  fromFileId"""
+         file=self.showFile(fileId)
+     tagStr="""<span class="found">%s</span>"""
+     ret=[]
+     for line in file.split("\n"):
+         if line.find(word)>-1:
+             if lineList: #liste of moeglichen Zeilennummern
+                 num=line.split(".")[0] #Zeilenummer ist alles vor dem . in der Zeile
+                 if num in lineList:
+                     ret.append(line.replace(word,tagStr%word))
+             else: # nimm alles ohne line check
+                 ret.append(line.replace(word,tagStr%word))
+         else:
+             ret.append(line)
+     return "<br>\n".join(ret)
      def URLquote(self,str):
          """quote url"""

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>

Removed from v.1.51
changed lines
	Added in v.1.57