Diff for /cdli/cdli_files.py between versions 1.80.2.8 and 1.80.2.10

version 1.80.2.8, 2007/11/27 10:12:58 version 1.80.2.10, 2007/12/03 21:30:19
Line 2137  class CDLIRoot(Folder): Line 2137  class CDLIRoot(Folder):
         resultset = idx.search(query_request=idxQuery,sort_index='textid')          resultset = idx.search(query_request=idxQuery,sort_index='textid')
         # put only the P-Number in the result           # put only the P-Number in the result 
         results = [res.getId[:7] for res in resultset]          results = [res.getId[:7] for res in resultset]
           logging.debug("searchtext: found %d texts"%len(results))
         return results          return results
   
   
Line 2175  class CDLIRoot(Folder): Line 2176  class CDLIRoot(Folder):
         # add whitespace before and whitespace and line-end to splitter bounds expressions          # add whitespace before and whitespace and line-end to splitter bounds expressions
         bounds = self.splitter[indexName].bounds          bounds = self.splitter[indexName].bounds
         splitexp = "(%s|\s)(%%s)(%s|\s|\Z)"%(bounds,bounds)          splitexp = "(%s|\s)(%%s)(%s|\s|\Z)"%(bounds,bounds)
           # clean word expression 
           # TODO: this should use QueryParser itself
           word = word.replace('"','') # take out double quotes
           # escape parens for regexp too
         # compile into regexp objects          # compile into regexp objects
         wordlist = [re.compile(splitexp%w) for w in word.split(' ')]          wordlist = [re.compile(splitexp%re.escape(w)) for w in word.split(' ')]
                           
         for line in file.split("\n"):          for line in file.split("\n"):
             for word in wordlist:              for word in wordlist:
Line 2190  class CDLIRoot(Folder): Line 2195  class CDLIRoot(Folder):
   
           
     def showWordInFiles(self,fileIds,word,indexName='graphemes',regExp=False):      def showWordInFiles(self,fileIds,word,indexName='graphemes',regExp=False):
         """get lines with word from all ids in list FileIds"""          """
           get lines with word from all ids in list FileIds.
           returns dict with id:lines pairs.
           """
         logging.debug("showwordinfiles word='%s' index=%s file=%s"%(word,indexName,fileIds))           logging.debug("showwordinfiles word='%s' index=%s file=%s"%(word,indexName,fileIds)) 
         return [self.showWordInFile(id, word, indexName, regExp) for id in fileIds]          
           return dict([(id,self.showWordInFile(id, word, indexName, regExp)) for id in fileIds])
           
   
     def tagWordInFile(self,fileId,word,indexName='graphemes',regExp=False):      def tagWordInFile(self,fileId,word,indexName='graphemes',regExp=False):
Line 2208  class CDLIRoot(Folder): Line 2217  class CDLIRoot(Folder):
         # add whitespace to splitter bounds expressions and compile into regexp object          # add whitespace to splitter bounds expressions and compile into regexp object
         bounds = self.splitter[indexName].bounds          bounds = self.splitter[indexName].bounds
         wordsplit = re.compile("(%s|\s)"%bounds)          wordsplit = re.compile("(%s|\s)"%bounds)
           # clean word expression 
           # TODO: this should use QueryParser itself
           word = word.replace('"','') # take out double quotes
         # split search terms by blanks          # split search terms by blanks
         words = word.split(' ')          words = word.split(' ')
                           
Line 2248  class CDLIRoot(Folder): Line 2260  class CDLIRoot(Folder):
   
   
     def tagWordInFiles(self,fileIds,word,indexName='graphemes',regExp=False):      def tagWordInFiles(self,fileIds,word,indexName='graphemes',regExp=False):
         """get texts with word from all ids in list FileIds"""          """
           get texts with highlighted word from all ids in list FileIds.
           returns dict with id:text pairs.
           """
         logging.debug("tagwordinfiles word='%s' index=%s file=%s"%(word,indexName,fileIds))           logging.debug("tagwordinfiles word='%s' index=%s file=%s"%(word,indexName,fileIds)) 
         return [self.tagWordInFile(id, word, indexName, regExp) for id in fileIds]          return dict([(id,self.tagWordInFile(id, word, indexName, regExp)) for id in fileIds])
           
   
     def URLquote(self,str):      def URLquote(self,str):

Removed from v.1.80.2.8  
changed lines
  Added in v.1.80.2.10


FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>