--- cdli/cdli_files.py	2007/11/19 15:14:44	1.80.2.7
+++ cdli/cdli_files.py	2007/12/13 19:20:45	1.80.2.11
@@ -2137,6 +2137,7 @@ class CDLIRoot(Folder):
         resultset = idx.search(query_request=idxQuery,sort_index='textid')
         # put only the P-Number in the result 
         results = [res.getId[:7] for res in resultset]
+        logging.debug("searchtext: found %d texts"%len(results))
         return results
 
 
@@ -2167,6 +2168,7 @@ class CDLIRoot(Folder):
 
     def showWordInFile(self,fileId,word,indexName='graphemes',regExp=False,):
         """get lines with word from FileId"""
+        logging.debug("showwordinfile word='%s' index=%s file=%s"%(word,indexName,fileId)) 
         
         file = formatAtfFullLineNum(self.getFile(fileId))
         ret=[]
@@ -2174,22 +2176,40 @@ class CDLIRoot(Folder):
         # add whitespace before and whitespace and line-end to splitter bounds expressions
         bounds = self.splitter[indexName].bounds
         splitexp = "(%s|\s)(%%s)(%s|\s|\Z)"%(bounds,bounds)
-        # compile into regexp objects
-        wordlist = [re.compile(splitexp%w) for w in word.split(' ')]
+        # clean word expression 
+        # TODO: this should use QueryParser itself
+        # take out double quotes
+        word = word.replace('"','')
+        # take out ignorable signs
+        ignorable = self.splitter[indexName].ignorex
+        word = ignorable.sub('', word)
+        # compile into regexp objects and escape parens
+        wordlist = [re.compile(splitexp%re.escape(w)) for w in word.split(' ')]
             
         for line in file.split("\n"):
             for word in wordlist:
-                #logging.debug("showwordinfile: searching for %s in %s"%(word.pattern,line))
-                if word.search(line):
+                #logging.debug("showwordinfile: searching for %s in %s"%(word.pattern,ignoreable.sub('',line)))
+                if word.search(ignorable.sub('',line)):
                     line = formatAtfLineHtml(line)
                     ret.append(line)
                     break
                     
         return ret
+
+    
+    def showWordInFiles(self,fileIds,word,indexName='graphemes',regExp=False):
+        """
+        get lines with word from all ids in list FileIds.
+        returns dict with id:lines pairs.
+        """
+        logging.debug("showwordinfiles word='%s' index=%s file=%s"%(word,indexName,fileIds))
+        
+        return dict([(id,self.showWordInFile(id, word, indexName, regExp)) for id in fileIds])
     
 
     def tagWordInFile(self,fileId,word,indexName='graphemes',regExp=False):
         """get text with word highlighted from FileId"""
+        logging.debug("tagwordinfile word='%s' index=%s file=%s"%(word,indexName,fileId)) 
         
         file=self.getFile(fileId)
         tagStart=u'<span class="found">'
@@ -2200,8 +2220,16 @@ class CDLIRoot(Folder):
         # add whitespace to splitter bounds expressions and compile into regexp object
         bounds = self.splitter[indexName].bounds
         wordsplit = re.compile("(%s|\s)"%bounds)
+        # clean word expression 
+        # TODO: this should use QueryParser itself
+        word = word.replace('"','') # take out double quotes
+        # take out ignoreable signs
+        ignorable = self.splitter[indexName].ignorex
+        word = ignorable.sub('', word)
         # split search terms by blanks
         words = word.split(' ')
+        # split search terms again (for grapheme search with words)
+        splitwords = dict(((w,self.splitter[indexName].process([w])) for w in words))
             
         for line in file.split("\n"):
             line = unicodify(line)
@@ -2212,9 +2240,11 @@ class CDLIRoot(Folder):
             # first scan
             hitwords = []
             for w in words:
-                if line.find(w) > -1:
+                if ignorable.sub('',line).find(w) > -1:
                     # word is in line
-                    hitwords.append(w)
+                    # append split word for grapheme search with words
+                    hitwords.extend(splitwords[w])
+                    #hitwords.extend(wordsplit.split(w))
                    
             # examine hits closer
             if hitwords:
@@ -2222,8 +2252,10 @@ class CDLIRoot(Folder):
                 parts = wordsplit.split(line)
                 line = ""
                 for p in parts:
+                    #logging.debug("tagwordinfile: searching for %s in %s"%(p,hitwords))
                     # reassemble line
-                    if p in hitwords:
+                    if ignorable.sub('', p) in hitwords:
+                        #logging.debug("tagwordinfile: found %s in %s"%(p,hitwords))
                         # this part was found
                         line += tagStart + formatAtfHtml(p) + tagEnd
                     else:
@@ -2236,6 +2268,16 @@ class CDLIRoot(Folder):
             ret.append(line)
                         
         return u'<br>\n'.join(ret)
+
+
+
+    def tagWordInFiles(self,fileIds,word,indexName='graphemes',regExp=False):
+        """
+        get texts with highlighted word from all ids in list FileIds.
+        returns dict with id:text pairs.
+        """
+        logging.debug("tagwordinfiles word='%s' index=%s file=%s"%(word,indexName,fileIds)) 
+        return dict([(id,self.tagWordInFile(id, word, indexName, regExp)) for id in fileIds])
     
 
     def URLquote(self,str):