--- cdli/cdli_files.py 2007/10/24 20:36:07 1.80.2.4 +++ cdli/cdli_files.py 2007/10/26 22:45:12 1.80.2.5 @@ -2125,36 +2125,34 @@ class CDLIRoot(Folder): return f[0].getObject().getLastVersionFormattedData() - def showWordInFile(self,fileId,word,lineList=None,regExp=False,indexName=""): + def showWordInFile(self,fileId,word,indexName='graphemes',regExp=False,): """get lines with word from FileId""" file=self.showFile(fileId) - logging.debug("show word regEXP %s"%regExp) ret=[] + # search using lowercase + word = word.lower() if regExp: # wenn regexp dann generiere alle worte aus der list die der regexp entsprechen wordlist=self.findWordRegExp(indexName,word) else: - wordlist=[word] + # split the search term into words according to the corresponding splitter + #try: + wordlist = self.splitter[indexName].process([word]) + #except: + # wordlist=[word] for line in file.split("\n"): line = formatAtfLineHtml(unicodify(line)) - found=False + if not line: + # formatAtf can produce empty lines + continue for word in wordlist: - try: # just a hack because of possible unicode errors in line - if line.find(word)>-1: - if lineList: #liste of moeglichen Zeilennummern - num=line.split(".")[0] #Zeilenummer ist alles vor dem . in der Zeile - if num in lineList: - ret.append(line) - else: # nimm alles ohne line check - ret.append(line) - break; - except: - pass + if line.lower().find(word)>-1: + ret.append(line) return ret - def tagWordInFile(self,fileId,word,indexName='words',regExp=False): + def tagWordInFile(self,fileId,word,indexName='graphemes',regExp=False): """get text with word highlighted from FileId""" file=self.showFile(fileId)