version 1.80.2.10, 2007/12/03 21:30:19
|
version 1.80.2.11, 2007/12/13 19:20:45
|
Line 2178 class CDLIRoot(Folder):
|
Line 2178 class CDLIRoot(Folder):
|
splitexp = "(%s|\s)(%%s)(%s|\s|\Z)"%(bounds,bounds) |
splitexp = "(%s|\s)(%%s)(%s|\s|\Z)"%(bounds,bounds) |
# clean word expression |
# clean word expression |
# TODO: this should use QueryParser itself |
# TODO: this should use QueryParser itself |
word = word.replace('"','') # take out double quotes |
# take out double quotes |
# escape parens for regexp too |
word = word.replace('"','') |
# compile into regexp objects |
# take out ignorable signs |
|
ignorable = self.splitter[indexName].ignorex |
|
word = ignorable.sub('', word) |
|
# compile into regexp objects and escape parens |
wordlist = [re.compile(splitexp%re.escape(w)) for w in word.split(' ')] |
wordlist = [re.compile(splitexp%re.escape(w)) for w in word.split(' ')] |
|
|
for line in file.split("\n"): |
for line in file.split("\n"): |
for word in wordlist: |
for word in wordlist: |
#logging.debug("showwordinfile: searching for %s in %s"%(word.pattern,line)) |
#logging.debug("showwordinfile: searching for %s in %s"%(word.pattern,ignoreable.sub('',line))) |
if word.search(line): |
if word.search(ignorable.sub('',line)): |
line = formatAtfLineHtml(line) |
line = formatAtfLineHtml(line) |
ret.append(line) |
ret.append(line) |
break |
break |
Line 2220 class CDLIRoot(Folder):
|
Line 2223 class CDLIRoot(Folder):
|
# clean word expression |
# clean word expression |
# TODO: this should use QueryParser itself |
# TODO: this should use QueryParser itself |
word = word.replace('"','') # take out double quotes |
word = word.replace('"','') # take out double quotes |
|
# take out ignoreable signs |
|
ignorable = self.splitter[indexName].ignorex |
|
word = ignorable.sub('', word) |
# split search terms by blanks |
# split search terms by blanks |
words = word.split(' ') |
words = word.split(' ') |
|
# split search terms again (for grapheme search with words) |
|
splitwords = dict(((w,self.splitter[indexName].process([w])) for w in words)) |
|
|
for line in file.split("\n"): |
for line in file.split("\n"): |
line = unicodify(line) |
line = unicodify(line) |
Line 2232 class CDLIRoot(Folder):
|
Line 2240 class CDLIRoot(Folder):
|
# first scan |
# first scan |
hitwords = [] |
hitwords = [] |
for w in words: |
for w in words: |
if line.find(w) > -1: |
if ignorable.sub('',line).find(w) > -1: |
# word is in line |
# word is in line |
hitwords.append(w) |
# append split word for grapheme search with words |
|
hitwords.extend(splitwords[w]) |
|
#hitwords.extend(wordsplit.split(w)) |
|
|
# examine hits closer |
# examine hits closer |
if hitwords: |
if hitwords: |
Line 2242 class CDLIRoot(Folder):
|
Line 2252 class CDLIRoot(Folder):
|
parts = wordsplit.split(line) |
parts = wordsplit.split(line) |
line = "" |
line = "" |
for p in parts: |
for p in parts: |
|
#logging.debug("tagwordinfile: searching for %s in %s"%(p,hitwords)) |
# reassemble line |
# reassemble line |
if p in hitwords: |
if ignorable.sub('', p) in hitwords: |
|
#logging.debug("tagwordinfile: found %s in %s"%(p,hitwords)) |
# this part was found |
# this part was found |
line += tagStart + formatAtfHtml(p) + tagEnd |
line += tagStart + formatAtfHtml(p) + tagEnd |
else: |
else: |