cdli/cdli_files.py - diff

Return to cdli_files.py CVS log

Up to [Repository] / cdli

Diff for /cdli/cdli_files.py between versions 1.80.2.10 and 1.80.2.11

version 1.80.2.10, 2007/12/03 21:30:19	version 1.80.2.11, 2007/12/13 19:20:45
Line 2178 class CDLIRoot(Folder):	Line 2178 class CDLIRoot(Folder):
splitexp = "(%s\|\s)(%%s)(%s\|\s\|\Z)"%(bounds,bounds)	splitexp = "(%s\|\s)(%%s)(%s\|\s\|\Z)"%(bounds,bounds)
# clean word expression	# clean word expression
# TODO: this should use QueryParser itself	# TODO: this should use QueryParser itself
word = word.replace('"','') # take out double quotes	# take out double quotes
# escape parens for regexp too	word = word.replace('"','')
# compile into regexp objects	# take out ignorable signs
	ignorable = self.splitter[indexName].ignorex
	word = ignorable.sub('', word)
	# compile into regexp objects and escape parens
wordlist = [re.compile(splitexp%re.escape(w)) for w in word.split(' ')]	wordlist = [re.compile(splitexp%re.escape(w)) for w in word.split(' ')]

for line in file.split("\n"):	for line in file.split("\n"):
for word in wordlist:	for word in wordlist:
#logging.debug("showwordinfile: searching for %s in %s"%(word.pattern,line))	#logging.debug("showwordinfile: searching for %s in %s"%(word.pattern,ignoreable.sub('',line)))
if word.search(line):	if word.search(ignorable.sub('',line)):
line = formatAtfLineHtml(line)	line = formatAtfLineHtml(line)
ret.append(line)	ret.append(line)
break	break
Line 2220 class CDLIRoot(Folder):	Line 2223 class CDLIRoot(Folder):
# clean word expression	# clean word expression
# TODO: this should use QueryParser itself	# TODO: this should use QueryParser itself
word = word.replace('"','') # take out double quotes	word = word.replace('"','') # take out double quotes
	# take out ignoreable signs
	ignorable = self.splitter[indexName].ignorex
	word = ignorable.sub('', word)
# split search terms by blanks	# split search terms by blanks
words = word.split(' ')	words = word.split(' ')
	# split search terms again (for grapheme search with words)
	splitwords = dict(((w,self.splitter[indexName].process([w])) for w in words))

for line in file.split("\n"):	for line in file.split("\n"):
line = unicodify(line)	line = unicodify(line)
Line 2232 class CDLIRoot(Folder):	Line 2240 class CDLIRoot(Folder):
# first scan	# first scan
hitwords = []	hitwords = []
for w in words:	for w in words:
if line.find(w) > -1:	if ignorable.sub('',line).find(w) > -1:
# word is in line	# word is in line
hitwords.append(w)	# append split word for grapheme search with words
	hitwords.extend(splitwords[w])
	#hitwords.extend(wordsplit.split(w))

# examine hits closer	# examine hits closer
if hitwords:	if hitwords:
Line 2242 class CDLIRoot(Folder):	Line 2252 class CDLIRoot(Folder):
parts = wordsplit.split(line)	parts = wordsplit.split(line)
line = ""	line = ""
for p in parts:	for p in parts:
	#logging.debug("tagwordinfile: searching for %s in %s"%(p,hitwords))
# reassemble line	# reassemble line
if p in hitwords:	if ignorable.sub('', p) in hitwords:
	#logging.debug("tagwordinfile: found %s in %s"%(p,hitwords))
# this part was found	# this part was found
line += tagStart + formatAtfHtml(p) + tagEnd	line += tagStart + formatAtfHtml(p) + tagEnd
else:	else:

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>

Removed from v.1.80.2.10
changed lines
	Added in v.1.80.2.11