cdli/cdli_files.py - diff

Return to cdli_files.py CVS log

Up to [Repository] / cdli

Diff for /cdli/cdli_files.py between versions 1.80.2.9 and 1.80.2.11

version 1.80.2.9, 2007/11/27 10:27:39	version 1.80.2.11, 2007/12/13 19:20:45
Line 2137 class CDLIRoot(Folder):	Line 2137 class CDLIRoot(Folder):
resultset = idx.search(query_request=idxQuery,sort_index='textid')	resultset = idx.search(query_request=idxQuery,sort_index='textid')
# put only the P-Number in the result	# put only the P-Number in the result
results = [res.getId[:7] for res in resultset]	results = [res.getId[:7] for res in resultset]
	logging.debug("searchtext: found %d texts"%len(results))
return results	return results


Line 2175 class CDLIRoot(Folder):	Line 2176 class CDLIRoot(Folder):
# add whitespace before and whitespace and line-end to splitter bounds expressions	# add whitespace before and whitespace and line-end to splitter bounds expressions
bounds = self.splitter[indexName].bounds	bounds = self.splitter[indexName].bounds
splitexp = "(%s\|\s)(%%s)(%s\|\s\|\Z)"%(bounds,bounds)	splitexp = "(%s\|\s)(%%s)(%s\|\s\|\Z)"%(bounds,bounds)
# compile into regexp objects	# clean word expression
wordlist = [re.compile(splitexp%w) for w in word.split(' ')]	# TODO: this should use QueryParser itself
	# take out double quotes
	word = word.replace('"','')
	# take out ignorable signs
	ignorable = self.splitter[indexName].ignorex
	word = ignorable.sub('', word)
	# compile into regexp objects and escape parens
	wordlist = [re.compile(splitexp%re.escape(w)) for w in word.split(' ')]

for line in file.split("\n"):	for line in file.split("\n"):
for word in wordlist:	for word in wordlist:
#logging.debug("showwordinfile: searching for %s in %s"%(word.pattern,line))	#logging.debug("showwordinfile: searching for %s in %s"%(word.pattern,ignoreable.sub('',line)))
if word.search(line):	if word.search(ignorable.sub('',line)):
line = formatAtfLineHtml(line)	line = formatAtfLineHtml(line)
ret.append(line)	ret.append(line)
break	break
Line 2212 class CDLIRoot(Folder):	Line 2220 class CDLIRoot(Folder):
# add whitespace to splitter bounds expressions and compile into regexp object	# add whitespace to splitter bounds expressions and compile into regexp object
bounds = self.splitter[indexName].bounds	bounds = self.splitter[indexName].bounds
wordsplit = re.compile("(%s\|\s)"%bounds)	wordsplit = re.compile("(%s\|\s)"%bounds)
	# clean word expression
	# TODO: this should use QueryParser itself
	word = word.replace('"','') # take out double quotes
	# take out ignoreable signs
	ignorable = self.splitter[indexName].ignorex
	word = ignorable.sub('', word)
# split search terms by blanks	# split search terms by blanks
words = word.split(' ')	words = word.split(' ')
	# split search terms again (for grapheme search with words)
	splitwords = dict(((w,self.splitter[indexName].process([w])) for w in words))

for line in file.split("\n"):	for line in file.split("\n"):
line = unicodify(line)	line = unicodify(line)
Line 2224 class CDLIRoot(Folder):	Line 2240 class CDLIRoot(Folder):
# first scan	# first scan
hitwords = []	hitwords = []
for w in words:	for w in words:
if line.find(w) > -1:	if ignorable.sub('',line).find(w) > -1:
# word is in line	# word is in line
hitwords.append(w)	# append split word for grapheme search with words
	hitwords.extend(splitwords[w])
	#hitwords.extend(wordsplit.split(w))

# examine hits closer	# examine hits closer
if hitwords:	if hitwords:
Line 2234 class CDLIRoot(Folder):	Line 2252 class CDLIRoot(Folder):
parts = wordsplit.split(line)	parts = wordsplit.split(line)
line = ""	line = ""
for p in parts:	for p in parts:
	#logging.debug("tagwordinfile: searching for %s in %s"%(p,hitwords))
# reassemble line	# reassemble line
if p in hitwords:	if ignorable.sub('', p) in hitwords:
	#logging.debug("tagwordinfile: found %s in %s"%(p,hitwords))
# this part was found	# this part was found
line += tagStart + formatAtfHtml(p) + tagEnd	line += tagStart + formatAtfHtml(p) + tagEnd
else:	else:

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>

Removed from v.1.80.2.9
changed lines
	Added in v.1.80.2.11