version 1.80.2.9, 2007/11/27 10:27:39
|
version 1.80.2.10, 2007/12/03 21:30:19
|
Line 2137 class CDLIRoot(Folder):
|
Line 2137 class CDLIRoot(Folder):
|
resultset = idx.search(query_request=idxQuery,sort_index='textid') |
resultset = idx.search(query_request=idxQuery,sort_index='textid') |
# put only the P-Number in the result |
# put only the P-Number in the result |
results = [res.getId[:7] for res in resultset] |
results = [res.getId[:7] for res in resultset] |
|
logging.debug("searchtext: found %d texts"%len(results)) |
return results |
return results |
|
|
|
|
Line 2175 class CDLIRoot(Folder):
|
Line 2176 class CDLIRoot(Folder):
|
# add whitespace before and whitespace and line-end to splitter bounds expressions |
# add whitespace before and whitespace and line-end to splitter bounds expressions |
bounds = self.splitter[indexName].bounds |
bounds = self.splitter[indexName].bounds |
splitexp = "(%s|\s)(%%s)(%s|\s|\Z)"%(bounds,bounds) |
splitexp = "(%s|\s)(%%s)(%s|\s|\Z)"%(bounds,bounds) |
|
# clean word expression |
|
# TODO: this should use QueryParser itself |
|
word = word.replace('"','') # take out double quotes |
|
# escape parens for regexp too |
# compile into regexp objects |
# compile into regexp objects |
wordlist = [re.compile(splitexp%w) for w in word.split(' ')] |
wordlist = [re.compile(splitexp%re.escape(w)) for w in word.split(' ')] |
|
|
for line in file.split("\n"): |
for line in file.split("\n"): |
for word in wordlist: |
for word in wordlist: |
Line 2212 class CDLIRoot(Folder):
|
Line 2217 class CDLIRoot(Folder):
|
# add whitespace to splitter bounds expressions and compile into regexp object |
# add whitespace to splitter bounds expressions and compile into regexp object |
bounds = self.splitter[indexName].bounds |
bounds = self.splitter[indexName].bounds |
wordsplit = re.compile("(%s|\s)"%bounds) |
wordsplit = re.compile("(%s|\s)"%bounds) |
|
# clean word expression |
|
# TODO: this should use QueryParser itself |
|
word = word.replace('"','') # take out double quotes |
# split search terms by blanks |
# split search terms by blanks |
words = word.split(' ') |
words = word.split(' ') |
|
|