version 1.80.2.7, 2007/11/19 15:14:44
|
version 1.80.2.10, 2007/12/03 21:30:19
|
Line 2137 class CDLIRoot(Folder):
|
Line 2137 class CDLIRoot(Folder):
|
resultset = idx.search(query_request=idxQuery,sort_index='textid') |
resultset = idx.search(query_request=idxQuery,sort_index='textid') |
# put only the P-Number in the result |
# put only the P-Number in the result |
results = [res.getId[:7] for res in resultset] |
results = [res.getId[:7] for res in resultset] |
|
logging.debug("searchtext: found %d texts"%len(results)) |
return results |
return results |
|
|
|
|
Line 2167 class CDLIRoot(Folder):
|
Line 2168 class CDLIRoot(Folder):
|
|
|
def showWordInFile(self,fileId,word,indexName='graphemes',regExp=False,): |
def showWordInFile(self,fileId,word,indexName='graphemes',regExp=False,): |
"""get lines with word from FileId""" |
"""get lines with word from FileId""" |
|
logging.debug("showwordinfile word='%s' index=%s file=%s"%(word,indexName,fileId)) |
|
|
file = formatAtfFullLineNum(self.getFile(fileId)) |
file = formatAtfFullLineNum(self.getFile(fileId)) |
ret=[] |
ret=[] |
Line 2174 class CDLIRoot(Folder):
|
Line 2176 class CDLIRoot(Folder):
|
# add whitespace before and whitespace and line-end to splitter bounds expressions |
# add whitespace before and whitespace and line-end to splitter bounds expressions |
bounds = self.splitter[indexName].bounds |
bounds = self.splitter[indexName].bounds |
splitexp = "(%s|\s)(%%s)(%s|\s|\Z)"%(bounds,bounds) |
splitexp = "(%s|\s)(%%s)(%s|\s|\Z)"%(bounds,bounds) |
|
# clean word expression |
|
# TODO: this should use QueryParser itself |
|
word = word.replace('"','') # take out double quotes |
|
# escape parens for regexp too |
# compile into regexp objects |
# compile into regexp objects |
wordlist = [re.compile(splitexp%w) for w in word.split(' ')] |
wordlist = [re.compile(splitexp%re.escape(w)) for w in word.split(' ')] |
|
|
for line in file.split("\n"): |
for line in file.split("\n"): |
for word in wordlist: |
for word in wordlist: |
Line 2188 class CDLIRoot(Folder):
|
Line 2194 class CDLIRoot(Folder):
|
return ret |
return ret |
|
|
|
|
|
def showWordInFiles(self,fileIds,word,indexName='graphemes',regExp=False): |
|
""" |
|
get lines with word from all ids in list FileIds. |
|
returns dict with id:lines pairs. |
|
""" |
|
logging.debug("showwordinfiles word='%s' index=%s file=%s"%(word,indexName,fileIds)) |
|
|
|
return dict([(id,self.showWordInFile(id, word, indexName, regExp)) for id in fileIds]) |
|
|
|
|
def tagWordInFile(self,fileId,word,indexName='graphemes',regExp=False): |
def tagWordInFile(self,fileId,word,indexName='graphemes',regExp=False): |
"""get text with word highlighted from FileId""" |
"""get text with word highlighted from FileId""" |
|
logging.debug("tagwordinfile word='%s' index=%s file=%s"%(word,indexName,fileId)) |
|
|
file=self.getFile(fileId) |
file=self.getFile(fileId) |
tagStart=u'<span class="found">' |
tagStart=u'<span class="found">' |
Line 2200 class CDLIRoot(Folder):
|
Line 2217 class CDLIRoot(Folder):
|
# add whitespace to splitter bounds expressions and compile into regexp object |
# add whitespace to splitter bounds expressions and compile into regexp object |
bounds = self.splitter[indexName].bounds |
bounds = self.splitter[indexName].bounds |
wordsplit = re.compile("(%s|\s)"%bounds) |
wordsplit = re.compile("(%s|\s)"%bounds) |
|
# clean word expression |
|
# TODO: this should use QueryParser itself |
|
word = word.replace('"','') # take out double quotes |
# split search terms by blanks |
# split search terms by blanks |
words = word.split(' ') |
words = word.split(' ') |
|
|
Line 2238 class CDLIRoot(Folder):
|
Line 2258 class CDLIRoot(Folder):
|
return u'<br>\n'.join(ret) |
return u'<br>\n'.join(ret) |
|
|
|
|
|
|
|
def tagWordInFiles(self,fileIds,word,indexName='graphemes',regExp=False): |
|
""" |
|
get texts with highlighted word from all ids in list FileIds. |
|
returns dict with id:text pairs. |
|
""" |
|
logging.debug("tagwordinfiles word='%s' index=%s file=%s"%(word,indexName,fileIds)) |
|
return dict([(id,self.tagWordInFile(id, word, indexName, regExp)) for id in fileIds]) |
|
|
|
|
def URLquote(self,str): |
def URLquote(self,str): |
"""quote url""" |
"""quote url""" |
return urllib.quote(str) |
return urllib.quote(str) |