version 1.80.2.3, 2007/10/22 16:26:40
|
version 1.80.2.4, 2007/10/24 20:36:07
|
Line 28 import copy
|
Line 28 import copy
|
import codecs |
import codecs |
import sys |
import sys |
|
|
|
import cdliSplitter |
|
|
|
|
def unicodify(s): |
def unicodify(s): |
"""decode str (utf-8 or latin-1 representation) into unicode object""" |
"""decode str (utf-8 or latin-1 representation) into unicode object""" |
if not s: |
if not s: |
Line 50 def utf8ify(s):
|
Line 53 def utf8ify(s):
|
else: |
else: |
return s.encode('utf-8') |
return s.encode('utf-8') |
|
|
|
def formatAtfLineHtml(l, nolemma=True): |
|
"""escape special ATF characters for HTML""" |
|
if not l: |
|
return "" |
|
|
|
if nolemma: |
|
# ignore lemma lines |
|
if l.lstrip().startswith('#lem:'): |
|
return "" |
|
# replace & |
|
l = l.replace('&','&') |
|
# replace angular brackets |
|
l = l.replace('<','<') |
|
l = l.replace('>','>') |
|
return l |
|
|
|
|
def generateXMLReturn(hash): |
def generateXMLReturn(hash): |
"""erzeugt das xml file als returnwert fuer uploadATFRPC""" |
"""erzeugt das xml file als returnwert fuer uploadATFRPC""" |
Line 1812 class CDLIFileFolder(extVersionedFileFol
|
Line 1831 class CDLIFileFolder(extVersionedFileFol
|
|
|
def getFile(self,fn): |
def getFile(self,fn): |
"""get the content of the file fn""" |
"""get the content of the file fn""" |
|
logging.debug("getFile: %s"%repr(fn)) |
if not self.hasObject(fn): |
if not self.hasObject(fn): |
# search deeper |
# search deeper |
founds=self.CDLICatalog.search({'title':fn}) |
founds=self.CDLICatalog.search({'title':fn}) |
Line 2030 class CDLIRoot(Folder):
|
Line 2050 class CDLIRoot(Folder):
|
meta_type="CDLIRoot" |
meta_type="CDLIRoot" |
downloadCounterBaskets=0 # counts the current basket downloads if counter > 10 no downloads are possible |
downloadCounterBaskets=0 # counts the current basket downloads if counter > 10 no downloads are possible |
|
|
|
file_catalog = 'CDLICatalog' |
|
|
|
# word splitter for search |
|
splitter = {'words':cdliSplitter.wordSplitter(), |
|
'graphemes':cdliSplitter.graphemeSplitter()} |
|
|
|
|
def deleteFiles(self,ids): |
def deleteFiles(self,ids): |
"""delete files""" |
"""delete files""" |
for id in ids: |
for id in ids: |
Line 2042 class CDLIRoot(Folder):
|
Line 2069 class CDLIRoot(Folder):
|
|
|
|
|
|
|
|
def searchText(self, query, index='words'): |
|
"""searches query in the fulltext index and returns a list of file ids/P-numbers""" |
|
idxQuery = {index:{'query':query}} |
|
idx = getattr(self, self.file_catalog) |
|
results = [] |
|
# do search |
|
resultset = idx.search(idxQuery) |
|
for res in resultset: |
|
# put only the P-Number in the result |
|
results.append(res.getId[:7]) |
|
return results |
|
|
|
# from PluginINdexes.common.util.py:parseIndexRequest: |
|
# |
|
# The class understands the following type of parameters: |
|
# |
|
# - old-style parameters where the query for an index as value inside |
|
# the request directory where the index name is the name of the key. |
|
# Additional parameters for an index could be passed as index+"_usage" ... |
|
# |
|
# |
|
# - dictionary-style parameters specify a query for an index as |
|
# an entry in the request dictionary where the key corresponds to the |
|
# name of the index and the key is a dictionary with the parameters |
|
# passed to the index. |
|
# |
|
# Allowed keys of the parameter dictionary: |
|
# |
|
# 'query' - contains the query (either string, list or tuple) (required) |
|
# |
|
# other parameters depend on the the index |
|
# |
|
# |
|
# - record-style parameters specify a query for an index as instance of the |
|
# Record class. This happens usually when parameters from a web form use |
|
# the "record" type e.g. <input type="text" name="path.query:record:string">. |
|
# All restrictions of the dictionary-style parameters apply to the record-style |
|
# parameters |
|
|
|
|
|
|
def showFile(self,fileId,wholePage=False): |
def showFile(self,fileId,wholePage=False): |
"""show a file |
"""show a file |
@param fileId: P-Number of the document to be displayed |
@param fileId: P-Number of the document to be displayed |
Line 2069 class CDLIRoot(Folder):
|
Line 2137 class CDLIRoot(Folder):
|
wordlist=[word] |
wordlist=[word] |
|
|
for line in file.split("\n"): |
for line in file.split("\n"): |
|
line = formatAtfLineHtml(unicodify(line)) |
found=False |
found=False |
for word in wordlist: |
for word in wordlist: |
try: # just a hack because of possible unicode errors in line |
try: # just a hack because of possible unicode errors in line |
if line.find(word)>-1: |
if line.find(word)>-1: |
if lineList: #liste of moeglichen Zeilennummern |
if lineList: #liste of moeglichen Zeilennummern |
num=line.split(".")[0] #Zeilenummer ist alles vor dem . in der Zeile |
num=line.split(".")[0] #Zeilenummer ist alles vor dem . in der Zeile |
|
|
if num in lineList: |
if num in lineList: |
|
|
ret.append(line) |
ret.append(line) |
else: # nimm alles ohne line check |
else: # nimm alles ohne line check |
ret.append(line) |
ret.append(line) |
|
|
break; |
break; |
except: |
except: |
pass |
pass |
return ret |
return ret |
|
|
def tagWordInFile(self,fileId,word,lineList=None,regExp=False,indexName=""): |
|
|
def tagWordInFile(self,fileId,word,indexName='words',regExp=False): |
"""get text with word highlighted from FileId""" |
"""get text with word highlighted from FileId""" |
|
|
file=self.showFile(fileId) |
file=self.showFile(fileId) |
tagStr=u'<span class="found">%s</span>' |
tagStr=u'<span class="found">%s</span>' |
ret=[] |
ret=[] |
|
# search using lowercase |
|
word = word.lower() |
|
|
if regExp: # wenn regexp dann generiere alle worte aus der list die der regexp entsprechen |
if regExp: # wenn regexp dann generiere alle worte aus der list die der regexp entsprechen |
wordlist=self.findWordRegExp(indexName,word) |
wordlist=self.findWordRegExp(indexName,word) |
else: |
else: |
wordlist=[word] |
# split the search term into words according to the corresponding splitter |
|
#try: |
|
wordlist = self.splitter[indexName].process([word]) |
|
#except: |
|
# wordlist=[word] |
|
|
for line in file.split("\n"): |
for line in file.split("\n"): |
line = unicodify(line) |
line = formatAtfLineHtml(unicodify(line)) |
found=False |
if not line: |
for word in wordlist: |
# formatAtf can produce empty lines |
if line.find(word)>-1: #word ist gefunden dann makiere und breche die Schleife ab |
continue |
if lineList: #liste of moeglichen Zeilennummern |
|
num=line.split(".")[0] #Zeilenummer ist alles vor dem . in der Zeile |
for w in wordlist: |
|
if line.lower().find(w)>-1: |
if num in lineList: |
#word ist gefunden dann makiere |
|
line = line.replace(w,tagStr%w) |
ret.append(line.replace(word,tagStr%word)) |
|
|
|
else: # nimm alles ohne line check |
|
ret.append(line.replace(word,tagStr%word)) |
|
found=True |
|
break |
|
if not found: #word wurde nicht gefunden keine makierung |
|
ret.append(line) |
ret.append(line) |
|
|
return u'<br>\n'.join(ret) |
return u'<br>\n'.join(ret) |