cdli/cdli_files.py - diff

Return to cdli_files.py CVS log

Up to [Repository] / cdli

Diff for /cdli/cdli_files.py between versions 1.80.2.3 and 1.80.2.4

version 1.80.2.3, 2007/10/22 16:26:40	version 1.80.2.4, 2007/10/24 20:36:07
Line 28 import copy	Line 28 import copy
import codecs	import codecs
import sys	import sys

	import cdliSplitter


def unicodify(s):	def unicodify(s):
"""decode str (utf-8 or latin-1 representation) into unicode object"""	"""decode str (utf-8 or latin-1 representation) into unicode object"""
if not s:	if not s:
Line 50 def utf8ify(s):	Line 53 def utf8ify(s):
else:	else:
return s.encode('utf-8')	return s.encode('utf-8')

	def formatAtfLineHtml(l, nolemma=True):
	"""escape special ATF characters for HTML"""
	if not l:
	return ""

	if nolemma:
	# ignore lemma lines
	if l.lstrip().startswith('#lem:'):
	return ""
	# replace &
	l = l.replace('&','&')
	# replace angular brackets
	l = l.replace('<','<')
	l = l.replace('>','>')
	return l


def generateXMLReturn(hash):	def generateXMLReturn(hash):
"""erzeugt das xml file als returnwert fuer uploadATFRPC"""	"""erzeugt das xml file als returnwert fuer uploadATFRPC"""
Line 1812 class CDLIFileFolder(extVersionedFileFol	Line 1831 class CDLIFileFolder(extVersionedFileFol

def getFile(self,fn):	def getFile(self,fn):
"""get the content of the file fn"""	"""get the content of the file fn"""
	logging.debug("getFile: %s"%repr(fn))
if not self.hasObject(fn):	if not self.hasObject(fn):
# search deeper	# search deeper
founds=self.CDLICatalog.search({'title':fn})	founds=self.CDLICatalog.search({'title':fn})
Line 2030 class CDLIRoot(Folder):	Line 2050 class CDLIRoot(Folder):
meta_type="CDLIRoot"	meta_type="CDLIRoot"
downloadCounterBaskets=0 # counts the current basket downloads if counter > 10 no downloads are possible	downloadCounterBaskets=0 # counts the current basket downloads if counter > 10 no downloads are possible

	file_catalog = 'CDLICatalog'

	# word splitter for search
	splitter = {'words':cdliSplitter.wordSplitter(),
	'graphemes':cdliSplitter.graphemeSplitter()}


def deleteFiles(self,ids):	def deleteFiles(self,ids):
"""delete files"""	"""delete files"""
for id in ids:	for id in ids:
Line 2042 class CDLIRoot(Folder):	Line 2069 class CDLIRoot(Folder):



	def searchText(self, query, index='words'):
	"""searches query in the fulltext index and returns a list of file ids/P-numbers"""
	idxQuery = {index:{'query':query}}
	idx = getattr(self, self.file_catalog)
	results = []
	# do search
	resultset = idx.search(idxQuery)
	for res in resultset:
	# put only the P-Number in the result
	results.append(res.getId[:7])
	return results

	# from PluginINdexes.common.util.py:parseIndexRequest:
	#
	# The class understands the following type of parameters:
	#
	# - old-style parameters where the query for an index as value inside
	# the request directory where the index name is the name of the key.
	# Additional parameters for an index could be passed as index+"_usage" ...
	#
	#
	# - dictionary-style parameters specify a query for an index as
	# an entry in the request dictionary where the key corresponds to the
	# name of the index and the key is a dictionary with the parameters
	# passed to the index.
	#
	# Allowed keys of the parameter dictionary:
	#
	# 'query' - contains the query (either string, list or tuple) (required)
	#
	# other parameters depend on the the index
	#
	#
	# - record-style parameters specify a query for an index as instance of the
	# Record class. This happens usually when parameters from a web form use
	# the "record" type e.g. <input type="text" name="path.query:record:string">.
	# All restrictions of the dictionary-style parameters apply to the record-style
	# parameters



def showFile(self,fileId,wholePage=False):	def showFile(self,fileId,wholePage=False):
"""show a file	"""show a file
@param fileId: P-Number of the document to be displayed	@param fileId: P-Number of the document to be displayed
Line 2069 class CDLIRoot(Folder):	Line 2137 class CDLIRoot(Folder):
wordlist=[word]	wordlist=[word]

for line in file.split("\n"):	for line in file.split("\n"):
	line = formatAtfLineHtml(unicodify(line))
found=False	found=False
for word in wordlist:	for word in wordlist:
try: # just a hack because of possible unicode errors in line	try: # just a hack because of possible unicode errors in line
if line.find(word)>-1:	if line.find(word)>-1:
if lineList: #liste of moeglichen Zeilennummern	if lineList: #liste of moeglichen Zeilennummern
num=line.split(".")[0] #Zeilenummer ist alles vor dem . in der Zeile	num=line.split(".")[0] #Zeilenummer ist alles vor dem . in der Zeile

if num in lineList:	if num in lineList:

ret.append(line)	ret.append(line)
else: # nimm alles ohne line check	else: # nimm alles ohne line check
ret.append(line)	ret.append(line)

break;	break;
except:	except:
pass	pass
return ret	return ret

def tagWordInFile(self,fileId,word,lineList=None,regExp=False,indexName=""):
	def tagWordInFile(self,fileId,word,indexName='words',regExp=False):
"""get text with word highlighted from FileId"""	"""get text with word highlighted from FileId"""

file=self.showFile(fileId)	file=self.showFile(fileId)
tagStr=u'<span class="found">%s</span>'	tagStr=u'<span class="found">%s</span>'
ret=[]	ret=[]
	# search using lowercase
	word = word.lower()

if regExp: # wenn regexp dann generiere alle worte aus der list die der regexp entsprechen	if regExp: # wenn regexp dann generiere alle worte aus der list die der regexp entsprechen
wordlist=self.findWordRegExp(indexName,word)	wordlist=self.findWordRegExp(indexName,word)
else:	else:
wordlist=[word]	# split the search term into words according to the corresponding splitter
	#try:
	wordlist = self.splitter[indexName].process([word])
	#except:
	# wordlist=[word]

for line in file.split("\n"):	for line in file.split("\n"):
line = unicodify(line)	line = formatAtfLineHtml(unicodify(line))
found=False	if not line:
for word in wordlist:	# formatAtf can produce empty lines
if line.find(word)>-1: #word ist gefunden dann makiere und breche die Schleife ab	continue
if lineList: #liste of moeglichen Zeilennummern
num=line.split(".")[0] #Zeilenummer ist alles vor dem . in der Zeile	for w in wordlist:
	if line.lower().find(w)>-1:
if num in lineList:	#word ist gefunden dann makiere
	line = line.replace(w,tagStr%w)
ret.append(line.replace(word,tagStr%word))

else: # nimm alles ohne line check
ret.append(line.replace(word,tagStr%word))
found=True
break
if not found: #word wurde nicht gefunden keine makierung
ret.append(line)	ret.append(line)

return u'<br>\n'.join(ret)	return u'<br>\n'.join(ret)

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>

Removed from v.1.80.2.3
changed lines
	Added in v.1.80.2.4