cdli/cdli_files.py - diff

Return to cdli_files.py CVS log

Up to [Repository] / cdli

Diff for /cdli/cdli_files.py between versions 1.80.2.4 and 1.81

version 1.80.2.4, 2007/10/24 20:36:07	version 1.81, 2008/01/21 17:19:01
Line 53 def utf8ify(s):	Line 53 def utf8ify(s):
else:	else:
return s.encode('utf-8')	return s.encode('utf-8')

def formatAtfLineHtml(l, nolemma=True):	def formatAtfHtml(l):
"""escape special ATF characters for HTML"""	"""escape special ATF characters for HTML"""
if not l:	if not l:
return ""	return ""

if nolemma:
# ignore lemma lines
if l.lstrip().startswith('#lem:'):
return ""
# replace &	# replace &
l = l.replace('&','&')	l = l.replace('&','&')
# replace angular brackets	# replace angular brackets
Line 69 def formatAtfLineHtml(l, nolemma=True):	Line 65 def formatAtfLineHtml(l, nolemma=True):
l = l.replace('>','>')	l = l.replace('>','>')
return l	return l

	def formatAtfLineHtml(l, nolemma=True):
	"""format ATF line for HTML"""
	if not l:
	return ""

	if nolemma:
	# ignore lemma lines
	if l.lstrip().startswith('#lem:'):
	return ""

	return formatAtfHtml(l)



	def formatAtfFullLineNum(txt, nolemma=True):
	"""format full line numbers in ATF text"""
	# surface codes
	surfaces = {'@obverse':'obv',
	'@reverse':'rev',
	'@surface':'surface',
	'@edge':'edge',
	'@left':'left',
	'@right':'right',
	'@top':'top',
	'@bottom':'bottom',
	'@face':'face',
	'@seal':'seal'}

	if not txt:
	return ""

	ret = []
	surf = ""
	col = ""
	for line in txt.splitlines():
	line = unicodify(line)
	if line and line[0] == '@':
	# surface or column
	words = line.split(' ')
	if words[0] in surfaces:
	surf = line.replace(words[0],surfaces[words[0]]).strip()

	elif words[0] == '@column':
	col = ' '.join(words[1:])

	elif line and line[0] in '123456789':
	# ordinary line -> add line number
	line = "%s:%s:%s"%(surf,col,line)

	ret.append(line)

	return '\n'.join(ret)


def generateXMLReturn(hash):	def generateXMLReturn(hash):
"""erzeugt das xml file als returnwert fuer uploadATFRPC"""	"""erzeugt das xml file als returnwert fuer uploadATFRPC"""
Line 95 def generateXMLReturn(hash):	Line 144 def generateXMLReturn(hash):
return ret	return ret










def unique(s):	def unique(s):
"""Return a list of the elements in s, but without duplicates.	"""Return a list of the elements in s, but without duplicates.

Line 272 class uploadATFfinallyThread(Thread):	Line 313 class uploadATFfinallyThread(Thread):
self.result+="<h2>Start processing</h2>"	self.result+="<h2>Start processing</h2>"

#shall I only upload the changed files?	#shall I only upload the changed files?
logging.info("uploadATFfinally procedure: %s"%procedure)	logging.debug("uploadATFfinally procedure: %s"%procedure)
if procedure=="uploadchanged":	if procedure=="uploadchanged":
changed=[x[0] for x in SESSION.get('changed',[])]	changed=[x[0] for x in SESSION.get('changed',[])]
uploadFns=changed+SESSION.get('newPs',[])	uploadFns=changed+SESSION.get('newPs',[])
Line 293 class uploadATFfinallyThread(Thread):	Line 334 class uploadATFfinallyThread(Thread):
#do first the changed files	#do first the changed files
i=0	i=0
for fn in uploadFns:	for fn in uploadFns:
	logging.debug("uploadATFfinally uploadFn=%s"%fn)
i+=1	i+=1
founds=ctx2.CDLICatalog.search({'title':fn})	founds=ctx2.CDLICatalog.search({'title':fn})
if len(founds)>0:	if len(founds)>0:
SESSION['author']=str(username)	SESSION['author']=str(username)
self.result="<p>Changing : %s"%fn+self.result	self.result="<p>Changing : %s"%fn+self.result
	logging.debug("uploadatffinallythread changing:%s"%fn+self.result)
founds[0].getObject().manage_addCDLIFileObject('',comment,SESSION['author'],file=os.path.join(SESSION['tmpdir'],fn),from_tmp=True)	founds[0].getObject().manage_addCDLIFileObject('',comment,SESSION['author'],file=os.path.join(SESSION['tmpdir'],fn),from_tmp=True)
if i==200:	if i%200==0:
i=0
transaction.get().commit()	transaction.get().commit()
logging.info("changing: do commit")	logging.debug("uploadatffinallythread changing: do commit")

transaction.get().commit()	transaction.get().commit()
logging.info("changing: last commit")	logging.debug("uploadatffinallythread changing: last commit")

#now add the new files	#now add the new files
newPs=SESSION['newPs']	newPs=SESSION['newPs']
if len(newPs)>0:	if len(newPs)>0:
tmpDir=SESSION['tmpdir']	tmpDir=SESSION['tmpdir']
logging.info("adding start")	logging.debug("uploadatffinallythread adding start")
self.result="<p>Adding files</p>"+self.result	self.result="<p>Adding files</p>"+self.result
#TODO: make this configurable, at the moment base folder for the files has to be cdli_main	#TODO: make this configurable, at the moment base folder for the files has to be cdli_main

ctx2.importFiles(comment=comment,author=str(username) ,folderName=tmpDir, files=newPs,ext=self)	ctx2.importFiles(comment=comment,author=str(username) ,folderName=tmpDir, files=newPs,ext=self)
logging.info("adding finished")	logging.debug("uploadatffinallythread adding finished")


#unlock locked files?	#unlock locked files?
if unlock:	if unlock:
logging.info("unlocking start")	logging.debug("uploadatffinallythread unlocking start")
self.result="<p>Unlock files</p>"+self.result	self.result="<p>Unlock files</p>"+self.result
unlockFns=[]	unlockFns=[]
for x in os.listdir(SESSION['tmpdir']):	for x in os.listdir(SESSION['tmpdir']):
if not x in SESSION['errors']:	if not x in SESSION['errors']:
unlockFns.append(x)	unlockFns.append(x)
logging.info("unlocking have now what to unlock")
	logging.debug("unlocking have now what to unlock")

for fn in unlockFns:	for fn in unlockFns:
#logging.info("will unlock: %s"%fn)	#logging.info("will unlock: %s"%fn)
Line 336 class uploadATFfinallyThread(Thread):	Line 377 class uploadATFfinallyThread(Thread):
if len(founds)>0:	if len(founds)>0:
#logging.info("unlock: %s"%founds[0].getObject().getId())	#logging.info("unlock: %s"%founds[0].getObject().getId())
SESSION['author']=str(username)	SESSION['author']=str(username)

founds[0].getObject().lockedBy=""	founds[0].getObject().lockedBy=""
logging.info("unlocking done")
	logging.debug("uploadatffinallythread unlocking done")

#if a basketname is given, add files to the basket	#if a basketname is given, add files to the basket
if not (basketname ==''):	if not (basketname ==''):
logging.info("add to basket %s"%basketname)	logging.debug("uploadatffinallythread add to basket %s"%basketname)
self.result="<p>Add to basket</p>"+self.result	self.result="<p>Add to basket</p>"+self.result
basketId=ctx2.basketContainer.getBasketIdfromName(basketname)	basketId=ctx2.basketContainer.getBasketIdfromName(basketname)

if not basketId: # create new basket	if not basketId: # create new basket
logging.info("create basket %s"%basketname)	logging.debug("uploadatffinallythread create basket %s"%basketname)
self.result="<p>Create a new basket</p>"+self.result	self.result="<p>Create a new basket</p>"+self.result
ob=ctx2.basketContainer.addBasket(basketname)	ob=ctx2.basketContainer.addBasket(basketname)
basketId=ob.getId()	basketId=ob.getId()
Line 355 class uploadATFfinallyThread(Thread):	Line 396 class uploadATFfinallyThread(Thread):
ids=os.listdir(SESSION['tmpdir'])	ids=os.listdir(SESSION['tmpdir'])
basket.addObjects(ids,deleteOld=True,username=str(username))	basket.addObjects(ids,deleteOld=True,username=str(username))

	logging.debug("uploadatffinallythread uploadfinally done")

if RESPONSE is not None:	if RESPONSE is not None:
RESPONSE.redirect(self.aq_parent.absolute_url())	RESPONSE.redirect(self.aq_parent.absolute_url())


logging.info("uploadfinally done")
return True	return True

class tmpStore(SimpleItem):	class tmpStore(SimpleItem):
Line 591 class CDLIBasketContainer(OrderedFolder)	Line 632 class CDLIBasketContainer(OrderedFolder)
ret+=str(object[0].getData())+"\n"	ret+=str(object[0].getData())+"\n"
elif current=="yes":	elif current=="yes":
#search current object	#search current object
logging.info("crrent: %s"%object[1].getId().split(".")[0])	logging.debug("current: %s"%object[1].getId().split(".")[0])
founds=self.CDLICatalog.search({'title':object[1].getId().split(".")[0]})	founds=self.CDLICatalog.search({'title':object[1].getId().split(".")[0]})
if len(founds)>0:	if len(founds)>0:
ret+=str(founds[0].getObject().getLastVersion().getData())+"\n"	ret+=str(founds[0].getObject().getLastVersion().getData())+"\n"
Line 1471 class CDLIFileObject(CatalogAware,extVer	Line 1512 class CDLIFileObject(CatalogAware,extVer

security.declareProtected('manage','index_html')	security.declareProtected('manage','index_html')

	security.declarePublic('view')
	view = PageTemplateFile('zpt/viewCDLIFile.zpt', globals())

	security.declarePublic('editATF')
	editATF = PageTemplateFile('zpt/editATFFile.zpt', globals())

def PrincipiaSearchSource(self):	def PrincipiaSearchSource(self):
"""Return cataloguable key for ourselves."""	"""Return cataloguable key for ourselves."""
return str(self)	return str(self)
Line 1485 class CDLIFileObject(CatalogAware,extVer	Line 1532 class CDLIFileObject(CatalogAware,extVer
def makeThisVersionCurrent(self,comment,author,RESPONSE=None):	def makeThisVersionCurrent(self,comment,author,RESPONSE=None):
"""copy this version to current"""	"""copy this version to current"""
parent=self.aq_parent	parent=self.aq_parent
newversion=parent.manage_addCDLIFileObject('',comment,author)	parent.manage_addVersionedFileObject(id=None,vC=comment,author=author,file=self.getData(),RESPONSE=RESPONSE)
newversion.manage_upload(self.getData())	#newversion=parent.manage_addCDLIFileObject('',comment,author)
	#newversion.manage_upload(self.getData())

if RESPONSE is not None:	#if RESPONSE is not None:
RESPONSE.redirect(self.aq_parent.absolute_url()+'/history')	# RESPONSE.redirect(self.aq_parent.absolute_url()+'/history')

return True	return True

Line 1499 class CDLIFileObject(CatalogAware,extVer	Line 1547 class CDLIFileObject(CatalogAware,extVer
# return re.sub("\s\#lem"," #lem",data) #remove return vor #lem	# return re.sub("\s\#lem"," #lem",data) #remove return vor #lem
return re.sub("#lem"," #lem",data) #remove return vor #lem	return re.sub("#lem"," #lem",data) #remove return vor #lem

security.declarePublic('view')
def view(self):
"""view file"""
pt=PageTemplateFile(os.path.join(package_home(globals()),'zpt','viewCDLIFile.zpt')).__of__(self)
return pt()

security.declarePublic('getPNumber')	security.declarePublic('getPNumber')
def getPNumber(self):	def getPNumber(self):
Line 1533 class CDLIFileObject(CatalogAware,extVer	Line 1576 class CDLIFileObject(CatalogAware,extVer
except:	except:
return "ERROR"	return "ERROR"


manage_addCDLIFileObjectForm=DTMLFile('dtml/fileAdd', globals(),Kind='CDLIFileObject',kind='CDLIFileObject', version='1')	manage_addCDLIFileObjectForm=DTMLFile('dtml/fileAdd', globals(),Kind='CDLIFileObject',kind='CDLIFileObject', version='1')

def manage_addCDLIFileObject(self,id,vC='',author='', file='',title='',versionNumber=0,	def manage_addCDLIFileObject(self,id,vC='',author='', file='',title='',versionNumber=0,
Line 1565 def manage_addCDLIFileObject(self,id,vC=	Line 1609 def manage_addCDLIFileObject(self,id,vC=
if content_type:	if content_type:
fob.content_type=content_type	fob.content_type=content_type

logging.debug("manage_add: lastversion=%s"%self.getData())	#logging.debug("manage_add: lastversion=%s"%self.getData())
logging.debug("reindex1: %s in %s"%(repr(self),repr(self.default_catalog)))	logging.debug("reindex1: %s in %s"%(repr(self),repr(self.default_catalog)))
self.reindex_object()	self.reindex_object()
logging.debug("manage_add: fob_data=%s"%fob.getData())	#logging.debug("manage_add: fob_data=%s"%fob.getData())
logging.debug("reindex2: %s in %s"%(repr(fob), repr(fob.default_catalog)))	logging.debug("reindex2: %s in %s"%(repr(fob), repr(fob.default_catalog)))
fob.index_object()	fob.index_object()

Line 1584 class CDLIFile(extVersionedFile,CatalogA	Line 1628 class CDLIFile(extVersionedFile,CatalogA
content_meta_type = ["CDLI File Object"]	content_meta_type = ["CDLI File Object"]

default_catalog='CDLICatalog'	default_catalog='CDLICatalog'

security.declareProtected('manage','index_html')	security.declareProtected('manage','index_html')

def getLastVersionData(self):	def getLastVersionData(self):
Line 1594 class CDLIFile(extVersionedFile,CatalogA	Line 1639 class CDLIFile(extVersionedFile,CatalogA
"""get last version data"""	"""get last version data"""
return self.getContentObject().getFormattedData()	return self.getContentObject().getFormattedData()

	def getTextId(self):
	"""returns P-number of text"""
	# assuming that its the beginning of the title
	return self.title[:7]

#security.declarePublic('history')	#security.declarePublic('history')
def history(self):	def history(self):
"""history"""	"""history"""
Line 1641 class CDLIFile(extVersionedFile,CatalogA	Line 1691 class CDLIFile(extVersionedFile,CatalogA

def _newContentObject(self, id, title='', versionNumber=0, versionComment=None, time=None, author=None):	def _newContentObject(self, id, title='', versionNumber=0, versionComment=None, time=None, author=None):
"""factory for content objects. to be overridden in derived classes."""	"""factory for content objects. to be overridden in derived classes."""
	logging.debug("_newContentObject(CDLI)")
return CDLIFileObject(id,title,versionNumber=versionNumber,versionComment=versionComment,time=time,author=author)	return CDLIFileObject(id,title,versionNumber=versionNumber,versionComment=versionComment,time=time,author=author)


Line 1710 def manage_addCDLIFile(self,id,title,loc	Line 1761 def manage_addCDLIFile(self,id,title,loc
if RESPONSE is not None:	if RESPONSE is not None:
RESPONSE.redirect('manage_main')	RESPONSE.redirect('manage_main')


def checkUTF8(data):	def checkUTF8(data):
"""check utf 8"""	"""check utf 8"""
try:	try:
Line 1740 def splitatf(fh,dir=None,ext=None):	Line 1792 def splitatf(fh,dir=None,ext=None):
nf=None	nf=None
i=0	i=0

	#ROC: why split \n first and then \r???
if (type(fh) is StringType) or (type(fh) is UnicodeType):	if (type(fh) is StringType) or (type(fh) is UnicodeType):
iter=fh.split("\n")	iter=fh.split("\n")
else:	else:
Line 1796 class CDLIFileFolder(extVersionedFileFol	Line 1849 class CDLIFileFolder(extVersionedFileFol
file_meta_type=['CDLI file']	file_meta_type=['CDLI file']
folder_meta_type=['CDLI Folder']	folder_meta_type=['CDLI Folder']

default_catalog='CDLICatalog'	file_catalog='CDLICatalog'
defaultFileCatalog=default_catalog #wenn dieses definiert ist, wird beim hinzufuegen einer neuen version eines files dieser catalog neuindiziert
#downloadCounter=0 # counts how many download for all files currently run, be mehr als 5 wird verweigert.	#downloadCounter=0 # counts how many download for all files currently run, be mehr als 5 wird verweigert.
tmpStore2={}	tmpStore2={}

	def _newVersionedFile(self, id, title='', lockedBy=None, author=None):
	"""factory for versioned files. to be overridden in derived classes."""
	logging.debug("_newVersionedFile(CDLI)")
	return CDLIFile(id, title, lockedBy=lockedBy, author=author)

def setTemp(self,name,value):	def setTemp(self,name,value):
"""set tmp"""	"""set tmp"""

setattr(self,name,value)	setattr(self,name,value)

	deleteFileForm = PageTemplateFile("zpt/doDeleteFile", globals())

def delete(self,ids):	def delete(self,ids,REQUEST=None):
"""delete these files"""	"""delete these files"""
if type(ids) is not ListType:	if type(ids) is not ListType:
ids=[ids]	ids=[ids]

self.manage_delObjects(ids)	self.manage_delObjects(ids)

	if REQUEST is not None:
	return self.index_html()


def getVersionNumbersFromIds(self,ids):	def getVersionNumbersFromIds(self,ids):
"""get the numbers of the current versions of documents described by their ids"""	"""get the numbers of the current versions of documents described by their ids"""
Line 1834 class CDLIFileFolder(extVersionedFileFol	Line 1896 class CDLIFileFolder(extVersionedFileFol
logging.debug("getFile: %s"%repr(fn))	logging.debug("getFile: %s"%repr(fn))
if not self.hasObject(fn):	if not self.hasObject(fn):
# search deeper	# search deeper
founds=self.CDLICatalog.search({'title':fn})	founds=getattr(self, self.file_catalog).search({'textid':fn})
if founds:	if founds:
obj=founds[0].getObject().getContentObject()	obj=founds[0].getObject().getContentObject()
else:	else:
Line 1937 class CDLIFileFolder(extVersionedFileFol	Line 1999 class CDLIFileFolder(extVersionedFileFol
def sortF(x,y):	def sortF(x,y):
return cmp(x[0],y[0])	return cmp(x[0],y[0])

catalog=getattr(self,self.default_catalog)	catalog=getattr(self,self.file_catalog)
#tf,tfilename=mkstemp()	#tf,tfilename=mkstemp()
if not hasattr(self.temp_folder,'downloadCounter'):	if not hasattr(self.temp_folder,'downloadCounter'):
self.temp_folder.downloadCounter=0	self.temp_folder.downloadCounter=0
Line 2069 class CDLIRoot(Folder):	Line 2131 class CDLIRoot(Folder):



def searchText(self, query, index='words'):	def searchText(self, query, index='graphemes'):
"""searches query in the fulltext index and returns a list of file ids/P-numbers"""	"""searches query in the fulltext index and returns a list of file ids/P-numbers"""
	# see also: http://www.plope.com/Books/2_7Edition/SearchingZCatalog.stx#2-13
	logging.debug("searchtext for '%s' in index %s"%(query,index))
	#import Products.ZCTextIndex.QueryParser
	#qp = QueryParser.QueryParser()
	#logging.debug()
idxQuery = {index:{'query':query}}	idxQuery = {index:{'query':query}}
idx = getattr(self, self.file_catalog)	idx = getattr(self, self.file_catalog)
results = []
# do search	# do search
resultset = idx.search(idxQuery)	resultset = idx.search(query_request=idxQuery,sort_index='textid')
for res in resultset:
# put only the P-Number in the result	# put only the P-Number in the result
results.append(res.getId[:7])	results = [res.getId[:7] for res in resultset]
	logging.debug("searchtext: found %d texts"%len(results))
return results	return results

# from PluginINdexes.common.util.py:parseIndexRequest:
#	def getFile(self, pnum):
# The class understands the following type of parameters:	"""get the translit file with the given pnum"""
#	f = getattr(self, self.file_catalog).search({'textid':pnum})
# - old-style parameters where the query for an index as value inside	if not f:
# the request directory where the index name is the name of the key.	return ""
# Additional parameters for an index could be passed as index+"_usage" ...
#	return f[0].getObject().getData()
#
# - dictionary-style parameters specify a query for an index as
# an entry in the request dictionary where the key corresponds to the
# name of the index and the key is a dictionary with the parameters
# passed to the index.
#
# Allowed keys of the parameter dictionary:
#
# 'query' - contains the query (either string, list or tuple) (required)
#
# other parameters depend on the the index
#
#
# - record-style parameters specify a query for an index as instance of the
# Record class. This happens usually when parameters from a web form use
# the "record" type e.g. <input type="text" name="path.query:record:string">.
# All restrictions of the dictionary-style parameters apply to the record-style
# parameters



Line 2114 class CDLIRoot(Folder):	Line 2162 class CDLIRoot(Folder):
"""show a file	"""show a file
@param fileId: P-Number of the document to be displayed	@param fileId: P-Number of the document to be displayed
"""	"""
f=self.CDLICatalog({'title':fileId})	f=getattr(self, self.file_catalog).search({'textid':fileId})
if not f:	if not f:
return ""	return ""

Line 2125 class CDLIRoot(Folder):	Line 2173 class CDLIRoot(Folder):
return f[0].getObject().getLastVersionFormattedData()	return f[0].getObject().getLastVersionFormattedData()


def showWordInFile(self,fileId,word,lineList=None,regExp=False,indexName=""):	def showWordInFile(self,fileId,word,indexName='graphemes',regExp=False,):
"""get lines with word from FileId"""	"""get lines with word from FileId"""
	logging.debug("showwordinfile word='%s' index=%s file=%s"%(word,indexName,fileId))

file=self.showFile(fileId)	file = formatAtfFullLineNum(self.getFile(fileId))
logging.debug("show word regEXP %s"%regExp)
ret=[]	ret=[]
if regExp: # wenn regexp dann generiere alle worte aus der list die der regexp entsprechen
wordlist=self.findWordRegExp(indexName,word)
else:
wordlist=[word]

for line in file.split("\n"):	# add whitespace before and whitespace and line-end to splitter bounds expressions
line = formatAtfLineHtml(unicodify(line))	bounds = self.splitter[indexName].bounds
found=False	splitexp = "(%s\|\s)(%%s)(%s\|\s\|\Z)"%(bounds,bounds)
	# clean word expression
	# TODO: this should use QueryParser itself
	# take out double quotes
	word = word.replace('"','')
	# take out ignorable signs
	ignorable = self.splitter[indexName].ignorex
	word = ignorable.sub('', word)
	# compile into regexp objects and escape parens
	wordlist = [re.compile(splitexp%re.escape(w)) for w in word.split(' ')]

	for line in file.splitlines():
for word in wordlist:	for word in wordlist:
try: # just a hack because of possible unicode errors in line	#logging.debug("showwordinfile: searching for %s in %s"%(word.pattern,ignoreable.sub('',line)))
if line.find(word)>-1:	if word.search(ignorable.sub('',line)):
if lineList: #liste of moeglichen Zeilennummern	line = formatAtfLineHtml(line)
num=line.split(".")[0] #Zeilenummer ist alles vor dem . in der Zeile
if num in lineList:
ret.append(line)
else: # nimm alles ohne line check
ret.append(line)	ret.append(line)
break;	break
except:
pass
return ret	return ret


def tagWordInFile(self,fileId,word,indexName='words',regExp=False):	def showWordInFiles(self,fileIds,word,indexName='graphemes',regExp=False):
	"""
	get lines with word from all ids in list FileIds.
	returns dict with id:lines pairs.
	"""
	logging.debug("showwordinfiles word='%s' index=%s file=%s"%(word,indexName,fileIds))

	return dict([(id,self.showWordInFile(id, word, indexName, regExp)) for id in fileIds])


	def tagWordInFile(self,fileId,word,indexName='graphemes',regExp=False):
"""get text with word highlighted from FileId"""	"""get text with word highlighted from FileId"""
	logging.debug("tagwordinfile word='%s' index=%s file=%s"%(word,indexName,fileId))

file=self.showFile(fileId)	file=self.getFile(fileId)
tagStr=u'<span class="found">%s</span>'	tagStart=u'<span class="found">'
	tagEnd=u'</span>'
	tagStr=tagStart + u'%%s' + tagEnd
ret=[]	ret=[]
# search using lowercase
word = word.lower()

if regExp: # wenn regexp dann generiere alle worte aus der list die der regexp entsprechen	# add whitespace to splitter bounds expressions and compile into regexp object
wordlist=self.findWordRegExp(indexName,word)	bounds = self.splitter[indexName].bounds
else:	wordsplit = re.compile("(%s\|\s)"%bounds)
# split the search term into words according to the corresponding splitter	# clean word expression
#try:	# TODO: this should use QueryParser itself
wordlist = self.splitter[indexName].process([word])	word = word.replace('"','') # take out double quotes
#except:	# take out ignoreable signs
# wordlist=[word]	ignorable = self.splitter[indexName].ignorex
	word = ignorable.sub('', word)
for line in file.split("\n"):	# split search terms by blanks
line = formatAtfLineHtml(unicodify(line))	words = word.split(' ')
if not line:	# split search terms again (for grapheme search with words)
# formatAtf can produce empty lines	splitwords = dict(((w,self.splitter[indexName].process([w])) for w in words))

	for line in file.splitlines():
	line = unicodify(line)
	# ignore lemma and other lines
	if line.lstrip().startswith('#lem:'):
	continue
	# ignore p-num line
	if line.startswith('&P'):
	continue
	# ignore version lines
	if line.startswith('#version'):
continue	continue
	# ignore atf type lines
	if line.startswith('#atf:'):
	continue

	# first scan
	hitwords = []
	for w in words:
	if ignorable.sub('',line).find(w) > -1:
	# word is in line
	# append split word for grapheme search with words
	hitwords.extend(splitwords[w])
	#hitwords.extend(wordsplit.split(w))

	# examine hits closer
	if hitwords:
	# split line into words
	parts = wordsplit.split(line)
	line = ""
	for p in parts:
	#logging.debug("tagwordinfile: searching for %s in %s"%(p,hitwords))
	# reassemble line
	if ignorable.sub('', p) in hitwords:
	#logging.debug("tagwordinfile: found %s in %s"%(p,hitwords))
	# this part was found
	line += tagStart + formatAtfHtml(p) + tagEnd
	else:
	line += formatAtfHtml(p)

for w in wordlist:	else:
if line.lower().find(w)>-1:	# no hits
#word ist gefunden dann makiere	line = formatAtfHtml(line)
line = line.replace(w,tagStr%w)

ret.append(line)	ret.append(line)

return u'<br>\n'.join(ret)	return u'<br>\n'.join(ret)



	def tagWordInFiles(self,fileIds,word,indexName='graphemes',regExp=False):
	"""
	get texts with highlighted word from all ids in list FileIds.
	returns dict with id:text pairs.
	"""
	logging.debug("tagwordinfiles word='%s' index=%s file=%s"%(word,indexName,fileIds))
	return dict([(id,self.tagWordInFile(id, word, indexName, regExp)) for id in fileIds])


def URLquote(self,str):	def URLquote(self,str):
"""quote url"""	"""quote url"""
return urllib.quote(str)	return urllib.quote(str)
Line 2441 class CDLIRoot(Folder):	Line 2550 class CDLIRoot(Folder):

def importFiles(self,comment="",author="" ,folderName="/Users/dwinter/atf", files=None,ext=None):	def importFiles(self,comment="",author="" ,folderName="/Users/dwinter/atf", files=None,ext=None):
"""import files"""	"""import files"""
	logging.debug("importFiles folderName=%s files=%s ext=%s"%(folderName,files,ext))
root=self.cdli_main	root=self.cdli_main
count=0	count=0
if not files:	if not files:
Line 2450 class CDLIRoot(Folder):	Line 2560 class CDLIRoot(Folder):
folder=f[0:3]	folder=f[0:3]
f2=f[0:5]	f2=f[0:5]
obj=self.ZopeFind(root,obj_ids=[folder])	obj=self.ZopeFind(root,obj_ids=[folder])
	logging.debug("importFiles: folder=%s f2=%s obj=%s"%(folder,f2,obj))
if ext:	if ext:

ext.result="<p>adding: %s </p>"%f+ext.result	ext.result="<p>adding: %s </p>"%f+ext.result

if not obj:	if not obj:
manage_addCDLIFileFolder(root,folder,folder)	manage_addCDLIFileFolder(root,folder,folder)
fobj=getattr(root,folder)	fobj=getattr(root,folder)
#transaction.get().commit()	#transaction.get().commit()

else:	else:
fobj=obj[0][1]	fobj=obj[0][1]

obj2=fobj.ZopeFind(fobj,obj_ids=[f2])	obj2=fobj.ZopeFind(fobj,obj_ids=[f2])
	logging.debug("importFiles: fobj=%s obj2=%s"%(fobj,obj2))

if not obj2:	if not obj2:
manage_addCDLIFileFolder(fobj,f2,f2)	manage_addCDLIFileFolder(fobj,f2,f2)
Line 2471 class CDLIRoot(Folder):	Line 2584 class CDLIRoot(Folder):

file2=os.path.join(folderName,f)	file2=os.path.join(folderName,f)
id=f	id=f
manage_addCDLIFile(fobj2,f,'','')	logging.debug("importFiles: addCDLIFile fobj2=%s, f=%s file2=%s"%(fobj2,repr(f),repr(file2)))
id=f	fobj2.addFile(vC='',file=file(file2),author=author,newName=f)
ob=fobj2._getOb(f)
ob.title=id

manage_addCDLIFileObject(ob,id,comment,author,file2,content_type='',from_tmp=True)
self.CDLICatalog.catalog_object(ob)
#self.CDLICatalog.manage_catalogFoundItems(obj_ids=[id],search_sub=1)
#self.CDLICatalog.manage_catalogObject(self.REQUEST, self.REQUEST.RESPONSE, 'CDLICatalog', urlparse.urlparse(ob.absolute_url())[1])
count+=1	count+=1

if count > 1000:	if count%100==0:
print "committing"	logging.debug("importfiles: committing")
transaction.get().commit()	transaction.get().commit()
count=0
transaction.get().commit()	transaction.get().commit()
return "ok"	return "ok"

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>

Removed from v.1.80.2.4
changed lines
	Added in v.1.81