OSAS/OSA_system/OSAS_search.py - diff

Return to OSAS_search.py CVS log

Up to [Repository] / OSAS / OSA_system

Diff for /OSAS/OSA_system/OSAS_search.py between versions 1.1 and 1.8

version 1.1, 2004/07/01 19:31:25	version 1.8, 2007/01/19 17:16:25
Line 8 from Globals import InitializeClass	Line 8 from Globals import InitializeClass
from Globals import Persistent, package_home	from Globals import Persistent, package_home
from Products.PageTemplates.PageTemplateFile import PageTemplateFile	from Products.PageTemplates.PageTemplateFile import PageTemplateFile
from Products.PageTemplates.PageTemplate import PageTemplate	from Products.PageTemplates.PageTemplate import PageTemplate
	from Products.PageTemplates.ZopePageTemplate import ZopePageTemplate
	from OFS.Folder import Folder
from OFS.SimpleItem import SimpleItem	from OFS.SimpleItem import SimpleItem
#from pyPgSQL import PgSQL	try:
	import psycopg2 as PgSQL
	except:
	try:
import psycopg as PgSQL	import psycopg as PgSQL
	except:
	from pyPgSQL import PgSQL

import re	import re
import os	import os

MAXHITS = 1000	MAXHITS = 1000

class OSAS_search(SimpleItem):	class OSAS_search(Folder):
"""Object for global metadata search"""	"""Object for global metadata search"""

meta_type="OSAS_search"	meta_type="OSAS_search"

	manage_options=Folder.manage_options+(
	{'label':'Main config','action':'manage_ChangeOSAS_searchForm'},
	)


def __init__(self,id,title,dsn=None):	def __init__(self,id,title,dsn=None):
Line 69 class OSAS_search(SimpleItem):	Line 79 class OSAS_search(SimpleItem):
metacache = self.REQUEST.SESSION['dbMeta']	metacache = self.REQUEST.SESSION['dbMeta']
if metacache.has_key(fileid):	if metacache.has_key(fileid):
res = metacache[fileid]	res = metacache[fileid]
print "meta from cache "	#print "meta from cache "
return res	return res

curs = self.dbCursor()	curs = self.dbCursor()
Line 97 class OSAS_search(SimpleItem):	Line 107 class OSAS_search(SimpleItem):
filecache = self.REQUEST.SESSION['dbFiles']	filecache = self.REQUEST.SESSION['dbFiles']
if filecache.has_key(fileid):	if filecache.has_key(fileid):
res = filecache[fileid]	res = filecache[fileid]
print "file from cache "	#print "file from cache "
return res	return res

curs = self.dbCursor()	curs = self.dbCursor()
Line 117 class OSAS_search(SimpleItem):	Line 127 class OSAS_search(SimpleItem):
return res	return res


def dbSearch(self, query):	def dbSearch(self, query, type):
"""search DB for query and return result set"""	"""search DB for query and return result set"""
	results = []
	restypes = {}
	if not query:
	# empty query
	return results

curs = self.dbCursor()	curs = self.dbCursor()
	if type == 'equals':
	qs = query
	elif type == 'startswith':
qs = query + "%"	qs = query + "%"
sql = 'select fileid,idx,tags,content from meta where content like %(qs)s'	elif type == 'contains':
	qs = "%" + query + "%"

	sql = 'select fileid,idx,tags,content from meta where lower(content) like lower(%(qs)s)'
print sql, " -> ", qs	print sql, " -> ", qs
curs.execute(sql, {'qs':qs})	curs.execute(sql, {'qs':qs})
print "done"	print "done"
results = []
res = curs.fetchone()	res = curs.fetchone()
rescnt = 1	rescnt = 1
#print "res0:", res	#print "res0:", res
while res and rescnt < MAXHITS:	while res and rescnt < MAXHITS:
#print "res:", res	#print "res:", res
result = self.getResult(res)	result = self.getResult(res)
if (result):	if result:
results.append(result)	results.append(result)
	restypes[result.type] = result.type

res = curs.fetchone()	res = curs.fetchone()
rescnt += 1	rescnt += 1
Line 141 class OSAS_search(SimpleItem):	Line 163 class OSAS_search(SimpleItem):
curs.close()	curs.close()
#self.dbCon = None	#self.dbCon = None

print "SEARCH: ", rescnt, " results"	#print "SEARCH: ", rescnt, " results"
return results	restypelist = restypes.keys()
	return (results, restypelist)


def getResult(self, db_result, rank=0):	def getResult(self, db_result, rank=0):
"""factory for result objects"""	"""factory for result objects"""
print "NEW RESULT!"

(fileid, tagidx, tags, content) = db_result	(fileid, tagidx, tags, content) = db_result
res = None	res = None

print "tags: ", tags
if tags.find('/meta/bib/') > -1:	if tags.find('/meta/bib/') > -1:
res = BibResult(self, db_result, rank)	res = BibResult(self, db_result, rank)
elif tags.find('/meta/archimedes/') > -1:	elif tags.find('/meta/archimedes/') > -1:
Line 162 class OSAS_search(SimpleItem):	Line 183 class OSAS_search(SimpleItem):

return res	return res


def renderResult(self, result):	def renderResult(self, result):
"""returns HTML rendering of a search result"""	"""returns HTML rendering of a search result"""

print "renderresult!", result, " -- ", result.url
return result.render(self)	return result.render(self)


	def filterResults(self, results, start, end, restypefilter=None):
	"""returns list of results that match a filter"""
	# filter types first
	if restypefilter:
	res = []
	for r in results:
	if r.type == restypefilter:
	res.append(r)
	else:
	res = results
	# new total count (because of filter)
	rescnt = len(res)
	# filter on count
	resgroup = res[start:end]

	return (resgroup, rescnt)


#	#
Line 182 class OSAS_search(SimpleItem):	Line 218 class OSAS_search(SimpleItem):
return pt()	return pt()


def search(self, searchstring=None):	def search(self, searchstring=None, searchtype='startswith', start=1, count=10, restypefilter=None):
"""search and result"""	"""search and create result"""
if searchstring:	sres = int(start) -1
print "SEARCH: ", searchstring	lres = sres + count
res = self.dbSearch(searchstring)	try:
	oldsearch = self.REQUEST.SESSION['searchstring']
	oldtype = self.REQUEST.SESSION['searchtype']
	except:
	oldsearch = ""
	oldtype = ""

	if not searchstring:
	searchstring = oldsearch
	searchtype = oldtype

	if not oldsearch or searchstring != oldsearch or searchtype != oldtype:
	# new search
	(res, restypes) = self.dbSearch(searchstring, searchtype)
	# sort the result
res.sort(ranksort)	res.sort(ranksort)
	# store it
self.REQUEST.SESSION['results'] = res	self.REQUEST.SESSION['results'] = res
self.REQUEST.SESSION['searchstring'] = searchstring	self.REQUEST.SESSION['searchstring'] = searchstring
	self.REQUEST.SESSION['searchtype'] = searchtype
	self.REQUEST.SESSION['resulttypes'] = restypes

	(resgroup, nres) = self.filterResults(self.REQUEST.SESSION['results'], sres, lres, restypefilter)
	lres = min(lres, nres)
	sres = min(sres, nres)
	self.REQUEST.SESSION['resultgroup'] = resgroup
	self.REQUEST.SESSION['res_indexes'] = (sres+1, lres, nres, int(count))
	self.REQUEST.SESSION['res_type_filter'] = restypefilter
	if nres > 0:
	zpt = "zpt/searchResult.zpt"
	else:
	zpt = "zpt/searchResult_none.zpt"

	pt=PageTemplateFile(os.path.join(package_home(globals()), zpt)).__of__(self)
	return pt()


	def getSearchType(self):
	"""returns the last search type"""
	try:
	ret = self.REQUEST.SESSION['searchtype']
	except:
	ret = ""

	return ret

	def getSearchString(self):
	"""returns the last search string"""
	try:
	ret = self.REQUEST.SESSION['searchstring']
	except:
	ret = ""

	return ret


	def hasNextResults(self):
	"""returns if there are more results"""
	try:
	(first, last, total, count) = self.REQUEST.SESSION['res_indexes']
	return (first + count < total)
	except:
	return False

	def hasPrevResults(self):
	"""returns if there are previous results"""
	try:
	(first, last, total, count) = self.REQUEST.SESSION['res_indexes']
	return (first > 1)
	except:
	return False


print "SEARCH res:", res	def nextResults(self):
pt=PageTemplateFile(os.path.join(package_home(globals()), "zpt/searchResult.zpt")).__of__(self)	"""returns more results"""
	try:
	(first, last, total, count) = self.REQUEST.SESSION['res_indexes']
	first = first + count
	last = last + count
	if first > total:
	first = total
	if last > total:
	last = total
	except:
	print "OUCH: no next results!"
	return self.search()

	return self.search(start=first, count=count)


	def prevResults(self):
	"""returns more results"""
	try:
	(first, last, total, count) = self.REQUEST.SESSION['res_indexes']
	first = first - count
	last = last - count
	if first < 1:
	first = 1
	if last < 1:
	last = 1
	except:
	print "OUCH: no prev results!"
	return self.search()

	return self.search(start=first, count=count)


	def manage_ChangeOSAS_searchForm(self):
	"""create Search form"""
	pt=PageTemplateFile(os.path.join(package_home(globals()), "zpt/ChangeOSAS_search.zpt")).__of__(self)
return pt()	return pt()

	def manage_ChangeOSAS_search(self,id,title=None,dsn=None,RESPONSE=None):
	"""add the OSAS_root"""
	self.id = id
	self.title = title
	self.dsn = dsn
	if RESPONSE is not None:
	RESPONSE.redirect('manage_main')


def manage_AddOSAS_searchForm(self):	def manage_AddOSAS_searchForm(self):
Line 217 class SearchResult(SimpleItem):	Line 363 class SearchResult(SimpleItem):

def __init__(self, type='unknown', file=None, url=None, content=None, rank=0):	def __init__(self, type='unknown', file=None, url=None, content=None, rank=0):
"""init"""	"""init"""
	# result type (e.g. "bib", "archim")
self.type = type	self.type = type
	# index file name
self.file = file	self.file = file
self.url = url	# url for result (list of pairs)
self.urlabel = url	if url:
	self.urls = url
	else:
	self.urls = []
	# actual content (list of tuples)
self.content = content	self.content = content
	# document status (e.g. "online", "archive")
	self.status = None
	# result rank for presentation
self.rank = rank	self.rank = rank

class AnyResult(SearchResult):	class AnyResult(SearchResult):
Line 229 class AnyResult(SearchResult):	Line 384 class AnyResult(SearchResult):

def __init__(self, zope, db_result, rank):	def __init__(self, zope, db_result, rank):
"""returns a catch-all type result"""	"""returns a catch-all type result"""
SearchResult.__init__(self, type='unknown')	SearchResult.__init__(self)
print "NEW ANY RESULT!"	#print "NEW ANY RESULT!"
	self.type='unknown'
self.zptFile = os.path.join(package_home(globals()), "zpt/searchResult_any.zpt")	self.zptFile = os.path.join(package_home(globals()), "zpt/searchResult_any.zpt")

(fileid, tagidx, tags, content) = db_result	(db_fileid, db_tagidx, db_tags, db_content) = db_result
self.hitTag = tags	self.hitTag = db_tags

# get full info from db	# get full info from db
self.fileinfo = zope.getDBFile(fileid)	self.fileinfo = zope.getDBFile(db_fileid)
assert self.fileinfo	assert self.fileinfo

items = {}	items = {}
items[tags] = content	items[db_tags] = db_content
self.content = items	self.content = items
self.file = self.fileinfo[0]	self.file = self.fileinfo[0]
self.url = ""	self.status = statusForFile(self.file)
self.urlabel = self.file
self.rank = rank	self.rank = rank

	def getContentList(self):
	"""returns content as list of tuples in preferred order"""
	l = []
	for k in self.content.keys():
	l.append((k, self.content[k]))

	return l

def render(self, zope):	def render(self, zope):
"""render this result object"""	"""render this result object"""
zope.REQUEST.SESSION['result'] = self	zope.REQUEST.SESSION['result'] = self
print "renderender...", self
pt=PageTemplateFile(self.zptFile).__of__(zope)	pt=PageTemplateFile(self.zptFile).__of__(zope)
return pt()	return pt()

Line 263 class MetaResult(AnyResult):	Line 424 class MetaResult(AnyResult):
def __init__(self, zope, db_result, rank):	def __init__(self, zope, db_result, rank):
"""contructor"""	"""contructor"""
AnyResult.__init__(self, zope, db_result, rank)	AnyResult.__init__(self, zope, db_result, rank)
print "NEW META RESULT!"	#print "NEW META RESULT!"

(fileid, tagidx, tags, content) = db_result	(fileid, tagidx, tags, content) = db_result

Line 271 class MetaResult(AnyResult):	Line 432 class MetaResult(AnyResult):
self.metainfo = zope.getDBFileMeta(fileid)	self.metainfo = zope.getDBFileMeta(fileid)
assert self.metainfo	assert self.metainfo

def checkContext(self, tags, content):	def checkContext(self, tags, content, ctxurl):
"""takes meta entry and sets url from context tags"""	"""takes meta entry and updates url from context tags"""
if tags.endswith('/context/link'):	if tags.endswith('/context/link'):
if content:	if content:
self.url = content	#print "CTXlink: ", content
	ctxurl[0] = content

elif tags.endswith('/context/name'):	elif tags.endswith('/context/name'):
if content:	if content:
self.urlabel = content	#print "CTXname: ", content
	ctxurl[1] = content
else:
return False

return True	return ctxurl


class BibResult(MetaResult):	class BibResult(MetaResult):
Line 293 class BibResult(MetaResult):	Line 453 class BibResult(MetaResult):
def __init__(self, zope, db_result, rank):	def __init__(self, zope, db_result, rank):
"""constructor"""	"""constructor"""
MetaResult.__init__(self, zope, db_result, rank)	MetaResult.__init__(self, zope, db_result, rank)
print "NEW BIB RESULT!"	#print "NEW BIB RESULT!", self
self.type = "bib"	self.type = "bib"
self.zptFile = os.path.join(package_home(globals()), "zpt/searchResult_bib.zpt")	self.zptFile = os.path.join(package_home(globals()), "zpt/searchResult_bib.zpt")
self.url = urlForFile(self.file)	url = storageURL(self.file)
self.urlabel = None	if url:
	self.urls.append(url)
(fileid, tagidx, tags, content) = db_result	(fileid, tagidx, tags, content) = db_result

btype = ""	btype = ""
bitems = {}	bitems = {}
	ctxurl = ['', '']

for me in self.metainfo:	for me in self.metainfo:
(m_idx, m_tags, m_content, m_attributes) = me	(m_idx, m_tags, m_content, m_attributes) = me
# context tag	# context tag
if self.checkContext(m_tags, m_content):	ctxurl = self.checkContext(m_tags, m_content, ctxurl)
continue
# first tag with bib type attribute	# first tag with bib type attribute
if m_tags.endswith('/meta/bib'):	if m_tags.endswith('/meta/bib'):
r = re.search('type="([^"]*)"', m_attributes)	r = re.search('type="([^"]*)"', m_attributes)
Line 329 class BibResult(MetaResult):	Line 490 class BibResult(MetaResult):
k = r.group(1)	k = r.group(1)
#print "CONTENT: ", m_content	#print "CONTENT: ", m_content
bitems[k] = m_content	bitems[k] = m_content
	# remember hit tag
	if m_tags == self.hitTag:
	self.hitTag = k
continue	continue

self.content = bitems	self.content = bitems
	# store context
	if not ctxurl[1]:
	ctxurl[1] = "View"
	# must have link
	if ctxurl[0]:
	self.urls.append(ctxurl)

self.rank += 100	self.rank += 100
if not self.urlabel and self.url:
self.urlabel = "view"	def getContentList(self):
	"""returns content as list of tuples in preferred order"""
	l = []
	c = self.content.copy()
	# preferred items first
	for k in ('author', 'title', 'journal', 'year'):
	if c.has_key(k):
	l.append((k, c[k]))
	del c[k]

	# no type
	del c['type']
	# copy the rest
	for k in c.keys():
	l.append((k, c[k]))

	return l


class ArchimResult(MetaResult):	class ArchimResult(MetaResult):
Line 343 class ArchimResult(MetaResult):	Line 530 class ArchimResult(MetaResult):
def __init__(self, zope, db_result, rank):	def __init__(self, zope, db_result, rank):
"""constructor"""	"""constructor"""
MetaResult.__init__(self, zope, db_result, rank)	MetaResult.__init__(self, zope, db_result, rank)
print "NEW ARCHIM RESULT!"	#print "NEW ARCHIM RESULT!", self
self.type = "archim"	self.type = "archim"
self.zptFile = os.path.join(package_home(globals()), "zpt/searchResult_archim.zpt")	self.zptFile = os.path.join(package_home(globals()), "zpt/searchResult_archim.zpt")
self.url = urlForFile(self.file)	url = storageURL(self.file)
self.urlabel = None	if url:
	self.urls.append(url)

(fileid, tagidx, tags, content) = db_result	(fileid, tagidx, tags, content) = db_result

# process info	# process info
bitems = {}	bitems = {}
	ctxurl = ['', '']
for me in self.metainfo:	for me in self.metainfo:
(m_idx, m_tags, m_content, m_attributes) = me	(m_idx, m_tags, m_content, m_attributes) = me
# context tag	# context tag
if self.checkContext(m_tags, m_content):	ctxurl = self.checkContext(m_tags, m_content, ctxurl)
continue
# collect archimedes/something	# collect archimedes/something
r = re.search('/meta/archimedes/(.*)', m_tags)	r = re.search('/meta/archimedes/(.*)', m_tags)
if r:	if r:
k = r.group(1)	k = r.group(1)
#print "CONTENT: ", m_content	#print "CONTENT: ", m_content
bitems[k] = m_content	bitems[k] = m_content
	# remember hit tag
	if m_tags == self.hitTag:
	self.hitTag = k
continue	continue

self.content = bitems	self.content = bitems
self.rank += 100	self.rank += 100
if not self.urlabel and self.url:	# store context
self.urlabel = "view"	if not ctxurl[1]:
	ctxurl[1] = "View"
	# must have link
	if ctxurl[0]:
	self.urls.append(ctxurl)


	def getContentList(self):
	"""returns content as list of tuples in preferred order"""
	l = []
	c = self.content.copy()
	# preferred items first
	for k in ('author', 'title', 'date', 'place'):
	if c.has_key(k):
	l.append((k, c[k]))
	del c[k]

	# copy the rest
	for k in c.keys():
	l.append((k, c[k]))

	return l



Line 378 def ranksort(res1, res2):	Line 591 def ranksort(res1, res2):
return cmp(res2.rank, res1.rank)	return cmp(res2.rank, res1.rank)


def urlForFile(filename):	def statusForFile(filename):
	"""heuristic... returns status for a index file name"""
	status = None
	if filename.startswith('/mpiwg/online/'):
	status = "online"
	elif filename.startswith('/mpiwg/archive/'):
	status = "archive"
	elif filename.startswith('http://'):
	status = "database"

	return status

	def storageURL(filename):
"""heuristic... returns an URL for a index file name"""	"""heuristic... returns an URL for a index file name"""
url = None	url = None
	name = None
if filename.startswith('/mpiwg/online/'):	if filename.startswith('/mpiwg/online/'):
print "URLFORFILE: online ", filename	#print "URLFORFILE: online ", filename
r = re.search('/mpiwg/online/(.*)/index.meta', filename)	r = re.search('^(.*)/index.meta', filename)
if r:	if r:
url = "http://nausikaa.mpiwg-berlin.mpg.de/digitallibrary/digilib.jsp?fn=%s"%r.group(1)	url = "http://content.mpiwg-berlin.mpg.de/mpistorage/storage/ShowOnline/index_html?path=%s"%r.group(1)
	name = "Storage System"

	elif filename.startswith('http://'):
	#print "URLFORFILE: url ", filename
	url = filename
	name = "Online Database"

	if name and url:
	return (url, name)

	return None

return url

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>

Removed from v.1.1
changed lines
	Added in v.1.8