"""Metadata search interface
ROC 2004, itgroup
"""
from AccessControl import ClassSecurityInfo
from Globals import InitializeClass
from Globals import Persistent, package_home
from Products.PageTemplates.PageTemplateFile import PageTemplateFile
from Products.PageTemplates.PageTemplate import PageTemplate
from Products.PageTemplates.ZopePageTemplate import ZopePageTemplate
from OFS.Folder import Folder
from OFS.SimpleItem import SimpleItem
#from pyPgSQL import PgSQL
import psycopg as PgSQL
import re
import os
MAXHITS = 1000
class OSAS_search(Folder):
"""Object for global metadata search"""
meta_type="OSAS_search"
manage_options=Folder.manage_options+(
{'label':'Main config','action':'manage_ChangeOSAS_searchForm'},
)
def __init__(self,id,title,dsn=None):
"""init"""
self.id=id
self.title=title
if dsn:
self.dsn = dsn
else:
self.dsn = "host=foxridge.mpiwg-berlin.mpg.de dbname=storage user=archiveread password=archiveread"
# volatile database connection object
self._v_dbCon = None
self._v_tryCon = 0
def dbCursor(self):
"""returns new SQL cursor object"""
curs = None
if hasattr(self, '_v_dbCon') and self._v_dbCon is not None:
try:
curs = self._v_dbCon.cursor()
self._v_tryCon = 0
except:
# in case of problems reset dbCon
self._v_dbCon = None
self._v_tryCon += 1
else:
self._v_dbCon = None
self._v_tryCon = 0
if not curs and self._v_tryCon < 3:
self._v_dbCon = PgSQL.connect(self.dsn, serialize=0)
# call ourself with the new connection
curs = self.dbCursor()
assert curs, "AIIEE no db cursor!!"
return curs
def getDBFileMeta(self, fileid):
"""returns an array with all meta entries of fileid"""
metacache = {}
# try in cache
if self.REQUEST.SESSION.has_key('dbMeta'):
metacache = self.REQUEST.SESSION['dbMeta']
if metacache.has_key(fileid):
res = metacache[fileid]
#print "meta from cache "
return res
curs = self.dbCursor()
sql = 'SELECT idx,tags,content,attributes FROM meta WHERE fileid=%(id)s ORDER BY idx'
print sql, " -> ", fileid
curs.execute(sql, {'id':fileid})
print "done"
res = curs.fetchall()
#print "res:", res
curs.close()
# store info in cache
metacache[fileid] = res
self.REQUEST.SESSION['dbMeta'] = metacache
return res
def getDBFile(self, fileid):
"""returns the file information of fileid"""
filecache = {}
# try in cache
if self.REQUEST.SESSION.has_key('dbFiles'):
filecache = self.REQUEST.SESSION['dbFiles']
if filecache.has_key(fileid):
res = filecache[fileid]
#print "file from cache "
return res
curs = self.dbCursor()
sql = 'select filename,mtime from files where id=%(id)s'
print 'DBFILE: ', sql, " -> ", fileid
curs.execute(sql, {'id':fileid})
print "DBFILE: done"
res = curs.fetchone()
#print "DBFILE: res:", res
curs.close()
# store info in cache
filecache[fileid] = res
self.REQUEST.SESSION['dbFiles'] = filecache
return res
def dbSearch(self, query, type):
"""search DB for query and return result set"""
results = []
restypes = {}
if not query:
# empty query
return results
curs = self.dbCursor()
if type == 'equals':
qs = query
elif type == 'startswith':
qs = query + "%"
elif type == 'contains':
qs = "%" + query + "%"
sql = 'select fileid,idx,tags,content from meta where lower(content) like lower(%(qs)s)'
print sql, " -> ", qs
curs.execute(sql, {'qs':qs})
print "done"
res = curs.fetchone()
rescnt = 1
#print "res0:", res
while res and rescnt < MAXHITS:
#print "res:", res
result = self.getResult(res)
if result:
results.append(result)
restypes[result.type] = result.type
res = curs.fetchone()
rescnt += 1
curs.close()
#self.dbCon = None
#print "SEARCH: ", rescnt, " results"
restypelist = restypes.keys()
return (results, restypelist)
def getResult(self, db_result, rank=0):
"""factory for result objects"""
(fileid, tagidx, tags, content) = db_result
res = None
if tags.find('/meta/bib/') > -1:
res = BibResult(self, db_result, rank)
elif tags.find('/meta/archimedes/') > -1:
res = ArchimResult(self, db_result, rank)
else:
res = AnyResult(self, db_result, rank)
return res
def renderResult(self, result):
"""returns HTML rendering of a search result"""
return result.render(self)
def filterResults(self, results, start, end, restypefilter=None):
"""returns list of results that match a filter"""
# filter types first
if restypefilter:
res = []
for r in results:
if r.type in restypefilter:
res.append(r)
else:
res = results
# new total count (because of filter)
rescnt = len(res)
# filter on count
resgroup = res[start:end]
return (resgroup, rescnt)
#
# Web page stuff
#
def index_html(self):
"""metadata search"""
pt=PageTemplateFile(os.path.join(package_home(globals()), "zpt/OSAS_search.zpt")).__of__(self)
return pt()
def search(self, searchstring=None, searchtype='startswith', start=1, count=10, restypefilter=None):
"""search and create result"""
sres = int(start) -1
lres = sres + count
try:
oldsearch = self.REQUEST.SESSION['searchstring']
oldtype = self.REQUEST.SESSION['searchtype']
except:
oldsearch = ""
oldtype = ""
if not searchstring:
searchstring = oldsearch
searchtype = oldtype
if not oldsearch or searchstring != oldsearch or searchtype != oldtype:
# new search
(res, restypes) = self.dbSearch(searchstring, searchtype)
# sort the result
res.sort(ranksort)
# store it
self.REQUEST.SESSION['results'] = res
self.REQUEST.SESSION['searchstring'] = searchstring
self.REQUEST.SESSION['searchtype'] = searchtype
self.REQUEST.SESSION['resulttypes'] = restypes
(resgroup, nres) = self.filterResults(self.REQUEST.SESSION['results'], sres, lres, restypefilter)
lres = min(lres, nres)
sres = min(sres, nres)
self.REQUEST.SESSION['resultgroup'] = resgroup
self.REQUEST.SESSION['res_indexes'] = (sres+1, lres, nres, int(count))
self.REQUEST.SESSION['res_type_filter'] = restypefilter
if nres > 0:
zpt = "zpt/searchResult.zpt"
else:
zpt = "zpt/searchResult_none.zpt"
pt=PageTemplateFile(os.path.join(package_home(globals()), zpt)).__of__(self)
return pt()
def getSearchType(self):
"""returns the last search type"""
try:
ret = self.REQUEST.SESSION['searchtype']
except:
ret = ""
return ret
def getSearchString(self):
"""returns the last search string"""
try:
ret = self.REQUEST.SESSION['searchstring']
except:
ret = ""
return ret
def hasNextResults(self):
"""returns if there are more results"""
try:
(first, last, total, count) = self.REQUEST.SESSION['res_indexes']
return (first + count < total)
except:
return False
def hasPrevResults(self):
"""returns if there are previous results"""
try:
(first, last, total, count) = self.REQUEST.SESSION['res_indexes']
return (first > 1)
except:
return False
def nextResults(self):
"""returns more results"""
try:
(first, last, total, count) = self.REQUEST.SESSION['res_indexes']
first = first + count
last = last + count
if first > total:
first = total
if last > total:
last = total
except:
print "OUCH: no next results!"
return self.search()
return self.search(start=first, count=count)
def prevResults(self):
"""returns more results"""
try:
(first, last, total, count) = self.REQUEST.SESSION['res_indexes']
first = first - count
last = last - count
if first < 1:
first = 1
if last < 1:
last = 1
except:
print "OUCH: no prev results!"
return self.search()
return self.search(start=first, count=count)
def manage_ChangeOSAS_searchForm(self):
"""create Search form"""
pt=PageTemplateFile(os.path.join(package_home(globals()), "zpt/ChangeOSAS_search.zpt")).__of__(self)
return pt()
def manage_ChangeOSAS_search(self,id,title=None,dsn=None,RESPONSE=None):
"""add the OSAS_root"""
self.id = id
self.title = title
self.dsn = dsn
if RESPONSE is not None:
RESPONSE.redirect('manage_main')
def manage_AddOSAS_searchForm(self):
"""create Search form"""
pt=PageTemplateFile(os.path.join(package_home(globals()), "zpt/AddOSAS_search.zpt")).__of__(self)
return pt()
def manage_AddOSAS_search(self,id,title=None,dsn=None,RESPONSE=None):
"""add the OSAS_root"""
newObj=OSAS_search(id,title,dsn)
self._setObject(id,newObj)
if RESPONSE is not None:
RESPONSE.redirect('manage_main')
class SearchResult(SimpleItem):
"""base search result object"""
def __init__(self, type='unknown', file=None, url=None, content=None, rank=0):
"""init"""
# result type (e.g. "bib", "archim")
self.type = type
# index file name
self.file = file
# url for result (list of pairs)
if url:
self.urls = url
else:
self.urls = []
# actual content (list of tuples)
self.content = content
# document status (e.g. "online", "archive")
self.status = None
# result rank for presentation
self.rank = rank
class AnyResult(SearchResult):
"""catch-all type result object"""
def __init__(self, zope, db_result, rank):
"""returns a catch-all type result"""
SearchResult.__init__(self)
#print "NEW ANY RESULT!"
self.type='unknown'
self.zptFile = os.path.join(package_home(globals()), "zpt/searchResult_any.zpt")
(db_fileid, db_tagidx, db_tags, db_content) = db_result
self.hitTag = db_tags
# get full info from db
self.fileinfo = zope.getDBFile(db_fileid)
assert self.fileinfo
items = {}
items[db_tags] = db_content
self.content = items
self.file = self.fileinfo[0]
self.status = statusForFile(self.file)
self.rank = rank
def getContentList(self):
"""returns content as list of tuples in preferred order"""
l = []
for k in self.content.keys():
l.append((k, self.content[k]))
return l
def render(self, zope):
"""render this result object"""
zope.REQUEST.SESSION['result'] = self
pt=PageTemplateFile(self.zptFile).__of__(zope)
return pt()
class MetaResult(AnyResult):
"""result object that collects metadata"""
def __init__(self, zope, db_result, rank):
"""contructor"""
AnyResult.__init__(self, zope, db_result, rank)
#print "NEW META RESULT!"
(fileid, tagidx, tags, content) = db_result
# get full info from db
self.metainfo = zope.getDBFileMeta(fileid)
assert self.metainfo
def checkContext(self, tags, content, ctxurl):
"""takes meta entry and updates url from context tags"""
if tags.endswith('/context/link'):
if content:
#print "CTXlink: ", content
ctxurl[0] = content
elif tags.endswith('/context/name'):
if content:
#print "CTXname: ", content
ctxurl[1] = content
return ctxurl
class BibResult(MetaResult):
"""bib type result object"""
def __init__(self, zope, db_result, rank):
"""constructor"""
MetaResult.__init__(self, zope, db_result, rank)
#print "NEW BIB RESULT!", self
self.type = "bib"
self.zptFile = os.path.join(package_home(globals()), "zpt/searchResult_bib.zpt")
url = storageURL(self.file)
if url:
self.urls.append(url)
(fileid, tagidx, tags, content) = db_result
btype = ""
bitems = {}
ctxurl = ['', '']
for me in self.metainfo:
(m_idx, m_tags, m_content, m_attributes) = me
# context tag
ctxurl = self.checkContext(m_tags, m_content, ctxurl)
# first tag with bib type attribute
if m_tags.endswith('/meta/bib'):
r = re.search('type="([^"]*)"', m_attributes)
if r:
btype = r.group(1)
if not btype:
btype = "*unknown*"
bitems['type'] = btype
continue
# skip other tags
if not btype: continue
# collect bib/something
r = re.search('/meta/bib/(.*)', m_tags)
if r:
k = r.group(1)
#print "CONTENT: ", m_content
bitems[k] = m_content
# remember hit tag
if m_tags == self.hitTag:
self.hitTag = k
continue
self.content = bitems
# store context
if not ctxurl[1]:
ctxurl[1] = "View"
# must have link
if ctxurl[0]:
self.urls.append(ctxurl)
self.rank += 100
def getContentList(self):
"""returns content as list of tuples in preferred order"""
l = []
c = self.content.copy()
# preferred items first
for k in ('author', 'title', 'journal', 'year'):
if c.has_key(k):
l.append((k, c[k]))
del c[k]
# no type
del c['type']
# copy the rest
for k in c.keys():
l.append((k, c[k]))
return l
class ArchimResult(MetaResult):
"""archimedes type result object"""
def __init__(self, zope, db_result, rank):
"""constructor"""
MetaResult.__init__(self, zope, db_result, rank)
#print "NEW ARCHIM RESULT!", self
self.type = "archim"
self.zptFile = os.path.join(package_home(globals()), "zpt/searchResult_archim.zpt")
url = storageURL(self.file)
if url:
self.urls.append(url)
(fileid, tagidx, tags, content) = db_result
# process info
bitems = {}
ctxurl = ['', '']
for me in self.metainfo:
(m_idx, m_tags, m_content, m_attributes) = me
# context tag
ctxurl = self.checkContext(m_tags, m_content, ctxurl)
# collect archimedes/something
r = re.search('/meta/archimedes/(.*)', m_tags)
if r:
k = r.group(1)
#print "CONTENT: ", m_content
bitems[k] = m_content
# remember hit tag
if m_tags == self.hitTag:
self.hitTag = k
continue
self.content = bitems
self.rank += 100
# store context
if not ctxurl[1]:
ctxurl[1] = "View"
# must have link
if ctxurl[0]:
self.urls.append(ctxurl)
def getContentList(self):
"""returns content as list of tuples in preferred order"""
l = []
c = self.content.copy()
# preferred items first
for k in ('author', 'title', 'date', 'place'):
if c.has_key(k):
l.append((k, c[k]))
del c[k]
# copy the rest
for k in c.keys():
l.append((k, c[k]))
return l
def ranksort(res1, res2):
"""sort results on rank"""
return cmp(res2.rank, res1.rank)
def statusForFile(filename):
"""heuristic... returns status for a index file name"""
status = None
if filename.startswith('/mpiwg/online/'):
status = "online"
elif filename.startswith('/mpiwg/archive/'):
status = "archive"
elif filename.startswith('http://'):
status = "database"
return status
def storageURL(filename):
"""heuristic... returns an URL for a index file name"""
url = None
name = None
if filename.startswith('/mpiwg/online/'):
#print "URLFORFILE: online ", filename
r = re.search('^(.*)/index.meta', filename)
if r:
url = "http://content.mpiwg-berlin.mpg.de/mpistorage/storage/ShowOnline/index_html?path=%s"%r.group(1)
name = "Storage System"
elif filename.startswith('http://'):
#print "URLFORFILE: url ", filename
url = filename
name = "Online Database"
if name and url:
return (url, name)
return None
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>