"""Metadata search interface
ROC 2004, itgroup
"""
from AccessControl import ClassSecurityInfo
from Globals import InitializeClass
from Globals import Persistent, package_home
from Products.PageTemplates.PageTemplateFile import PageTemplateFile
from Products.PageTemplates.PageTemplate import PageTemplate
from OFS.SimpleItem import SimpleItem
#from pyPgSQL import PgSQL
import psycopg as PgSQL
import re
import os
MAXHITS = 1000
class OSAS_search(SimpleItem):
"""Object for global metadata search"""
meta_type="OSAS_search"
def __init__(self,id,title,dsn=None):
"""init"""
self.id=id
self.title=title
if dsn:
self.dsn = dsn
else:
self.dsn = "host=foxridge.mpiwg-berlin.mpg.de dbname=storage user=archiveread password=archiveread"
# volatile database connection object
self._v_dbCon = None
self._v_tryCon = 0
def dbCursor(self):
"""returns new SQL cursor object"""
curs = None
if hasattr(self, '_v_dbCon') and self._v_dbCon is not None:
try:
curs = self._v_dbCon.cursor()
self._v_tryCon = 0
except:
# in case of problems reset dbCon
self._v_dbCon = None
self._v_tryCon += 1
else:
self._v_dbCon = None
self._v_tryCon = 0
if not curs and self._v_tryCon < 3:
self._v_dbCon = PgSQL.connect(self.dsn, serialize=0)
# call ourself with the new connection
curs = self.dbCursor()
assert curs, "AIIEE no db cursor!!"
return curs
def getDBFileMeta(self, fileid):
"""returns an array with all meta entries of fileid"""
metacache = {}
# try in cache
if self.REQUEST.SESSION.has_key('dbMeta'):
metacache = self.REQUEST.SESSION['dbMeta']
if metacache.has_key(fileid):
res = metacache[fileid]
#print "meta from cache "
return res
curs = self.dbCursor()
sql = 'SELECT idx,tags,content,attributes FROM meta WHERE fileid=%(id)s ORDER BY idx'
print sql, " -> ", fileid
curs.execute(sql, {'id':fileid})
print "done"
res = curs.fetchall()
#print "res:", res
curs.close()
# store info in cache
metacache[fileid] = res
self.REQUEST.SESSION['dbMeta'] = metacache
return res
def getDBFile(self, fileid):
"""returns the file information of fileid"""
filecache = {}
# try in cache
if self.REQUEST.SESSION.has_key('dbFiles'):
filecache = self.REQUEST.SESSION['dbFiles']
if filecache.has_key(fileid):
res = filecache[fileid]
#print "file from cache "
return res
curs = self.dbCursor()
sql = 'select filename,mtime from files where id=%(id)s'
print 'DBFILE: ', sql, " -> ", fileid
curs.execute(sql, {'id':fileid})
print "DBFILE: done"
res = curs.fetchone()
#print "DBFILE: res:", res
curs.close()
# store info in cache
filecache[fileid] = res
self.REQUEST.SESSION['dbFiles'] = filecache
return res
def dbSearch(self, query, type):
"""search DB for query and return result set"""
curs = self.dbCursor()
if type == 'equals':
qs = query
elif type == 'startswith':
qs = query + "%"
elif type == 'contains':
qs = "%" + query + "%"
sql = 'select fileid,idx,tags,content from meta where content like %(qs)s'
print sql, " -> ", qs
curs.execute(sql, {'qs':qs})
print "done"
results = []
res = curs.fetchone()
rescnt = 1
#print "res0:", res
while res and rescnt < MAXHITS:
#print "res:", res
result = self.getResult(res)
if (result):
results.append(result)
res = curs.fetchone()
rescnt += 1
curs.close()
#self.dbCon = None
#print "SEARCH: ", rescnt, " results"
return results
def getResult(self, db_result, rank=0):
"""factory for result objects"""
(fileid, tagidx, tags, content) = db_result
res = None
if tags.find('/meta/bib/') > -1:
res = BibResult(self, db_result, rank)
elif tags.find('/meta/archimedes/') > -1:
res = ArchimResult(self, db_result, rank)
else:
res = AnyResult(self, db_result, rank)
return res
def renderResult(self, result):
"""returns HTML rendering of a search result"""
return result.render(self)
#
# Web page stuff
#
def index_html(self):
"""metadata search"""
pt=PageTemplateFile(os.path.join(package_home(globals()), "zpt/OSAS_search.zpt")).__of__(self)
return pt()
def search(self, searchstring=None, searchtype='startswith', start=1, count=10):
"""search and create result"""
sres = int(start) -1
lres = sres + count
try:
oldsearch = self.REQUEST.SESSION['searchstring']
oldtype = self.REQUEST.SESSION['searchtype']
except:
oldsearch = ""
oldtype = ""
if not searchstring:
searchstring = oldsearch
searchtype = oldtype
if not oldsearch or searchstring != oldsearch or searchtype != oldtype:
# new search
res = self.dbSearch(searchstring, searchtype)
# sort the result
res.sort(ranksort)
# store it
self.REQUEST.SESSION['results'] = res
self.REQUEST.SESSION['searchstring'] = searchstring
self.REQUEST.SESSION['searchtype'] = searchtype
self.REQUEST.SESSION['resultgroup'] = self.REQUEST.SESSION['results'][sres:lres]
self.REQUEST.SESSION['res_indexes'] = (sres+1, lres, len(self.REQUEST.SESSION['results']), int(count))
pt=PageTemplateFile(os.path.join(package_home(globals()), "zpt/searchResult.zpt")).__of__(self)
return pt()
def getSearchType(self):
"""returns the last search type"""
try:
ret = self.REQUEST.SESSION['searchtype']
except:
ret = ""
return ret
def getSearchString(self):
"""returns the last search string"""
try:
ret = self.REQUEST.SESSION['searchstring']
except:
ret = ""
return ret
def hasNextResults(self):
"""returns if there are more results"""
try:
(first, last, total, count) = self.REQUEST.SESSION['res_indexes']
return (first < total)
except:
return False
def hasPrevResults(self):
"""returns if there are previous results"""
try:
(first, last, total, count) = self.REQUEST.SESSION['res_indexes']
return (first > 1)
except:
return False
def nextResults(self):
"""returns more results"""
try:
(first, last, total, count) = self.REQUEST.SESSION['res_indexes']
first = first + count
last = last + count
if first > total:
first = total
if last > total:
last = total
except:
print "OUCH: no next results: ", first, last, total, count
return self.search(start=first, count=count)
def prevResults(self):
"""returns more results"""
try:
(first, last, total, count) = self.REQUEST.SESSION['res_indexes']
first = first - count
last = last - count
if first < 1:
first = 1
if last < 1:
last = 1
except:
print "OUCH: no prev results: ", first, last, total, count
return self.search(start=first, count=count)
def manage_AddOSAS_searchForm(self):
"""create Search form"""
pt=PageTemplateFile(os.path.join(package_home(globals()), "zpt/AddOSAS_search.zpt")).__of__(self)
return pt()
def manage_AddOSAS_search(self,id,title=None,dsn=None,RESPONSE=None):
"""add the OSAS_root"""
newObj=OSAS_search(id,title,dsn)
self._setObject(id,newObj)
if RESPONSE is not None:
RESPONSE.redirect('manage_main')
class SearchResult(SimpleItem):
"""base search result object"""
def __init__(self, type='unknown', file=None, url=None, content=None, rank=0):
"""init"""
self.type = type
self.file = file
self.url = url
self.urlabel = url
self.content = content
self.rank = rank
class AnyResult(SearchResult):
"""catch-all type result object"""
def __init__(self, zope, db_result, rank):
"""returns a catch-all type result"""
SearchResult.__init__(self, type='unknown')
#print "NEW ANY RESULT!"
self.zptFile = os.path.join(package_home(globals()), "zpt/searchResult_any.zpt")
(fileid, tagidx, tags, content) = db_result
self.hitTag = tags
# get full info from db
self.fileinfo = zope.getDBFile(fileid)
assert self.fileinfo
items = {}
items[tags] = content
self.content = items
self.file = self.fileinfo[0]
self.url = ""
self.urlabel = self.file
self.rank = rank
def render(self, zope):
"""render this result object"""
zope.REQUEST.SESSION['result'] = self
pt=PageTemplateFile(self.zptFile).__of__(zope)
return pt()
class MetaResult(AnyResult):
"""result object that collects metadata"""
def __init__(self, zope, db_result, rank):
"""contructor"""
AnyResult.__init__(self, zope, db_result, rank)
#print "NEW META RESULT!"
(fileid, tagidx, tags, content) = db_result
# get full info from db
self.metainfo = zope.getDBFileMeta(fileid)
assert self.metainfo
def checkContext(self, tags, content):
"""takes meta entry and sets url from context tags"""
if tags.endswith('/context/link'):
if content:
self.url = content
elif tags.endswith('/context/name'):
if content:
self.urlabel = content
else:
return False
return True
class BibResult(MetaResult):
"""bib type result object"""
def __init__(self, zope, db_result, rank):
"""constructor"""
MetaResult.__init__(self, zope, db_result, rank)
#print "NEW BIB RESULT!"
self.type = "bib"
self.zptFile = os.path.join(package_home(globals()), "zpt/searchResult_bib.zpt")
self.url = urlForFile(self.file)
self.urlabel = None
(fileid, tagidx, tags, content) = db_result
btype = ""
bitems = {}
for me in self.metainfo:
(m_idx, m_tags, m_content, m_attributes) = me
# context tag
if self.checkContext(m_tags, m_content):
continue
# first tag with bib type attribute
if m_tags.endswith('/meta/bib'):
r = re.search('type="([^"]*)"', m_attributes)
if r:
btype = r.group(1)
if not btype:
btype = "*unknown*"
bitems['type'] = btype
continue
# skip other tags
if not btype: continue
# collect bib/something
r = re.search('/meta/bib/(.*)', m_tags)
if r:
k = r.group(1)
#print "CONTENT: ", m_content
bitems[k] = m_content
continue
self.content = bitems
self.rank += 100
if not self.urlabel and self.url:
self.urlabel = "view"
class ArchimResult(MetaResult):
"""archimedes type result object"""
def __init__(self, zope, db_result, rank):
"""constructor"""
MetaResult.__init__(self, zope, db_result, rank)
#print "NEW ARCHIM RESULT!"
self.type = "archim"
self.zptFile = os.path.join(package_home(globals()), "zpt/searchResult_archim.zpt")
self.url = urlForFile(self.file)
self.urlabel = None
(fileid, tagidx, tags, content) = db_result
# process info
bitems = {}
for me in self.metainfo:
(m_idx, m_tags, m_content, m_attributes) = me
# context tag
if self.checkContext(m_tags, m_content):
continue
# collect archimedes/something
r = re.search('/meta/archimedes/(.*)', m_tags)
if r:
k = r.group(1)
#print "CONTENT: ", m_content
bitems[k] = m_content
continue
self.content = bitems
self.rank += 100
if not self.urlabel and self.url:
self.urlabel = "view"
def ranksort(res1, res2):
"""sort results on rank"""
return cmp(res2.rank, res1.rank)
def urlForFile(filename):
"""heuristic... returns an URL for a index file name"""
url = None
if filename.startswith('/mpiwg/online/'):
print "URLFORFILE: online ", filename
r = re.search('/mpiwg/online/(.*)/index.meta', filename)
if r:
url = "http://nausikaa.mpiwg-berlin.mpg.de/digitallibrary/digilib.jsp?fn=%s"%r.group(1)
return url
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>