File:  [Repository] / OSAS / OSA_system /
Revision 1.6: download - view: text, annotated - select for diffs - revision graph
Thu Jul 29 16:14:21 2004 UTC (20 years, 2 months ago) by casties
Branches: MAIN
CVS tags: HEAD
added change form

"""Metadata search interface
ROC 2004, itgroup


from AccessControl import ClassSecurityInfo
from Globals import InitializeClass
from Globals import Persistent, package_home
from Products.PageTemplates.PageTemplateFile import PageTemplateFile
from Products.PageTemplates.PageTemplate import PageTemplate
from Products.PageTemplates.ZopePageTemplate import ZopePageTemplate
from OFS.Folder import Folder
from OFS.SimpleItem import SimpleItem
#from pyPgSQL import PgSQL
import psycopg as PgSQL

import re
import os

MAXHITS = 1000

class OSAS_search(Folder):
    """Object for global metadata search"""


        {'label':'Main config','action':'manage_ChangeOSAS_searchForm'},

    def __init__(self,id,title,dsn=None):
        if dsn:
            self.dsn = dsn
            self.dsn = " dbname=storage user=archiveread password=archiveread"
        # volatile database connection object
        self._v_dbCon = None
        self._v_tryCon = 0

    def dbCursor(self):
        """returns new SQL cursor object"""
        curs = None
        if hasattr(self, '_v_dbCon') and self._v_dbCon is not None:
                curs = self._v_dbCon.cursor()
                self._v_tryCon = 0
                # in case of problems reset dbCon
                self._v_dbCon = None
                self._v_tryCon += 1
            self._v_dbCon = None
            self._v_tryCon = 0
        if not curs and self._v_tryCon < 3:
            self._v_dbCon = PgSQL.connect(self.dsn, serialize=0)
            # call ourself with the new connection
            curs = self.dbCursor()

        assert curs, "AIIEE no db cursor!!"
        return curs

    def getDBFileMeta(self, fileid):
        """returns an array with all meta entries of fileid"""

        metacache = {}
        # try in cache
        if self.REQUEST.SESSION.has_key('dbMeta'):
            metacache = self.REQUEST.SESSION['dbMeta']
            if metacache.has_key(fileid):
                res = metacache[fileid]
                #print "meta from cache "
                return res

        curs = self.dbCursor()

        sql = 'SELECT idx,tags,content,attributes FROM meta WHERE fileid=%(id)s ORDER BY idx'
        print sql, " -> ", fileid
        curs.execute(sql, {'id':fileid})
        print "done"

        res = curs.fetchall()
        #print "res:", res
        # store info in cache
        metacache[fileid] = res
        self.REQUEST.SESSION['dbMeta'] = metacache

        return res

    def getDBFile(self, fileid):
        """returns the file information of fileid"""

        filecache = {}
        # try in cache
        if self.REQUEST.SESSION.has_key('dbFiles'):
            filecache = self.REQUEST.SESSION['dbFiles']
            if filecache.has_key(fileid):
                res = filecache[fileid]
                #print "file from cache "
                return res

        curs = self.dbCursor()

        sql = 'select filename,mtime from files where id=%(id)s'
        print 'DBFILE: ', sql, " -> ", fileid
        curs.execute(sql, {'id':fileid})
        print "DBFILE: done"

        res = curs.fetchone()
        #print "DBFILE: res:", res
        # store info in cache
        filecache[fileid] = res
        self.REQUEST.SESSION['dbFiles'] = filecache

        return res
    def dbSearch(self, query, type):
        """search DB for query and return result set"""
        results = []
        restypes = {}
        if not query:
            # empty query
            return results
        curs = self.dbCursor()
        if type == 'equals':
            qs = query
        elif type == 'startswith':
            qs = query + "%"
        elif type == 'contains':
            qs = "%" + query + "%"
        sql = 'select fileid,idx,tags,content from meta where lower(content) like lower(%(qs)s)'
        print sql, " -> ", qs
        curs.execute(sql, {'qs':qs})
        print "done"
        res = curs.fetchone()
        rescnt = 1
        #print "res0:", res
        while res and rescnt < MAXHITS:
            #print "res:", res
            result = self.getResult(res)
            if result:
                restypes[result.type] = result.type
            res = curs.fetchone()
            rescnt += 1

        #self.dbCon = None

        #print "SEARCH: ", rescnt, " results"
        restypelist = restypes.keys()
        return (results, restypelist)

    def getResult(self, db_result, rank=0):
        """factory for result objects"""

        (fileid, tagidx, tags, content) = db_result
        res = None

        if tags.find('/meta/bib/') > -1:
            res = BibResult(self, db_result, rank)
        elif tags.find('/meta/archimedes/') > -1:
            res = ArchimResult(self, db_result, rank)
            res = AnyResult(self, db_result, rank)

        return res

    def renderResult(self, result):
        """returns HTML rendering of a search result"""

        return result.render(self)

    def filterResults(self, results, start, end, restypefilter=None):
        """returns list of results that match a filter"""
        # filter types first
        if restypefilter:
            res = []
            for r in results:
                if r.type in restypefilter:
            res = results
	# new total count (because of filter)
        rescnt = len(res)
        # filter on count
        resgroup = res[start:end]

        return (resgroup, rescnt)

    # Web page stuff

    def index_html(self):
        """metadata search"""
        pt=PageTemplateFile(os.path.join(package_home(globals()), "zpt/OSAS_search.zpt")).__of__(self)
        return pt()

    def search(self, searchstring=None, searchtype='startswith', start=1, count=10, restypefilter=None):
        """search and create result"""
        sres = int(start) -1
        lres = sres + count
            oldsearch = self.REQUEST.SESSION['searchstring']
            oldtype = self.REQUEST.SESSION['searchtype']
            oldsearch = ""
            oldtype = ""
        if not searchstring:
            searchstring = oldsearch
            searchtype = oldtype
        if not oldsearch or searchstring != oldsearch or searchtype != oldtype:
            # new search
            (res, restypes) = self.dbSearch(searchstring, searchtype)
            # sort the result
            # store it
            self.REQUEST.SESSION['results'] = res
            self.REQUEST.SESSION['searchstring'] = searchstring
            self.REQUEST.SESSION['searchtype'] = searchtype
            self.REQUEST.SESSION['resulttypes'] = restypes

        (resgroup, nres) = self.filterResults(self.REQUEST.SESSION['results'], sres, lres, restypefilter)
        lres = min(lres, nres)
        sres = min(sres, nres)
        self.REQUEST.SESSION['resultgroup'] = resgroup
        self.REQUEST.SESSION['res_indexes'] = (sres+1, lres, nres, int(count))
        self.REQUEST.SESSION['res_type_filter'] = restypefilter
        if nres > 0:
            zpt = "zpt/searchResult.zpt"
            zpt = "zpt/searchResult_none.zpt"
        pt=PageTemplateFile(os.path.join(package_home(globals()), zpt)).__of__(self)
        return pt()

    def getSearchType(self):
        """returns the last search type"""
            ret = self.REQUEST.SESSION['searchtype']
            ret = ""

        return ret
    def getSearchString(self):
        """returns the last search string"""
            ret = self.REQUEST.SESSION['searchstring']
            ret = ""

        return ret

    def hasNextResults(self):
        """returns if there are more results"""
            (first, last, total, count) = self.REQUEST.SESSION['res_indexes']
            return (first + count < total)
            return False

    def hasPrevResults(self):
        """returns if there are previous results"""
            (first, last, total, count) = self.REQUEST.SESSION['res_indexes']
            return (first > 1)
            return False

    def nextResults(self):
        """returns more results"""
            (first, last, total, count) = self.REQUEST.SESSION['res_indexes']
            first = first + count
            last = last + count
            if first > total:
                first = total
            if last > total:
                last = total
            print "OUCH: no next results!"

        return, count=count)

    def prevResults(self):
        """returns more results"""
            (first, last, total, count) = self.REQUEST.SESSION['res_indexes']
            first = first - count
            last = last - count
            if first < 1:
                first = 1
            if last < 1:
                last = 1
            print "OUCH: no prev results!"

        return, count=count)

    def manage_ChangeOSAS_searchForm(self):
        """create Search form"""
        pt=PageTemplateFile(os.path.join(package_home(globals()), "zpt/ChangeOSAS_search.zpt")).__of__(self)
        return pt()

    def manage_ChangeOSAS_search(self,id,title=None,dsn=None,RESPONSE=None):
        """add the OSAS_root""" = id
        self.title = title
        self.dsn = dsn
        if RESPONSE is not None:

def manage_AddOSAS_searchForm(self):
    """create Search form"""
    pt=PageTemplateFile(os.path.join(package_home(globals()), "zpt/AddOSAS_search.zpt")).__of__(self)
    return pt()

def manage_AddOSAS_search(self,id,title=None,dsn=None,RESPONSE=None):
    """add the OSAS_root"""
    if RESPONSE is not None:

class SearchResult(SimpleItem):
    """base search result object"""

    def __init__(self, type='unknown', file=None, url=None, content=None, rank=0):
        # result type (e.g. "bib", "archim")
        self.type = type
        # index file name
        self.file = file
        # url for result (list of pairs)
        if url:
            self.urls = url
            self.urls = []
        # actual content (list of tuples)
        self.content = content
        # document status (e.g. "online", "archive")
        self.status = None
        # result rank for presentation
        self.rank = rank

class AnyResult(SearchResult):
    """catch-all type result object"""

    def __init__(self, zope, db_result, rank):
        """returns a catch-all type result"""
        #print "NEW ANY RESULT!"
        self.zptFile = os.path.join(package_home(globals()), "zpt/searchResult_any.zpt")
        (db_fileid, db_tagidx, db_tags, db_content) = db_result
        self.hitTag = db_tags

        # get full info from db
        self.fileinfo = zope.getDBFile(db_fileid)
        assert self.fileinfo

        items = {}
        items[db_tags] = db_content
        self.content = items
        self.file = self.fileinfo[0]
        self.status = statusForFile(self.file)
        self.rank = rank

    def getContentList(self):
        """returns content as list of tuples in preferred order"""
        l = []
        for k in self.content.keys():
            l.append((k, self.content[k]))

        return l

    def render(self, zope):
        """render this result object"""
        zope.REQUEST.SESSION['result'] = self
        return pt()

class MetaResult(AnyResult):
    """result object that collects metadata"""

    def __init__(self, zope, db_result, rank):
        AnyResult.__init__(self, zope, db_result, rank)
        #print "NEW META RESULT!"

        (fileid, tagidx, tags, content) = db_result

        # get full info from db
        self.metainfo = zope.getDBFileMeta(fileid)
        assert self.metainfo
    def checkContext(self, tags, content, ctxurl):
        """takes meta entry and updates url from context tags"""
        if tags.endswith('/context/link'):
            if content:
                #print "CTXlink: ", content
                ctxurl[0] = content
        elif tags.endswith('/context/name'):
            if content:
                #print "CTXname: ", content
                ctxurl[1] = content

        return ctxurl

class BibResult(MetaResult):
    """bib type result object"""

    def __init__(self, zope, db_result, rank):
        MetaResult.__init__(self, zope, db_result, rank)
        #print "NEW BIB RESULT!", self
        self.type = "bib"
        self.zptFile = os.path.join(package_home(globals()), "zpt/searchResult_bib.zpt")
        url = storageURL(self.file)
        if url:
        (fileid, tagidx, tags, content) = db_result

        btype = ""
        bitems = {}
        ctxurl = ['', '']

        for me in self.metainfo:
            (m_idx, m_tags, m_content, m_attributes) = me
            # context tag
            ctxurl = self.checkContext(m_tags, m_content, ctxurl)
            # first tag with bib type attribute
            if m_tags.endswith('/meta/bib'):
                r ='type="([^"]*)"', m_attributes)
                if r:
                    btype =

                if not btype:
                    btype = "*unknown*"

                bitems['type'] = btype

            # skip other tags
            if not btype: continue

            # collect bib/something
            r ='/meta/bib/(.*)', m_tags)
            if r:
                k =
                #print "CONTENT: ", m_content
                bitems[k] = m_content
                # remember hit tag
                if m_tags == self.hitTag:
                    self.hitTag = k

        self.content = bitems
        # store context
        if not ctxurl[1]:
            ctxurl[1] = "View"
        # must have link
        if ctxurl[0]:
        self.rank += 100

    def getContentList(self):
        """returns content as list of tuples in preferred order"""
        l = []
        c = self.content.copy()
        # preferred items first
        for k in ('author', 'title', 'journal', 'year'):
            if c.has_key(k):
                l.append((k, c[k]))
                del c[k]

        # no type
        del c['type']
        # copy the rest
        for k in c.keys():
            l.append((k, c[k]))

        return l

class ArchimResult(MetaResult):
    """archimedes type result object"""

    def __init__(self, zope, db_result, rank):
        MetaResult.__init__(self, zope, db_result, rank)
        #print "NEW ARCHIM RESULT!", self
        self.type = "archim"
        self.zptFile = os.path.join(package_home(globals()), "zpt/searchResult_archim.zpt")
        url = storageURL(self.file)
        if url:
        (fileid, tagidx, tags, content) = db_result

        # process info
        bitems = {}
        ctxurl = ['', '']
        for me in self.metainfo:
            (m_idx, m_tags, m_content, m_attributes) = me
            # context tag
            ctxurl = self.checkContext(m_tags, m_content, ctxurl)
            # collect archimedes/something
            r ='/meta/archimedes/(.*)', m_tags)
            if r:
                k =
                #print "CONTENT: ", m_content
                bitems[k] = m_content
                # remember hit tag
                if m_tags == self.hitTag:
                    self.hitTag = k

        self.content = bitems
        self.rank += 100
        # store context
        if not ctxurl[1]:
            ctxurl[1] = "View"
        # must have link
        if ctxurl[0]:

    def getContentList(self):
        """returns content as list of tuples in preferred order"""
        l = []
        c = self.content.copy()
        # preferred items first
        for k in ('author', 'title', 'date', 'place'):
            if c.has_key(k):
                l.append((k, c[k]))
                del c[k]

        # copy the rest
        for k in c.keys():
            l.append((k, c[k]))

        return l

def ranksort(res1, res2):
    """sort results on rank"""
    return cmp(res2.rank, res1.rank)

def statusForFile(filename):
    """heuristic... returns status for a index file name"""
    status = None
    if filename.startswith('/mpiwg/online/'):
        status = "online"
    elif filename.startswith('/mpiwg/archive/'):
        status = "archive"
    elif filename.startswith('http://'):
        status = "database"
    return status

def storageURL(filename):
    """heuristic... returns an URL for a index file name"""
    url = None
    name = None
    if filename.startswith('/mpiwg/online/'):
        #print "URLFORFILE: online ", filename
        r ='^(.*)/index.meta', filename)
        if r:
            url = ""
            name = "Storage System"
    elif filename.startswith('http://'):
        #print "URLFORFILE: url ", filename
        url = filename
        name = "Online Database"

    if name and url:
        return (url, name)
    return None

FreeBSD-CVSweb <>