File:  [Repository] / OSAS / OSA_system / OSAS_search.py
Revision 1.2: download - view: text, annotated - select for diffs - revision graph
Mon Jul 5 21:08:55 2004 UTC (19 years, 11 months ago) by casties
Branches: MAIN
CVS tags: HEAD
improved everything, really

"""Metadata search interface
ROC 2004, itgroup

"""

from AccessControl import ClassSecurityInfo
from Globals import InitializeClass
from Globals import Persistent, package_home
from Products.PageTemplates.PageTemplateFile import PageTemplateFile
from Products.PageTemplates.PageTemplate import PageTemplate
from OFS.SimpleItem import SimpleItem
#from pyPgSQL import PgSQL
import psycopg as PgSQL

import re
import os

MAXHITS = 1000

class OSAS_search(SimpleItem):
    """Object for global metadata search"""

    meta_type="OSAS_search"

    

    def __init__(self,id,title,dsn=None):
        """init"""
        self.id=id
        self.title=title
        if dsn:
            self.dsn = dsn
        else:
            self.dsn = "host=foxridge.mpiwg-berlin.mpg.de dbname=storage user=archiveread password=archiveread"
        # volatile database connection object
        self._v_dbCon = None
        self._v_tryCon = 0


    def dbCursor(self):
        """returns new SQL cursor object"""
        curs = None
        if hasattr(self, '_v_dbCon') and self._v_dbCon is not None:
            try:
                curs = self._v_dbCon.cursor()
                self._v_tryCon = 0
            except:
                # in case of problems reset dbCon
                self._v_dbCon = None
                self._v_tryCon += 1
        else:
            self._v_dbCon = None
            self._v_tryCon = 0
                
        if not curs and self._v_tryCon < 3:
            self._v_dbCon = PgSQL.connect(self.dsn, serialize=0)
            # call ourself with the new connection
            curs = self.dbCursor()

        assert curs, "AIIEE no db cursor!!"
        return curs

    def getDBFileMeta(self, fileid):
        """returns an array with all meta entries of fileid"""

        metacache = {}
        # try in cache
        if self.REQUEST.SESSION.has_key('dbMeta'):
            metacache = self.REQUEST.SESSION['dbMeta']
            if metacache.has_key(fileid):
                res = metacache[fileid]
                #print "meta from cache "
                return res

        curs = self.dbCursor()

        sql = 'SELECT idx,tags,content,attributes FROM meta WHERE fileid=%(id)s ORDER BY idx'
        print sql, " -> ", fileid
        curs.execute(sql, {'id':fileid})
        print "done"

        res = curs.fetchall()
        #print "res:", res
        curs.close()
        # store info in cache
        metacache[fileid] = res
        self.REQUEST.SESSION['dbMeta'] = metacache

        return res

    def getDBFile(self, fileid):
        """returns the file information of fileid"""

        filecache = {}
        # try in cache
        if self.REQUEST.SESSION.has_key('dbFiles'):
            filecache = self.REQUEST.SESSION['dbFiles']
            if filecache.has_key(fileid):
                res = filecache[fileid]
                #print "file from cache "
                return res

        curs = self.dbCursor()

        sql = 'select filename,mtime from files where id=%(id)s'
        print 'DBFILE: ', sql, " -> ", fileid
        curs.execute(sql, {'id':fileid})
        print "DBFILE: done"

        res = curs.fetchone()
        #print "DBFILE: res:", res
        curs.close()
        # store info in cache
        filecache[fileid] = res
        self.REQUEST.SESSION['dbFiles'] = filecache

        return res
	
	
    def dbSearch(self, query, type):
        """search DB for query and return result set"""
        curs = self.dbCursor()
        if type == 'equals':
            qs = query
        elif type == 'startswith':
            qs = query + "%"
        elif type == 'contains':
            qs = "%" + query + "%"
            
        sql = 'select fileid,idx,tags,content from meta where content like %(qs)s'
        print sql, " -> ", qs
        curs.execute(sql, {'qs':qs})
        print "done"
        results = []
        res = curs.fetchone()
        rescnt = 1
        #print "res0:", res
        while res and rescnt < MAXHITS:
            #print "res:", res
            result = self.getResult(res)
            if (result):
                results.append(result)
                
            res = curs.fetchone()
            rescnt += 1

        curs.close()
        #self.dbCon = None

        #print "SEARCH: ", rescnt, " results"
        return results

        
    def getResult(self, db_result, rank=0):
        """factory for result objects"""

        (fileid, tagidx, tags, content) = db_result
        res = None

        if tags.find('/meta/bib/') > -1:
            res = BibResult(self, db_result, rank)
        elif tags.find('/meta/archimedes/') > -1:
            res = ArchimResult(self, db_result, rank)
        else:
            res = AnyResult(self, db_result, rank)

        return res
	
    def renderResult(self, result):
        """returns HTML rendering of a search result"""

        return result.render(self)
	
	
	
	

    #
    # Web page stuff
    #

    def index_html(self):
        """metadata search"""
        pt=PageTemplateFile(os.path.join(package_home(globals()), "zpt/OSAS_search.zpt")).__of__(self)
        return pt()


    def search(self, searchstring=None, searchtype='startswith', start=1, count=10):
        """search and create result"""
        sres = int(start) -1
        lres = sres + count
        try:
            oldsearch = self.REQUEST.SESSION['searchstring']
            oldtype = self.REQUEST.SESSION['searchtype']
        except:
            oldsearch = ""
            oldtype = ""
            
        if not searchstring:
            searchstring = oldsearch
            searchtype = oldtype
            
        if not oldsearch or searchstring != oldsearch or searchtype != oldtype:
            # new search
            res = self.dbSearch(searchstring, searchtype)
            # sort the result
            res.sort(ranksort)
            # store it
            self.REQUEST.SESSION['results'] = res
            self.REQUEST.SESSION['searchstring'] = searchstring
            self.REQUEST.SESSION['searchtype'] = searchtype

        self.REQUEST.SESSION['resultgroup'] = self.REQUEST.SESSION['results'][sres:lres]
        self.REQUEST.SESSION['res_indexes'] = (sres+1, lres, len(self.REQUEST.SESSION['results']), int(count))
            
        pt=PageTemplateFile(os.path.join(package_home(globals()), "zpt/searchResult.zpt")).__of__(self)
        return pt()


    def getSearchType(self):
        """returns the last search type"""
        try:
            ret = self.REQUEST.SESSION['searchtype']
        except:
            ret = ""

        return ret
    
    def getSearchString(self):
        """returns the last search string"""
        try:
            ret = self.REQUEST.SESSION['searchstring']
        except:
            ret = ""

        return ret
    

    def hasNextResults(self):
        """returns if there are more results"""
        try:
            (first, last, total, count) = self.REQUEST.SESSION['res_indexes']
            return (first < total)
        except:
            return False

    def hasPrevResults(self):
        """returns if there are previous results"""
        try:
            (first, last, total, count) = self.REQUEST.SESSION['res_indexes']
            return (first > 1)
        except:
            return False


    def nextResults(self):
        """returns more results"""
        try:
            (first, last, total, count) = self.REQUEST.SESSION['res_indexes']
            first = first + count
            last = last + count
            if first > total:
                first = total
            if last > total:
                last = total
        except:
            print "OUCH: no next results: ", first, last, total, count

        return self.search(start=first, count=count)

        
    def prevResults(self):
        """returns more results"""
        try:
            (first, last, total, count) = self.REQUEST.SESSION['res_indexes']
            first = first - count
            last = last - count
            if first < 1:
                first = 1
            if last < 1:
                last = 1
        except:
            print "OUCH: no prev results: ", first, last, total, count
           

        return self.search(start=first, count=count)
        


def manage_AddOSAS_searchForm(self):
    """create Search form"""
    pt=PageTemplateFile(os.path.join(package_home(globals()), "zpt/AddOSAS_search.zpt")).__of__(self)
    return pt()

def manage_AddOSAS_search(self,id,title=None,dsn=None,RESPONSE=None):
    """add the OSAS_root"""
    newObj=OSAS_search(id,title,dsn)
    self._setObject(id,newObj)
    if RESPONSE is not None:
        RESPONSE.redirect('manage_main')




class SearchResult(SimpleItem):
    """base search result object"""

    def __init__(self, type='unknown', file=None, url=None, content=None, rank=0):
        """init"""
        self.type = type
        self.file = file
        self.url = url
        self.urlabel = url
        self.content = content
        self.rank = rank

class AnyResult(SearchResult):
    """catch-all type result object"""

    def __init__(self, zope, db_result, rank):
        """returns a catch-all type result"""
        SearchResult.__init__(self, type='unknown')
        #print "NEW ANY RESULT!"

        self.zptFile = os.path.join(package_home(globals()), "zpt/searchResult_any.zpt")
        
        (fileid, tagidx, tags, content) = db_result
        self.hitTag = tags

        # get full info from db
        self.fileinfo = zope.getDBFile(fileid)
        assert self.fileinfo

        items = {}
        items[tags] = content
        self.content = items
        self.file = self.fileinfo[0]
        self.url = ""
        self.urlabel = self.file
        self.rank = rank

    def render(self, zope):
        """render this result object"""
        zope.REQUEST.SESSION['result'] = self
        pt=PageTemplateFile(self.zptFile).__of__(zope)
        return pt()


class MetaResult(AnyResult):
    """result object that collects metadata"""

    def __init__(self, zope, db_result, rank):
        """contructor"""
        AnyResult.__init__(self, zope, db_result, rank)
        #print "NEW META RESULT!"

        (fileid, tagidx, tags, content) = db_result

        # get full info from db
        self.metainfo = zope.getDBFileMeta(fileid)
        assert self.metainfo
        
    def checkContext(self, tags, content):
        """takes meta entry and sets url from context tags"""
        if tags.endswith('/context/link'):
            if content:
                self.url = content            
            
        elif tags.endswith('/context/name'):
            if content:
                self.urlabel = content

        else:
            return False

        return True


class BibResult(MetaResult):
    """bib type result object"""

    def __init__(self, zope, db_result, rank):
        """constructor"""
        MetaResult.__init__(self, zope, db_result, rank)
        #print "NEW BIB RESULT!"
        self.type = "bib"
        self.zptFile = os.path.join(package_home(globals()), "zpt/searchResult_bib.zpt")
        self.url = urlForFile(self.file)
        self.urlabel = None
        (fileid, tagidx, tags, content) = db_result

        btype = ""
        bitems = {}

        for me in self.metainfo:
            (m_idx, m_tags, m_content, m_attributes) = me
            # context tag
            if self.checkContext(m_tags, m_content):
                continue
            # first tag with bib type attribute
            if m_tags.endswith('/meta/bib'):
                r = re.search('type="([^"]*)"', m_attributes)
                if r:
                    btype = r.group(1)

                if not btype:
                    btype = "*unknown*"

                bitems['type'] = btype
                continue

            # skip other tags
            if not btype: continue

            # collect bib/something
            r = re.search('/meta/bib/(.*)', m_tags)
            if r:
                k = r.group(1)
                #print "CONTENT: ", m_content
                bitems[k] = m_content
                continue

        self.content = bitems
        self.rank += 100
        if not self.urlabel and self.url:
            self.urlabel = "view"


class ArchimResult(MetaResult):
    """archimedes type result object"""

    def __init__(self, zope, db_result, rank):
        """constructor"""
        MetaResult.__init__(self, zope, db_result, rank)
        #print "NEW ARCHIM RESULT!"
        self.type = "archim"
        self.zptFile = os.path.join(package_home(globals()), "zpt/searchResult_archim.zpt")
        self.url = urlForFile(self.file)
        self.urlabel = None
        (fileid, tagidx, tags, content) = db_result

        # process info
        bitems = {}
        for me in self.metainfo:
            (m_idx, m_tags, m_content, m_attributes) = me
            # context tag
            if self.checkContext(m_tags, m_content):
                continue
            # collect archimedes/something
            r = re.search('/meta/archimedes/(.*)', m_tags)
            if r:
                k = r.group(1)
                #print "CONTENT: ", m_content
                bitems[k] = m_content
                continue

        self.content = bitems
        self.rank += 100
        if not self.urlabel and self.url:
            self.urlabel = "view"
	



def ranksort(res1, res2):
    """sort results on rank"""
    return cmp(res2.rank, res1.rank)


def urlForFile(filename):
    """heuristic... returns an URL for a index file name"""
    url = None
    if filename.startswith('/mpiwg/online/'):
        print "URLFORFILE: online ", filename
        r = re.search('/mpiwg/online/(.*)/index.meta', filename)
        if r:
            url = "http://nausikaa.mpiwg-berlin.mpg.de/digitallibrary/digilib.jsp?fn=%s"%r.group(1)

    return url

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>