File:  [Repository] / OSAS / OSA_system / OSAS_search.py
Revision 1.1: download - view: text, annotated - select for diffs - revision graph
Thu Jul 1 19:31:25 2004 UTC (20 years ago) by casties
Branches: MAIN
CVS tags: HEAD
first version of metadata search interface

"""Metadata search interface
ROC 2004, itgroup

"""

from AccessControl import ClassSecurityInfo
from Globals import InitializeClass
from Globals import Persistent, package_home
from Products.PageTemplates.PageTemplateFile import PageTemplateFile
from Products.PageTemplates.PageTemplate import PageTemplate
from OFS.SimpleItem import SimpleItem
#from pyPgSQL import PgSQL
import psycopg as PgSQL

import re
import os

MAXHITS = 1000

class OSAS_search(SimpleItem):
    """Object for global metadata search"""

    meta_type="OSAS_search"

    

    def __init__(self,id,title,dsn=None):
        """init"""
        self.id=id
        self.title=title
        if dsn:
            self.dsn = dsn
        else:
            self.dsn = "host=foxridge.mpiwg-berlin.mpg.de dbname=storage user=archiveread password=archiveread"
        # volatile database connection object
        self._v_dbCon = None
        self._v_tryCon = 0


    def dbCursor(self):
        """returns new SQL cursor object"""
        curs = None
        if hasattr(self, '_v_dbCon') and self._v_dbCon is not None:
            try:
                curs = self._v_dbCon.cursor()
                self._v_tryCon = 0
            except:
                # in case of problems reset dbCon
                self._v_dbCon = None
                self._v_tryCon += 1
        else:
            self._v_dbCon = None
            self._v_tryCon = 0
                
        if not curs and self._v_tryCon < 3:
            self._v_dbCon = PgSQL.connect(self.dsn, serialize=0)
            # call ourself with the new connection
            curs = self.dbCursor()

        assert curs, "AIIEE no db cursor!!"
        return curs

    def getDBFileMeta(self, fileid):
        """returns an array with all meta entries of fileid"""

        metacache = {}
        # try in cache
        if self.REQUEST.SESSION.has_key('dbMeta'):
            metacache = self.REQUEST.SESSION['dbMeta']
            if metacache.has_key(fileid):
                res = metacache[fileid]
                print "meta from cache "
                return res

        curs = self.dbCursor()

        sql = 'SELECT idx,tags,content,attributes FROM meta WHERE fileid=%(id)s ORDER BY idx'
        print sql, " -> ", fileid
        curs.execute(sql, {'id':fileid})
        print "done"

        res = curs.fetchall()
        #print "res:", res
        curs.close()
        # store info in cache
        metacache[fileid] = res
        self.REQUEST.SESSION['dbMeta'] = metacache

        return res

    def getDBFile(self, fileid):
        """returns the file information of fileid"""

        filecache = {}
        # try in cache
        if self.REQUEST.SESSION.has_key('dbFiles'):
            filecache = self.REQUEST.SESSION['dbFiles']
            if filecache.has_key(fileid):
                res = filecache[fileid]
                print "file from cache "
                return res

        curs = self.dbCursor()

        sql = 'select filename,mtime from files where id=%(id)s'
        print 'DBFILE: ', sql, " -> ", fileid
        curs.execute(sql, {'id':fileid})
        print "DBFILE: done"

        res = curs.fetchone()
        #print "DBFILE: res:", res
        curs.close()
        # store info in cache
        filecache[fileid] = res
        self.REQUEST.SESSION['dbFiles'] = filecache

        return res
	
	
    def dbSearch(self, query):
        """search DB for query and return result set"""
        curs = self.dbCursor()
        qs = query + "%"
        sql = 'select fileid,idx,tags,content from meta where content like %(qs)s'
        print sql, " -> ", qs
        curs.execute(sql, {'qs':qs})
        print "done"
        results = []
        res = curs.fetchone()
        rescnt = 1
        #print "res0:", res
        while res and rescnt < MAXHITS:
            #print "res:", res
            result = self.getResult(res)
            if (result):
                results.append(result)
                
            res = curs.fetchone()
            rescnt += 1

        curs.close()
        #self.dbCon = None

        print "SEARCH: ", rescnt, " results"
        return results

        
    def getResult(self, db_result, rank=0):
        """factory for result objects"""
        print "NEW RESULT!"

        (fileid, tagidx, tags, content) = db_result
        res = None

        print "tags: ", tags
        if tags.find('/meta/bib/') > -1:
            res = BibResult(self, db_result, rank)
        elif tags.find('/meta/archimedes/') > -1:
            res = ArchimResult(self, db_result, rank)
        else:
            res = AnyResult(self, db_result, rank)

        return res
	
    def renderResult(self, result):
        """returns HTML rendering of a search result"""

        print "renderresult!", result, " -- ", result.url
        return result.render(self)
	
	
	
	

    #
    # Web page stuff
    #

    def index_html(self):
        """metadata search"""
        pt=PageTemplateFile(os.path.join(package_home(globals()), "zpt/OSAS_search.zpt")).__of__(self)
        return pt()


    def search(self, searchstring=None):
        """search and result"""
        if searchstring:
            print "SEARCH: ", searchstring
            res = self.dbSearch(searchstring)
            res.sort(ranksort)
            self.REQUEST.SESSION['results'] = res
            self.REQUEST.SESSION['searchstring'] = searchstring

        print "SEARCH res:", res
        pt=PageTemplateFile(os.path.join(package_home(globals()), "zpt/searchResult.zpt")).__of__(self)
        return pt()

	
        
def manage_AddOSAS_searchForm(self):
    """create Search form"""
    pt=PageTemplateFile(os.path.join(package_home(globals()), "zpt/AddOSAS_search.zpt")).__of__(self)
    return pt()

def manage_AddOSAS_search(self,id,title=None,dsn=None,RESPONSE=None):
    """add the OSAS_root"""
    newObj=OSAS_search(id,title,dsn)
    self._setObject(id,newObj)
    if RESPONSE is not None:
        RESPONSE.redirect('manage_main')




class SearchResult(SimpleItem):
    """base search result object"""

    def __init__(self, type='unknown', file=None, url=None, content=None, rank=0):
        """init"""
        self.type = type
        self.file = file
        self.url = url
        self.urlabel = url
        self.content = content
        self.rank = rank

class AnyResult(SearchResult):
    """catch-all type result object"""

    def __init__(self, zope, db_result, rank):
        """returns a catch-all type result"""
        SearchResult.__init__(self, type='unknown')
        print "NEW ANY RESULT!"

        self.zptFile = os.path.join(package_home(globals()), "zpt/searchResult_any.zpt")
        
        (fileid, tagidx, tags, content) = db_result
        self.hitTag = tags

        # get full info from db
        self.fileinfo = zope.getDBFile(fileid)
        assert self.fileinfo

        items = {}
        items[tags] = content
        self.content = items
        self.file = self.fileinfo[0]
        self.url = ""
        self.urlabel = self.file
        self.rank = rank

    def render(self, zope):
        """render this result object"""
        zope.REQUEST.SESSION['result'] = self
        print "renderender...", self
        pt=PageTemplateFile(self.zptFile).__of__(zope)
        return pt()


class MetaResult(AnyResult):
    """result object that collects metadata"""

    def __init__(self, zope, db_result, rank):
        """contructor"""
        AnyResult.__init__(self, zope, db_result, rank)
        print "NEW META RESULT!"

        (fileid, tagidx, tags, content) = db_result

        # get full info from db
        self.metainfo = zope.getDBFileMeta(fileid)
        assert self.metainfo
        
    def checkContext(self, tags, content):
        """takes meta entry and sets url from context tags"""
        if tags.endswith('/context/link'):
            if content:
                self.url = content            
            
        elif tags.endswith('/context/name'):
            if content:
                self.urlabel = content

        else:
            return False

        return True


class BibResult(MetaResult):
    """bib type result object"""

    def __init__(self, zope, db_result, rank):
        """constructor"""
        MetaResult.__init__(self, zope, db_result, rank)
        print "NEW BIB RESULT!"
        self.type = "bib"
        self.zptFile = os.path.join(package_home(globals()), "zpt/searchResult_bib.zpt")
        self.url = urlForFile(self.file)
        self.urlabel = None
        (fileid, tagidx, tags, content) = db_result

        btype = ""
        bitems = {}

        for me in self.metainfo:
            (m_idx, m_tags, m_content, m_attributes) = me
            # context tag
            if self.checkContext(m_tags, m_content):
                continue
            # first tag with bib type attribute
            if m_tags.endswith('/meta/bib'):
                r = re.search('type="([^"]*)"', m_attributes)
                if r:
                    btype = r.group(1)

                if not btype:
                    btype = "*unknown*"

                bitems['type'] = btype
                continue

            # skip other tags
            if not btype: continue

            # collect bib/something
            r = re.search('/meta/bib/(.*)', m_tags)
            if r:
                k = r.group(1)
                #print "CONTENT: ", m_content
                bitems[k] = m_content
                continue

        self.content = bitems
        self.rank += 100
        if not self.urlabel and self.url:
            self.urlabel = "view"


class ArchimResult(MetaResult):
    """archimedes type result object"""

    def __init__(self, zope, db_result, rank):
        """constructor"""
        MetaResult.__init__(self, zope, db_result, rank)
        print "NEW ARCHIM RESULT!"
        self.type = "archim"
        self.zptFile = os.path.join(package_home(globals()), "zpt/searchResult_archim.zpt")
        self.url = urlForFile(self.file)
        self.urlabel = None
        (fileid, tagidx, tags, content) = db_result

        # process info
        bitems = {}
        for me in self.metainfo:
            (m_idx, m_tags, m_content, m_attributes) = me
            # context tag
            if self.checkContext(m_tags, m_content):
                continue
            # collect archimedes/something
            r = re.search('/meta/archimedes/(.*)', m_tags)
            if r:
                k = r.group(1)
                #print "CONTENT: ", m_content
                bitems[k] = m_content
                continue

        self.content = bitems
        self.rank += 100
        if not self.urlabel and self.url:
            self.urlabel = "view"
	



def ranksort(res1, res2):
    """sort results on rank"""
    return cmp(res2.rank, res1.rank)


def urlForFile(filename):
    """heuristic... returns an URL for a index file name"""
    url = None
    if filename.startswith('/mpiwg/online/'):
        print "URLFORFILE: online ", filename
        r = re.search('/mpiwg/online/(.*)/index.meta', filename)
        if r:
            url = "http://nausikaa.mpiwg-berlin.mpg.de/digitallibrary/digilib.jsp?fn=%s"%r.group(1)

    return url

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>