File:  [Repository] / OSAS / OSA_system / OSAS_search.py
Revision 1.2: download - view: text, annotated - select for diffs - revision graph
Mon Jul 5 21:08:55 2004 UTC (19 years, 11 months ago) by casties
Branches: MAIN
CVS tags: HEAD
improved everything, really

    1: """Metadata search interface
    2: ROC 2004, itgroup
    3: 
    4: """
    5: 
    6: from AccessControl import ClassSecurityInfo
    7: from Globals import InitializeClass
    8: from Globals import Persistent, package_home
    9: from Products.PageTemplates.PageTemplateFile import PageTemplateFile
   10: from Products.PageTemplates.PageTemplate import PageTemplate
   11: from OFS.SimpleItem import SimpleItem
   12: #from pyPgSQL import PgSQL
   13: import psycopg as PgSQL
   14: 
   15: import re
   16: import os
   17: 
   18: MAXHITS = 1000
   19: 
   20: class OSAS_search(SimpleItem):
   21:     """Object for global metadata search"""
   22: 
   23:     meta_type="OSAS_search"
   24: 
   25:     
   26: 
   27:     def __init__(self,id,title,dsn=None):
   28:         """init"""
   29:         self.id=id
   30:         self.title=title
   31:         if dsn:
   32:             self.dsn = dsn
   33:         else:
   34:             self.dsn = "host=foxridge.mpiwg-berlin.mpg.de dbname=storage user=archiveread password=archiveread"
   35:         # volatile database connection object
   36:         self._v_dbCon = None
   37:         self._v_tryCon = 0
   38: 
   39: 
   40:     def dbCursor(self):
   41:         """returns new SQL cursor object"""
   42:         curs = None
   43:         if hasattr(self, '_v_dbCon') and self._v_dbCon is not None:
   44:             try:
   45:                 curs = self._v_dbCon.cursor()
   46:                 self._v_tryCon = 0
   47:             except:
   48:                 # in case of problems reset dbCon
   49:                 self._v_dbCon = None
   50:                 self._v_tryCon += 1
   51:         else:
   52:             self._v_dbCon = None
   53:             self._v_tryCon = 0
   54:                 
   55:         if not curs and self._v_tryCon < 3:
   56:             self._v_dbCon = PgSQL.connect(self.dsn, serialize=0)
   57:             # call ourself with the new connection
   58:             curs = self.dbCursor()
   59: 
   60:         assert curs, "AIIEE no db cursor!!"
   61:         return curs
   62: 
   63:     def getDBFileMeta(self, fileid):
   64:         """returns an array with all meta entries of fileid"""
   65: 
   66:         metacache = {}
   67:         # try in cache
   68:         if self.REQUEST.SESSION.has_key('dbMeta'):
   69:             metacache = self.REQUEST.SESSION['dbMeta']
   70:             if metacache.has_key(fileid):
   71:                 res = metacache[fileid]
   72:                 #print "meta from cache "
   73:                 return res
   74: 
   75:         curs = self.dbCursor()
   76: 
   77:         sql = 'SELECT idx,tags,content,attributes FROM meta WHERE fileid=%(id)s ORDER BY idx'
   78:         print sql, " -> ", fileid
   79:         curs.execute(sql, {'id':fileid})
   80:         print "done"
   81: 
   82:         res = curs.fetchall()
   83:         #print "res:", res
   84:         curs.close()
   85:         # store info in cache
   86:         metacache[fileid] = res
   87:         self.REQUEST.SESSION['dbMeta'] = metacache
   88: 
   89:         return res
   90: 
   91:     def getDBFile(self, fileid):
   92:         """returns the file information of fileid"""
   93: 
   94:         filecache = {}
   95:         # try in cache
   96:         if self.REQUEST.SESSION.has_key('dbFiles'):
   97:             filecache = self.REQUEST.SESSION['dbFiles']
   98:             if filecache.has_key(fileid):
   99:                 res = filecache[fileid]
  100:                 #print "file from cache "
  101:                 return res
  102: 
  103:         curs = self.dbCursor()
  104: 
  105:         sql = 'select filename,mtime from files where id=%(id)s'
  106:         print 'DBFILE: ', sql, " -> ", fileid
  107:         curs.execute(sql, {'id':fileid})
  108:         print "DBFILE: done"
  109: 
  110:         res = curs.fetchone()
  111:         #print "DBFILE: res:", res
  112:         curs.close()
  113:         # store info in cache
  114:         filecache[fileid] = res
  115:         self.REQUEST.SESSION['dbFiles'] = filecache
  116: 
  117:         return res
  118: 	
  119: 	
  120:     def dbSearch(self, query, type):
  121:         """search DB for query and return result set"""
  122:         curs = self.dbCursor()
  123:         if type == 'equals':
  124:             qs = query
  125:         elif type == 'startswith':
  126:             qs = query + "%"
  127:         elif type == 'contains':
  128:             qs = "%" + query + "%"
  129:             
  130:         sql = 'select fileid,idx,tags,content from meta where content like %(qs)s'
  131:         print sql, " -> ", qs
  132:         curs.execute(sql, {'qs':qs})
  133:         print "done"
  134:         results = []
  135:         res = curs.fetchone()
  136:         rescnt = 1
  137:         #print "res0:", res
  138:         while res and rescnt < MAXHITS:
  139:             #print "res:", res
  140:             result = self.getResult(res)
  141:             if (result):
  142:                 results.append(result)
  143:                 
  144:             res = curs.fetchone()
  145:             rescnt += 1
  146: 
  147:         curs.close()
  148:         #self.dbCon = None
  149: 
  150:         #print "SEARCH: ", rescnt, " results"
  151:         return results
  152: 
  153:         
  154:     def getResult(self, db_result, rank=0):
  155:         """factory for result objects"""
  156: 
  157:         (fileid, tagidx, tags, content) = db_result
  158:         res = None
  159: 
  160:         if tags.find('/meta/bib/') > -1:
  161:             res = BibResult(self, db_result, rank)
  162:         elif tags.find('/meta/archimedes/') > -1:
  163:             res = ArchimResult(self, db_result, rank)
  164:         else:
  165:             res = AnyResult(self, db_result, rank)
  166: 
  167:         return res
  168: 	
  169:     def renderResult(self, result):
  170:         """returns HTML rendering of a search result"""
  171: 
  172:         return result.render(self)
  173: 	
  174: 	
  175: 	
  176: 	
  177: 
  178:     #
  179:     # Web page stuff
  180:     #
  181: 
  182:     def index_html(self):
  183:         """metadata search"""
  184:         pt=PageTemplateFile(os.path.join(package_home(globals()), "zpt/OSAS_search.zpt")).__of__(self)
  185:         return pt()
  186: 
  187: 
  188:     def search(self, searchstring=None, searchtype='startswith', start=1, count=10):
  189:         """search and create result"""
  190:         sres = int(start) -1
  191:         lres = sres + count
  192:         try:
  193:             oldsearch = self.REQUEST.SESSION['searchstring']
  194:             oldtype = self.REQUEST.SESSION['searchtype']
  195:         except:
  196:             oldsearch = ""
  197:             oldtype = ""
  198:             
  199:         if not searchstring:
  200:             searchstring = oldsearch
  201:             searchtype = oldtype
  202:             
  203:         if not oldsearch or searchstring != oldsearch or searchtype != oldtype:
  204:             # new search
  205:             res = self.dbSearch(searchstring, searchtype)
  206:             # sort the result
  207:             res.sort(ranksort)
  208:             # store it
  209:             self.REQUEST.SESSION['results'] = res
  210:             self.REQUEST.SESSION['searchstring'] = searchstring
  211:             self.REQUEST.SESSION['searchtype'] = searchtype
  212: 
  213:         self.REQUEST.SESSION['resultgroup'] = self.REQUEST.SESSION['results'][sres:lres]
  214:         self.REQUEST.SESSION['res_indexes'] = (sres+1, lres, len(self.REQUEST.SESSION['results']), int(count))
  215:             
  216:         pt=PageTemplateFile(os.path.join(package_home(globals()), "zpt/searchResult.zpt")).__of__(self)
  217:         return pt()
  218: 
  219: 
  220:     def getSearchType(self):
  221:         """returns the last search type"""
  222:         try:
  223:             ret = self.REQUEST.SESSION['searchtype']
  224:         except:
  225:             ret = ""
  226: 
  227:         return ret
  228:     
  229:     def getSearchString(self):
  230:         """returns the last search string"""
  231:         try:
  232:             ret = self.REQUEST.SESSION['searchstring']
  233:         except:
  234:             ret = ""
  235: 
  236:         return ret
  237:     
  238: 
  239:     def hasNextResults(self):
  240:         """returns if there are more results"""
  241:         try:
  242:             (first, last, total, count) = self.REQUEST.SESSION['res_indexes']
  243:             return (first < total)
  244:         except:
  245:             return False
  246: 
  247:     def hasPrevResults(self):
  248:         """returns if there are previous results"""
  249:         try:
  250:             (first, last, total, count) = self.REQUEST.SESSION['res_indexes']
  251:             return (first > 1)
  252:         except:
  253:             return False
  254: 
  255: 
  256:     def nextResults(self):
  257:         """returns more results"""
  258:         try:
  259:             (first, last, total, count) = self.REQUEST.SESSION['res_indexes']
  260:             first = first + count
  261:             last = last + count
  262:             if first > total:
  263:                 first = total
  264:             if last > total:
  265:                 last = total
  266:         except:
  267:             print "OUCH: no next results: ", first, last, total, count
  268: 
  269:         return self.search(start=first, count=count)
  270: 
  271:         
  272:     def prevResults(self):
  273:         """returns more results"""
  274:         try:
  275:             (first, last, total, count) = self.REQUEST.SESSION['res_indexes']
  276:             first = first - count
  277:             last = last - count
  278:             if first < 1:
  279:                 first = 1
  280:             if last < 1:
  281:                 last = 1
  282:         except:
  283:             print "OUCH: no prev results: ", first, last, total, count
  284:            
  285: 
  286:         return self.search(start=first, count=count)
  287:         
  288: 
  289: 
  290: def manage_AddOSAS_searchForm(self):
  291:     """create Search form"""
  292:     pt=PageTemplateFile(os.path.join(package_home(globals()), "zpt/AddOSAS_search.zpt")).__of__(self)
  293:     return pt()
  294: 
  295: def manage_AddOSAS_search(self,id,title=None,dsn=None,RESPONSE=None):
  296:     """add the OSAS_root"""
  297:     newObj=OSAS_search(id,title,dsn)
  298:     self._setObject(id,newObj)
  299:     if RESPONSE is not None:
  300:         RESPONSE.redirect('manage_main')
  301: 
  302: 
  303: 
  304: 
  305: class SearchResult(SimpleItem):
  306:     """base search result object"""
  307: 
  308:     def __init__(self, type='unknown', file=None, url=None, content=None, rank=0):
  309:         """init"""
  310:         self.type = type
  311:         self.file = file
  312:         self.url = url
  313:         self.urlabel = url
  314:         self.content = content
  315:         self.rank = rank
  316: 
  317: class AnyResult(SearchResult):
  318:     """catch-all type result object"""
  319: 
  320:     def __init__(self, zope, db_result, rank):
  321:         """returns a catch-all type result"""
  322:         SearchResult.__init__(self, type='unknown')
  323:         #print "NEW ANY RESULT!"
  324: 
  325:         self.zptFile = os.path.join(package_home(globals()), "zpt/searchResult_any.zpt")
  326:         
  327:         (fileid, tagidx, tags, content) = db_result
  328:         self.hitTag = tags
  329: 
  330:         # get full info from db
  331:         self.fileinfo = zope.getDBFile(fileid)
  332:         assert self.fileinfo
  333: 
  334:         items = {}
  335:         items[tags] = content
  336:         self.content = items
  337:         self.file = self.fileinfo[0]
  338:         self.url = ""
  339:         self.urlabel = self.file
  340:         self.rank = rank
  341: 
  342:     def render(self, zope):
  343:         """render this result object"""
  344:         zope.REQUEST.SESSION['result'] = self
  345:         pt=PageTemplateFile(self.zptFile).__of__(zope)
  346:         return pt()
  347: 
  348: 
  349: class MetaResult(AnyResult):
  350:     """result object that collects metadata"""
  351: 
  352:     def __init__(self, zope, db_result, rank):
  353:         """contructor"""
  354:         AnyResult.__init__(self, zope, db_result, rank)
  355:         #print "NEW META RESULT!"
  356: 
  357:         (fileid, tagidx, tags, content) = db_result
  358: 
  359:         # get full info from db
  360:         self.metainfo = zope.getDBFileMeta(fileid)
  361:         assert self.metainfo
  362:         
  363:     def checkContext(self, tags, content):
  364:         """takes meta entry and sets url from context tags"""
  365:         if tags.endswith('/context/link'):
  366:             if content:
  367:                 self.url = content            
  368:             
  369:         elif tags.endswith('/context/name'):
  370:             if content:
  371:                 self.urlabel = content
  372: 
  373:         else:
  374:             return False
  375: 
  376:         return True
  377: 
  378: 
  379: class BibResult(MetaResult):
  380:     """bib type result object"""
  381: 
  382:     def __init__(self, zope, db_result, rank):
  383:         """constructor"""
  384:         MetaResult.__init__(self, zope, db_result, rank)
  385:         #print "NEW BIB RESULT!"
  386:         self.type = "bib"
  387:         self.zptFile = os.path.join(package_home(globals()), "zpt/searchResult_bib.zpt")
  388:         self.url = urlForFile(self.file)
  389:         self.urlabel = None
  390:         (fileid, tagidx, tags, content) = db_result
  391: 
  392:         btype = ""
  393:         bitems = {}
  394: 
  395:         for me in self.metainfo:
  396:             (m_idx, m_tags, m_content, m_attributes) = me
  397:             # context tag
  398:             if self.checkContext(m_tags, m_content):
  399:                 continue
  400:             # first tag with bib type attribute
  401:             if m_tags.endswith('/meta/bib'):
  402:                 r = re.search('type="([^"]*)"', m_attributes)
  403:                 if r:
  404:                     btype = r.group(1)
  405: 
  406:                 if not btype:
  407:                     btype = "*unknown*"
  408: 
  409:                 bitems['type'] = btype
  410:                 continue
  411: 
  412:             # skip other tags
  413:             if not btype: continue
  414: 
  415:             # collect bib/something
  416:             r = re.search('/meta/bib/(.*)', m_tags)
  417:             if r:
  418:                 k = r.group(1)
  419:                 #print "CONTENT: ", m_content
  420:                 bitems[k] = m_content
  421:                 continue
  422: 
  423:         self.content = bitems
  424:         self.rank += 100
  425:         if not self.urlabel and self.url:
  426:             self.urlabel = "view"
  427: 
  428: 
  429: class ArchimResult(MetaResult):
  430:     """archimedes type result object"""
  431: 
  432:     def __init__(self, zope, db_result, rank):
  433:         """constructor"""
  434:         MetaResult.__init__(self, zope, db_result, rank)
  435:         #print "NEW ARCHIM RESULT!"
  436:         self.type = "archim"
  437:         self.zptFile = os.path.join(package_home(globals()), "zpt/searchResult_archim.zpt")
  438:         self.url = urlForFile(self.file)
  439:         self.urlabel = None
  440:         (fileid, tagidx, tags, content) = db_result
  441: 
  442:         # process info
  443:         bitems = {}
  444:         for me in self.metainfo:
  445:             (m_idx, m_tags, m_content, m_attributes) = me
  446:             # context tag
  447:             if self.checkContext(m_tags, m_content):
  448:                 continue
  449:             # collect archimedes/something
  450:             r = re.search('/meta/archimedes/(.*)', m_tags)
  451:             if r:
  452:                 k = r.group(1)
  453:                 #print "CONTENT: ", m_content
  454:                 bitems[k] = m_content
  455:                 continue
  456: 
  457:         self.content = bitems
  458:         self.rank += 100
  459:         if not self.urlabel and self.url:
  460:             self.urlabel = "view"
  461: 	
  462: 
  463: 
  464: 
  465: def ranksort(res1, res2):
  466:     """sort results on rank"""
  467:     return cmp(res2.rank, res1.rank)
  468: 
  469: 
  470: def urlForFile(filename):
  471:     """heuristic... returns an URL for a index file name"""
  472:     url = None
  473:     if filename.startswith('/mpiwg/online/'):
  474:         print "URLFORFILE: online ", filename
  475:         r = re.search('/mpiwg/online/(.*)/index.meta', filename)
  476:         if r:
  477:             url = "http://nausikaa.mpiwg-berlin.mpg.de/digitallibrary/digilib.jsp?fn=%s"%r.group(1)
  478: 
  479:     return url

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>