File:  [Repository] / OSAS / OSA_system / OSAS_search.py
Revision 1.6: download - view: text, annotated - select for diffs - revision graph
Thu Jul 29 16:14:21 2004 UTC (19 years, 11 months ago) by casties
Branches: MAIN
CVS tags: HEAD
added change form

    1: """Metadata search interface
    2: ROC 2004, itgroup
    3: 
    4: """
    5: 
    6: from AccessControl import ClassSecurityInfo
    7: from Globals import InitializeClass
    8: from Globals import Persistent, package_home
    9: from Products.PageTemplates.PageTemplateFile import PageTemplateFile
   10: from Products.PageTemplates.PageTemplate import PageTemplate
   11: from Products.PageTemplates.ZopePageTemplate import ZopePageTemplate
   12: from OFS.Folder import Folder
   13: from OFS.SimpleItem import SimpleItem
   14: #from pyPgSQL import PgSQL
   15: import psycopg as PgSQL
   16: 
   17: import re
   18: import os
   19: 
   20: MAXHITS = 1000
   21: 
   22: class OSAS_search(Folder):
   23:     """Object for global metadata search"""
   24: 
   25:     meta_type="OSAS_search"
   26: 
   27:     manage_options=Folder.manage_options+(
   28:         {'label':'Main config','action':'manage_ChangeOSAS_searchForm'},
   29:        )
   30:     
   31: 
   32:     def __init__(self,id,title,dsn=None):
   33:         """init"""
   34:         self.id=id
   35:         self.title=title
   36:         if dsn:
   37:             self.dsn = dsn
   38:         else:
   39:             self.dsn = "host=foxridge.mpiwg-berlin.mpg.de dbname=storage user=archiveread password=archiveread"
   40:         # volatile database connection object
   41:         self._v_dbCon = None
   42:         self._v_tryCon = 0
   43: 
   44: 
   45:     def dbCursor(self):
   46:         """returns new SQL cursor object"""
   47:         curs = None
   48:         if hasattr(self, '_v_dbCon') and self._v_dbCon is not None:
   49:             try:
   50:                 curs = self._v_dbCon.cursor()
   51:                 self._v_tryCon = 0
   52:             except:
   53:                 # in case of problems reset dbCon
   54:                 self._v_dbCon = None
   55:                 self._v_tryCon += 1
   56:         else:
   57:             self._v_dbCon = None
   58:             self._v_tryCon = 0
   59:                 
   60:         if not curs and self._v_tryCon < 3:
   61:             self._v_dbCon = PgSQL.connect(self.dsn, serialize=0)
   62:             # call ourself with the new connection
   63:             curs = self.dbCursor()
   64: 
   65:         assert curs, "AIIEE no db cursor!!"
   66:         return curs
   67: 
   68:     def getDBFileMeta(self, fileid):
   69:         """returns an array with all meta entries of fileid"""
   70: 
   71:         metacache = {}
   72:         # try in cache
   73:         if self.REQUEST.SESSION.has_key('dbMeta'):
   74:             metacache = self.REQUEST.SESSION['dbMeta']
   75:             if metacache.has_key(fileid):
   76:                 res = metacache[fileid]
   77:                 #print "meta from cache "
   78:                 return res
   79: 
   80:         curs = self.dbCursor()
   81: 
   82:         sql = 'SELECT idx,tags,content,attributes FROM meta WHERE fileid=%(id)s ORDER BY idx'
   83:         print sql, " -> ", fileid
   84:         curs.execute(sql, {'id':fileid})
   85:         print "done"
   86: 
   87:         res = curs.fetchall()
   88:         #print "res:", res
   89:         curs.close()
   90:         # store info in cache
   91:         metacache[fileid] = res
   92:         self.REQUEST.SESSION['dbMeta'] = metacache
   93: 
   94:         return res
   95: 
   96:     def getDBFile(self, fileid):
   97:         """returns the file information of fileid"""
   98: 
   99:         filecache = {}
  100:         # try in cache
  101:         if self.REQUEST.SESSION.has_key('dbFiles'):
  102:             filecache = self.REQUEST.SESSION['dbFiles']
  103:             if filecache.has_key(fileid):
  104:                 res = filecache[fileid]
  105:                 #print "file from cache "
  106:                 return res
  107: 
  108:         curs = self.dbCursor()
  109: 
  110:         sql = 'select filename,mtime from files where id=%(id)s'
  111:         print 'DBFILE: ', sql, " -> ", fileid
  112:         curs.execute(sql, {'id':fileid})
  113:         print "DBFILE: done"
  114: 
  115:         res = curs.fetchone()
  116:         #print "DBFILE: res:", res
  117:         curs.close()
  118:         # store info in cache
  119:         filecache[fileid] = res
  120:         self.REQUEST.SESSION['dbFiles'] = filecache
  121: 
  122:         return res
  123: 	
  124: 	
  125:     def dbSearch(self, query, type):
  126:         """search DB for query and return result set"""
  127:         results = []
  128:         restypes = {}
  129:         if not query:
  130:             # empty query
  131:             return results
  132:         
  133:         curs = self.dbCursor()
  134:         if type == 'equals':
  135:             qs = query
  136:         elif type == 'startswith':
  137:             qs = query + "%"
  138:         elif type == 'contains':
  139:             qs = "%" + query + "%"
  140:             
  141:         sql = 'select fileid,idx,tags,content from meta where lower(content) like lower(%(qs)s)'
  142:         print sql, " -> ", qs
  143:         curs.execute(sql, {'qs':qs})
  144:         print "done"
  145:         res = curs.fetchone()
  146:         rescnt = 1
  147:         #print "res0:", res
  148:         while res and rescnt < MAXHITS:
  149:             #print "res:", res
  150:             result = self.getResult(res)
  151:             if result:
  152:                 results.append(result)
  153:                 restypes[result.type] = result.type
  154:                 
  155:             res = curs.fetchone()
  156:             rescnt += 1
  157: 
  158:         curs.close()
  159:         #self.dbCon = None
  160: 
  161:         #print "SEARCH: ", rescnt, " results"
  162:         restypelist = restypes.keys()
  163:         return (results, restypelist)
  164: 
  165:         
  166:     def getResult(self, db_result, rank=0):
  167:         """factory for result objects"""
  168: 
  169:         (fileid, tagidx, tags, content) = db_result
  170:         res = None
  171: 
  172:         if tags.find('/meta/bib/') > -1:
  173:             res = BibResult(self, db_result, rank)
  174:         elif tags.find('/meta/archimedes/') > -1:
  175:             res = ArchimResult(self, db_result, rank)
  176:         else:
  177:             res = AnyResult(self, db_result, rank)
  178: 
  179:         return res
  180: 
  181: 	
  182:     def renderResult(self, result):
  183:         """returns HTML rendering of a search result"""
  184: 
  185:         return result.render(self)
  186: 	
  187: 
  188:     def filterResults(self, results, start, end, restypefilter=None):
  189:         """returns list of results that match a filter"""
  190:         # filter types first
  191:         if restypefilter:
  192:             res = []
  193:             for r in results:
  194:                 if r.type in restypefilter:
  195:                     res.append(r)
  196:         else:
  197:             res = results
  198: 	# new total count (because of filter)
  199:         rescnt = len(res)
  200:         # filter on count
  201:         resgroup = res[start:end]
  202: 
  203:         return (resgroup, rescnt)
  204:     
  205: 
  206:     #
  207:     # Web page stuff
  208:     #
  209: 
  210:     def index_html(self):
  211:         """metadata search"""
  212:         pt=PageTemplateFile(os.path.join(package_home(globals()), "zpt/OSAS_search.zpt")).__of__(self)
  213:         return pt()
  214: 
  215: 
  216:     def search(self, searchstring=None, searchtype='startswith', start=1, count=10, restypefilter=None):
  217:         """search and create result"""
  218:         sres = int(start) -1
  219:         lres = sres + count
  220:         try:
  221:             oldsearch = self.REQUEST.SESSION['searchstring']
  222:             oldtype = self.REQUEST.SESSION['searchtype']
  223:         except:
  224:             oldsearch = ""
  225:             oldtype = ""
  226:             
  227:         if not searchstring:
  228:             searchstring = oldsearch
  229:             searchtype = oldtype
  230:             
  231:         if not oldsearch or searchstring != oldsearch or searchtype != oldtype:
  232:             # new search
  233:             (res, restypes) = self.dbSearch(searchstring, searchtype)
  234:             # sort the result
  235:             res.sort(ranksort)
  236:             # store it
  237:             self.REQUEST.SESSION['results'] = res
  238:             self.REQUEST.SESSION['searchstring'] = searchstring
  239:             self.REQUEST.SESSION['searchtype'] = searchtype
  240:             self.REQUEST.SESSION['resulttypes'] = restypes
  241: 
  242:         (resgroup, nres) = self.filterResults(self.REQUEST.SESSION['results'], sres, lres, restypefilter)
  243:         lres = min(lres, nres)
  244:         sres = min(sres, nres)
  245:         self.REQUEST.SESSION['resultgroup'] = resgroup
  246:         self.REQUEST.SESSION['res_indexes'] = (sres+1, lres, nres, int(count))
  247:         self.REQUEST.SESSION['res_type_filter'] = restypefilter
  248:         if nres > 0:
  249:             zpt = "zpt/searchResult.zpt"
  250:         else:
  251:             zpt = "zpt/searchResult_none.zpt"
  252:             
  253:         pt=PageTemplateFile(os.path.join(package_home(globals()), zpt)).__of__(self)
  254:         return pt()
  255: 
  256: 
  257:     def getSearchType(self):
  258:         """returns the last search type"""
  259:         try:
  260:             ret = self.REQUEST.SESSION['searchtype']
  261:         except:
  262:             ret = ""
  263: 
  264:         return ret
  265:     
  266:     def getSearchString(self):
  267:         """returns the last search string"""
  268:         try:
  269:             ret = self.REQUEST.SESSION['searchstring']
  270:         except:
  271:             ret = ""
  272: 
  273:         return ret
  274:     
  275: 
  276:     def hasNextResults(self):
  277:         """returns if there are more results"""
  278:         try:
  279:             (first, last, total, count) = self.REQUEST.SESSION['res_indexes']
  280:             return (first + count < total)
  281:         except:
  282:             return False
  283: 
  284:     def hasPrevResults(self):
  285:         """returns if there are previous results"""
  286:         try:
  287:             (first, last, total, count) = self.REQUEST.SESSION['res_indexes']
  288:             return (first > 1)
  289:         except:
  290:             return False
  291: 
  292: 
  293:     def nextResults(self):
  294:         """returns more results"""
  295:         try:
  296:             (first, last, total, count) = self.REQUEST.SESSION['res_indexes']
  297:             first = first + count
  298:             last = last + count
  299:             if first > total:
  300:                 first = total
  301:             if last > total:
  302:                 last = total
  303:         except:
  304:             print "OUCH: no next results!"
  305:             return self.search()
  306: 
  307:         return self.search(start=first, count=count)
  308: 
  309:         
  310:     def prevResults(self):
  311:         """returns more results"""
  312:         try:
  313:             (first, last, total, count) = self.REQUEST.SESSION['res_indexes']
  314:             first = first - count
  315:             last = last - count
  316:             if first < 1:
  317:                 first = 1
  318:             if last < 1:
  319:                 last = 1
  320:         except:
  321:             print "OUCH: no prev results!"
  322:             return self.search()           
  323: 
  324:         return self.search(start=first, count=count)
  325:         
  326: 
  327:     def manage_ChangeOSAS_searchForm(self):
  328:         """create Search form"""
  329:         pt=PageTemplateFile(os.path.join(package_home(globals()), "zpt/ChangeOSAS_search.zpt")).__of__(self)
  330:         return pt()
  331: 
  332:     def manage_ChangeOSAS_search(self,id,title=None,dsn=None,RESPONSE=None):
  333:         """add the OSAS_root"""
  334:         self.id = id
  335:         self.title = title
  336:         self.dsn = dsn
  337:         if RESPONSE is not None:
  338:             RESPONSE.redirect('manage_main')
  339: 
  340: 
  341: def manage_AddOSAS_searchForm(self):
  342:     """create Search form"""
  343:     pt=PageTemplateFile(os.path.join(package_home(globals()), "zpt/AddOSAS_search.zpt")).__of__(self)
  344:     return pt()
  345: 
  346: def manage_AddOSAS_search(self,id,title=None,dsn=None,RESPONSE=None):
  347:     """add the OSAS_root"""
  348:     newObj=OSAS_search(id,title,dsn)
  349:     self._setObject(id,newObj)
  350:     if RESPONSE is not None:
  351:         RESPONSE.redirect('manage_main')
  352: 
  353: 
  354: 
  355: 
  356: class SearchResult(SimpleItem):
  357:     """base search result object"""
  358: 
  359:     def __init__(self, type='unknown', file=None, url=None, content=None, rank=0):
  360:         """init"""
  361:         # result type (e.g. "bib", "archim")
  362:         self.type = type
  363:         # index file name
  364:         self.file = file
  365:         # url for result (list of pairs)
  366:         if url:
  367:             self.urls = url
  368:         else:
  369:             self.urls = []
  370:         # actual content (list of tuples)
  371:         self.content = content
  372:         # document status (e.g. "online", "archive")
  373:         self.status = None
  374:         # result rank for presentation
  375:         self.rank = rank
  376: 
  377: class AnyResult(SearchResult):
  378:     """catch-all type result object"""
  379: 
  380:     def __init__(self, zope, db_result, rank):
  381:         """returns a catch-all type result"""
  382:         SearchResult.__init__(self)
  383:         #print "NEW ANY RESULT!"
  384:         self.type='unknown'
  385:         self.zptFile = os.path.join(package_home(globals()), "zpt/searchResult_any.zpt")
  386:         
  387:         (db_fileid, db_tagidx, db_tags, db_content) = db_result
  388:         self.hitTag = db_tags
  389: 
  390:         # get full info from db
  391:         self.fileinfo = zope.getDBFile(db_fileid)
  392:         assert self.fileinfo
  393: 
  394:         items = {}
  395:         items[db_tags] = db_content
  396:         self.content = items
  397:         self.file = self.fileinfo[0]
  398:         self.status = statusForFile(self.file)
  399:         self.rank = rank
  400: 
  401:     def getContentList(self):
  402:         """returns content as list of tuples in preferred order"""
  403:         l = []
  404:         for k in self.content.keys():
  405:             l.append((k, self.content[k]))
  406: 
  407:         return l
  408: 
  409:     def render(self, zope):
  410:         """render this result object"""
  411:         zope.REQUEST.SESSION['result'] = self
  412:         pt=PageTemplateFile(self.zptFile).__of__(zope)
  413:         return pt()
  414: 
  415: 
  416: class MetaResult(AnyResult):
  417:     """result object that collects metadata"""
  418: 
  419:     def __init__(self, zope, db_result, rank):
  420:         """contructor"""
  421:         AnyResult.__init__(self, zope, db_result, rank)
  422:         #print "NEW META RESULT!"
  423: 
  424:         (fileid, tagidx, tags, content) = db_result
  425: 
  426:         # get full info from db
  427:         self.metainfo = zope.getDBFileMeta(fileid)
  428:         assert self.metainfo
  429:         
  430:     def checkContext(self, tags, content, ctxurl):
  431:         """takes meta entry and updates url from context tags"""
  432:         if tags.endswith('/context/link'):
  433:             if content:
  434:                 #print "CTXlink: ", content
  435:                 ctxurl[0] = content
  436:             
  437:         elif tags.endswith('/context/name'):
  438:             if content:
  439:                 #print "CTXname: ", content
  440:                 ctxurl[1] = content
  441: 
  442:         return ctxurl
  443: 
  444: 
  445: class BibResult(MetaResult):
  446:     """bib type result object"""
  447: 
  448:     def __init__(self, zope, db_result, rank):
  449:         """constructor"""
  450:         MetaResult.__init__(self, zope, db_result, rank)
  451:         #print "NEW BIB RESULT!", self
  452:         self.type = "bib"
  453:         self.zptFile = os.path.join(package_home(globals()), "zpt/searchResult_bib.zpt")
  454:         url = storageURL(self.file)
  455:         if url:
  456:             self.urls.append(url)
  457:         (fileid, tagidx, tags, content) = db_result
  458: 
  459:         btype = ""
  460:         bitems = {}
  461:         ctxurl = ['', '']
  462: 
  463:         for me in self.metainfo:
  464:             (m_idx, m_tags, m_content, m_attributes) = me
  465:             # context tag
  466:             ctxurl = self.checkContext(m_tags, m_content, ctxurl)
  467:             # first tag with bib type attribute
  468:             if m_tags.endswith('/meta/bib'):
  469:                 r = re.search('type="([^"]*)"', m_attributes)
  470:                 if r:
  471:                     btype = r.group(1)
  472: 
  473:                 if not btype:
  474:                     btype = "*unknown*"
  475: 
  476:                 bitems['type'] = btype
  477:                 continue
  478: 
  479:             # skip other tags
  480:             if not btype: continue
  481: 
  482:             # collect bib/something
  483:             r = re.search('/meta/bib/(.*)', m_tags)
  484:             if r:
  485:                 k = r.group(1)
  486:                 #print "CONTENT: ", m_content
  487:                 bitems[k] = m_content
  488:                 # remember hit tag
  489:                 if m_tags == self.hitTag:
  490:                     self.hitTag = k
  491:                 continue
  492: 
  493:         self.content = bitems
  494:         # store context
  495:         if not ctxurl[1]:
  496:             ctxurl[1] = "View"
  497:         # must have link
  498:         if ctxurl[0]:
  499:             self.urls.append(ctxurl)
  500:                 
  501:         self.rank += 100
  502: 
  503:     def getContentList(self):
  504:         """returns content as list of tuples in preferred order"""
  505:         l = []
  506:         c = self.content.copy()
  507:         # preferred items first
  508:         for k in ('author', 'title', 'journal', 'year'):
  509:             if c.has_key(k):
  510:                 l.append((k, c[k]))
  511:                 del c[k]
  512: 
  513:         # no type
  514:         del c['type']
  515:         # copy the rest
  516:         for k in c.keys():
  517:             l.append((k, c[k]))
  518: 
  519:         return l
  520: 
  521: 
  522: class ArchimResult(MetaResult):
  523:     """archimedes type result object"""
  524: 
  525:     def __init__(self, zope, db_result, rank):
  526:         """constructor"""
  527:         MetaResult.__init__(self, zope, db_result, rank)
  528:         #print "NEW ARCHIM RESULT!", self
  529:         self.type = "archim"
  530:         self.zptFile = os.path.join(package_home(globals()), "zpt/searchResult_archim.zpt")
  531:         url = storageURL(self.file)
  532:         if url:
  533:             self.urls.append(url)
  534:             
  535:         (fileid, tagidx, tags, content) = db_result
  536: 
  537:         # process info
  538:         bitems = {}
  539:         ctxurl = ['', '']
  540:         for me in self.metainfo:
  541:             (m_idx, m_tags, m_content, m_attributes) = me
  542:             # context tag
  543:             ctxurl = self.checkContext(m_tags, m_content, ctxurl)
  544:             # collect archimedes/something
  545:             r = re.search('/meta/archimedes/(.*)', m_tags)
  546:             if r:
  547:                 k = r.group(1)
  548:                 #print "CONTENT: ", m_content
  549:                 bitems[k] = m_content
  550:                 # remember hit tag
  551:                 if m_tags == self.hitTag:
  552:                     self.hitTag = k
  553:                 continue
  554: 
  555:         self.content = bitems
  556:         self.rank += 100
  557:         # store context
  558:         if not ctxurl[1]:
  559:             ctxurl[1] = "View"
  560:         # must have link
  561:         if ctxurl[0]:
  562:             self.urls.append(ctxurl)
  563: 
  564: 
  565:     def getContentList(self):
  566:         """returns content as list of tuples in preferred order"""
  567:         l = []
  568:         c = self.content.copy()
  569:         # preferred items first
  570:         for k in ('author', 'title', 'date', 'place'):
  571:             if c.has_key(k):
  572:                 l.append((k, c[k]))
  573:                 del c[k]
  574: 
  575:         # copy the rest
  576:         for k in c.keys():
  577:             l.append((k, c[k]))
  578: 
  579:         return l
  580: 	
  581: 
  582: 
  583: 
  584: def ranksort(res1, res2):
  585:     """sort results on rank"""
  586:     return cmp(res2.rank, res1.rank)
  587: 
  588: 
  589: def statusForFile(filename):
  590:     """heuristic... returns status for a index file name"""
  591:     status = None
  592:     if filename.startswith('/mpiwg/online/'):
  593:         status = "online"
  594:     elif filename.startswith('/mpiwg/archive/'):
  595:         status = "archive"
  596:     elif filename.startswith('http://'):
  597:         status = "database"
  598:         
  599:     return status
  600: 
  601: def storageURL(filename):
  602:     """heuristic... returns an URL for a index file name"""
  603:     url = None
  604:     name = None
  605:     if filename.startswith('/mpiwg/online/'):
  606:         #print "URLFORFILE: online ", filename
  607:         r = re.search('^(.*)/index.meta', filename)
  608:         if r:
  609:             url = "http://content.mpiwg-berlin.mpg.de/mpistorage/storage/ShowOnline/index_html?path=%s"%r.group(1)
  610:             name = "Storage System"
  611:             
  612:     elif filename.startswith('http://'):
  613:         #print "URLFORFILE: url ", filename
  614:         url = filename
  615:         name = "Online Database"
  616: 
  617:     if name and url:
  618:         return (url, name)
  619:     
  620:     return None
  621: 

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>