OSAS/OSA_system/OSAS_search.py - annotate

Return to OSAS_search.py CVS log
Up to [Repository] / OSAS / OSA_system
Annotation of OSAS/OSA_system/OSAS_search.py, revision 1.7

1.1       casties     1: """Metadata search interface
                      2: ROC 2004, itgroup
                      3: 
                      4: """
                      5: 
                      6: from AccessControl import ClassSecurityInfo
                      7: from Globals import InitializeClass
                      8: from Globals import Persistent, package_home
                      9: from Products.PageTemplates.PageTemplateFile import PageTemplateFile
                     10: from Products.PageTemplates.PageTemplate import PageTemplate
1.6       casties    11: from Products.PageTemplates.ZopePageTemplate import ZopePageTemplate
                     12: from OFS.Folder import Folder
1.1       casties    13: from OFS.SimpleItem import SimpleItem
                     14: #from pyPgSQL import PgSQL
                     15: import psycopg as PgSQL
                     16: 
                     17: import re
                     18: import os
                     19: 
                     20: MAXHITS = 1000
                     21: 
1.6       casties    22: class OSAS_search(Folder):
1.1       casties    23:     """Object for global metadata search"""
                     24: 
                     25:     meta_type="OSAS_search"
                     26: 
1.6       casties    27:     manage_options=Folder.manage_options+(
                     28:         {'label':'Main config','action':'manage_ChangeOSAS_searchForm'},
                     29:        )
1.1       casties    30:     
                     31: 
                     32:     def __init__(self,id,title,dsn=None):
                     33:         """init"""
                     34:         self.id=id
                     35:         self.title=title
                     36:         if dsn:
                     37:             self.dsn = dsn
                     38:         else:
                     39:             self.dsn = "host=foxridge.mpiwg-berlin.mpg.de dbname=storage user=archiveread password=archiveread"
                     40:         # volatile database connection object
                     41:         self._v_dbCon = None
                     42:         self._v_tryCon = 0
                     43: 
                     44: 
                     45:     def dbCursor(self):
                     46:         """returns new SQL cursor object"""
                     47:         curs = None
                     48:         if hasattr(self, '_v_dbCon') and self._v_dbCon is not None:
                     49:             try:
                     50:                 curs = self._v_dbCon.cursor()
                     51:                 self._v_tryCon = 0
                     52:             except:
                     53:                 # in case of problems reset dbCon
                     54:                 self._v_dbCon = None
                     55:                 self._v_tryCon += 1
                     56:         else:
                     57:             self._v_dbCon = None
                     58:             self._v_tryCon = 0
                     59:                 
                     60:         if not curs and self._v_tryCon < 3:
                     61:             self._v_dbCon = PgSQL.connect(self.dsn, serialize=0)
                     62:             # call ourself with the new connection
                     63:             curs = self.dbCursor()
                     64: 
                     65:         assert curs, "AIIEE no db cursor!!"
                     66:         return curs
                     67: 
                     68:     def getDBFileMeta(self, fileid):
                     69:         """returns an array with all meta entries of fileid"""
                     70: 
                     71:         metacache = {}
                     72:         # try in cache
                     73:         if self.REQUEST.SESSION.has_key('dbMeta'):
                     74:             metacache = self.REQUEST.SESSION['dbMeta']
                     75:             if metacache.has_key(fileid):
                     76:                 res = metacache[fileid]
1.2       casties    77:                 #print "meta from cache "
1.1       casties    78:                 return res
                     79: 
                     80:         curs = self.dbCursor()
                     81: 
                     82:         sql = 'SELECT idx,tags,content,attributes FROM meta WHERE fileid=%(id)s ORDER BY idx'
                     83:         print sql, " -> ", fileid
                     84:         curs.execute(sql, {'id':fileid})
                     85:         print "done"
                     86: 
                     87:         res = curs.fetchall()
                     88:         #print "res:", res
                     89:         curs.close()
                     90:         # store info in cache
                     91:         metacache[fileid] = res
                     92:         self.REQUEST.SESSION['dbMeta'] = metacache
                     93: 
                     94:         return res
                     95: 
                     96:     def getDBFile(self, fileid):
                     97:         """returns the file information of fileid"""
                     98: 
                     99:         filecache = {}
                    100:         # try in cache
                    101:         if self.REQUEST.SESSION.has_key('dbFiles'):
                    102:             filecache = self.REQUEST.SESSION['dbFiles']
                    103:             if filecache.has_key(fileid):
                    104:                 res = filecache[fileid]
1.2       casties   105:                 #print "file from cache "
1.1       casties   106:                 return res
                    107: 
                    108:         curs = self.dbCursor()
                    109: 
                    110:         sql = 'select filename,mtime from files where id=%(id)s'
                    111:         print 'DBFILE: ', sql, " -> ", fileid
                    112:         curs.execute(sql, {'id':fileid})
                    113:         print "DBFILE: done"
                    114: 
                    115:         res = curs.fetchone()
                    116:         #print "DBFILE: res:", res
                    117:         curs.close()
                    118:         # store info in cache
                    119:         filecache[fileid] = res
                    120:         self.REQUEST.SESSION['dbFiles'] = filecache
                    121: 
                    122:         return res
                    123:    
                    124:    
1.2       casties   125:     def dbSearch(self, query, type):
1.1       casties   126:         """search DB for query and return result set"""
1.3       casties   127:         results = []
                    128:         restypes = {}
                    129:         if not query:
                    130:             # empty query
                    131:             return results
                    132:         
1.1       casties   133:         curs = self.dbCursor()
1.2       casties   134:         if type == 'equals':
                    135:             qs = query
                    136:         elif type == 'startswith':
                    137:             qs = query + "%"
                    138:         elif type == 'contains':
                    139:             qs = "%" + query + "%"
                    140:             
1.3       casties   141:         sql = 'select fileid,idx,tags,content from meta where lower(content) like lower(%(qs)s)'
1.1       casties   142:         print sql, " -> ", qs
                    143:         curs.execute(sql, {'qs':qs})
                    144:         print "done"
                    145:         res = curs.fetchone()
                    146:         rescnt = 1
                    147:         #print "res0:", res
                    148:         while res and rescnt < MAXHITS:
                    149:             #print "res:", res
                    150:             result = self.getResult(res)
1.3       casties   151:             if result:
1.1       casties   152:                 results.append(result)
1.3       casties   153:                 restypes[result.type] = result.type
1.1       casties   154:                 
                    155:             res = curs.fetchone()
                    156:             rescnt += 1
                    157: 
                    158:         curs.close()
                    159:         #self.dbCon = None
                    160: 
1.2       casties   161:         #print "SEARCH: ", rescnt, " results"
1.3       casties   162:         restypelist = restypes.keys()
                    163:         return (results, restypelist)
1.1       casties   164: 
                    165:         
                    166:     def getResult(self, db_result, rank=0):
                    167:         """factory for result objects"""
                    168: 
                    169:         (fileid, tagidx, tags, content) = db_result
                    170:         res = None
                    171: 
                    172:         if tags.find('/meta/bib/') > -1:
                    173:             res = BibResult(self, db_result, rank)
                    174:         elif tags.find('/meta/archimedes/') > -1:
                    175:             res = ArchimResult(self, db_result, rank)
                    176:         else:
                    177:             res = AnyResult(self, db_result, rank)
                    178: 
                    179:         return res
1.3       casties   180: 
1.1       casties   181:    
                    182:     def renderResult(self, result):
                    183:         """returns HTML rendering of a search result"""
                    184: 
                    185:         return result.render(self)
                    186:    
1.3       casties   187: 
                    188:     def filterResults(self, results, start, end, restypefilter=None):
                    189:         """returns list of results that match a filter"""
                    190:         # filter types first
                    191:         if restypefilter:
                    192:             res = []
                    193:             for r in results:
1.7     ! casties   194:                 if r.type == restypefilter:
1.3       casties   195:                     res.append(r)
                    196:         else:
                    197:             res = results
1.5       casties   198:    # new total count (because of filter)
                    199:         rescnt = len(res)
1.3       casties   200:         # filter on count
                    201:         resgroup = res[start:end]
                    202: 
                    203:         return (resgroup, rescnt)
                    204:     
1.1       casties   205: 
                    206:     #
                    207:     # Web page stuff
                    208:     #
                    209: 
                    210:     def index_html(self):
                    211:         """metadata search"""
                    212:         pt=PageTemplateFile(os.path.join(package_home(globals()), "zpt/OSAS_search.zpt")).__of__(self)
                    213:         return pt()
                    214: 
                    215: 
1.3       casties   216:     def search(self, searchstring=None, searchtype='startswith', start=1, count=10, restypefilter=None):
1.2       casties   217:         """search and create result"""
                    218:         sres = int(start) -1
                    219:         lres = sres + count
                    220:         try:
                    221:             oldsearch = self.REQUEST.SESSION['searchstring']
                    222:             oldtype = self.REQUEST.SESSION['searchtype']
                    223:         except:
                    224:             oldsearch = ""
                    225:             oldtype = ""
                    226:             
                    227:         if not searchstring:
                    228:             searchstring = oldsearch
                    229:             searchtype = oldtype
                    230:             
                    231:         if not oldsearch or searchstring != oldsearch or searchtype != oldtype:
                    232:             # new search
1.3       casties   233:             (res, restypes) = self.dbSearch(searchstring, searchtype)
1.2       casties   234:             # sort the result
1.1       casties   235:             res.sort(ranksort)
1.2       casties   236:             # store it
1.1       casties   237:             self.REQUEST.SESSION['results'] = res
                    238:             self.REQUEST.SESSION['searchstring'] = searchstring
1.2       casties   239:             self.REQUEST.SESSION['searchtype'] = searchtype
1.3       casties   240:             self.REQUEST.SESSION['resulttypes'] = restypes
1.1       casties   241: 
1.3       casties   242:         (resgroup, nres) = self.filterResults(self.REQUEST.SESSION['results'], sres, lres, restypefilter)
                    243:         lres = min(lres, nres)
1.5       casties   244:         sres = min(sres, nres)
1.3       casties   245:         self.REQUEST.SESSION['resultgroup'] = resgroup
                    246:         self.REQUEST.SESSION['res_indexes'] = (sres+1, lres, nres, int(count))
                    247:         self.REQUEST.SESSION['res_type_filter'] = restypefilter
                    248:         if nres > 0:
                    249:             zpt = "zpt/searchResult.zpt"
                    250:         else:
                    251:             zpt = "zpt/searchResult_none.zpt"
1.2       casties   252:             
1.3       casties   253:         pt=PageTemplateFile(os.path.join(package_home(globals()), zpt)).__of__(self)
1.1       casties   254:         return pt()
                    255: 
1.2       casties   256: 
                    257:     def getSearchType(self):
                    258:         """returns the last search type"""
                    259:         try:
                    260:             ret = self.REQUEST.SESSION['searchtype']
                    261:         except:
                    262:             ret = ""
                    263: 
                    264:         return ret
                    265:     
                    266:     def getSearchString(self):
                    267:         """returns the last search string"""
                    268:         try:
                    269:             ret = self.REQUEST.SESSION['searchstring']
                    270:         except:
                    271:             ret = ""
                    272: 
                    273:         return ret
                    274:     
                    275: 
                    276:     def hasNextResults(self):
                    277:         """returns if there are more results"""
                    278:         try:
                    279:             (first, last, total, count) = self.REQUEST.SESSION['res_indexes']
1.3       casties   280:             return (first + count < total)
1.2       casties   281:         except:
                    282:             return False
                    283: 
                    284:     def hasPrevResults(self):
                    285:         """returns if there are previous results"""
                    286:         try:
                    287:             (first, last, total, count) = self.REQUEST.SESSION['res_indexes']
                    288:             return (first > 1)
                    289:         except:
                    290:             return False
                    291: 
                    292: 
                    293:     def nextResults(self):
                    294:         """returns more results"""
                    295:         try:
                    296:             (first, last, total, count) = self.REQUEST.SESSION['res_indexes']
                    297:             first = first + count
                    298:             last = last + count
                    299:             if first > total:
                    300:                 first = total
                    301:             if last > total:
                    302:                 last = total
                    303:         except:
1.3       casties   304:             print "OUCH: no next results!"
                    305:             return self.search()
1.2       casties   306: 
                    307:         return self.search(start=first, count=count)
                    308: 
                    309:         
                    310:     def prevResults(self):
                    311:         """returns more results"""
                    312:         try:
                    313:             (first, last, total, count) = self.REQUEST.SESSION['res_indexes']
                    314:             first = first - count
                    315:             last = last - count
                    316:             if first < 1:
                    317:                 first = 1
                    318:             if last < 1:
                    319:                 last = 1
                    320:         except:
1.3       casties   321:             print "OUCH: no prev results!"
                    322:             return self.search()           
1.2       casties   323: 
                    324:         return self.search(start=first, count=count)
1.1       casties   325:         
1.6       casties   326: 
                    327:     def manage_ChangeOSAS_searchForm(self):
                    328:         """create Search form"""
                    329:         pt=PageTemplateFile(os.path.join(package_home(globals()), "zpt/ChangeOSAS_search.zpt")).__of__(self)
                    330:         return pt()
                    331: 
                    332:     def manage_ChangeOSAS_search(self,id,title=None,dsn=None,RESPONSE=None):
                    333:         """add the OSAS_root"""
                    334:         self.id = id
                    335:         self.title = title
                    336:         self.dsn = dsn
                    337:         if RESPONSE is not None:
                    338:             RESPONSE.redirect('manage_main')
1.2       casties   339: 
                    340: 
1.1       casties   341: def manage_AddOSAS_searchForm(self):
                    342:     """create Search form"""
                    343:     pt=PageTemplateFile(os.path.join(package_home(globals()), "zpt/AddOSAS_search.zpt")).__of__(self)
                    344:     return pt()
                    345: 
                    346: def manage_AddOSAS_search(self,id,title=None,dsn=None,RESPONSE=None):
                    347:     """add the OSAS_root"""
                    348:     newObj=OSAS_search(id,title,dsn)
                    349:     self._setObject(id,newObj)
                    350:     if RESPONSE is not None:
                    351:         RESPONSE.redirect('manage_main')
                    352: 
                    353: 
                    354: 
                    355: 
                    356: class SearchResult(SimpleItem):
                    357:     """base search result object"""
                    358: 
                    359:     def __init__(self, type='unknown', file=None, url=None, content=None, rank=0):
                    360:         """init"""
1.3       casties   361:         # result type (e.g. "bib", "archim")
1.1       casties   362:         self.type = type
1.3       casties   363:         # index file name
1.1       casties   364:         self.file = file
1.3       casties   365:         # url for result (list of pairs)
                    366:         if url:
                    367:             self.urls = url
                    368:         else:
                    369:             self.urls = []
                    370:         # actual content (list of tuples)
1.1       casties   371:         self.content = content
1.3       casties   372:         # document status (e.g. "online", "archive")
                    373:         self.status = None
                    374:         # result rank for presentation
1.1       casties   375:         self.rank = rank
                    376: 
                    377: class AnyResult(SearchResult):
                    378:     """catch-all type result object"""
                    379: 
                    380:     def __init__(self, zope, db_result, rank):
                    381:         """returns a catch-all type result"""
1.3       casties   382:         SearchResult.__init__(self)
1.2       casties   383:         #print "NEW ANY RESULT!"
1.3       casties   384:         self.type='unknown'
1.1       casties   385:         self.zptFile = os.path.join(package_home(globals()), "zpt/searchResult_any.zpt")
                    386:         
1.3       casties   387:         (db_fileid, db_tagidx, db_tags, db_content) = db_result
                    388:         self.hitTag = db_tags
1.1       casties   389: 
                    390:         # get full info from db
1.3       casties   391:         self.fileinfo = zope.getDBFile(db_fileid)
1.1       casties   392:         assert self.fileinfo
                    393: 
                    394:         items = {}
1.3       casties   395:         items[db_tags] = db_content
1.1       casties   396:         self.content = items
                    397:         self.file = self.fileinfo[0]
1.3       casties   398:         self.status = statusForFile(self.file)
1.1       casties   399:         self.rank = rank
                    400: 
1.3       casties   401:     def getContentList(self):
                    402:         """returns content as list of tuples in preferred order"""
                    403:         l = []
                    404:         for k in self.content.keys():
                    405:             l.append((k, self.content[k]))
                    406: 
                    407:         return l
                    408: 
1.1       casties   409:     def render(self, zope):
                    410:         """render this result object"""
                    411:         zope.REQUEST.SESSION['result'] = self
                    412:         pt=PageTemplateFile(self.zptFile).__of__(zope)
                    413:         return pt()
                    414: 
                    415: 
                    416: class MetaResult(AnyResult):
                    417:     """result object that collects metadata"""
                    418: 
                    419:     def __init__(self, zope, db_result, rank):
                    420:         """contructor"""
                    421:         AnyResult.__init__(self, zope, db_result, rank)
1.2       casties   422:         #print "NEW META RESULT!"
1.1       casties   423: 
                    424:         (fileid, tagidx, tags, content) = db_result
                    425: 
                    426:         # get full info from db
                    427:         self.metainfo = zope.getDBFileMeta(fileid)
                    428:         assert self.metainfo
                    429:         
1.3       casties   430:     def checkContext(self, tags, content, ctxurl):
                    431:         """takes meta entry and updates url from context tags"""
1.1       casties   432:         if tags.endswith('/context/link'):
                    433:             if content:
1.3       casties   434:                 #print "CTXlink: ", content
                    435:                 ctxurl[0] = content
1.1       casties   436:             
                    437:         elif tags.endswith('/context/name'):
                    438:             if content:
1.3       casties   439:                 #print "CTXname: ", content
                    440:                 ctxurl[1] = content
1.1       casties   441: 
1.3       casties   442:         return ctxurl
1.1       casties   443: 
                    444: 
                    445: class BibResult(MetaResult):
                    446:     """bib type result object"""
                    447: 
                    448:     def __init__(self, zope, db_result, rank):
                    449:         """constructor"""
                    450:         MetaResult.__init__(self, zope, db_result, rank)
1.3       casties   451:         #print "NEW BIB RESULT!", self
1.1       casties   452:         self.type = "bib"
                    453:         self.zptFile = os.path.join(package_home(globals()), "zpt/searchResult_bib.zpt")
1.3       casties   454:         url = storageURL(self.file)
                    455:         if url:
                    456:             self.urls.append(url)
1.1       casties   457:         (fileid, tagidx, tags, content) = db_result
                    458: 
                    459:         btype = ""
                    460:         bitems = {}
1.3       casties   461:         ctxurl = ['', '']
1.1       casties   462: 
                    463:         for me in self.metainfo:
                    464:             (m_idx, m_tags, m_content, m_attributes) = me
                    465:             # context tag
1.3       casties   466:             ctxurl = self.checkContext(m_tags, m_content, ctxurl)
1.1       casties   467:             # first tag with bib type attribute
                    468:             if m_tags.endswith('/meta/bib'):
                    469:                 r = re.search('type="([^"]*)"', m_attributes)
                    470:                 if r:
                    471:                     btype = r.group(1)
                    472: 
                    473:                 if not btype:
                    474:                     btype = "*unknown*"
                    475: 
                    476:                 bitems['type'] = btype
                    477:                 continue
                    478: 
                    479:             # skip other tags
                    480:             if not btype: continue
                    481: 
                    482:             # collect bib/something
                    483:             r = re.search('/meta/bib/(.*)', m_tags)
                    484:             if r:
                    485:                 k = r.group(1)
                    486:                 #print "CONTENT: ", m_content
                    487:                 bitems[k] = m_content
1.3       casties   488:                 # remember hit tag
                    489:                 if m_tags == self.hitTag:
                    490:                     self.hitTag = k
1.1       casties   491:                 continue
                    492: 
                    493:         self.content = bitems
1.3       casties   494:         # store context
                    495:         if not ctxurl[1]:
                    496:             ctxurl[1] = "View"
                    497:         # must have link
                    498:         if ctxurl[0]:
                    499:             self.urls.append(ctxurl)
                    500:                 
1.1       casties   501:         self.rank += 100
1.3       casties   502: 
                    503:     def getContentList(self):
                    504:         """returns content as list of tuples in preferred order"""
                    505:         l = []
                    506:         c = self.content.copy()
                    507:         # preferred items first
                    508:         for k in ('author', 'title', 'journal', 'year'):
                    509:             if c.has_key(k):
                    510:                 l.append((k, c[k]))
                    511:                 del c[k]
                    512: 
                    513:         # no type
                    514:         del c['type']
                    515:         # copy the rest
                    516:         for k in c.keys():
                    517:             l.append((k, c[k]))
                    518: 
                    519:         return l
1.1       casties   520: 
                    521: 
                    522: class ArchimResult(MetaResult):
                    523:     """archimedes type result object"""
                    524: 
                    525:     def __init__(self, zope, db_result, rank):
                    526:         """constructor"""
                    527:         MetaResult.__init__(self, zope, db_result, rank)
1.3       casties   528:         #print "NEW ARCHIM RESULT!", self
1.1       casties   529:         self.type = "archim"
                    530:         self.zptFile = os.path.join(package_home(globals()), "zpt/searchResult_archim.zpt")
1.3       casties   531:         url = storageURL(self.file)
                    532:         if url:
                    533:             self.urls.append(url)
                    534:             
1.1       casties   535:         (fileid, tagidx, tags, content) = db_result
                    536: 
                    537:         # process info
                    538:         bitems = {}
1.3       casties   539:         ctxurl = ['', '']
1.1       casties   540:         for me in self.metainfo:
                    541:             (m_idx, m_tags, m_content, m_attributes) = me
                    542:             # context tag
1.3       casties   543:             ctxurl = self.checkContext(m_tags, m_content, ctxurl)
1.1       casties   544:             # collect archimedes/something
                    545:             r = re.search('/meta/archimedes/(.*)', m_tags)
                    546:             if r:
                    547:                 k = r.group(1)
                    548:                 #print "CONTENT: ", m_content
                    549:                 bitems[k] = m_content
1.3       casties   550:                 # remember hit tag
                    551:                 if m_tags == self.hitTag:
                    552:                     self.hitTag = k
1.1       casties   553:                 continue
                    554: 
                    555:         self.content = bitems
                    556:         self.rank += 100
1.3       casties   557:         # store context
                    558:         if not ctxurl[1]:
                    559:             ctxurl[1] = "View"
                    560:         # must have link
                    561:         if ctxurl[0]:
                    562:             self.urls.append(ctxurl)
                    563: 
                    564: 
                    565:     def getContentList(self):
                    566:         """returns content as list of tuples in preferred order"""
                    567:         l = []
                    568:         c = self.content.copy()
                    569:         # preferred items first
                    570:         for k in ('author', 'title', 'date', 'place'):
                    571:             if c.has_key(k):
                    572:                 l.append((k, c[k]))
                    573:                 del c[k]
                    574: 
                    575:         # copy the rest
                    576:         for k in c.keys():
                    577:             l.append((k, c[k]))
                    578: 
                    579:         return l
1.1       casties   580:    
                    581: 
                    582: 
                    583: 
                    584: def ranksort(res1, res2):
                    585:     """sort results on rank"""
                    586:     return cmp(res2.rank, res1.rank)
                    587: 
                    588: 
1.3       casties   589: def statusForFile(filename):
                    590:     """heuristic... returns status for a index file name"""
                    591:     status = None
                    592:     if filename.startswith('/mpiwg/online/'):
                    593:         status = "online"
                    594:     elif filename.startswith('/mpiwg/archive/'):
                    595:         status = "archive"
                    596:     elif filename.startswith('http://'):
                    597:         status = "database"
                    598:         
                    599:     return status
                    600: 
                    601: def storageURL(filename):
1.1       casties   602:     """heuristic... returns an URL for a index file name"""
                    603:     url = None
1.3       casties   604:     name = None
1.1       casties   605:     if filename.startswith('/mpiwg/online/'):
1.3       casties   606:         #print "URLFORFILE: online ", filename
                    607:         r = re.search('^(.*)/index.meta', filename)
1.1       casties   608:         if r:
1.3       casties   609:             url = "http://content.mpiwg-berlin.mpg.de/mpistorage/storage/ShowOnline/index_html?path=%s"%r.group(1)
                    610:             name = "Storage System"
1.4       casties   611:             
                    612:     elif filename.startswith('http://'):
                    613:         #print "URLFORFILE: url ", filename
                    614:         url = filename
                    615:         name = "Online Database"
1.3       casties   616: 
                    617:     if name and url:
                    618:         return (url, name)
                    619:     
                    620:     return None
1.1       casties   621:
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>