OSAS/OSA_system/OSAS_search.py - annotate

Return to OSAS_search.py CVS log
Up to [Repository] / OSAS / OSA_system
Annotation of OSAS/OSA_system/OSAS_search.py, revision 1.1

1.1     ! casties     1: """Metadata search interface
        !             2: ROC 2004, itgroup
        !             3: 
        !             4: """
        !             5: 
        !             6: from AccessControl import ClassSecurityInfo
        !             7: from Globals import InitializeClass
        !             8: from Globals import Persistent, package_home
        !             9: from Products.PageTemplates.PageTemplateFile import PageTemplateFile
        !            10: from Products.PageTemplates.PageTemplate import PageTemplate
        !            11: from OFS.SimpleItem import SimpleItem
        !            12: #from pyPgSQL import PgSQL
        !            13: import psycopg as PgSQL
        !            14: 
        !            15: import re
        !            16: import os
        !            17: 
        !            18: MAXHITS = 1000
        !            19: 
        !            20: class OSAS_search(SimpleItem):
        !            21:     """Object for global metadata search"""
        !            22: 
        !            23:     meta_type="OSAS_search"
        !            24: 
        !            25:     
        !            26: 
        !            27:     def __init__(self,id,title,dsn=None):
        !            28:         """init"""
        !            29:         self.id=id
        !            30:         self.title=title
        !            31:         if dsn:
        !            32:             self.dsn = dsn
        !            33:         else:
        !            34:             self.dsn = "host=foxridge.mpiwg-berlin.mpg.de dbname=storage user=archiveread password=archiveread"
        !            35:         # volatile database connection object
        !            36:         self._v_dbCon = None
        !            37:         self._v_tryCon = 0
        !            38: 
        !            39: 
        !            40:     def dbCursor(self):
        !            41:         """returns new SQL cursor object"""
        !            42:         curs = None
        !            43:         if hasattr(self, '_v_dbCon') and self._v_dbCon is not None:
        !            44:             try:
        !            45:                 curs = self._v_dbCon.cursor()
        !            46:                 self._v_tryCon = 0
        !            47:             except:
        !            48:                 # in case of problems reset dbCon
        !            49:                 self._v_dbCon = None
        !            50:                 self._v_tryCon += 1
        !            51:         else:
        !            52:             self._v_dbCon = None
        !            53:             self._v_tryCon = 0
        !            54:                 
        !            55:         if not curs and self._v_tryCon < 3:
        !            56:             self._v_dbCon = PgSQL.connect(self.dsn, serialize=0)
        !            57:             # call ourself with the new connection
        !            58:             curs = self.dbCursor()
        !            59: 
        !            60:         assert curs, "AIIEE no db cursor!!"
        !            61:         return curs
        !            62: 
        !            63:     def getDBFileMeta(self, fileid):
        !            64:         """returns an array with all meta entries of fileid"""
        !            65: 
        !            66:         metacache = {}
        !            67:         # try in cache
        !            68:         if self.REQUEST.SESSION.has_key('dbMeta'):
        !            69:             metacache = self.REQUEST.SESSION['dbMeta']
        !            70:             if metacache.has_key(fileid):
        !            71:                 res = metacache[fileid]
        !            72:                 print "meta from cache "
        !            73:                 return res
        !            74: 
        !            75:         curs = self.dbCursor()
        !            76: 
        !            77:         sql = 'SELECT idx,tags,content,attributes FROM meta WHERE fileid=%(id)s ORDER BY idx'
        !            78:         print sql, " -> ", fileid
        !            79:         curs.execute(sql, {'id':fileid})
        !            80:         print "done"
        !            81: 
        !            82:         res = curs.fetchall()
        !            83:         #print "res:", res
        !            84:         curs.close()
        !            85:         # store info in cache
        !            86:         metacache[fileid] = res
        !            87:         self.REQUEST.SESSION['dbMeta'] = metacache
        !            88: 
        !            89:         return res
        !            90: 
        !            91:     def getDBFile(self, fileid):
        !            92:         """returns the file information of fileid"""
        !            93: 
        !            94:         filecache = {}
        !            95:         # try in cache
        !            96:         if self.REQUEST.SESSION.has_key('dbFiles'):
        !            97:             filecache = self.REQUEST.SESSION['dbFiles']
        !            98:             if filecache.has_key(fileid):
        !            99:                 res = filecache[fileid]
        !           100:                 print "file from cache "
        !           101:                 return res
        !           102: 
        !           103:         curs = self.dbCursor()
        !           104: 
        !           105:         sql = 'select filename,mtime from files where id=%(id)s'
        !           106:         print 'DBFILE: ', sql, " -> ", fileid
        !           107:         curs.execute(sql, {'id':fileid})
        !           108:         print "DBFILE: done"
        !           109: 
        !           110:         res = curs.fetchone()
        !           111:         #print "DBFILE: res:", res
        !           112:         curs.close()
        !           113:         # store info in cache
        !           114:         filecache[fileid] = res
        !           115:         self.REQUEST.SESSION['dbFiles'] = filecache
        !           116: 
        !           117:         return res
        !           118:    
        !           119:    
        !           120:     def dbSearch(self, query):
        !           121:         """search DB for query and return result set"""
        !           122:         curs = self.dbCursor()
        !           123:         qs = query + "%"
        !           124:         sql = 'select fileid,idx,tags,content from meta where content like %(qs)s'
        !           125:         print sql, " -> ", qs
        !           126:         curs.execute(sql, {'qs':qs})
        !           127:         print "done"
        !           128:         results = []
        !           129:         res = curs.fetchone()
        !           130:         rescnt = 1
        !           131:         #print "res0:", res
        !           132:         while res and rescnt < MAXHITS:
        !           133:             #print "res:", res
        !           134:             result = self.getResult(res)
        !           135:             if (result):
        !           136:                 results.append(result)
        !           137:                 
        !           138:             res = curs.fetchone()
        !           139:             rescnt += 1
        !           140: 
        !           141:         curs.close()
        !           142:         #self.dbCon = None
        !           143: 
        !           144:         print "SEARCH: ", rescnt, " results"
        !           145:         return results
        !           146: 
        !           147:         
        !           148:     def getResult(self, db_result, rank=0):
        !           149:         """factory for result objects"""
        !           150:         print "NEW RESULT!"
        !           151: 
        !           152:         (fileid, tagidx, tags, content) = db_result
        !           153:         res = None
        !           154: 
        !           155:         print "tags: ", tags
        !           156:         if tags.find('/meta/bib/') > -1:
        !           157:             res = BibResult(self, db_result, rank)
        !           158:         elif tags.find('/meta/archimedes/') > -1:
        !           159:             res = ArchimResult(self, db_result, rank)
        !           160:         else:
        !           161:             res = AnyResult(self, db_result, rank)
        !           162: 
        !           163:         return res
        !           164:    
        !           165:     def renderResult(self, result):
        !           166:         """returns HTML rendering of a search result"""
        !           167: 
        !           168:         print "renderresult!", result, " -- ", result.url
        !           169:         return result.render(self)
        !           170:    
        !           171:    
        !           172:    
        !           173:    
        !           174: 
        !           175:     #
        !           176:     # Web page stuff
        !           177:     #
        !           178: 
        !           179:     def index_html(self):
        !           180:         """metadata search"""
        !           181:         pt=PageTemplateFile(os.path.join(package_home(globals()), "zpt/OSAS_search.zpt")).__of__(self)
        !           182:         return pt()
        !           183: 
        !           184: 
        !           185:     def search(self, searchstring=None):
        !           186:         """search and result"""
        !           187:         if searchstring:
        !           188:             print "SEARCH: ", searchstring
        !           189:             res = self.dbSearch(searchstring)
        !           190:             res.sort(ranksort)
        !           191:             self.REQUEST.SESSION['results'] = res
        !           192:             self.REQUEST.SESSION['searchstring'] = searchstring
        !           193: 
        !           194:         print "SEARCH res:", res
        !           195:         pt=PageTemplateFile(os.path.join(package_home(globals()), "zpt/searchResult.zpt")).__of__(self)
        !           196:         return pt()
        !           197: 
        !           198:    
        !           199:         
        !           200: def manage_AddOSAS_searchForm(self):
        !           201:     """create Search form"""
        !           202:     pt=PageTemplateFile(os.path.join(package_home(globals()), "zpt/AddOSAS_search.zpt")).__of__(self)
        !           203:     return pt()
        !           204: 
        !           205: def manage_AddOSAS_search(self,id,title=None,dsn=None,RESPONSE=None):
        !           206:     """add the OSAS_root"""
        !           207:     newObj=OSAS_search(id,title,dsn)
        !           208:     self._setObject(id,newObj)
        !           209:     if RESPONSE is not None:
        !           210:         RESPONSE.redirect('manage_main')
        !           211: 
        !           212: 
        !           213: 
        !           214: 
        !           215: class SearchResult(SimpleItem):
        !           216:     """base search result object"""
        !           217: 
        !           218:     def __init__(self, type='unknown', file=None, url=None, content=None, rank=0):
        !           219:         """init"""
        !           220:         self.type = type
        !           221:         self.file = file
        !           222:         self.url = url
        !           223:         self.urlabel = url
        !           224:         self.content = content
        !           225:         self.rank = rank
        !           226: 
        !           227: class AnyResult(SearchResult):
        !           228:     """catch-all type result object"""
        !           229: 
        !           230:     def __init__(self, zope, db_result, rank):
        !           231:         """returns a catch-all type result"""
        !           232:         SearchResult.__init__(self, type='unknown')
        !           233:         print "NEW ANY RESULT!"
        !           234: 
        !           235:         self.zptFile = os.path.join(package_home(globals()), "zpt/searchResult_any.zpt")
        !           236:         
        !           237:         (fileid, tagidx, tags, content) = db_result
        !           238:         self.hitTag = tags
        !           239: 
        !           240:         # get full info from db
        !           241:         self.fileinfo = zope.getDBFile(fileid)
        !           242:         assert self.fileinfo
        !           243: 
        !           244:         items = {}
        !           245:         items[tags] = content
        !           246:         self.content = items
        !           247:         self.file = self.fileinfo[0]
        !           248:         self.url = ""
        !           249:         self.urlabel = self.file
        !           250:         self.rank = rank
        !           251: 
        !           252:     def render(self, zope):
        !           253:         """render this result object"""
        !           254:         zope.REQUEST.SESSION['result'] = self
        !           255:         print "renderender...", self
        !           256:         pt=PageTemplateFile(self.zptFile).__of__(zope)
        !           257:         return pt()
        !           258: 
        !           259: 
        !           260: class MetaResult(AnyResult):
        !           261:     """result object that collects metadata"""
        !           262: 
        !           263:     def __init__(self, zope, db_result, rank):
        !           264:         """contructor"""
        !           265:         AnyResult.__init__(self, zope, db_result, rank)
        !           266:         print "NEW META RESULT!"
        !           267: 
        !           268:         (fileid, tagidx, tags, content) = db_result
        !           269: 
        !           270:         # get full info from db
        !           271:         self.metainfo = zope.getDBFileMeta(fileid)
        !           272:         assert self.metainfo
        !           273:         
        !           274:     def checkContext(self, tags, content):
        !           275:         """takes meta entry and sets url from context tags"""
        !           276:         if tags.endswith('/context/link'):
        !           277:             if content:
        !           278:                 self.url = content            
        !           279:             
        !           280:         elif tags.endswith('/context/name'):
        !           281:             if content:
        !           282:                 self.urlabel = content
        !           283: 
        !           284:         else:
        !           285:             return False
        !           286: 
        !           287:         return True
        !           288: 
        !           289: 
        !           290: class BibResult(MetaResult):
        !           291:     """bib type result object"""
        !           292: 
        !           293:     def __init__(self, zope, db_result, rank):
        !           294:         """constructor"""
        !           295:         MetaResult.__init__(self, zope, db_result, rank)
        !           296:         print "NEW BIB RESULT!"
        !           297:         self.type = "bib"
        !           298:         self.zptFile = os.path.join(package_home(globals()), "zpt/searchResult_bib.zpt")
        !           299:         self.url = urlForFile(self.file)
        !           300:         self.urlabel = None
        !           301:         (fileid, tagidx, tags, content) = db_result
        !           302: 
        !           303:         btype = ""
        !           304:         bitems = {}
        !           305: 
        !           306:         for me in self.metainfo:
        !           307:             (m_idx, m_tags, m_content, m_attributes) = me
        !           308:             # context tag
        !           309:             if self.checkContext(m_tags, m_content):
        !           310:                 continue
        !           311:             # first tag with bib type attribute
        !           312:             if m_tags.endswith('/meta/bib'):
        !           313:                 r = re.search('type="([^"]*)"', m_attributes)
        !           314:                 if r:
        !           315:                     btype = r.group(1)
        !           316: 
        !           317:                 if not btype:
        !           318:                     btype = "*unknown*"
        !           319: 
        !           320:                 bitems['type'] = btype
        !           321:                 continue
        !           322: 
        !           323:             # skip other tags
        !           324:             if not btype: continue
        !           325: 
        !           326:             # collect bib/something
        !           327:             r = re.search('/meta/bib/(.*)', m_tags)
        !           328:             if r:
        !           329:                 k = r.group(1)
        !           330:                 #print "CONTENT: ", m_content
        !           331:                 bitems[k] = m_content
        !           332:                 continue
        !           333: 
        !           334:         self.content = bitems
        !           335:         self.rank += 100
        !           336:         if not self.urlabel and self.url:
        !           337:             self.urlabel = "view"
        !           338: 
        !           339: 
        !           340: class ArchimResult(MetaResult):
        !           341:     """archimedes type result object"""
        !           342: 
        !           343:     def __init__(self, zope, db_result, rank):
        !           344:         """constructor"""
        !           345:         MetaResult.__init__(self, zope, db_result, rank)
        !           346:         print "NEW ARCHIM RESULT!"
        !           347:         self.type = "archim"
        !           348:         self.zptFile = os.path.join(package_home(globals()), "zpt/searchResult_archim.zpt")
        !           349:         self.url = urlForFile(self.file)
        !           350:         self.urlabel = None
        !           351:         (fileid, tagidx, tags, content) = db_result
        !           352: 
        !           353:         # process info
        !           354:         bitems = {}
        !           355:         for me in self.metainfo:
        !           356:             (m_idx, m_tags, m_content, m_attributes) = me
        !           357:             # context tag
        !           358:             if self.checkContext(m_tags, m_content):
        !           359:                 continue
        !           360:             # collect archimedes/something
        !           361:             r = re.search('/meta/archimedes/(.*)', m_tags)
        !           362:             if r:
        !           363:                 k = r.group(1)
        !           364:                 #print "CONTENT: ", m_content
        !           365:                 bitems[k] = m_content
        !           366:                 continue
        !           367: 
        !           368:         self.content = bitems
        !           369:         self.rank += 100
        !           370:         if not self.urlabel and self.url:
        !           371:             self.urlabel = "view"
        !           372:    
        !           373: 
        !           374: 
        !           375: 
        !           376: def ranksort(res1, res2):
        !           377:     """sort results on rank"""
        !           378:     return cmp(res2.rank, res1.rank)
        !           379: 
        !           380: 
        !           381: def urlForFile(filename):
        !           382:     """heuristic... returns an URL for a index file name"""
        !           383:     url = None
        !           384:     if filename.startswith('/mpiwg/online/'):
        !           385:         print "URLFORFILE: online ", filename
        !           386:         r = re.search('/mpiwg/online/(.*)/index.meta', filename)
        !           387:         if r:
        !           388:             url = "http://nausikaa.mpiwg-berlin.mpg.de/digitallibrary/digilib.jsp?fn=%s"%r.group(1)
        !           389: 
        !           390:     return url
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>