OSAS/OSA_system/OSAS_search.py - view

File: [Repository] / OSAS / OSA_system / OSAS_search.py
Revision 1.1: download - view: text, annotated - select for diffs - revision graph
Thu Jul 1 19:31:25 2004 UTC (20 years ago) by casties
Branches: MAIN
CVS tags: HEAD

first version of metadata search interface

1: """Metadata search interface 2: ROC 2004, itgroup 3: 4: """ 5: 6: from AccessControl import ClassSecurityInfo 7: from Globals import InitializeClass 8: from Globals import Persistent, package_home 9: from Products.PageTemplates.PageTemplateFile import PageTemplateFile 10: from Products.PageTemplates.PageTemplate import PageTemplate 11: from OFS.SimpleItem import SimpleItem 12: #from pyPgSQL import PgSQL 13: import psycopg as PgSQL 14: 15: import re 16: import os 17: 18: MAXHITS = 1000 19: 20: class OSAS_search(SimpleItem): 21: """Object for global metadata search""" 22: 23: meta_type="OSAS_search" 24: 25: 26: 27: def __init__(self,id,title,dsn=None): 28: """init""" 29: self.id=id 30: self.title=title 31: if dsn: 32: self.dsn = dsn 33: else: 34: self.dsn = "host=foxridge.mpiwg-berlin.mpg.de dbname=storage user=archiveread password=archiveread" 35: # volatile database connection object 36: self._v_dbCon = None 37: self._v_tryCon = 0 38: 39: 40: def dbCursor(self): 41: """returns new SQL cursor object""" 42: curs = None 43: if hasattr(self, '_v_dbCon') and self._v_dbCon is not None: 44: try: 45: curs = self._v_dbCon.cursor() 46: self._v_tryCon = 0 47: except: 48: # in case of problems reset dbCon 49: self._v_dbCon = None 50: self._v_tryCon += 1 51: else: 52: self._v_dbCon = None 53: self._v_tryCon = 0 54: 55: if not curs and self._v_tryCon < 3: 56: self._v_dbCon = PgSQL.connect(self.dsn, serialize=0) 57: # call ourself with the new connection 58: curs = self.dbCursor() 59: 60: assert curs, "AIIEE no db cursor!!" 61: return curs 62: 63: def getDBFileMeta(self, fileid): 64: """returns an array with all meta entries of fileid""" 65: 66: metacache = {} 67: # try in cache 68: if self.REQUEST.SESSION.has_key('dbMeta'): 69: metacache = self.REQUEST.SESSION['dbMeta'] 70: if metacache.has_key(fileid): 71: res = metacache[fileid] 72: print "meta from cache " 73: return res 74: 75: curs = self.dbCursor() 76: 77: sql = 'SELECT idx,tags,content,attributes FROM meta WHERE fileid=%(id)s ORDER BY idx' 78: print sql, " -> ", fileid 79: curs.execute(sql, {'id':fileid}) 80: print "done" 81: 82: res = curs.fetchall() 83: #print "res:", res 84: curs.close() 85: # store info in cache 86: metacache[fileid] = res 87: self.REQUEST.SESSION['dbMeta'] = metacache 88: 89: return res 90: 91: def getDBFile(self, fileid): 92: """returns the file information of fileid""" 93: 94: filecache = {} 95: # try in cache 96: if self.REQUEST.SESSION.has_key('dbFiles'): 97: filecache = self.REQUEST.SESSION['dbFiles'] 98: if filecache.has_key(fileid): 99: res = filecache[fileid] 100: print "file from cache " 101: return res 102: 103: curs = self.dbCursor() 104: 105: sql = 'select filename,mtime from files where id=%(id)s' 106: print 'DBFILE: ', sql, " -> ", fileid 107: curs.execute(sql, {'id':fileid}) 108: print "DBFILE: done" 109: 110: res = curs.fetchone() 111: #print "DBFILE: res:", res 112: curs.close() 113: # store info in cache 114: filecache[fileid] = res 115: self.REQUEST.SESSION['dbFiles'] = filecache 116: 117: return res 118: 119: 120: def dbSearch(self, query): 121: """search DB for query and return result set""" 122: curs = self.dbCursor() 123: qs = query + "%" 124: sql = 'select fileid,idx,tags,content from meta where content like %(qs)s' 125: print sql, " -> ", qs 126: curs.execute(sql, {'qs':qs}) 127: print "done" 128: results = [] 129: res = curs.fetchone() 130: rescnt = 1 131: #print "res0:", res 132: while res and rescnt < MAXHITS: 133: #print "res:", res 134: result = self.getResult(res) 135: if (result): 136: results.append(result) 137: 138: res = curs.fetchone() 139: rescnt += 1 140: 141: curs.close() 142: #self.dbCon = None 143: 144: print "SEARCH: ", rescnt, " results" 145: return results 146: 147: 148: def getResult(self, db_result, rank=0): 149: """factory for result objects""" 150: print "NEW RESULT!" 151: 152: (fileid, tagidx, tags, content) = db_result 153: res = None 154: 155: print "tags: ", tags 156: if tags.find('/meta/bib/') > -1: 157: res = BibResult(self, db_result, rank) 158: elif tags.find('/meta/archimedes/') > -1: 159: res = ArchimResult(self, db_result, rank) 160: else: 161: res = AnyResult(self, db_result, rank) 162: 163: return res 164: 165: def renderResult(self, result): 166: """returns HTML rendering of a search result""" 167: 168: print "renderresult!", result, " -- ", result.url 169: return result.render(self) 170: 171: 172: 173: 174: 175: # 176: # Web page stuff 177: # 178: 179: def index_html(self): 180: """metadata search""" 181: pt=PageTemplateFile(os.path.join(package_home(globals()), "zpt/OSAS_search.zpt")).__of__(self) 182: return pt() 183: 184: 185: def search(self, searchstring=None): 186: """search and result""" 187: if searchstring: 188: print "SEARCH: ", searchstring 189: res = self.dbSearch(searchstring) 190: res.sort(ranksort) 191: self.REQUEST.SESSION['results'] = res 192: self.REQUEST.SESSION['searchstring'] = searchstring 193: 194: print "SEARCH res:", res 195: pt=PageTemplateFile(os.path.join(package_home(globals()), "zpt/searchResult.zpt")).__of__(self) 196: return pt() 197: 198: 199: 200: def manage_AddOSAS_searchForm(self): 201: """create Search form""" 202: pt=PageTemplateFile(os.path.join(package_home(globals()), "zpt/AddOSAS_search.zpt")).__of__(self) 203: return pt() 204: 205: def manage_AddOSAS_search(self,id,title=None,dsn=None,RESPONSE=None): 206: """add the OSAS_root""" 207: newObj=OSAS_search(id,title,dsn) 208: self._setObject(id,newObj) 209: if RESPONSE is not None: 210: RESPONSE.redirect('manage_main') 211: 212: 213: 214: 215: class SearchResult(SimpleItem): 216: """base search result object""" 217: 218: def __init__(self, type='unknown', file=None, url=None, content=None, rank=0): 219: """init""" 220: self.type = type 221: self.file = file 222: self.url = url 223: self.urlabel = url 224: self.content = content 225: self.rank = rank 226: 227: class AnyResult(SearchResult): 228: """catch-all type result object""" 229: 230: def __init__(self, zope, db_result, rank): 231: """returns a catch-all type result""" 232: SearchResult.__init__(self, type='unknown') 233: print "NEW ANY RESULT!" 234: 235: self.zptFile = os.path.join(package_home(globals()), "zpt/searchResult_any.zpt") 236: 237: (fileid, tagidx, tags, content) = db_result 238: self.hitTag = tags 239: 240: # get full info from db 241: self.fileinfo = zope.getDBFile(fileid) 242: assert self.fileinfo 243: 244: items = {} 245: items[tags] = content 246: self.content = items 247: self.file = self.fileinfo[0] 248: self.url = "" 249: self.urlabel = self.file 250: self.rank = rank 251: 252: def render(self, zope): 253: """render this result object""" 254: zope.REQUEST.SESSION['result'] = self 255: print "renderender...", self 256: pt=PageTemplateFile(self.zptFile).__of__(zope) 257: return pt() 258: 259: 260: class MetaResult(AnyResult): 261: """result object that collects metadata""" 262: 263: def __init__(self, zope, db_result, rank): 264: """contructor""" 265: AnyResult.__init__(self, zope, db_result, rank) 266: print "NEW META RESULT!" 267: 268: (fileid, tagidx, tags, content) = db_result 269: 270: # get full info from db 271: self.metainfo = zope.getDBFileMeta(fileid) 272: assert self.metainfo 273: 274: def checkContext(self, tags, content): 275: """takes meta entry and sets url from context tags""" 276: if tags.endswith('/context/link'): 277: if content: 278: self.url = content 279: 280: elif tags.endswith('/context/name'): 281: if content: 282: self.urlabel = content 283: 284: else: 285: return False 286: 287: return True 288: 289: 290: class BibResult(MetaResult): 291: """bib type result object""" 292: 293: def __init__(self, zope, db_result, rank): 294: """constructor""" 295: MetaResult.__init__(self, zope, db_result, rank) 296: print "NEW BIB RESULT!" 297: self.type = "bib" 298: self.zptFile = os.path.join(package_home(globals()), "zpt/searchResult_bib.zpt") 299: self.url = urlForFile(self.file) 300: self.urlabel = None 301: (fileid, tagidx, tags, content) = db_result 302: 303: btype = "" 304: bitems = {} 305: 306: for me in self.metainfo: 307: (m_idx, m_tags, m_content, m_attributes) = me 308: # context tag 309: if self.checkContext(m_tags, m_content): 310: continue 311: # first tag with bib type attribute 312: if m_tags.endswith('/meta/bib'): 313: r = re.search('type="([^"]*)"', m_attributes) 314: if r: 315: btype = r.group(1) 316: 317: if not btype: 318: btype = "*unknown*" 319: 320: bitems['type'] = btype 321: continue 322: 323: # skip other tags 324: if not btype: continue 325: 326: # collect bib/something 327: r = re.search('/meta/bib/(.*)', m_tags) 328: if r: 329: k = r.group(1) 330: #print "CONTENT: ", m_content 331: bitems[k] = m_content 332: continue 333: 334: self.content = bitems 335: self.rank += 100 336: if not self.urlabel and self.url: 337: self.urlabel = "view" 338: 339: 340: class ArchimResult(MetaResult): 341: """archimedes type result object""" 342: 343: def __init__(self, zope, db_result, rank): 344: """constructor""" 345: MetaResult.__init__(self, zope, db_result, rank) 346: print "NEW ARCHIM RESULT!" 347: self.type = "archim" 348: self.zptFile = os.path.join(package_home(globals()), "zpt/searchResult_archim.zpt") 349: self.url = urlForFile(self.file) 350: self.urlabel = None 351: (fileid, tagidx, tags, content) = db_result 352: 353: # process info 354: bitems = {} 355: for me in self.metainfo: 356: (m_idx, m_tags, m_content, m_attributes) = me 357: # context tag 358: if self.checkContext(m_tags, m_content): 359: continue 360: # collect archimedes/something 361: r = re.search('/meta/archimedes/(.*)', m_tags) 362: if r: 363: k = r.group(1) 364: #print "CONTENT: ", m_content 365: bitems[k] = m_content 366: continue 367: 368: self.content = bitems 369: self.rank += 100 370: if not self.urlabel and self.url: 371: self.urlabel = "view" 372: 373: 374: 375: 376: def ranksort(res1, res2): 377: """sort results on rank""" 378: return cmp(res2.rank, res1.rank) 379: 380: 381: def urlForFile(filename): 382: """heuristic... returns an URL for a index file name""" 383: url = None 384: if filename.startswith('/mpiwg/online/'): 385: print "URLFORFILE: online ", filename 386: r = re.search('/mpiwg/online/(.*)/index.meta', filename) 387: if r: 388: url = "http://nausikaa.mpiwg-berlin.mpg.de/digitallibrary/digilib.jsp?fn=%s"%r.group(1) 389: 390: return url