OSAS/OSA_system/OSAS_search.py - view

File: [Repository] / OSAS / OSA_system / OSAS_search.py
Revision 1.2: download - view: text, annotated - select for diffs - revision graph
Mon Jul 5 21:08:55 2004 UTC (19 years, 11 months ago) by casties
Branches: MAIN
CVS tags: HEAD

improved everything, really

1: """Metadata search interface 2: ROC 2004, itgroup 3: 4: """ 5: 6: from AccessControl import ClassSecurityInfo 7: from Globals import InitializeClass 8: from Globals import Persistent, package_home 9: from Products.PageTemplates.PageTemplateFile import PageTemplateFile 10: from Products.PageTemplates.PageTemplate import PageTemplate 11: from OFS.SimpleItem import SimpleItem 12: #from pyPgSQL import PgSQL 13: import psycopg as PgSQL 14: 15: import re 16: import os 17: 18: MAXHITS = 1000 19: 20: class OSAS_search(SimpleItem): 21: """Object for global metadata search""" 22: 23: meta_type="OSAS_search" 24: 25: 26: 27: def __init__(self,id,title,dsn=None): 28: """init""" 29: self.id=id 30: self.title=title 31: if dsn: 32: self.dsn = dsn 33: else: 34: self.dsn = "host=foxridge.mpiwg-berlin.mpg.de dbname=storage user=archiveread password=archiveread" 35: # volatile database connection object 36: self._v_dbCon = None 37: self._v_tryCon = 0 38: 39: 40: def dbCursor(self): 41: """returns new SQL cursor object""" 42: curs = None 43: if hasattr(self, '_v_dbCon') and self._v_dbCon is not None: 44: try: 45: curs = self._v_dbCon.cursor() 46: self._v_tryCon = 0 47: except: 48: # in case of problems reset dbCon 49: self._v_dbCon = None 50: self._v_tryCon += 1 51: else: 52: self._v_dbCon = None 53: self._v_tryCon = 0 54: 55: if not curs and self._v_tryCon < 3: 56: self._v_dbCon = PgSQL.connect(self.dsn, serialize=0) 57: # call ourself with the new connection 58: curs = self.dbCursor() 59: 60: assert curs, "AIIEE no db cursor!!" 61: return curs 62: 63: def getDBFileMeta(self, fileid): 64: """returns an array with all meta entries of fileid""" 65: 66: metacache = {} 67: # try in cache 68: if self.REQUEST.SESSION.has_key('dbMeta'): 69: metacache = self.REQUEST.SESSION['dbMeta'] 70: if metacache.has_key(fileid): 71: res = metacache[fileid] 72: #print "meta from cache " 73: return res 74: 75: curs = self.dbCursor() 76: 77: sql = 'SELECT idx,tags,content,attributes FROM meta WHERE fileid=%(id)s ORDER BY idx' 78: print sql, " -> ", fileid 79: curs.execute(sql, {'id':fileid}) 80: print "done" 81: 82: res = curs.fetchall() 83: #print "res:", res 84: curs.close() 85: # store info in cache 86: metacache[fileid] = res 87: self.REQUEST.SESSION['dbMeta'] = metacache 88: 89: return res 90: 91: def getDBFile(self, fileid): 92: """returns the file information of fileid""" 93: 94: filecache = {} 95: # try in cache 96: if self.REQUEST.SESSION.has_key('dbFiles'): 97: filecache = self.REQUEST.SESSION['dbFiles'] 98: if filecache.has_key(fileid): 99: res = filecache[fileid] 100: #print "file from cache " 101: return res 102: 103: curs = self.dbCursor() 104: 105: sql = 'select filename,mtime from files where id=%(id)s' 106: print 'DBFILE: ', sql, " -> ", fileid 107: curs.execute(sql, {'id':fileid}) 108: print "DBFILE: done" 109: 110: res = curs.fetchone() 111: #print "DBFILE: res:", res 112: curs.close() 113: # store info in cache 114: filecache[fileid] = res 115: self.REQUEST.SESSION['dbFiles'] = filecache 116: 117: return res 118: 119: 120: def dbSearch(self, query, type): 121: """search DB for query and return result set""" 122: curs = self.dbCursor() 123: if type == 'equals': 124: qs = query 125: elif type == 'startswith': 126: qs = query + "%" 127: elif type == 'contains': 128: qs = "%" + query + "%" 129: 130: sql = 'select fileid,idx,tags,content from meta where content like %(qs)s' 131: print sql, " -> ", qs 132: curs.execute(sql, {'qs':qs}) 133: print "done" 134: results = [] 135: res = curs.fetchone() 136: rescnt = 1 137: #print "res0:", res 138: while res and rescnt < MAXHITS: 139: #print "res:", res 140: result = self.getResult(res) 141: if (result): 142: results.append(result) 143: 144: res = curs.fetchone() 145: rescnt += 1 146: 147: curs.close() 148: #self.dbCon = None 149: 150: #print "SEARCH: ", rescnt, " results" 151: return results 152: 153: 154: def getResult(self, db_result, rank=0): 155: """factory for result objects""" 156: 157: (fileid, tagidx, tags, content) = db_result 158: res = None 159: 160: if tags.find('/meta/bib/') > -1: 161: res = BibResult(self, db_result, rank) 162: elif tags.find('/meta/archimedes/') > -1: 163: res = ArchimResult(self, db_result, rank) 164: else: 165: res = AnyResult(self, db_result, rank) 166: 167: return res 168: 169: def renderResult(self, result): 170: """returns HTML rendering of a search result""" 171: 172: return result.render(self) 173: 174: 175: 176: 177: 178: # 179: # Web page stuff 180: # 181: 182: def index_html(self): 183: """metadata search""" 184: pt=PageTemplateFile(os.path.join(package_home(globals()), "zpt/OSAS_search.zpt")).__of__(self) 185: return pt() 186: 187: 188: def search(self, searchstring=None, searchtype='startswith', start=1, count=10): 189: """search and create result""" 190: sres = int(start) -1 191: lres = sres + count 192: try: 193: oldsearch = self.REQUEST.SESSION['searchstring'] 194: oldtype = self.REQUEST.SESSION['searchtype'] 195: except: 196: oldsearch = "" 197: oldtype = "" 198: 199: if not searchstring: 200: searchstring = oldsearch 201: searchtype = oldtype 202: 203: if not oldsearch or searchstring != oldsearch or searchtype != oldtype: 204: # new search 205: res = self.dbSearch(searchstring, searchtype) 206: # sort the result 207: res.sort(ranksort) 208: # store it 209: self.REQUEST.SESSION['results'] = res 210: self.REQUEST.SESSION['searchstring'] = searchstring 211: self.REQUEST.SESSION['searchtype'] = searchtype 212: 213: self.REQUEST.SESSION['resultgroup'] = self.REQUEST.SESSION['results'][sres:lres] 214: self.REQUEST.SESSION['res_indexes'] = (sres+1, lres, len(self.REQUEST.SESSION['results']), int(count)) 215: 216: pt=PageTemplateFile(os.path.join(package_home(globals()), "zpt/searchResult.zpt")).__of__(self) 217: return pt() 218: 219: 220: def getSearchType(self): 221: """returns the last search type""" 222: try: 223: ret = self.REQUEST.SESSION['searchtype'] 224: except: 225: ret = "" 226: 227: return ret 228: 229: def getSearchString(self): 230: """returns the last search string""" 231: try: 232: ret = self.REQUEST.SESSION['searchstring'] 233: except: 234: ret = "" 235: 236: return ret 237: 238: 239: def hasNextResults(self): 240: """returns if there are more results""" 241: try: 242: (first, last, total, count) = self.REQUEST.SESSION['res_indexes'] 243: return (first < total) 244: except: 245: return False 246: 247: def hasPrevResults(self): 248: """returns if there are previous results""" 249: try: 250: (first, last, total, count) = self.REQUEST.SESSION['res_indexes'] 251: return (first > 1) 252: except: 253: return False 254: 255: 256: def nextResults(self): 257: """returns more results""" 258: try: 259: (first, last, total, count) = self.REQUEST.SESSION['res_indexes'] 260: first = first + count 261: last = last + count 262: if first > total: 263: first = total 264: if last > total: 265: last = total 266: except: 267: print "OUCH: no next results: ", first, last, total, count 268: 269: return self.search(start=first, count=count) 270: 271: 272: def prevResults(self): 273: """returns more results""" 274: try: 275: (first, last, total, count) = self.REQUEST.SESSION['res_indexes'] 276: first = first - count 277: last = last - count 278: if first < 1: 279: first = 1 280: if last < 1: 281: last = 1 282: except: 283: print "OUCH: no prev results: ", first, last, total, count 284: 285: 286: return self.search(start=first, count=count) 287: 288: 289: 290: def manage_AddOSAS_searchForm(self): 291: """create Search form""" 292: pt=PageTemplateFile(os.path.join(package_home(globals()), "zpt/AddOSAS_search.zpt")).__of__(self) 293: return pt() 294: 295: def manage_AddOSAS_search(self,id,title=None,dsn=None,RESPONSE=None): 296: """add the OSAS_root""" 297: newObj=OSAS_search(id,title,dsn) 298: self._setObject(id,newObj) 299: if RESPONSE is not None: 300: RESPONSE.redirect('manage_main') 301: 302: 303: 304: 305: class SearchResult(SimpleItem): 306: """base search result object""" 307: 308: def __init__(self, type='unknown', file=None, url=None, content=None, rank=0): 309: """init""" 310: self.type = type 311: self.file = file 312: self.url = url 313: self.urlabel = url 314: self.content = content 315: self.rank = rank 316: 317: class AnyResult(SearchResult): 318: """catch-all type result object""" 319: 320: def __init__(self, zope, db_result, rank): 321: """returns a catch-all type result""" 322: SearchResult.__init__(self, type='unknown') 323: #print "NEW ANY RESULT!" 324: 325: self.zptFile = os.path.join(package_home(globals()), "zpt/searchResult_any.zpt") 326: 327: (fileid, tagidx, tags, content) = db_result 328: self.hitTag = tags 329: 330: # get full info from db 331: self.fileinfo = zope.getDBFile(fileid) 332: assert self.fileinfo 333: 334: items = {} 335: items[tags] = content 336: self.content = items 337: self.file = self.fileinfo[0] 338: self.url = "" 339: self.urlabel = self.file 340: self.rank = rank 341: 342: def render(self, zope): 343: """render this result object""" 344: zope.REQUEST.SESSION['result'] = self 345: pt=PageTemplateFile(self.zptFile).__of__(zope) 346: return pt() 347: 348: 349: class MetaResult(AnyResult): 350: """result object that collects metadata""" 351: 352: def __init__(self, zope, db_result, rank): 353: """contructor""" 354: AnyResult.__init__(self, zope, db_result, rank) 355: #print "NEW META RESULT!" 356: 357: (fileid, tagidx, tags, content) = db_result 358: 359: # get full info from db 360: self.metainfo = zope.getDBFileMeta(fileid) 361: assert self.metainfo 362: 363: def checkContext(self, tags, content): 364: """takes meta entry and sets url from context tags""" 365: if tags.endswith('/context/link'): 366: if content: 367: self.url = content 368: 369: elif tags.endswith('/context/name'): 370: if content: 371: self.urlabel = content 372: 373: else: 374: return False 375: 376: return True 377: 378: 379: class BibResult(MetaResult): 380: """bib type result object""" 381: 382: def __init__(self, zope, db_result, rank): 383: """constructor""" 384: MetaResult.__init__(self, zope, db_result, rank) 385: #print "NEW BIB RESULT!" 386: self.type = "bib" 387: self.zptFile = os.path.join(package_home(globals()), "zpt/searchResult_bib.zpt") 388: self.url = urlForFile(self.file) 389: self.urlabel = None 390: (fileid, tagidx, tags, content) = db_result 391: 392: btype = "" 393: bitems = {} 394: 395: for me in self.metainfo: 396: (m_idx, m_tags, m_content, m_attributes) = me 397: # context tag 398: if self.checkContext(m_tags, m_content): 399: continue 400: # first tag with bib type attribute 401: if m_tags.endswith('/meta/bib'): 402: r = re.search('type="([^"]*)"', m_attributes) 403: if r: 404: btype = r.group(1) 405: 406: if not btype: 407: btype = "*unknown*" 408: 409: bitems['type'] = btype 410: continue 411: 412: # skip other tags 413: if not btype: continue 414: 415: # collect bib/something 416: r = re.search('/meta/bib/(.*)', m_tags) 417: if r: 418: k = r.group(1) 419: #print "CONTENT: ", m_content 420: bitems[k] = m_content 421: continue 422: 423: self.content = bitems 424: self.rank += 100 425: if not self.urlabel and self.url: 426: self.urlabel = "view" 427: 428: 429: class ArchimResult(MetaResult): 430: """archimedes type result object""" 431: 432: def __init__(self, zope, db_result, rank): 433: """constructor""" 434: MetaResult.__init__(self, zope, db_result, rank) 435: #print "NEW ARCHIM RESULT!" 436: self.type = "archim" 437: self.zptFile = os.path.join(package_home(globals()), "zpt/searchResult_archim.zpt") 438: self.url = urlForFile(self.file) 439: self.urlabel = None 440: (fileid, tagidx, tags, content) = db_result 441: 442: # process info 443: bitems = {} 444: for me in self.metainfo: 445: (m_idx, m_tags, m_content, m_attributes) = me 446: # context tag 447: if self.checkContext(m_tags, m_content): 448: continue 449: # collect archimedes/something 450: r = re.search('/meta/archimedes/(.*)', m_tags) 451: if r: 452: k = r.group(1) 453: #print "CONTENT: ", m_content 454: bitems[k] = m_content 455: continue 456: 457: self.content = bitems 458: self.rank += 100 459: if not self.urlabel and self.url: 460: self.urlabel = "view" 461: 462: 463: 464: 465: def ranksort(res1, res2): 466: """sort results on rank""" 467: return cmp(res2.rank, res1.rank) 468: 469: 470: def urlForFile(filename): 471: """heuristic... returns an URL for a index file name""" 472: url = None 473: if filename.startswith('/mpiwg/online/'): 474: print "URLFORFILE: online ", filename 475: r = re.search('/mpiwg/online/(.*)/index.meta', filename) 476: if r: 477: url = "http://nausikaa.mpiwg-berlin.mpg.de/digitallibrary/digilib.jsp?fn=%s"%r.group(1) 478: 479: return url