OSAS/OSA_system/OSAS_search.py - view

File: [Repository] / OSAS / OSA_system / OSAS_search.py
Revision 1.6: download - view: text, annotated - select for diffs - revision graph
Thu Jul 29 16:14:21 2004 UTC (19 years, 11 months ago) by casties
Branches: MAIN
CVS tags: HEAD

added change form

1: """Metadata search interface 2: ROC 2004, itgroup 3: 4: """ 5: 6: from AccessControl import ClassSecurityInfo 7: from Globals import InitializeClass 8: from Globals import Persistent, package_home 9: from Products.PageTemplates.PageTemplateFile import PageTemplateFile 10: from Products.PageTemplates.PageTemplate import PageTemplate 11: from Products.PageTemplates.ZopePageTemplate import ZopePageTemplate 12: from OFS.Folder import Folder 13: from OFS.SimpleItem import SimpleItem 14: #from pyPgSQL import PgSQL 15: import psycopg as PgSQL 16: 17: import re 18: import os 19: 20: MAXHITS = 1000 21: 22: class OSAS_search(Folder): 23: """Object for global metadata search""" 24: 25: meta_type="OSAS_search" 26: 27: manage_options=Folder.manage_options+( 28: {'label':'Main config','action':'manage_ChangeOSAS_searchForm'}, 29: ) 30: 31: 32: def __init__(self,id,title,dsn=None): 33: """init""" 34: self.id=id 35: self.title=title 36: if dsn: 37: self.dsn = dsn 38: else: 39: self.dsn = "host=foxridge.mpiwg-berlin.mpg.de dbname=storage user=archiveread password=archiveread" 40: # volatile database connection object 41: self._v_dbCon = None 42: self._v_tryCon = 0 43: 44: 45: def dbCursor(self): 46: """returns new SQL cursor object""" 47: curs = None 48: if hasattr(self, '_v_dbCon') and self._v_dbCon is not None: 49: try: 50: curs = self._v_dbCon.cursor() 51: self._v_tryCon = 0 52: except: 53: # in case of problems reset dbCon 54: self._v_dbCon = None 55: self._v_tryCon += 1 56: else: 57: self._v_dbCon = None 58: self._v_tryCon = 0 59: 60: if not curs and self._v_tryCon < 3: 61: self._v_dbCon = PgSQL.connect(self.dsn, serialize=0) 62: # call ourself with the new connection 63: curs = self.dbCursor() 64: 65: assert curs, "AIIEE no db cursor!!" 66: return curs 67: 68: def getDBFileMeta(self, fileid): 69: """returns an array with all meta entries of fileid""" 70: 71: metacache = {} 72: # try in cache 73: if self.REQUEST.SESSION.has_key('dbMeta'): 74: metacache = self.REQUEST.SESSION['dbMeta'] 75: if metacache.has_key(fileid): 76: res = metacache[fileid] 77: #print "meta from cache " 78: return res 79: 80: curs = self.dbCursor() 81: 82: sql = 'SELECT idx,tags,content,attributes FROM meta WHERE fileid=%(id)s ORDER BY idx' 83: print sql, " -> ", fileid 84: curs.execute(sql, {'id':fileid}) 85: print "done" 86: 87: res = curs.fetchall() 88: #print "res:", res 89: curs.close() 90: # store info in cache 91: metacache[fileid] = res 92: self.REQUEST.SESSION['dbMeta'] = metacache 93: 94: return res 95: 96: def getDBFile(self, fileid): 97: """returns the file information of fileid""" 98: 99: filecache = {} 100: # try in cache 101: if self.REQUEST.SESSION.has_key('dbFiles'): 102: filecache = self.REQUEST.SESSION['dbFiles'] 103: if filecache.has_key(fileid): 104: res = filecache[fileid] 105: #print "file from cache " 106: return res 107: 108: curs = self.dbCursor() 109: 110: sql = 'select filename,mtime from files where id=%(id)s' 111: print 'DBFILE: ', sql, " -> ", fileid 112: curs.execute(sql, {'id':fileid}) 113: print "DBFILE: done" 114: 115: res = curs.fetchone() 116: #print "DBFILE: res:", res 117: curs.close() 118: # store info in cache 119: filecache[fileid] = res 120: self.REQUEST.SESSION['dbFiles'] = filecache 121: 122: return res 123: 124: 125: def dbSearch(self, query, type): 126: """search DB for query and return result set""" 127: results = [] 128: restypes = {} 129: if not query: 130: # empty query 131: return results 132: 133: curs = self.dbCursor() 134: if type == 'equals': 135: qs = query 136: elif type == 'startswith': 137: qs = query + "%" 138: elif type == 'contains': 139: qs = "%" + query + "%" 140: 141: sql = 'select fileid,idx,tags,content from meta where lower(content) like lower(%(qs)s)' 142: print sql, " -> ", qs 143: curs.execute(sql, {'qs':qs}) 144: print "done" 145: res = curs.fetchone() 146: rescnt = 1 147: #print "res0:", res 148: while res and rescnt < MAXHITS: 149: #print "res:", res 150: result = self.getResult(res) 151: if result: 152: results.append(result) 153: restypes[result.type] = result.type 154: 155: res = curs.fetchone() 156: rescnt += 1 157: 158: curs.close() 159: #self.dbCon = None 160: 161: #print "SEARCH: ", rescnt, " results" 162: restypelist = restypes.keys() 163: return (results, restypelist) 164: 165: 166: def getResult(self, db_result, rank=0): 167: """factory for result objects""" 168: 169: (fileid, tagidx, tags, content) = db_result 170: res = None 171: 172: if tags.find('/meta/bib/') > -1: 173: res = BibResult(self, db_result, rank) 174: elif tags.find('/meta/archimedes/') > -1: 175: res = ArchimResult(self, db_result, rank) 176: else: 177: res = AnyResult(self, db_result, rank) 178: 179: return res 180: 181: 182: def renderResult(self, result): 183: """returns HTML rendering of a search result""" 184: 185: return result.render(self) 186: 187: 188: def filterResults(self, results, start, end, restypefilter=None): 189: """returns list of results that match a filter""" 190: # filter types first 191: if restypefilter: 192: res = [] 193: for r in results: 194: if r.type in restypefilter: 195: res.append(r) 196: else: 197: res = results 198: # new total count (because of filter) 199: rescnt = len(res) 200: # filter on count 201: resgroup = res[start:end] 202: 203: return (resgroup, rescnt) 204: 205: 206: # 207: # Web page stuff 208: # 209: 210: def index_html(self): 211: """metadata search""" 212: pt=PageTemplateFile(os.path.join(package_home(globals()), "zpt/OSAS_search.zpt")).__of__(self) 213: return pt() 214: 215: 216: def search(self, searchstring=None, searchtype='startswith', start=1, count=10, restypefilter=None): 217: """search and create result""" 218: sres = int(start) -1 219: lres = sres + count 220: try: 221: oldsearch = self.REQUEST.SESSION['searchstring'] 222: oldtype = self.REQUEST.SESSION['searchtype'] 223: except: 224: oldsearch = "" 225: oldtype = "" 226: 227: if not searchstring: 228: searchstring = oldsearch 229: searchtype = oldtype 230: 231: if not oldsearch or searchstring != oldsearch or searchtype != oldtype: 232: # new search 233: (res, restypes) = self.dbSearch(searchstring, searchtype) 234: # sort the result 235: res.sort(ranksort) 236: # store it 237: self.REQUEST.SESSION['results'] = res 238: self.REQUEST.SESSION['searchstring'] = searchstring 239: self.REQUEST.SESSION['searchtype'] = searchtype 240: self.REQUEST.SESSION['resulttypes'] = restypes 241: 242: (resgroup, nres) = self.filterResults(self.REQUEST.SESSION['results'], sres, lres, restypefilter) 243: lres = min(lres, nres) 244: sres = min(sres, nres) 245: self.REQUEST.SESSION['resultgroup'] = resgroup 246: self.REQUEST.SESSION['res_indexes'] = (sres+1, lres, nres, int(count)) 247: self.REQUEST.SESSION['res_type_filter'] = restypefilter 248: if nres > 0: 249: zpt = "zpt/searchResult.zpt" 250: else: 251: zpt = "zpt/searchResult_none.zpt" 252: 253: pt=PageTemplateFile(os.path.join(package_home(globals()), zpt)).__of__(self) 254: return pt() 255: 256: 257: def getSearchType(self): 258: """returns the last search type""" 259: try: 260: ret = self.REQUEST.SESSION['searchtype'] 261: except: 262: ret = "" 263: 264: return ret 265: 266: def getSearchString(self): 267: """returns the last search string""" 268: try: 269: ret = self.REQUEST.SESSION['searchstring'] 270: except: 271: ret = "" 272: 273: return ret 274: 275: 276: def hasNextResults(self): 277: """returns if there are more results""" 278: try: 279: (first, last, total, count) = self.REQUEST.SESSION['res_indexes'] 280: return (first + count < total) 281: except: 282: return False 283: 284: def hasPrevResults(self): 285: """returns if there are previous results""" 286: try: 287: (first, last, total, count) = self.REQUEST.SESSION['res_indexes'] 288: return (first > 1) 289: except: 290: return False 291: 292: 293: def nextResults(self): 294: """returns more results""" 295: try: 296: (first, last, total, count) = self.REQUEST.SESSION['res_indexes'] 297: first = first + count 298: last = last + count 299: if first > total: 300: first = total 301: if last > total: 302: last = total 303: except: 304: print "OUCH: no next results!" 305: return self.search() 306: 307: return self.search(start=first, count=count) 308: 309: 310: def prevResults(self): 311: """returns more results""" 312: try: 313: (first, last, total, count) = self.REQUEST.SESSION['res_indexes'] 314: first = first - count 315: last = last - count 316: if first < 1: 317: first = 1 318: if last < 1: 319: last = 1 320: except: 321: print "OUCH: no prev results!" 322: return self.search() 323: 324: return self.search(start=first, count=count) 325: 326: 327: def manage_ChangeOSAS_searchForm(self): 328: """create Search form""" 329: pt=PageTemplateFile(os.path.join(package_home(globals()), "zpt/ChangeOSAS_search.zpt")).__of__(self) 330: return pt() 331: 332: def manage_ChangeOSAS_search(self,id,title=None,dsn=None,RESPONSE=None): 333: """add the OSAS_root""" 334: self.id = id 335: self.title = title 336: self.dsn = dsn 337: if RESPONSE is not None: 338: RESPONSE.redirect('manage_main') 339: 340: 341: def manage_AddOSAS_searchForm(self): 342: """create Search form""" 343: pt=PageTemplateFile(os.path.join(package_home(globals()), "zpt/AddOSAS_search.zpt")).__of__(self) 344: return pt() 345: 346: def manage_AddOSAS_search(self,id,title=None,dsn=None,RESPONSE=None): 347: """add the OSAS_root""" 348: newObj=OSAS_search(id,title,dsn) 349: self._setObject(id,newObj) 350: if RESPONSE is not None: 351: RESPONSE.redirect('manage_main') 352: 353: 354: 355: 356: class SearchResult(SimpleItem): 357: """base search result object""" 358: 359: def __init__(self, type='unknown', file=None, url=None, content=None, rank=0): 360: """init""" 361: # result type (e.g. "bib", "archim") 362: self.type = type 363: # index file name 364: self.file = file 365: # url for result (list of pairs) 366: if url: 367: self.urls = url 368: else: 369: self.urls = [] 370: # actual content (list of tuples) 371: self.content = content 372: # document status (e.g. "online", "archive") 373: self.status = None 374: # result rank for presentation 375: self.rank = rank 376: 377: class AnyResult(SearchResult): 378: """catch-all type result object""" 379: 380: def __init__(self, zope, db_result, rank): 381: """returns a catch-all type result""" 382: SearchResult.__init__(self) 383: #print "NEW ANY RESULT!" 384: self.type='unknown' 385: self.zptFile = os.path.join(package_home(globals()), "zpt/searchResult_any.zpt") 386: 387: (db_fileid, db_tagidx, db_tags, db_content) = db_result 388: self.hitTag = db_tags 389: 390: # get full info from db 391: self.fileinfo = zope.getDBFile(db_fileid) 392: assert self.fileinfo 393: 394: items = {} 395: items[db_tags] = db_content 396: self.content = items 397: self.file = self.fileinfo[0] 398: self.status = statusForFile(self.file) 399: self.rank = rank 400: 401: def getContentList(self): 402: """returns content as list of tuples in preferred order""" 403: l = [] 404: for k in self.content.keys(): 405: l.append((k, self.content[k])) 406: 407: return l 408: 409: def render(self, zope): 410: """render this result object""" 411: zope.REQUEST.SESSION['result'] = self 412: pt=PageTemplateFile(self.zptFile).__of__(zope) 413: return pt() 414: 415: 416: class MetaResult(AnyResult): 417: """result object that collects metadata""" 418: 419: def __init__(self, zope, db_result, rank): 420: """contructor""" 421: AnyResult.__init__(self, zope, db_result, rank) 422: #print "NEW META RESULT!" 423: 424: (fileid, tagidx, tags, content) = db_result 425: 426: # get full info from db 427: self.metainfo = zope.getDBFileMeta(fileid) 428: assert self.metainfo 429: 430: def checkContext(self, tags, content, ctxurl): 431: """takes meta entry and updates url from context tags""" 432: if tags.endswith('/context/link'): 433: if content: 434: #print "CTXlink: ", content 435: ctxurl[0] = content 436: 437: elif tags.endswith('/context/name'): 438: if content: 439: #print "CTXname: ", content 440: ctxurl[1] = content 441: 442: return ctxurl 443: 444: 445: class BibResult(MetaResult): 446: """bib type result object""" 447: 448: def __init__(self, zope, db_result, rank): 449: """constructor""" 450: MetaResult.__init__(self, zope, db_result, rank) 451: #print "NEW BIB RESULT!", self 452: self.type = "bib" 453: self.zptFile = os.path.join(package_home(globals()), "zpt/searchResult_bib.zpt") 454: url = storageURL(self.file) 455: if url: 456: self.urls.append(url) 457: (fileid, tagidx, tags, content) = db_result 458: 459: btype = "" 460: bitems = {} 461: ctxurl = ['', ''] 462: 463: for me in self.metainfo: 464: (m_idx, m_tags, m_content, m_attributes) = me 465: # context tag 466: ctxurl = self.checkContext(m_tags, m_content, ctxurl) 467: # first tag with bib type attribute 468: if m_tags.endswith('/meta/bib'): 469: r = re.search('type="([^"]*)"', m_attributes) 470: if r: 471: btype = r.group(1) 472: 473: if not btype: 474: btype = "*unknown*" 475: 476: bitems['type'] = btype 477: continue 478: 479: # skip other tags 480: if not btype: continue 481: 482: # collect bib/something 483: r = re.search('/meta/bib/(.*)', m_tags) 484: if r: 485: k = r.group(1) 486: #print "CONTENT: ", m_content 487: bitems[k] = m_content 488: # remember hit tag 489: if m_tags == self.hitTag: 490: self.hitTag = k 491: continue 492: 493: self.content = bitems 494: # store context 495: if not ctxurl[1]: 496: ctxurl[1] = "View" 497: # must have link 498: if ctxurl[0]: 499: self.urls.append(ctxurl) 500: 501: self.rank += 100 502: 503: def getContentList(self): 504: """returns content as list of tuples in preferred order""" 505: l = [] 506: c = self.content.copy() 507: # preferred items first 508: for k in ('author', 'title', 'journal', 'year'): 509: if c.has_key(k): 510: l.append((k, c[k])) 511: del c[k] 512: 513: # no type 514: del c['type'] 515: # copy the rest 516: for k in c.keys(): 517: l.append((k, c[k])) 518: 519: return l 520: 521: 522: class ArchimResult(MetaResult): 523: """archimedes type result object""" 524: 525: def __init__(self, zope, db_result, rank): 526: """constructor""" 527: MetaResult.__init__(self, zope, db_result, rank) 528: #print "NEW ARCHIM RESULT!", self 529: self.type = "archim" 530: self.zptFile = os.path.join(package_home(globals()), "zpt/searchResult_archim.zpt") 531: url = storageURL(self.file) 532: if url: 533: self.urls.append(url) 534: 535: (fileid, tagidx, tags, content) = db_result 536: 537: # process info 538: bitems = {} 539: ctxurl = ['', ''] 540: for me in self.metainfo: 541: (m_idx, m_tags, m_content, m_attributes) = me 542: # context tag 543: ctxurl = self.checkContext(m_tags, m_content, ctxurl) 544: # collect archimedes/something 545: r = re.search('/meta/archimedes/(.*)', m_tags) 546: if r: 547: k = r.group(1) 548: #print "CONTENT: ", m_content 549: bitems[k] = m_content 550: # remember hit tag 551: if m_tags == self.hitTag: 552: self.hitTag = k 553: continue 554: 555: self.content = bitems 556: self.rank += 100 557: # store context 558: if not ctxurl[1]: 559: ctxurl[1] = "View" 560: # must have link 561: if ctxurl[0]: 562: self.urls.append(ctxurl) 563: 564: 565: def getContentList(self): 566: """returns content as list of tuples in preferred order""" 567: l = [] 568: c = self.content.copy() 569: # preferred items first 570: for k in ('author', 'title', 'date', 'place'): 571: if c.has_key(k): 572: l.append((k, c[k])) 573: del c[k] 574: 575: # copy the rest 576: for k in c.keys(): 577: l.append((k, c[k])) 578: 579: return l 580: 581: 582: 583: 584: def ranksort(res1, res2): 585: """sort results on rank""" 586: return cmp(res2.rank, res1.rank) 587: 588: 589: def statusForFile(filename): 590: """heuristic... returns status for a index file name""" 591: status = None 592: if filename.startswith('/mpiwg/online/'): 593: status = "online" 594: elif filename.startswith('/mpiwg/archive/'): 595: status = "archive" 596: elif filename.startswith('http://'): 597: status = "database" 598: 599: return status 600: 601: def storageURL(filename): 602: """heuristic... returns an URL for a index file name""" 603: url = None 604: name = None 605: if filename.startswith('/mpiwg/online/'): 606: #print "URLFORFILE: online ", filename 607: r = re.search('^(.*)/index.meta', filename) 608: if r: 609: url = "http://content.mpiwg-berlin.mpg.de/mpistorage/storage/ShowOnline/index_html?path=%s"%r.group(1) 610: name = "Storage System" 611: 612: elif filename.startswith('http://'): 613: #print "URLFORFILE: url ", filename 614: url = filename 615: name = "Online Database" 616: 617: if name and url: 618: return (url, name) 619: 620: return None 621: