Annotation of OSAS/OSA_system/OSAS_search.py, revision 1.1
1.1 ! casties 1: """Metadata search interface
! 2: ROC 2004, itgroup
! 3:
! 4: """
! 5:
! 6: from AccessControl import ClassSecurityInfo
! 7: from Globals import InitializeClass
! 8: from Globals import Persistent, package_home
! 9: from Products.PageTemplates.PageTemplateFile import PageTemplateFile
! 10: from Products.PageTemplates.PageTemplate import PageTemplate
! 11: from OFS.SimpleItem import SimpleItem
! 12: #from pyPgSQL import PgSQL
! 13: import psycopg as PgSQL
! 14:
! 15: import re
! 16: import os
! 17:
! 18: MAXHITS = 1000
! 19:
! 20: class OSAS_search(SimpleItem):
! 21: """Object for global metadata search"""
! 22:
! 23: meta_type="OSAS_search"
! 24:
! 25:
! 26:
! 27: def __init__(self,id,title,dsn=None):
! 28: """init"""
! 29: self.id=id
! 30: self.title=title
! 31: if dsn:
! 32: self.dsn = dsn
! 33: else:
! 34: self.dsn = "host=foxridge.mpiwg-berlin.mpg.de dbname=storage user=archiveread password=archiveread"
! 35: # volatile database connection object
! 36: self._v_dbCon = None
! 37: self._v_tryCon = 0
! 38:
! 39:
! 40: def dbCursor(self):
! 41: """returns new SQL cursor object"""
! 42: curs = None
! 43: if hasattr(self, '_v_dbCon') and self._v_dbCon is not None:
! 44: try:
! 45: curs = self._v_dbCon.cursor()
! 46: self._v_tryCon = 0
! 47: except:
! 48: # in case of problems reset dbCon
! 49: self._v_dbCon = None
! 50: self._v_tryCon += 1
! 51: else:
! 52: self._v_dbCon = None
! 53: self._v_tryCon = 0
! 54:
! 55: if not curs and self._v_tryCon < 3:
! 56: self._v_dbCon = PgSQL.connect(self.dsn, serialize=0)
! 57: # call ourself with the new connection
! 58: curs = self.dbCursor()
! 59:
! 60: assert curs, "AIIEE no db cursor!!"
! 61: return curs
! 62:
! 63: def getDBFileMeta(self, fileid):
! 64: """returns an array with all meta entries of fileid"""
! 65:
! 66: metacache = {}
! 67: # try in cache
! 68: if self.REQUEST.SESSION.has_key('dbMeta'):
! 69: metacache = self.REQUEST.SESSION['dbMeta']
! 70: if metacache.has_key(fileid):
! 71: res = metacache[fileid]
! 72: print "meta from cache "
! 73: return res
! 74:
! 75: curs = self.dbCursor()
! 76:
! 77: sql = 'SELECT idx,tags,content,attributes FROM meta WHERE fileid=%(id)s ORDER BY idx'
! 78: print sql, " -> ", fileid
! 79: curs.execute(sql, {'id':fileid})
! 80: print "done"
! 81:
! 82: res = curs.fetchall()
! 83: #print "res:", res
! 84: curs.close()
! 85: # store info in cache
! 86: metacache[fileid] = res
! 87: self.REQUEST.SESSION['dbMeta'] = metacache
! 88:
! 89: return res
! 90:
! 91: def getDBFile(self, fileid):
! 92: """returns the file information of fileid"""
! 93:
! 94: filecache = {}
! 95: # try in cache
! 96: if self.REQUEST.SESSION.has_key('dbFiles'):
! 97: filecache = self.REQUEST.SESSION['dbFiles']
! 98: if filecache.has_key(fileid):
! 99: res = filecache[fileid]
! 100: print "file from cache "
! 101: return res
! 102:
! 103: curs = self.dbCursor()
! 104:
! 105: sql = 'select filename,mtime from files where id=%(id)s'
! 106: print 'DBFILE: ', sql, " -> ", fileid
! 107: curs.execute(sql, {'id':fileid})
! 108: print "DBFILE: done"
! 109:
! 110: res = curs.fetchone()
! 111: #print "DBFILE: res:", res
! 112: curs.close()
! 113: # store info in cache
! 114: filecache[fileid] = res
! 115: self.REQUEST.SESSION['dbFiles'] = filecache
! 116:
! 117: return res
! 118:
! 119:
! 120: def dbSearch(self, query):
! 121: """search DB for query and return result set"""
! 122: curs = self.dbCursor()
! 123: qs = query + "%"
! 124: sql = 'select fileid,idx,tags,content from meta where content like %(qs)s'
! 125: print sql, " -> ", qs
! 126: curs.execute(sql, {'qs':qs})
! 127: print "done"
! 128: results = []
! 129: res = curs.fetchone()
! 130: rescnt = 1
! 131: #print "res0:", res
! 132: while res and rescnt < MAXHITS:
! 133: #print "res:", res
! 134: result = self.getResult(res)
! 135: if (result):
! 136: results.append(result)
! 137:
! 138: res = curs.fetchone()
! 139: rescnt += 1
! 140:
! 141: curs.close()
! 142: #self.dbCon = None
! 143:
! 144: print "SEARCH: ", rescnt, " results"
! 145: return results
! 146:
! 147:
! 148: def getResult(self, db_result, rank=0):
! 149: """factory for result objects"""
! 150: print "NEW RESULT!"
! 151:
! 152: (fileid, tagidx, tags, content) = db_result
! 153: res = None
! 154:
! 155: print "tags: ", tags
! 156: if tags.find('/meta/bib/') > -1:
! 157: res = BibResult(self, db_result, rank)
! 158: elif tags.find('/meta/archimedes/') > -1:
! 159: res = ArchimResult(self, db_result, rank)
! 160: else:
! 161: res = AnyResult(self, db_result, rank)
! 162:
! 163: return res
! 164:
! 165: def renderResult(self, result):
! 166: """returns HTML rendering of a search result"""
! 167:
! 168: print "renderresult!", result, " -- ", result.url
! 169: return result.render(self)
! 170:
! 171:
! 172:
! 173:
! 174:
! 175: #
! 176: # Web page stuff
! 177: #
! 178:
! 179: def index_html(self):
! 180: """metadata search"""
! 181: pt=PageTemplateFile(os.path.join(package_home(globals()), "zpt/OSAS_search.zpt")).__of__(self)
! 182: return pt()
! 183:
! 184:
! 185: def search(self, searchstring=None):
! 186: """search and result"""
! 187: if searchstring:
! 188: print "SEARCH: ", searchstring
! 189: res = self.dbSearch(searchstring)
! 190: res.sort(ranksort)
! 191: self.REQUEST.SESSION['results'] = res
! 192: self.REQUEST.SESSION['searchstring'] = searchstring
! 193:
! 194: print "SEARCH res:", res
! 195: pt=PageTemplateFile(os.path.join(package_home(globals()), "zpt/searchResult.zpt")).__of__(self)
! 196: return pt()
! 197:
! 198:
! 199:
! 200: def manage_AddOSAS_searchForm(self):
! 201: """create Search form"""
! 202: pt=PageTemplateFile(os.path.join(package_home(globals()), "zpt/AddOSAS_search.zpt")).__of__(self)
! 203: return pt()
! 204:
! 205: def manage_AddOSAS_search(self,id,title=None,dsn=None,RESPONSE=None):
! 206: """add the OSAS_root"""
! 207: newObj=OSAS_search(id,title,dsn)
! 208: self._setObject(id,newObj)
! 209: if RESPONSE is not None:
! 210: RESPONSE.redirect('manage_main')
! 211:
! 212:
! 213:
! 214:
! 215: class SearchResult(SimpleItem):
! 216: """base search result object"""
! 217:
! 218: def __init__(self, type='unknown', file=None, url=None, content=None, rank=0):
! 219: """init"""
! 220: self.type = type
! 221: self.file = file
! 222: self.url = url
! 223: self.urlabel = url
! 224: self.content = content
! 225: self.rank = rank
! 226:
! 227: class AnyResult(SearchResult):
! 228: """catch-all type result object"""
! 229:
! 230: def __init__(self, zope, db_result, rank):
! 231: """returns a catch-all type result"""
! 232: SearchResult.__init__(self, type='unknown')
! 233: print "NEW ANY RESULT!"
! 234:
! 235: self.zptFile = os.path.join(package_home(globals()), "zpt/searchResult_any.zpt")
! 236:
! 237: (fileid, tagidx, tags, content) = db_result
! 238: self.hitTag = tags
! 239:
! 240: # get full info from db
! 241: self.fileinfo = zope.getDBFile(fileid)
! 242: assert self.fileinfo
! 243:
! 244: items = {}
! 245: items[tags] = content
! 246: self.content = items
! 247: self.file = self.fileinfo[0]
! 248: self.url = ""
! 249: self.urlabel = self.file
! 250: self.rank = rank
! 251:
! 252: def render(self, zope):
! 253: """render this result object"""
! 254: zope.REQUEST.SESSION['result'] = self
! 255: print "renderender...", self
! 256: pt=PageTemplateFile(self.zptFile).__of__(zope)
! 257: return pt()
! 258:
! 259:
! 260: class MetaResult(AnyResult):
! 261: """result object that collects metadata"""
! 262:
! 263: def __init__(self, zope, db_result, rank):
! 264: """contructor"""
! 265: AnyResult.__init__(self, zope, db_result, rank)
! 266: print "NEW META RESULT!"
! 267:
! 268: (fileid, tagidx, tags, content) = db_result
! 269:
! 270: # get full info from db
! 271: self.metainfo = zope.getDBFileMeta(fileid)
! 272: assert self.metainfo
! 273:
! 274: def checkContext(self, tags, content):
! 275: """takes meta entry and sets url from context tags"""
! 276: if tags.endswith('/context/link'):
! 277: if content:
! 278: self.url = content
! 279:
! 280: elif tags.endswith('/context/name'):
! 281: if content:
! 282: self.urlabel = content
! 283:
! 284: else:
! 285: return False
! 286:
! 287: return True
! 288:
! 289:
! 290: class BibResult(MetaResult):
! 291: """bib type result object"""
! 292:
! 293: def __init__(self, zope, db_result, rank):
! 294: """constructor"""
! 295: MetaResult.__init__(self, zope, db_result, rank)
! 296: print "NEW BIB RESULT!"
! 297: self.type = "bib"
! 298: self.zptFile = os.path.join(package_home(globals()), "zpt/searchResult_bib.zpt")
! 299: self.url = urlForFile(self.file)
! 300: self.urlabel = None
! 301: (fileid, tagidx, tags, content) = db_result
! 302:
! 303: btype = ""
! 304: bitems = {}
! 305:
! 306: for me in self.metainfo:
! 307: (m_idx, m_tags, m_content, m_attributes) = me
! 308: # context tag
! 309: if self.checkContext(m_tags, m_content):
! 310: continue
! 311: # first tag with bib type attribute
! 312: if m_tags.endswith('/meta/bib'):
! 313: r = re.search('type="([^"]*)"', m_attributes)
! 314: if r:
! 315: btype = r.group(1)
! 316:
! 317: if not btype:
! 318: btype = "*unknown*"
! 319:
! 320: bitems['type'] = btype
! 321: continue
! 322:
! 323: # skip other tags
! 324: if not btype: continue
! 325:
! 326: # collect bib/something
! 327: r = re.search('/meta/bib/(.*)', m_tags)
! 328: if r:
! 329: k = r.group(1)
! 330: #print "CONTENT: ", m_content
! 331: bitems[k] = m_content
! 332: continue
! 333:
! 334: self.content = bitems
! 335: self.rank += 100
! 336: if not self.urlabel and self.url:
! 337: self.urlabel = "view"
! 338:
! 339:
! 340: class ArchimResult(MetaResult):
! 341: """archimedes type result object"""
! 342:
! 343: def __init__(self, zope, db_result, rank):
! 344: """constructor"""
! 345: MetaResult.__init__(self, zope, db_result, rank)
! 346: print "NEW ARCHIM RESULT!"
! 347: self.type = "archim"
! 348: self.zptFile = os.path.join(package_home(globals()), "zpt/searchResult_archim.zpt")
! 349: self.url = urlForFile(self.file)
! 350: self.urlabel = None
! 351: (fileid, tagidx, tags, content) = db_result
! 352:
! 353: # process info
! 354: bitems = {}
! 355: for me in self.metainfo:
! 356: (m_idx, m_tags, m_content, m_attributes) = me
! 357: # context tag
! 358: if self.checkContext(m_tags, m_content):
! 359: continue
! 360: # collect archimedes/something
! 361: r = re.search('/meta/archimedes/(.*)', m_tags)
! 362: if r:
! 363: k = r.group(1)
! 364: #print "CONTENT: ", m_content
! 365: bitems[k] = m_content
! 366: continue
! 367:
! 368: self.content = bitems
! 369: self.rank += 100
! 370: if not self.urlabel and self.url:
! 371: self.urlabel = "view"
! 372:
! 373:
! 374:
! 375:
! 376: def ranksort(res1, res2):
! 377: """sort results on rank"""
! 378: return cmp(res2.rank, res1.rank)
! 379:
! 380:
! 381: def urlForFile(filename):
! 382: """heuristic... returns an URL for a index file name"""
! 383: url = None
! 384: if filename.startswith('/mpiwg/online/'):
! 385: print "URLFORFILE: online ", filename
! 386: r = re.search('/mpiwg/online/(.*)/index.meta', filename)
! 387: if r:
! 388: url = "http://nausikaa.mpiwg-berlin.mpg.de/digitallibrary/digilib.jsp?fn=%s"%r.group(1)
! 389:
! 390: return url
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>