Annotation of OSAS/OSA_system/OSAS_search.py, revision 1.2
1.1 casties 1: """Metadata search interface
2: ROC 2004, itgroup
3:
4: """
5:
6: from AccessControl import ClassSecurityInfo
7: from Globals import InitializeClass
8: from Globals import Persistent, package_home
9: from Products.PageTemplates.PageTemplateFile import PageTemplateFile
10: from Products.PageTemplates.PageTemplate import PageTemplate
11: from OFS.SimpleItem import SimpleItem
12: #from pyPgSQL import PgSQL
13: import psycopg as PgSQL
14:
15: import re
16: import os
17:
18: MAXHITS = 1000
19:
20: class OSAS_search(SimpleItem):
21: """Object for global metadata search"""
22:
23: meta_type="OSAS_search"
24:
25:
26:
27: def __init__(self,id,title,dsn=None):
28: """init"""
29: self.id=id
30: self.title=title
31: if dsn:
32: self.dsn = dsn
33: else:
34: self.dsn = "host=foxridge.mpiwg-berlin.mpg.de dbname=storage user=archiveread password=archiveread"
35: # volatile database connection object
36: self._v_dbCon = None
37: self._v_tryCon = 0
38:
39:
40: def dbCursor(self):
41: """returns new SQL cursor object"""
42: curs = None
43: if hasattr(self, '_v_dbCon') and self._v_dbCon is not None:
44: try:
45: curs = self._v_dbCon.cursor()
46: self._v_tryCon = 0
47: except:
48: # in case of problems reset dbCon
49: self._v_dbCon = None
50: self._v_tryCon += 1
51: else:
52: self._v_dbCon = None
53: self._v_tryCon = 0
54:
55: if not curs and self._v_tryCon < 3:
56: self._v_dbCon = PgSQL.connect(self.dsn, serialize=0)
57: # call ourself with the new connection
58: curs = self.dbCursor()
59:
60: assert curs, "AIIEE no db cursor!!"
61: return curs
62:
63: def getDBFileMeta(self, fileid):
64: """returns an array with all meta entries of fileid"""
65:
66: metacache = {}
67: # try in cache
68: if self.REQUEST.SESSION.has_key('dbMeta'):
69: metacache = self.REQUEST.SESSION['dbMeta']
70: if metacache.has_key(fileid):
71: res = metacache[fileid]
1.2 ! casties 72: #print "meta from cache "
1.1 casties 73: return res
74:
75: curs = self.dbCursor()
76:
77: sql = 'SELECT idx,tags,content,attributes FROM meta WHERE fileid=%(id)s ORDER BY idx'
78: print sql, " -> ", fileid
79: curs.execute(sql, {'id':fileid})
80: print "done"
81:
82: res = curs.fetchall()
83: #print "res:", res
84: curs.close()
85: # store info in cache
86: metacache[fileid] = res
87: self.REQUEST.SESSION['dbMeta'] = metacache
88:
89: return res
90:
91: def getDBFile(self, fileid):
92: """returns the file information of fileid"""
93:
94: filecache = {}
95: # try in cache
96: if self.REQUEST.SESSION.has_key('dbFiles'):
97: filecache = self.REQUEST.SESSION['dbFiles']
98: if filecache.has_key(fileid):
99: res = filecache[fileid]
1.2 ! casties 100: #print "file from cache "
1.1 casties 101: return res
102:
103: curs = self.dbCursor()
104:
105: sql = 'select filename,mtime from files where id=%(id)s'
106: print 'DBFILE: ', sql, " -> ", fileid
107: curs.execute(sql, {'id':fileid})
108: print "DBFILE: done"
109:
110: res = curs.fetchone()
111: #print "DBFILE: res:", res
112: curs.close()
113: # store info in cache
114: filecache[fileid] = res
115: self.REQUEST.SESSION['dbFiles'] = filecache
116:
117: return res
118:
119:
1.2 ! casties 120: def dbSearch(self, query, type):
1.1 casties 121: """search DB for query and return result set"""
122: curs = self.dbCursor()
1.2 ! casties 123: if type == 'equals':
! 124: qs = query
! 125: elif type == 'startswith':
! 126: qs = query + "%"
! 127: elif type == 'contains':
! 128: qs = "%" + query + "%"
! 129:
1.1 casties 130: sql = 'select fileid,idx,tags,content from meta where content like %(qs)s'
131: print sql, " -> ", qs
132: curs.execute(sql, {'qs':qs})
133: print "done"
134: results = []
135: res = curs.fetchone()
136: rescnt = 1
137: #print "res0:", res
138: while res and rescnt < MAXHITS:
139: #print "res:", res
140: result = self.getResult(res)
141: if (result):
142: results.append(result)
143:
144: res = curs.fetchone()
145: rescnt += 1
146:
147: curs.close()
148: #self.dbCon = None
149:
1.2 ! casties 150: #print "SEARCH: ", rescnt, " results"
1.1 casties 151: return results
152:
153:
154: def getResult(self, db_result, rank=0):
155: """factory for result objects"""
156:
157: (fileid, tagidx, tags, content) = db_result
158: res = None
159:
160: if tags.find('/meta/bib/') > -1:
161: res = BibResult(self, db_result, rank)
162: elif tags.find('/meta/archimedes/') > -1:
163: res = ArchimResult(self, db_result, rank)
164: else:
165: res = AnyResult(self, db_result, rank)
166:
167: return res
168:
169: def renderResult(self, result):
170: """returns HTML rendering of a search result"""
171:
172: return result.render(self)
173:
174:
175:
176:
177:
178: #
179: # Web page stuff
180: #
181:
182: def index_html(self):
183: """metadata search"""
184: pt=PageTemplateFile(os.path.join(package_home(globals()), "zpt/OSAS_search.zpt")).__of__(self)
185: return pt()
186:
187:
1.2 ! casties 188: def search(self, searchstring=None, searchtype='startswith', start=1, count=10):
! 189: """search and create result"""
! 190: sres = int(start) -1
! 191: lres = sres + count
! 192: try:
! 193: oldsearch = self.REQUEST.SESSION['searchstring']
! 194: oldtype = self.REQUEST.SESSION['searchtype']
! 195: except:
! 196: oldsearch = ""
! 197: oldtype = ""
! 198:
! 199: if not searchstring:
! 200: searchstring = oldsearch
! 201: searchtype = oldtype
! 202:
! 203: if not oldsearch or searchstring != oldsearch or searchtype != oldtype:
! 204: # new search
! 205: res = self.dbSearch(searchstring, searchtype)
! 206: # sort the result
1.1 casties 207: res.sort(ranksort)
1.2 ! casties 208: # store it
1.1 casties 209: self.REQUEST.SESSION['results'] = res
210: self.REQUEST.SESSION['searchstring'] = searchstring
1.2 ! casties 211: self.REQUEST.SESSION['searchtype'] = searchtype
1.1 casties 212:
1.2 ! casties 213: self.REQUEST.SESSION['resultgroup'] = self.REQUEST.SESSION['results'][sres:lres]
! 214: self.REQUEST.SESSION['res_indexes'] = (sres+1, lres, len(self.REQUEST.SESSION['results']), int(count))
! 215:
1.1 casties 216: pt=PageTemplateFile(os.path.join(package_home(globals()), "zpt/searchResult.zpt")).__of__(self)
217: return pt()
218:
1.2 ! casties 219:
! 220: def getSearchType(self):
! 221: """returns the last search type"""
! 222: try:
! 223: ret = self.REQUEST.SESSION['searchtype']
! 224: except:
! 225: ret = ""
! 226:
! 227: return ret
! 228:
! 229: def getSearchString(self):
! 230: """returns the last search string"""
! 231: try:
! 232: ret = self.REQUEST.SESSION['searchstring']
! 233: except:
! 234: ret = ""
! 235:
! 236: return ret
! 237:
! 238:
! 239: def hasNextResults(self):
! 240: """returns if there are more results"""
! 241: try:
! 242: (first, last, total, count) = self.REQUEST.SESSION['res_indexes']
! 243: return (first < total)
! 244: except:
! 245: return False
! 246:
! 247: def hasPrevResults(self):
! 248: """returns if there are previous results"""
! 249: try:
! 250: (first, last, total, count) = self.REQUEST.SESSION['res_indexes']
! 251: return (first > 1)
! 252: except:
! 253: return False
! 254:
! 255:
! 256: def nextResults(self):
! 257: """returns more results"""
! 258: try:
! 259: (first, last, total, count) = self.REQUEST.SESSION['res_indexes']
! 260: first = first + count
! 261: last = last + count
! 262: if first > total:
! 263: first = total
! 264: if last > total:
! 265: last = total
! 266: except:
! 267: print "OUCH: no next results: ", first, last, total, count
! 268:
! 269: return self.search(start=first, count=count)
! 270:
! 271:
! 272: def prevResults(self):
! 273: """returns more results"""
! 274: try:
! 275: (first, last, total, count) = self.REQUEST.SESSION['res_indexes']
! 276: first = first - count
! 277: last = last - count
! 278: if first < 1:
! 279: first = 1
! 280: if last < 1:
! 281: last = 1
! 282: except:
! 283: print "OUCH: no prev results: ", first, last, total, count
! 284:
! 285:
! 286: return self.search(start=first, count=count)
1.1 casties 287:
1.2 ! casties 288:
! 289:
1.1 casties 290: def manage_AddOSAS_searchForm(self):
291: """create Search form"""
292: pt=PageTemplateFile(os.path.join(package_home(globals()), "zpt/AddOSAS_search.zpt")).__of__(self)
293: return pt()
294:
295: def manage_AddOSAS_search(self,id,title=None,dsn=None,RESPONSE=None):
296: """add the OSAS_root"""
297: newObj=OSAS_search(id,title,dsn)
298: self._setObject(id,newObj)
299: if RESPONSE is not None:
300: RESPONSE.redirect('manage_main')
301:
302:
303:
304:
305: class SearchResult(SimpleItem):
306: """base search result object"""
307:
308: def __init__(self, type='unknown', file=None, url=None, content=None, rank=0):
309: """init"""
310: self.type = type
311: self.file = file
312: self.url = url
313: self.urlabel = url
314: self.content = content
315: self.rank = rank
316:
317: class AnyResult(SearchResult):
318: """catch-all type result object"""
319:
320: def __init__(self, zope, db_result, rank):
321: """returns a catch-all type result"""
322: SearchResult.__init__(self, type='unknown')
1.2 ! casties 323: #print "NEW ANY RESULT!"
1.1 casties 324:
325: self.zptFile = os.path.join(package_home(globals()), "zpt/searchResult_any.zpt")
326:
327: (fileid, tagidx, tags, content) = db_result
328: self.hitTag = tags
329:
330: # get full info from db
331: self.fileinfo = zope.getDBFile(fileid)
332: assert self.fileinfo
333:
334: items = {}
335: items[tags] = content
336: self.content = items
337: self.file = self.fileinfo[0]
338: self.url = ""
339: self.urlabel = self.file
340: self.rank = rank
341:
342: def render(self, zope):
343: """render this result object"""
344: zope.REQUEST.SESSION['result'] = self
345: pt=PageTemplateFile(self.zptFile).__of__(zope)
346: return pt()
347:
348:
349: class MetaResult(AnyResult):
350: """result object that collects metadata"""
351:
352: def __init__(self, zope, db_result, rank):
353: """contructor"""
354: AnyResult.__init__(self, zope, db_result, rank)
1.2 ! casties 355: #print "NEW META RESULT!"
1.1 casties 356:
357: (fileid, tagidx, tags, content) = db_result
358:
359: # get full info from db
360: self.metainfo = zope.getDBFileMeta(fileid)
361: assert self.metainfo
362:
363: def checkContext(self, tags, content):
364: """takes meta entry and sets url from context tags"""
365: if tags.endswith('/context/link'):
366: if content:
367: self.url = content
368:
369: elif tags.endswith('/context/name'):
370: if content:
371: self.urlabel = content
372:
373: else:
374: return False
375:
376: return True
377:
378:
379: class BibResult(MetaResult):
380: """bib type result object"""
381:
382: def __init__(self, zope, db_result, rank):
383: """constructor"""
384: MetaResult.__init__(self, zope, db_result, rank)
1.2 ! casties 385: #print "NEW BIB RESULT!"
1.1 casties 386: self.type = "bib"
387: self.zptFile = os.path.join(package_home(globals()), "zpt/searchResult_bib.zpt")
388: self.url = urlForFile(self.file)
389: self.urlabel = None
390: (fileid, tagidx, tags, content) = db_result
391:
392: btype = ""
393: bitems = {}
394:
395: for me in self.metainfo:
396: (m_idx, m_tags, m_content, m_attributes) = me
397: # context tag
398: if self.checkContext(m_tags, m_content):
399: continue
400: # first tag with bib type attribute
401: if m_tags.endswith('/meta/bib'):
402: r = re.search('type="([^"]*)"', m_attributes)
403: if r:
404: btype = r.group(1)
405:
406: if not btype:
407: btype = "*unknown*"
408:
409: bitems['type'] = btype
410: continue
411:
412: # skip other tags
413: if not btype: continue
414:
415: # collect bib/something
416: r = re.search('/meta/bib/(.*)', m_tags)
417: if r:
418: k = r.group(1)
419: #print "CONTENT: ", m_content
420: bitems[k] = m_content
421: continue
422:
423: self.content = bitems
424: self.rank += 100
425: if not self.urlabel and self.url:
426: self.urlabel = "view"
427:
428:
429: class ArchimResult(MetaResult):
430: """archimedes type result object"""
431:
432: def __init__(self, zope, db_result, rank):
433: """constructor"""
434: MetaResult.__init__(self, zope, db_result, rank)
1.2 ! casties 435: #print "NEW ARCHIM RESULT!"
1.1 casties 436: self.type = "archim"
437: self.zptFile = os.path.join(package_home(globals()), "zpt/searchResult_archim.zpt")
438: self.url = urlForFile(self.file)
439: self.urlabel = None
440: (fileid, tagidx, tags, content) = db_result
441:
442: # process info
443: bitems = {}
444: for me in self.metainfo:
445: (m_idx, m_tags, m_content, m_attributes) = me
446: # context tag
447: if self.checkContext(m_tags, m_content):
448: continue
449: # collect archimedes/something
450: r = re.search('/meta/archimedes/(.*)', m_tags)
451: if r:
452: k = r.group(1)
453: #print "CONTENT: ", m_content
454: bitems[k] = m_content
455: continue
456:
457: self.content = bitems
458: self.rank += 100
459: if not self.urlabel and self.url:
460: self.urlabel = "view"
461:
462:
463:
464:
465: def ranksort(res1, res2):
466: """sort results on rank"""
467: return cmp(res2.rank, res1.rank)
468:
469:
470: def urlForFile(filename):
471: """heuristic... returns an URL for a index file name"""
472: url = None
473: if filename.startswith('/mpiwg/online/'):
474: print "URLFORFILE: online ", filename
475: r = re.search('/mpiwg/online/(.*)/index.meta', filename)
476: if r:
477: url = "http://nausikaa.mpiwg-berlin.mpg.de/digitallibrary/digilib.jsp?fn=%s"%r.group(1)
478:
479: return url
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>