Annotation of OSAS/OSA_system/OSAS_search.py, revision 1.8
1.1 casties 1: """Metadata search interface
2: ROC 2004, itgroup
3:
4: """
5:
6: from AccessControl import ClassSecurityInfo
7: from Globals import InitializeClass
8: from Globals import Persistent, package_home
9: from Products.PageTemplates.PageTemplateFile import PageTemplateFile
10: from Products.PageTemplates.PageTemplate import PageTemplate
1.6 casties 11: from Products.PageTemplates.ZopePageTemplate import ZopePageTemplate
12: from OFS.Folder import Folder
1.1 casties 13: from OFS.SimpleItem import SimpleItem
1.8 ! casties 14: try:
! 15: import psycopg2 as PgSQL
! 16: except:
! 17: try:
! 18: import psycopg as PgSQL
! 19: except:
! 20: from pyPgSQL import PgSQL
1.1 casties 21:
22: import re
23: import os
24:
25: MAXHITS = 1000
26:
1.6 casties 27: class OSAS_search(Folder):
1.1 casties 28: """Object for global metadata search"""
29:
30: meta_type="OSAS_search"
31:
1.6 casties 32: manage_options=Folder.manage_options+(
33: {'label':'Main config','action':'manage_ChangeOSAS_searchForm'},
34: )
1.1 casties 35:
36:
37: def __init__(self,id,title,dsn=None):
38: """init"""
39: self.id=id
40: self.title=title
41: if dsn:
42: self.dsn = dsn
43: else:
44: self.dsn = "host=foxridge.mpiwg-berlin.mpg.de dbname=storage user=archiveread password=archiveread"
45: # volatile database connection object
46: self._v_dbCon = None
47: self._v_tryCon = 0
48:
49:
50: def dbCursor(self):
51: """returns new SQL cursor object"""
52: curs = None
53: if hasattr(self, '_v_dbCon') and self._v_dbCon is not None:
54: try:
55: curs = self._v_dbCon.cursor()
56: self._v_tryCon = 0
57: except:
58: # in case of problems reset dbCon
59: self._v_dbCon = None
60: self._v_tryCon += 1
61: else:
62: self._v_dbCon = None
63: self._v_tryCon = 0
64:
65: if not curs and self._v_tryCon < 3:
66: self._v_dbCon = PgSQL.connect(self.dsn, serialize=0)
67: # call ourself with the new connection
68: curs = self.dbCursor()
69:
70: assert curs, "AIIEE no db cursor!!"
71: return curs
72:
73: def getDBFileMeta(self, fileid):
74: """returns an array with all meta entries of fileid"""
75:
76: metacache = {}
77: # try in cache
78: if self.REQUEST.SESSION.has_key('dbMeta'):
79: metacache = self.REQUEST.SESSION['dbMeta']
80: if metacache.has_key(fileid):
81: res = metacache[fileid]
1.2 casties 82: #print "meta from cache "
1.1 casties 83: return res
84:
85: curs = self.dbCursor()
86:
87: sql = 'SELECT idx,tags,content,attributes FROM meta WHERE fileid=%(id)s ORDER BY idx'
88: print sql, " -> ", fileid
89: curs.execute(sql, {'id':fileid})
90: print "done"
91:
92: res = curs.fetchall()
93: #print "res:", res
94: curs.close()
95: # store info in cache
96: metacache[fileid] = res
97: self.REQUEST.SESSION['dbMeta'] = metacache
98:
99: return res
100:
101: def getDBFile(self, fileid):
102: """returns the file information of fileid"""
103:
104: filecache = {}
105: # try in cache
106: if self.REQUEST.SESSION.has_key('dbFiles'):
107: filecache = self.REQUEST.SESSION['dbFiles']
108: if filecache.has_key(fileid):
109: res = filecache[fileid]
1.2 casties 110: #print "file from cache "
1.1 casties 111: return res
112:
113: curs = self.dbCursor()
114:
115: sql = 'select filename,mtime from files where id=%(id)s'
116: print 'DBFILE: ', sql, " -> ", fileid
117: curs.execute(sql, {'id':fileid})
118: print "DBFILE: done"
119:
120: res = curs.fetchone()
121: #print "DBFILE: res:", res
122: curs.close()
123: # store info in cache
124: filecache[fileid] = res
125: self.REQUEST.SESSION['dbFiles'] = filecache
126:
127: return res
128:
129:
1.2 casties 130: def dbSearch(self, query, type):
1.1 casties 131: """search DB for query and return result set"""
1.3 casties 132: results = []
133: restypes = {}
134: if not query:
135: # empty query
136: return results
137:
1.1 casties 138: curs = self.dbCursor()
1.2 casties 139: if type == 'equals':
140: qs = query
141: elif type == 'startswith':
142: qs = query + "%"
143: elif type == 'contains':
144: qs = "%" + query + "%"
145:
1.3 casties 146: sql = 'select fileid,idx,tags,content from meta where lower(content) like lower(%(qs)s)'
1.1 casties 147: print sql, " -> ", qs
148: curs.execute(sql, {'qs':qs})
149: print "done"
150: res = curs.fetchone()
151: rescnt = 1
152: #print "res0:", res
153: while res and rescnt < MAXHITS:
154: #print "res:", res
155: result = self.getResult(res)
1.3 casties 156: if result:
1.1 casties 157: results.append(result)
1.3 casties 158: restypes[result.type] = result.type
1.1 casties 159:
160: res = curs.fetchone()
161: rescnt += 1
162:
163: curs.close()
164: #self.dbCon = None
165:
1.2 casties 166: #print "SEARCH: ", rescnt, " results"
1.3 casties 167: restypelist = restypes.keys()
168: return (results, restypelist)
1.1 casties 169:
170:
171: def getResult(self, db_result, rank=0):
172: """factory for result objects"""
173:
174: (fileid, tagidx, tags, content) = db_result
175: res = None
176:
177: if tags.find('/meta/bib/') > -1:
178: res = BibResult(self, db_result, rank)
179: elif tags.find('/meta/archimedes/') > -1:
180: res = ArchimResult(self, db_result, rank)
181: else:
182: res = AnyResult(self, db_result, rank)
183:
184: return res
1.3 casties 185:
1.1 casties 186:
187: def renderResult(self, result):
188: """returns HTML rendering of a search result"""
189:
190: return result.render(self)
191:
1.3 casties 192:
193: def filterResults(self, results, start, end, restypefilter=None):
194: """returns list of results that match a filter"""
195: # filter types first
196: if restypefilter:
197: res = []
198: for r in results:
1.7 casties 199: if r.type == restypefilter:
1.3 casties 200: res.append(r)
201: else:
202: res = results
1.5 casties 203: # new total count (because of filter)
204: rescnt = len(res)
1.3 casties 205: # filter on count
206: resgroup = res[start:end]
207:
208: return (resgroup, rescnt)
209:
1.1 casties 210:
211: #
212: # Web page stuff
213: #
214:
215: def index_html(self):
216: """metadata search"""
217: pt=PageTemplateFile(os.path.join(package_home(globals()), "zpt/OSAS_search.zpt")).__of__(self)
218: return pt()
219:
220:
1.3 casties 221: def search(self, searchstring=None, searchtype='startswith', start=1, count=10, restypefilter=None):
1.2 casties 222: """search and create result"""
223: sres = int(start) -1
224: lres = sres + count
225: try:
226: oldsearch = self.REQUEST.SESSION['searchstring']
227: oldtype = self.REQUEST.SESSION['searchtype']
228: except:
229: oldsearch = ""
230: oldtype = ""
231:
232: if not searchstring:
233: searchstring = oldsearch
234: searchtype = oldtype
235:
236: if not oldsearch or searchstring != oldsearch or searchtype != oldtype:
237: # new search
1.3 casties 238: (res, restypes) = self.dbSearch(searchstring, searchtype)
1.2 casties 239: # sort the result
1.1 casties 240: res.sort(ranksort)
1.2 casties 241: # store it
1.1 casties 242: self.REQUEST.SESSION['results'] = res
243: self.REQUEST.SESSION['searchstring'] = searchstring
1.2 casties 244: self.REQUEST.SESSION['searchtype'] = searchtype
1.3 casties 245: self.REQUEST.SESSION['resulttypes'] = restypes
1.1 casties 246:
1.3 casties 247: (resgroup, nres) = self.filterResults(self.REQUEST.SESSION['results'], sres, lres, restypefilter)
248: lres = min(lres, nres)
1.5 casties 249: sres = min(sres, nres)
1.3 casties 250: self.REQUEST.SESSION['resultgroup'] = resgroup
251: self.REQUEST.SESSION['res_indexes'] = (sres+1, lres, nres, int(count))
252: self.REQUEST.SESSION['res_type_filter'] = restypefilter
253: if nres > 0:
254: zpt = "zpt/searchResult.zpt"
255: else:
256: zpt = "zpt/searchResult_none.zpt"
1.2 casties 257:
1.3 casties 258: pt=PageTemplateFile(os.path.join(package_home(globals()), zpt)).__of__(self)
1.1 casties 259: return pt()
260:
1.2 casties 261:
262: def getSearchType(self):
263: """returns the last search type"""
264: try:
265: ret = self.REQUEST.SESSION['searchtype']
266: except:
267: ret = ""
268:
269: return ret
270:
271: def getSearchString(self):
272: """returns the last search string"""
273: try:
274: ret = self.REQUEST.SESSION['searchstring']
275: except:
276: ret = ""
277:
278: return ret
279:
280:
281: def hasNextResults(self):
282: """returns if there are more results"""
283: try:
284: (first, last, total, count) = self.REQUEST.SESSION['res_indexes']
1.3 casties 285: return (first + count < total)
1.2 casties 286: except:
287: return False
288:
289: def hasPrevResults(self):
290: """returns if there are previous results"""
291: try:
292: (first, last, total, count) = self.REQUEST.SESSION['res_indexes']
293: return (first > 1)
294: except:
295: return False
296:
297:
298: def nextResults(self):
299: """returns more results"""
300: try:
301: (first, last, total, count) = self.REQUEST.SESSION['res_indexes']
302: first = first + count
303: last = last + count
304: if first > total:
305: first = total
306: if last > total:
307: last = total
308: except:
1.3 casties 309: print "OUCH: no next results!"
310: return self.search()
1.2 casties 311:
312: return self.search(start=first, count=count)
313:
314:
315: def prevResults(self):
316: """returns more results"""
317: try:
318: (first, last, total, count) = self.REQUEST.SESSION['res_indexes']
319: first = first - count
320: last = last - count
321: if first < 1:
322: first = 1
323: if last < 1:
324: last = 1
325: except:
1.3 casties 326: print "OUCH: no prev results!"
327: return self.search()
1.2 casties 328:
329: return self.search(start=first, count=count)
1.1 casties 330:
1.6 casties 331:
332: def manage_ChangeOSAS_searchForm(self):
333: """create Search form"""
334: pt=PageTemplateFile(os.path.join(package_home(globals()), "zpt/ChangeOSAS_search.zpt")).__of__(self)
335: return pt()
336:
337: def manage_ChangeOSAS_search(self,id,title=None,dsn=None,RESPONSE=None):
338: """add the OSAS_root"""
339: self.id = id
340: self.title = title
341: self.dsn = dsn
342: if RESPONSE is not None:
343: RESPONSE.redirect('manage_main')
1.2 casties 344:
345:
1.1 casties 346: def manage_AddOSAS_searchForm(self):
347: """create Search form"""
348: pt=PageTemplateFile(os.path.join(package_home(globals()), "zpt/AddOSAS_search.zpt")).__of__(self)
349: return pt()
350:
351: def manage_AddOSAS_search(self,id,title=None,dsn=None,RESPONSE=None):
352: """add the OSAS_root"""
353: newObj=OSAS_search(id,title,dsn)
354: self._setObject(id,newObj)
355: if RESPONSE is not None:
356: RESPONSE.redirect('manage_main')
357:
358:
359:
360:
361: class SearchResult(SimpleItem):
362: """base search result object"""
363:
364: def __init__(self, type='unknown', file=None, url=None, content=None, rank=0):
365: """init"""
1.3 casties 366: # result type (e.g. "bib", "archim")
1.1 casties 367: self.type = type
1.3 casties 368: # index file name
1.1 casties 369: self.file = file
1.3 casties 370: # url for result (list of pairs)
371: if url:
372: self.urls = url
373: else:
374: self.urls = []
375: # actual content (list of tuples)
1.1 casties 376: self.content = content
1.3 casties 377: # document status (e.g. "online", "archive")
378: self.status = None
379: # result rank for presentation
1.1 casties 380: self.rank = rank
381:
382: class AnyResult(SearchResult):
383: """catch-all type result object"""
384:
385: def __init__(self, zope, db_result, rank):
386: """returns a catch-all type result"""
1.3 casties 387: SearchResult.__init__(self)
1.2 casties 388: #print "NEW ANY RESULT!"
1.3 casties 389: self.type='unknown'
1.1 casties 390: self.zptFile = os.path.join(package_home(globals()), "zpt/searchResult_any.zpt")
391:
1.3 casties 392: (db_fileid, db_tagidx, db_tags, db_content) = db_result
393: self.hitTag = db_tags
1.1 casties 394:
395: # get full info from db
1.3 casties 396: self.fileinfo = zope.getDBFile(db_fileid)
1.1 casties 397: assert self.fileinfo
398:
399: items = {}
1.3 casties 400: items[db_tags] = db_content
1.1 casties 401: self.content = items
402: self.file = self.fileinfo[0]
1.3 casties 403: self.status = statusForFile(self.file)
1.1 casties 404: self.rank = rank
405:
1.3 casties 406: def getContentList(self):
407: """returns content as list of tuples in preferred order"""
408: l = []
409: for k in self.content.keys():
410: l.append((k, self.content[k]))
411:
412: return l
413:
1.1 casties 414: def render(self, zope):
415: """render this result object"""
416: zope.REQUEST.SESSION['result'] = self
417: pt=PageTemplateFile(self.zptFile).__of__(zope)
418: return pt()
419:
420:
421: class MetaResult(AnyResult):
422: """result object that collects metadata"""
423:
424: def __init__(self, zope, db_result, rank):
425: """contructor"""
426: AnyResult.__init__(self, zope, db_result, rank)
1.2 casties 427: #print "NEW META RESULT!"
1.1 casties 428:
429: (fileid, tagidx, tags, content) = db_result
430:
431: # get full info from db
432: self.metainfo = zope.getDBFileMeta(fileid)
433: assert self.metainfo
434:
1.3 casties 435: def checkContext(self, tags, content, ctxurl):
436: """takes meta entry and updates url from context tags"""
1.1 casties 437: if tags.endswith('/context/link'):
438: if content:
1.3 casties 439: #print "CTXlink: ", content
440: ctxurl[0] = content
1.1 casties 441:
442: elif tags.endswith('/context/name'):
443: if content:
1.3 casties 444: #print "CTXname: ", content
445: ctxurl[1] = content
1.1 casties 446:
1.3 casties 447: return ctxurl
1.1 casties 448:
449:
450: class BibResult(MetaResult):
451: """bib type result object"""
452:
453: def __init__(self, zope, db_result, rank):
454: """constructor"""
455: MetaResult.__init__(self, zope, db_result, rank)
1.3 casties 456: #print "NEW BIB RESULT!", self
1.1 casties 457: self.type = "bib"
458: self.zptFile = os.path.join(package_home(globals()), "zpt/searchResult_bib.zpt")
1.3 casties 459: url = storageURL(self.file)
460: if url:
461: self.urls.append(url)
1.1 casties 462: (fileid, tagidx, tags, content) = db_result
463:
464: btype = ""
465: bitems = {}
1.3 casties 466: ctxurl = ['', '']
1.1 casties 467:
468: for me in self.metainfo:
469: (m_idx, m_tags, m_content, m_attributes) = me
470: # context tag
1.3 casties 471: ctxurl = self.checkContext(m_tags, m_content, ctxurl)
1.1 casties 472: # first tag with bib type attribute
473: if m_tags.endswith('/meta/bib'):
474: r = re.search('type="([^"]*)"', m_attributes)
475: if r:
476: btype = r.group(1)
477:
478: if not btype:
479: btype = "*unknown*"
480:
481: bitems['type'] = btype
482: continue
483:
484: # skip other tags
485: if not btype: continue
486:
487: # collect bib/something
488: r = re.search('/meta/bib/(.*)', m_tags)
489: if r:
490: k = r.group(1)
491: #print "CONTENT: ", m_content
492: bitems[k] = m_content
1.3 casties 493: # remember hit tag
494: if m_tags == self.hitTag:
495: self.hitTag = k
1.1 casties 496: continue
497:
498: self.content = bitems
1.3 casties 499: # store context
500: if not ctxurl[1]:
501: ctxurl[1] = "View"
502: # must have link
503: if ctxurl[0]:
504: self.urls.append(ctxurl)
505:
1.1 casties 506: self.rank += 100
1.3 casties 507:
508: def getContentList(self):
509: """returns content as list of tuples in preferred order"""
510: l = []
511: c = self.content.copy()
512: # preferred items first
513: for k in ('author', 'title', 'journal', 'year'):
514: if c.has_key(k):
515: l.append((k, c[k]))
516: del c[k]
517:
518: # no type
519: del c['type']
520: # copy the rest
521: for k in c.keys():
522: l.append((k, c[k]))
523:
524: return l
1.1 casties 525:
526:
527: class ArchimResult(MetaResult):
528: """archimedes type result object"""
529:
530: def __init__(self, zope, db_result, rank):
531: """constructor"""
532: MetaResult.__init__(self, zope, db_result, rank)
1.3 casties 533: #print "NEW ARCHIM RESULT!", self
1.1 casties 534: self.type = "archim"
535: self.zptFile = os.path.join(package_home(globals()), "zpt/searchResult_archim.zpt")
1.3 casties 536: url = storageURL(self.file)
537: if url:
538: self.urls.append(url)
539:
1.1 casties 540: (fileid, tagidx, tags, content) = db_result
541:
542: # process info
543: bitems = {}
1.3 casties 544: ctxurl = ['', '']
1.1 casties 545: for me in self.metainfo:
546: (m_idx, m_tags, m_content, m_attributes) = me
547: # context tag
1.3 casties 548: ctxurl = self.checkContext(m_tags, m_content, ctxurl)
1.1 casties 549: # collect archimedes/something
550: r = re.search('/meta/archimedes/(.*)', m_tags)
551: if r:
552: k = r.group(1)
553: #print "CONTENT: ", m_content
554: bitems[k] = m_content
1.3 casties 555: # remember hit tag
556: if m_tags == self.hitTag:
557: self.hitTag = k
1.1 casties 558: continue
559:
560: self.content = bitems
561: self.rank += 100
1.3 casties 562: # store context
563: if not ctxurl[1]:
564: ctxurl[1] = "View"
565: # must have link
566: if ctxurl[0]:
567: self.urls.append(ctxurl)
568:
569:
570: def getContentList(self):
571: """returns content as list of tuples in preferred order"""
572: l = []
573: c = self.content.copy()
574: # preferred items first
575: for k in ('author', 'title', 'date', 'place'):
576: if c.has_key(k):
577: l.append((k, c[k]))
578: del c[k]
579:
580: # copy the rest
581: for k in c.keys():
582: l.append((k, c[k]))
583:
584: return l
1.1 casties 585:
586:
587:
588:
589: def ranksort(res1, res2):
590: """sort results on rank"""
591: return cmp(res2.rank, res1.rank)
592:
593:
1.3 casties 594: def statusForFile(filename):
595: """heuristic... returns status for a index file name"""
596: status = None
597: if filename.startswith('/mpiwg/online/'):
598: status = "online"
599: elif filename.startswith('/mpiwg/archive/'):
600: status = "archive"
601: elif filename.startswith('http://'):
602: status = "database"
603:
604: return status
605:
606: def storageURL(filename):
1.1 casties 607: """heuristic... returns an URL for a index file name"""
608: url = None
1.3 casties 609: name = None
1.1 casties 610: if filename.startswith('/mpiwg/online/'):
1.3 casties 611: #print "URLFORFILE: online ", filename
612: r = re.search('^(.*)/index.meta', filename)
1.1 casties 613: if r:
1.3 casties 614: url = "http://content.mpiwg-berlin.mpg.de/mpistorage/storage/ShowOnline/index_html?path=%s"%r.group(1)
615: name = "Storage System"
1.4 casties 616:
617: elif filename.startswith('http://'):
618: #print "URLFORFILE: url ", filename
619: url = filename
620: name = "Online Database"
1.3 casties 621:
622: if name and url:
623: return (url, name)
624:
625: return None
1.1 casties 626:
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>