Annotation of OSAS/OSA_system/OSAS_search.py, revision 1.7
1.1 casties 1: """Metadata search interface
2: ROC 2004, itgroup
3:
4: """
5:
6: from AccessControl import ClassSecurityInfo
7: from Globals import InitializeClass
8: from Globals import Persistent, package_home
9: from Products.PageTemplates.PageTemplateFile import PageTemplateFile
10: from Products.PageTemplates.PageTemplate import PageTemplate
1.6 casties 11: from Products.PageTemplates.ZopePageTemplate import ZopePageTemplate
12: from OFS.Folder import Folder
1.1 casties 13: from OFS.SimpleItem import SimpleItem
14: #from pyPgSQL import PgSQL
15: import psycopg as PgSQL
16:
17: import re
18: import os
19:
20: MAXHITS = 1000
21:
1.6 casties 22: class OSAS_search(Folder):
1.1 casties 23: """Object for global metadata search"""
24:
25: meta_type="OSAS_search"
26:
1.6 casties 27: manage_options=Folder.manage_options+(
28: {'label':'Main config','action':'manage_ChangeOSAS_searchForm'},
29: )
1.1 casties 30:
31:
32: def __init__(self,id,title,dsn=None):
33: """init"""
34: self.id=id
35: self.title=title
36: if dsn:
37: self.dsn = dsn
38: else:
39: self.dsn = "host=foxridge.mpiwg-berlin.mpg.de dbname=storage user=archiveread password=archiveread"
40: # volatile database connection object
41: self._v_dbCon = None
42: self._v_tryCon = 0
43:
44:
45: def dbCursor(self):
46: """returns new SQL cursor object"""
47: curs = None
48: if hasattr(self, '_v_dbCon') and self._v_dbCon is not None:
49: try:
50: curs = self._v_dbCon.cursor()
51: self._v_tryCon = 0
52: except:
53: # in case of problems reset dbCon
54: self._v_dbCon = None
55: self._v_tryCon += 1
56: else:
57: self._v_dbCon = None
58: self._v_tryCon = 0
59:
60: if not curs and self._v_tryCon < 3:
61: self._v_dbCon = PgSQL.connect(self.dsn, serialize=0)
62: # call ourself with the new connection
63: curs = self.dbCursor()
64:
65: assert curs, "AIIEE no db cursor!!"
66: return curs
67:
68: def getDBFileMeta(self, fileid):
69: """returns an array with all meta entries of fileid"""
70:
71: metacache = {}
72: # try in cache
73: if self.REQUEST.SESSION.has_key('dbMeta'):
74: metacache = self.REQUEST.SESSION['dbMeta']
75: if metacache.has_key(fileid):
76: res = metacache[fileid]
1.2 casties 77: #print "meta from cache "
1.1 casties 78: return res
79:
80: curs = self.dbCursor()
81:
82: sql = 'SELECT idx,tags,content,attributes FROM meta WHERE fileid=%(id)s ORDER BY idx'
83: print sql, " -> ", fileid
84: curs.execute(sql, {'id':fileid})
85: print "done"
86:
87: res = curs.fetchall()
88: #print "res:", res
89: curs.close()
90: # store info in cache
91: metacache[fileid] = res
92: self.REQUEST.SESSION['dbMeta'] = metacache
93:
94: return res
95:
96: def getDBFile(self, fileid):
97: """returns the file information of fileid"""
98:
99: filecache = {}
100: # try in cache
101: if self.REQUEST.SESSION.has_key('dbFiles'):
102: filecache = self.REQUEST.SESSION['dbFiles']
103: if filecache.has_key(fileid):
104: res = filecache[fileid]
1.2 casties 105: #print "file from cache "
1.1 casties 106: return res
107:
108: curs = self.dbCursor()
109:
110: sql = 'select filename,mtime from files where id=%(id)s'
111: print 'DBFILE: ', sql, " -> ", fileid
112: curs.execute(sql, {'id':fileid})
113: print "DBFILE: done"
114:
115: res = curs.fetchone()
116: #print "DBFILE: res:", res
117: curs.close()
118: # store info in cache
119: filecache[fileid] = res
120: self.REQUEST.SESSION['dbFiles'] = filecache
121:
122: return res
123:
124:
1.2 casties 125: def dbSearch(self, query, type):
1.1 casties 126: """search DB for query and return result set"""
1.3 casties 127: results = []
128: restypes = {}
129: if not query:
130: # empty query
131: return results
132:
1.1 casties 133: curs = self.dbCursor()
1.2 casties 134: if type == 'equals':
135: qs = query
136: elif type == 'startswith':
137: qs = query + "%"
138: elif type == 'contains':
139: qs = "%" + query + "%"
140:
1.3 casties 141: sql = 'select fileid,idx,tags,content from meta where lower(content) like lower(%(qs)s)'
1.1 casties 142: print sql, " -> ", qs
143: curs.execute(sql, {'qs':qs})
144: print "done"
145: res = curs.fetchone()
146: rescnt = 1
147: #print "res0:", res
148: while res and rescnt < MAXHITS:
149: #print "res:", res
150: result = self.getResult(res)
1.3 casties 151: if result:
1.1 casties 152: results.append(result)
1.3 casties 153: restypes[result.type] = result.type
1.1 casties 154:
155: res = curs.fetchone()
156: rescnt += 1
157:
158: curs.close()
159: #self.dbCon = None
160:
1.2 casties 161: #print "SEARCH: ", rescnt, " results"
1.3 casties 162: restypelist = restypes.keys()
163: return (results, restypelist)
1.1 casties 164:
165:
166: def getResult(self, db_result, rank=0):
167: """factory for result objects"""
168:
169: (fileid, tagidx, tags, content) = db_result
170: res = None
171:
172: if tags.find('/meta/bib/') > -1:
173: res = BibResult(self, db_result, rank)
174: elif tags.find('/meta/archimedes/') > -1:
175: res = ArchimResult(self, db_result, rank)
176: else:
177: res = AnyResult(self, db_result, rank)
178:
179: return res
1.3 casties 180:
1.1 casties 181:
182: def renderResult(self, result):
183: """returns HTML rendering of a search result"""
184:
185: return result.render(self)
186:
1.3 casties 187:
188: def filterResults(self, results, start, end, restypefilter=None):
189: """returns list of results that match a filter"""
190: # filter types first
191: if restypefilter:
192: res = []
193: for r in results:
1.7 ! casties 194: if r.type == restypefilter:
1.3 casties 195: res.append(r)
196: else:
197: res = results
1.5 casties 198: # new total count (because of filter)
199: rescnt = len(res)
1.3 casties 200: # filter on count
201: resgroup = res[start:end]
202:
203: return (resgroup, rescnt)
204:
1.1 casties 205:
206: #
207: # Web page stuff
208: #
209:
210: def index_html(self):
211: """metadata search"""
212: pt=PageTemplateFile(os.path.join(package_home(globals()), "zpt/OSAS_search.zpt")).__of__(self)
213: return pt()
214:
215:
1.3 casties 216: def search(self, searchstring=None, searchtype='startswith', start=1, count=10, restypefilter=None):
1.2 casties 217: """search and create result"""
218: sres = int(start) -1
219: lres = sres + count
220: try:
221: oldsearch = self.REQUEST.SESSION['searchstring']
222: oldtype = self.REQUEST.SESSION['searchtype']
223: except:
224: oldsearch = ""
225: oldtype = ""
226:
227: if not searchstring:
228: searchstring = oldsearch
229: searchtype = oldtype
230:
231: if not oldsearch or searchstring != oldsearch or searchtype != oldtype:
232: # new search
1.3 casties 233: (res, restypes) = self.dbSearch(searchstring, searchtype)
1.2 casties 234: # sort the result
1.1 casties 235: res.sort(ranksort)
1.2 casties 236: # store it
1.1 casties 237: self.REQUEST.SESSION['results'] = res
238: self.REQUEST.SESSION['searchstring'] = searchstring
1.2 casties 239: self.REQUEST.SESSION['searchtype'] = searchtype
1.3 casties 240: self.REQUEST.SESSION['resulttypes'] = restypes
1.1 casties 241:
1.3 casties 242: (resgroup, nres) = self.filterResults(self.REQUEST.SESSION['results'], sres, lres, restypefilter)
243: lres = min(lres, nres)
1.5 casties 244: sres = min(sres, nres)
1.3 casties 245: self.REQUEST.SESSION['resultgroup'] = resgroup
246: self.REQUEST.SESSION['res_indexes'] = (sres+1, lres, nres, int(count))
247: self.REQUEST.SESSION['res_type_filter'] = restypefilter
248: if nres > 0:
249: zpt = "zpt/searchResult.zpt"
250: else:
251: zpt = "zpt/searchResult_none.zpt"
1.2 casties 252:
1.3 casties 253: pt=PageTemplateFile(os.path.join(package_home(globals()), zpt)).__of__(self)
1.1 casties 254: return pt()
255:
1.2 casties 256:
257: def getSearchType(self):
258: """returns the last search type"""
259: try:
260: ret = self.REQUEST.SESSION['searchtype']
261: except:
262: ret = ""
263:
264: return ret
265:
266: def getSearchString(self):
267: """returns the last search string"""
268: try:
269: ret = self.REQUEST.SESSION['searchstring']
270: except:
271: ret = ""
272:
273: return ret
274:
275:
276: def hasNextResults(self):
277: """returns if there are more results"""
278: try:
279: (first, last, total, count) = self.REQUEST.SESSION['res_indexes']
1.3 casties 280: return (first + count < total)
1.2 casties 281: except:
282: return False
283:
284: def hasPrevResults(self):
285: """returns if there are previous results"""
286: try:
287: (first, last, total, count) = self.REQUEST.SESSION['res_indexes']
288: return (first > 1)
289: except:
290: return False
291:
292:
293: def nextResults(self):
294: """returns more results"""
295: try:
296: (first, last, total, count) = self.REQUEST.SESSION['res_indexes']
297: first = first + count
298: last = last + count
299: if first > total:
300: first = total
301: if last > total:
302: last = total
303: except:
1.3 casties 304: print "OUCH: no next results!"
305: return self.search()
1.2 casties 306:
307: return self.search(start=first, count=count)
308:
309:
310: def prevResults(self):
311: """returns more results"""
312: try:
313: (first, last, total, count) = self.REQUEST.SESSION['res_indexes']
314: first = first - count
315: last = last - count
316: if first < 1:
317: first = 1
318: if last < 1:
319: last = 1
320: except:
1.3 casties 321: print "OUCH: no prev results!"
322: return self.search()
1.2 casties 323:
324: return self.search(start=first, count=count)
1.1 casties 325:
1.6 casties 326:
327: def manage_ChangeOSAS_searchForm(self):
328: """create Search form"""
329: pt=PageTemplateFile(os.path.join(package_home(globals()), "zpt/ChangeOSAS_search.zpt")).__of__(self)
330: return pt()
331:
332: def manage_ChangeOSAS_search(self,id,title=None,dsn=None,RESPONSE=None):
333: """add the OSAS_root"""
334: self.id = id
335: self.title = title
336: self.dsn = dsn
337: if RESPONSE is not None:
338: RESPONSE.redirect('manage_main')
1.2 casties 339:
340:
1.1 casties 341: def manage_AddOSAS_searchForm(self):
342: """create Search form"""
343: pt=PageTemplateFile(os.path.join(package_home(globals()), "zpt/AddOSAS_search.zpt")).__of__(self)
344: return pt()
345:
346: def manage_AddOSAS_search(self,id,title=None,dsn=None,RESPONSE=None):
347: """add the OSAS_root"""
348: newObj=OSAS_search(id,title,dsn)
349: self._setObject(id,newObj)
350: if RESPONSE is not None:
351: RESPONSE.redirect('manage_main')
352:
353:
354:
355:
356: class SearchResult(SimpleItem):
357: """base search result object"""
358:
359: def __init__(self, type='unknown', file=None, url=None, content=None, rank=0):
360: """init"""
1.3 casties 361: # result type (e.g. "bib", "archim")
1.1 casties 362: self.type = type
1.3 casties 363: # index file name
1.1 casties 364: self.file = file
1.3 casties 365: # url for result (list of pairs)
366: if url:
367: self.urls = url
368: else:
369: self.urls = []
370: # actual content (list of tuples)
1.1 casties 371: self.content = content
1.3 casties 372: # document status (e.g. "online", "archive")
373: self.status = None
374: # result rank for presentation
1.1 casties 375: self.rank = rank
376:
377: class AnyResult(SearchResult):
378: """catch-all type result object"""
379:
380: def __init__(self, zope, db_result, rank):
381: """returns a catch-all type result"""
1.3 casties 382: SearchResult.__init__(self)
1.2 casties 383: #print "NEW ANY RESULT!"
1.3 casties 384: self.type='unknown'
1.1 casties 385: self.zptFile = os.path.join(package_home(globals()), "zpt/searchResult_any.zpt")
386:
1.3 casties 387: (db_fileid, db_tagidx, db_tags, db_content) = db_result
388: self.hitTag = db_tags
1.1 casties 389:
390: # get full info from db
1.3 casties 391: self.fileinfo = zope.getDBFile(db_fileid)
1.1 casties 392: assert self.fileinfo
393:
394: items = {}
1.3 casties 395: items[db_tags] = db_content
1.1 casties 396: self.content = items
397: self.file = self.fileinfo[0]
1.3 casties 398: self.status = statusForFile(self.file)
1.1 casties 399: self.rank = rank
400:
1.3 casties 401: def getContentList(self):
402: """returns content as list of tuples in preferred order"""
403: l = []
404: for k in self.content.keys():
405: l.append((k, self.content[k]))
406:
407: return l
408:
1.1 casties 409: def render(self, zope):
410: """render this result object"""
411: zope.REQUEST.SESSION['result'] = self
412: pt=PageTemplateFile(self.zptFile).__of__(zope)
413: return pt()
414:
415:
416: class MetaResult(AnyResult):
417: """result object that collects metadata"""
418:
419: def __init__(self, zope, db_result, rank):
420: """contructor"""
421: AnyResult.__init__(self, zope, db_result, rank)
1.2 casties 422: #print "NEW META RESULT!"
1.1 casties 423:
424: (fileid, tagidx, tags, content) = db_result
425:
426: # get full info from db
427: self.metainfo = zope.getDBFileMeta(fileid)
428: assert self.metainfo
429:
1.3 casties 430: def checkContext(self, tags, content, ctxurl):
431: """takes meta entry and updates url from context tags"""
1.1 casties 432: if tags.endswith('/context/link'):
433: if content:
1.3 casties 434: #print "CTXlink: ", content
435: ctxurl[0] = content
1.1 casties 436:
437: elif tags.endswith('/context/name'):
438: if content:
1.3 casties 439: #print "CTXname: ", content
440: ctxurl[1] = content
1.1 casties 441:
1.3 casties 442: return ctxurl
1.1 casties 443:
444:
445: class BibResult(MetaResult):
446: """bib type result object"""
447:
448: def __init__(self, zope, db_result, rank):
449: """constructor"""
450: MetaResult.__init__(self, zope, db_result, rank)
1.3 casties 451: #print "NEW BIB RESULT!", self
1.1 casties 452: self.type = "bib"
453: self.zptFile = os.path.join(package_home(globals()), "zpt/searchResult_bib.zpt")
1.3 casties 454: url = storageURL(self.file)
455: if url:
456: self.urls.append(url)
1.1 casties 457: (fileid, tagidx, tags, content) = db_result
458:
459: btype = ""
460: bitems = {}
1.3 casties 461: ctxurl = ['', '']
1.1 casties 462:
463: for me in self.metainfo:
464: (m_idx, m_tags, m_content, m_attributes) = me
465: # context tag
1.3 casties 466: ctxurl = self.checkContext(m_tags, m_content, ctxurl)
1.1 casties 467: # first tag with bib type attribute
468: if m_tags.endswith('/meta/bib'):
469: r = re.search('type="([^"]*)"', m_attributes)
470: if r:
471: btype = r.group(1)
472:
473: if not btype:
474: btype = "*unknown*"
475:
476: bitems['type'] = btype
477: continue
478:
479: # skip other tags
480: if not btype: continue
481:
482: # collect bib/something
483: r = re.search('/meta/bib/(.*)', m_tags)
484: if r:
485: k = r.group(1)
486: #print "CONTENT: ", m_content
487: bitems[k] = m_content
1.3 casties 488: # remember hit tag
489: if m_tags == self.hitTag:
490: self.hitTag = k
1.1 casties 491: continue
492:
493: self.content = bitems
1.3 casties 494: # store context
495: if not ctxurl[1]:
496: ctxurl[1] = "View"
497: # must have link
498: if ctxurl[0]:
499: self.urls.append(ctxurl)
500:
1.1 casties 501: self.rank += 100
1.3 casties 502:
503: def getContentList(self):
504: """returns content as list of tuples in preferred order"""
505: l = []
506: c = self.content.copy()
507: # preferred items first
508: for k in ('author', 'title', 'journal', 'year'):
509: if c.has_key(k):
510: l.append((k, c[k]))
511: del c[k]
512:
513: # no type
514: del c['type']
515: # copy the rest
516: for k in c.keys():
517: l.append((k, c[k]))
518:
519: return l
1.1 casties 520:
521:
522: class ArchimResult(MetaResult):
523: """archimedes type result object"""
524:
525: def __init__(self, zope, db_result, rank):
526: """constructor"""
527: MetaResult.__init__(self, zope, db_result, rank)
1.3 casties 528: #print "NEW ARCHIM RESULT!", self
1.1 casties 529: self.type = "archim"
530: self.zptFile = os.path.join(package_home(globals()), "zpt/searchResult_archim.zpt")
1.3 casties 531: url = storageURL(self.file)
532: if url:
533: self.urls.append(url)
534:
1.1 casties 535: (fileid, tagidx, tags, content) = db_result
536:
537: # process info
538: bitems = {}
1.3 casties 539: ctxurl = ['', '']
1.1 casties 540: for me in self.metainfo:
541: (m_idx, m_tags, m_content, m_attributes) = me
542: # context tag
1.3 casties 543: ctxurl = self.checkContext(m_tags, m_content, ctxurl)
1.1 casties 544: # collect archimedes/something
545: r = re.search('/meta/archimedes/(.*)', m_tags)
546: if r:
547: k = r.group(1)
548: #print "CONTENT: ", m_content
549: bitems[k] = m_content
1.3 casties 550: # remember hit tag
551: if m_tags == self.hitTag:
552: self.hitTag = k
1.1 casties 553: continue
554:
555: self.content = bitems
556: self.rank += 100
1.3 casties 557: # store context
558: if not ctxurl[1]:
559: ctxurl[1] = "View"
560: # must have link
561: if ctxurl[0]:
562: self.urls.append(ctxurl)
563:
564:
565: def getContentList(self):
566: """returns content as list of tuples in preferred order"""
567: l = []
568: c = self.content.copy()
569: # preferred items first
570: for k in ('author', 'title', 'date', 'place'):
571: if c.has_key(k):
572: l.append((k, c[k]))
573: del c[k]
574:
575: # copy the rest
576: for k in c.keys():
577: l.append((k, c[k]))
578:
579: return l
1.1 casties 580:
581:
582:
583:
584: def ranksort(res1, res2):
585: """sort results on rank"""
586: return cmp(res2.rank, res1.rank)
587:
588:
1.3 casties 589: def statusForFile(filename):
590: """heuristic... returns status for a index file name"""
591: status = None
592: if filename.startswith('/mpiwg/online/'):
593: status = "online"
594: elif filename.startswith('/mpiwg/archive/'):
595: status = "archive"
596: elif filename.startswith('http://'):
597: status = "database"
598:
599: return status
600:
601: def storageURL(filename):
1.1 casties 602: """heuristic... returns an URL for a index file name"""
603: url = None
1.3 casties 604: name = None
1.1 casties 605: if filename.startswith('/mpiwg/online/'):
1.3 casties 606: #print "URLFORFILE: online ", filename
607: r = re.search('^(.*)/index.meta', filename)
1.1 casties 608: if r:
1.3 casties 609: url = "http://content.mpiwg-berlin.mpg.de/mpistorage/storage/ShowOnline/index_html?path=%s"%r.group(1)
610: name = "Storage System"
1.4 casties 611:
612: elif filename.startswith('http://'):
613: #print "URLFORFILE: url ", filename
614: url = filename
615: name = "Online Database"
1.3 casties 616:
617: if name and url:
618: return (url, name)
619:
620: return None
1.1 casties 621:
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>