--- OSAS/OSA_system/OSAS_search.py 2004/07/01 19:31:25 1.1 +++ OSAS/OSA_system/OSAS_search.py 2004/07/09 16:55:19 1.3 @@ -69,7 +69,7 @@ class OSAS_search(SimpleItem): metacache = self.REQUEST.SESSION['dbMeta'] if metacache.has_key(fileid): res = metacache[fileid] - print "meta from cache " + #print "meta from cache " return res curs = self.dbCursor() @@ -97,7 +97,7 @@ class OSAS_search(SimpleItem): filecache = self.REQUEST.SESSION['dbFiles'] if filecache.has_key(fileid): res = filecache[fileid] - print "file from cache " + #print "file from cache " return res curs = self.dbCursor() @@ -117,23 +117,35 @@ class OSAS_search(SimpleItem): return res - def dbSearch(self, query): + def dbSearch(self, query, type): """search DB for query and return result set""" + results = [] + restypes = {} + if not query: + # empty query + return results + curs = self.dbCursor() - qs = query + "%" - sql = 'select fileid,idx,tags,content from meta where content like %(qs)s' + if type == 'equals': + qs = query + elif type == 'startswith': + qs = query + "%" + elif type == 'contains': + qs = "%" + query + "%" + + sql = 'select fileid,idx,tags,content from meta where lower(content) like lower(%(qs)s)' print sql, " -> ", qs curs.execute(sql, {'qs':qs}) print "done" - results = [] res = curs.fetchone() rescnt = 1 #print "res0:", res while res and rescnt < MAXHITS: #print "res:", res result = self.getResult(res) - if (result): + if result: results.append(result) + restypes[result.type] = result.type res = curs.fetchone() rescnt += 1 @@ -141,18 +153,17 @@ class OSAS_search(SimpleItem): curs.close() #self.dbCon = None - print "SEARCH: ", rescnt, " results" - return results + #print "SEARCH: ", rescnt, " results" + restypelist = restypes.keys() + return (results, restypelist) def getResult(self, db_result, rank=0): """factory for result objects""" - print "NEW RESULT!" (fileid, tagidx, tags, content) = db_result res = None - print "tags: ", tags if tags.find('/meta/bib/') > -1: res = BibResult(self, db_result, rank) elif tags.find('/meta/archimedes/') > -1: @@ -161,16 +172,31 @@ class OSAS_search(SimpleItem): res = AnyResult(self, db_result, rank) return res + def renderResult(self, result): """returns HTML rendering of a search result""" - print "renderresult!", result, " -- ", result.url return result.render(self) - - - + + def filterResults(self, results, start, end, restypefilter=None): + """returns list of results that match a filter""" + # filter types first + if restypefilter: + res = [] + for r in results: + if r.type in restypefilter: + res.append(r) + else: + res = results + # filter on count + resgroup = res[start:end] + # new total count (because of filter) + rescnt = len(res) + + return (resgroup, rescnt) + # # Web page stuff @@ -182,21 +208,117 @@ class OSAS_search(SimpleItem): return pt() - def search(self, searchstring=None): - """search and result""" - if searchstring: - print "SEARCH: ", searchstring - res = self.dbSearch(searchstring) + def search(self, searchstring=None, searchtype='startswith', start=1, count=10, restypefilter=None): + """search and create result""" + sres = int(start) -1 + lres = sres + count + try: + oldsearch = self.REQUEST.SESSION['searchstring'] + oldtype = self.REQUEST.SESSION['searchtype'] + except: + oldsearch = "" + oldtype = "" + + if not searchstring: + searchstring = oldsearch + searchtype = oldtype + + if not oldsearch or searchstring != oldsearch or searchtype != oldtype: + # new search + (res, restypes) = self.dbSearch(searchstring, searchtype) + # sort the result res.sort(ranksort) + # store it self.REQUEST.SESSION['results'] = res self.REQUEST.SESSION['searchstring'] = searchstring + self.REQUEST.SESSION['searchtype'] = searchtype + self.REQUEST.SESSION['resulttypes'] = restypes - print "SEARCH res:", res - pt=PageTemplateFile(os.path.join(package_home(globals()), "zpt/searchResult.zpt")).__of__(self) + (resgroup, nres) = self.filterResults(self.REQUEST.SESSION['results'], sres, lres, restypefilter) + lres = min(lres, nres) + self.REQUEST.SESSION['resultgroup'] = resgroup + self.REQUEST.SESSION['res_indexes'] = (sres+1, lres, nres, int(count)) + self.REQUEST.SESSION['res_type_filter'] = restypefilter + if nres > 0: + zpt = "zpt/searchResult.zpt" + else: + zpt = "zpt/searchResult_none.zpt" + + pt=PageTemplateFile(os.path.join(package_home(globals()), zpt)).__of__(self) return pt() - + + def getSearchType(self): + """returns the last search type""" + try: + ret = self.REQUEST.SESSION['searchtype'] + except: + ret = "" + + return ret + + def getSearchString(self): + """returns the last search string""" + try: + ret = self.REQUEST.SESSION['searchstring'] + except: + ret = "" + + return ret + + + def hasNextResults(self): + """returns if there are more results""" + try: + (first, last, total, count) = self.REQUEST.SESSION['res_indexes'] + return (first + count < total) + except: + return False + + def hasPrevResults(self): + """returns if there are previous results""" + try: + (first, last, total, count) = self.REQUEST.SESSION['res_indexes'] + return (first > 1) + except: + return False + + + def nextResults(self): + """returns more results""" + try: + (first, last, total, count) = self.REQUEST.SESSION['res_indexes'] + first = first + count + last = last + count + if first > total: + first = total + if last > total: + last = total + except: + print "OUCH: no next results!" + return self.search() + + return self.search(start=first, count=count) + + def prevResults(self): + """returns more results""" + try: + (first, last, total, count) = self.REQUEST.SESSION['res_indexes'] + first = first - count + last = last - count + if first < 1: + first = 1 + if last < 1: + last = 1 + except: + print "OUCH: no prev results!" + return self.search() + + return self.search(start=first, count=count) + + + def manage_AddOSAS_searchForm(self): """create Search form""" pt=PageTemplateFile(os.path.join(package_home(globals()), "zpt/AddOSAS_search.zpt")).__of__(self) @@ -217,11 +339,20 @@ class SearchResult(SimpleItem): def __init__(self, type='unknown', file=None, url=None, content=None, rank=0): """init""" + # result type (e.g. "bib", "archim") self.type = type + # index file name self.file = file - self.url = url - self.urlabel = url + # url for result (list of pairs) + if url: + self.urls = url + else: + self.urls = [] + # actual content (list of tuples) self.content = content + # document status (e.g. "online", "archive") + self.status = None + # result rank for presentation self.rank = rank class AnyResult(SearchResult): @@ -229,30 +360,36 @@ class AnyResult(SearchResult): def __init__(self, zope, db_result, rank): """returns a catch-all type result""" - SearchResult.__init__(self, type='unknown') - print "NEW ANY RESULT!" - + SearchResult.__init__(self) + #print "NEW ANY RESULT!" + self.type='unknown' self.zptFile = os.path.join(package_home(globals()), "zpt/searchResult_any.zpt") - (fileid, tagidx, tags, content) = db_result - self.hitTag = tags + (db_fileid, db_tagidx, db_tags, db_content) = db_result + self.hitTag = db_tags # get full info from db - self.fileinfo = zope.getDBFile(fileid) + self.fileinfo = zope.getDBFile(db_fileid) assert self.fileinfo items = {} - items[tags] = content + items[db_tags] = db_content self.content = items self.file = self.fileinfo[0] - self.url = "" - self.urlabel = self.file + self.status = statusForFile(self.file) self.rank = rank + def getContentList(self): + """returns content as list of tuples in preferred order""" + l = [] + for k in self.content.keys(): + l.append((k, self.content[k])) + + return l + def render(self, zope): """render this result object""" zope.REQUEST.SESSION['result'] = self - print "renderender...", self pt=PageTemplateFile(self.zptFile).__of__(zope) return pt() @@ -263,7 +400,7 @@ class MetaResult(AnyResult): def __init__(self, zope, db_result, rank): """contructor""" AnyResult.__init__(self, zope, db_result, rank) - print "NEW META RESULT!" + #print "NEW META RESULT!" (fileid, tagidx, tags, content) = db_result @@ -271,20 +408,19 @@ class MetaResult(AnyResult): self.metainfo = zope.getDBFileMeta(fileid) assert self.metainfo - def checkContext(self, tags, content): - """takes meta entry and sets url from context tags""" + def checkContext(self, tags, content, ctxurl): + """takes meta entry and updates url from context tags""" if tags.endswith('/context/link'): if content: - self.url = content + #print "CTXlink: ", content + ctxurl[0] = content elif tags.endswith('/context/name'): if content: - self.urlabel = content - - else: - return False + #print "CTXname: ", content + ctxurl[1] = content - return True + return ctxurl class BibResult(MetaResult): @@ -293,21 +429,22 @@ class BibResult(MetaResult): def __init__(self, zope, db_result, rank): """constructor""" MetaResult.__init__(self, zope, db_result, rank) - print "NEW BIB RESULT!" + #print "NEW BIB RESULT!", self self.type = "bib" self.zptFile = os.path.join(package_home(globals()), "zpt/searchResult_bib.zpt") - self.url = urlForFile(self.file) - self.urlabel = None + url = storageURL(self.file) + if url: + self.urls.append(url) (fileid, tagidx, tags, content) = db_result btype = "" bitems = {} + ctxurl = ['', ''] for me in self.metainfo: (m_idx, m_tags, m_content, m_attributes) = me # context tag - if self.checkContext(m_tags, m_content): - continue + ctxurl = self.checkContext(m_tags, m_content, ctxurl) # first tag with bib type attribute if m_tags.endswith('/meta/bib'): r = re.search('type="([^"]*)"', m_attributes) @@ -329,12 +466,38 @@ class BibResult(MetaResult): k = r.group(1) #print "CONTENT: ", m_content bitems[k] = m_content + # remember hit tag + if m_tags == self.hitTag: + self.hitTag = k continue self.content = bitems + # store context + if not ctxurl[1]: + ctxurl[1] = "View" + # must have link + if ctxurl[0]: + self.urls.append(ctxurl) + self.rank += 100 - if not self.urlabel and self.url: - self.urlabel = "view" + + def getContentList(self): + """returns content as list of tuples in preferred order""" + l = [] + c = self.content.copy() + # preferred items first + for k in ('author', 'title', 'journal', 'year'): + if c.has_key(k): + l.append((k, c[k])) + del c[k] + + # no type + del c['type'] + # copy the rest + for k in c.keys(): + l.append((k, c[k])) + + return l class ArchimResult(MetaResult): @@ -343,32 +506,58 @@ class ArchimResult(MetaResult): def __init__(self, zope, db_result, rank): """constructor""" MetaResult.__init__(self, zope, db_result, rank) - print "NEW ARCHIM RESULT!" + #print "NEW ARCHIM RESULT!", self self.type = "archim" self.zptFile = os.path.join(package_home(globals()), "zpt/searchResult_archim.zpt") - self.url = urlForFile(self.file) - self.urlabel = None + url = storageURL(self.file) + if url: + self.urls.append(url) + (fileid, tagidx, tags, content) = db_result # process info bitems = {} + ctxurl = ['', ''] for me in self.metainfo: (m_idx, m_tags, m_content, m_attributes) = me # context tag - if self.checkContext(m_tags, m_content): - continue + ctxurl = self.checkContext(m_tags, m_content, ctxurl) # collect archimedes/something r = re.search('/meta/archimedes/(.*)', m_tags) if r: k = r.group(1) #print "CONTENT: ", m_content bitems[k] = m_content + # remember hit tag + if m_tags == self.hitTag: + self.hitTag = k continue self.content = bitems self.rank += 100 - if not self.urlabel and self.url: - self.urlabel = "view" + # store context + if not ctxurl[1]: + ctxurl[1] = "View" + # must have link + if ctxurl[0]: + self.urls.append(ctxurl) + + + def getContentList(self): + """returns content as list of tuples in preferred order""" + l = [] + c = self.content.copy() + # preferred items first + for k in ('author', 'title', 'date', 'place'): + if c.has_key(k): + l.append((k, c[k])) + del c[k] + + # copy the rest + for k in c.keys(): + l.append((k, c[k])) + + return l @@ -378,13 +567,31 @@ def ranksort(res1, res2): return cmp(res2.rank, res1.rank) -def urlForFile(filename): +def statusForFile(filename): + """heuristic... returns status for a index file name""" + status = None + if filename.startswith('/mpiwg/online/'): + status = "online" + elif filename.startswith('/mpiwg/archive/'): + status = "archive" + elif filename.startswith('http://'): + status = "database" + + return status + +def storageURL(filename): """heuristic... returns an URL for a index file name""" url = None + name = None if filename.startswith('/mpiwg/online/'): - print "URLFORFILE: online ", filename - r = re.search('/mpiwg/online/(.*)/index.meta', filename) + #print "URLFORFILE: online ", filename + r = re.search('^(.*)/index.meta', filename) if r: - url = "http://nausikaa.mpiwg-berlin.mpg.de/digitallibrary/digilib.jsp?fn=%s"%r.group(1) + url = "http://content.mpiwg-berlin.mpg.de/mpistorage/storage/ShowOnline/index_html?path=%s"%r.group(1) + name = "Storage System" + + if name and url: + return (url, name) + + return None - return url