version 1.1, 2004/07/01 19:31:25
|
version 1.4, 2004/07/09 17:32:32
|
Line 69 class OSAS_search(SimpleItem):
|
Line 69 class OSAS_search(SimpleItem):
|
metacache = self.REQUEST.SESSION['dbMeta'] |
metacache = self.REQUEST.SESSION['dbMeta'] |
if metacache.has_key(fileid): |
if metacache.has_key(fileid): |
res = metacache[fileid] |
res = metacache[fileid] |
print "meta from cache " |
#print "meta from cache " |
return res |
return res |
|
|
curs = self.dbCursor() |
curs = self.dbCursor() |
Line 97 class OSAS_search(SimpleItem):
|
Line 97 class OSAS_search(SimpleItem):
|
filecache = self.REQUEST.SESSION['dbFiles'] |
filecache = self.REQUEST.SESSION['dbFiles'] |
if filecache.has_key(fileid): |
if filecache.has_key(fileid): |
res = filecache[fileid] |
res = filecache[fileid] |
print "file from cache " |
#print "file from cache " |
return res |
return res |
|
|
curs = self.dbCursor() |
curs = self.dbCursor() |
Line 117 class OSAS_search(SimpleItem):
|
Line 117 class OSAS_search(SimpleItem):
|
return res |
return res |
|
|
|
|
def dbSearch(self, query): |
def dbSearch(self, query, type): |
"""search DB for query and return result set""" |
"""search DB for query and return result set""" |
|
results = [] |
|
restypes = {} |
|
if not query: |
|
# empty query |
|
return results |
|
|
curs = self.dbCursor() |
curs = self.dbCursor() |
|
if type == 'equals': |
|
qs = query |
|
elif type == 'startswith': |
qs = query + "%" |
qs = query + "%" |
sql = 'select fileid,idx,tags,content from meta where content like %(qs)s' |
elif type == 'contains': |
|
qs = "%" + query + "%" |
|
|
|
sql = 'select fileid,idx,tags,content from meta where lower(content) like lower(%(qs)s)' |
print sql, " -> ", qs |
print sql, " -> ", qs |
curs.execute(sql, {'qs':qs}) |
curs.execute(sql, {'qs':qs}) |
print "done" |
print "done" |
results = [] |
|
res = curs.fetchone() |
res = curs.fetchone() |
rescnt = 1 |
rescnt = 1 |
#print "res0:", res |
#print "res0:", res |
while res and rescnt < MAXHITS: |
while res and rescnt < MAXHITS: |
#print "res:", res |
#print "res:", res |
result = self.getResult(res) |
result = self.getResult(res) |
if (result): |
if result: |
results.append(result) |
results.append(result) |
|
restypes[result.type] = result.type |
|
|
res = curs.fetchone() |
res = curs.fetchone() |
rescnt += 1 |
rescnt += 1 |
Line 141 class OSAS_search(SimpleItem):
|
Line 153 class OSAS_search(SimpleItem):
|
curs.close() |
curs.close() |
#self.dbCon = None |
#self.dbCon = None |
|
|
print "SEARCH: ", rescnt, " results" |
#print "SEARCH: ", rescnt, " results" |
return results |
restypelist = restypes.keys() |
|
return (results, restypelist) |
|
|
|
|
def getResult(self, db_result, rank=0): |
def getResult(self, db_result, rank=0): |
"""factory for result objects""" |
"""factory for result objects""" |
print "NEW RESULT!" |
|
|
|
(fileid, tagidx, tags, content) = db_result |
(fileid, tagidx, tags, content) = db_result |
res = None |
res = None |
|
|
print "tags: ", tags |
|
if tags.find('/meta/bib/') > -1: |
if tags.find('/meta/bib/') > -1: |
res = BibResult(self, db_result, rank) |
res = BibResult(self, db_result, rank) |
elif tags.find('/meta/archimedes/') > -1: |
elif tags.find('/meta/archimedes/') > -1: |
Line 162 class OSAS_search(SimpleItem):
|
Line 173 class OSAS_search(SimpleItem):
|
|
|
return res |
return res |
|
|
|
|
def renderResult(self, result): |
def renderResult(self, result): |
"""returns HTML rendering of a search result""" |
"""returns HTML rendering of a search result""" |
|
|
print "renderresult!", result, " -- ", result.url |
|
return result.render(self) |
return result.render(self) |
|
|
|
|
|
def filterResults(self, results, start, end, restypefilter=None): |
|
"""returns list of results that match a filter""" |
|
# filter types first |
|
if restypefilter: |
|
res = [] |
|
for r in results: |
|
if r.type in restypefilter: |
|
res.append(r) |
|
else: |
|
res = results |
|
# filter on count |
|
resgroup = res[start:end] |
|
# new total count (because of filter) |
|
rescnt = len(res) |
|
|
|
return (resgroup, rescnt) |
|
|
|
|
# |
# |
Line 182 class OSAS_search(SimpleItem):
|
Line 208 class OSAS_search(SimpleItem):
|
return pt() |
return pt() |
|
|
|
|
def search(self, searchstring=None): |
def search(self, searchstring=None, searchtype='startswith', start=1, count=10, restypefilter=None): |
"""search and result""" |
"""search and create result""" |
if searchstring: |
sres = int(start) -1 |
print "SEARCH: ", searchstring |
lres = sres + count |
res = self.dbSearch(searchstring) |
try: |
|
oldsearch = self.REQUEST.SESSION['searchstring'] |
|
oldtype = self.REQUEST.SESSION['searchtype'] |
|
except: |
|
oldsearch = "" |
|
oldtype = "" |
|
|
|
if not searchstring: |
|
searchstring = oldsearch |
|
searchtype = oldtype |
|
|
|
if not oldsearch or searchstring != oldsearch or searchtype != oldtype: |
|
# new search |
|
(res, restypes) = self.dbSearch(searchstring, searchtype) |
|
# sort the result |
res.sort(ranksort) |
res.sort(ranksort) |
|
# store it |
self.REQUEST.SESSION['results'] = res |
self.REQUEST.SESSION['results'] = res |
self.REQUEST.SESSION['searchstring'] = searchstring |
self.REQUEST.SESSION['searchstring'] = searchstring |
|
self.REQUEST.SESSION['searchtype'] = searchtype |
|
self.REQUEST.SESSION['resulttypes'] = restypes |
|
|
|
(resgroup, nres) = self.filterResults(self.REQUEST.SESSION['results'], sres, lres, restypefilter) |
|
lres = min(lres, nres) |
|
self.REQUEST.SESSION['resultgroup'] = resgroup |
|
self.REQUEST.SESSION['res_indexes'] = (sres+1, lres, nres, int(count)) |
|
self.REQUEST.SESSION['res_type_filter'] = restypefilter |
|
if nres > 0: |
|
zpt = "zpt/searchResult.zpt" |
|
else: |
|
zpt = "zpt/searchResult_none.zpt" |
|
|
print "SEARCH res:", res |
pt=PageTemplateFile(os.path.join(package_home(globals()), zpt)).__of__(self) |
pt=PageTemplateFile(os.path.join(package_home(globals()), "zpt/searchResult.zpt")).__of__(self) |
|
return pt() |
return pt() |
|
|
|
|
|
def getSearchType(self): |
|
"""returns the last search type""" |
|
try: |
|
ret = self.REQUEST.SESSION['searchtype'] |
|
except: |
|
ret = "" |
|
|
|
return ret |
|
|
|
def getSearchString(self): |
|
"""returns the last search string""" |
|
try: |
|
ret = self.REQUEST.SESSION['searchstring'] |
|
except: |
|
ret = "" |
|
|
|
return ret |
|
|
|
|
|
def hasNextResults(self): |
|
"""returns if there are more results""" |
|
try: |
|
(first, last, total, count) = self.REQUEST.SESSION['res_indexes'] |
|
return (first + count < total) |
|
except: |
|
return False |
|
|
|
def hasPrevResults(self): |
|
"""returns if there are previous results""" |
|
try: |
|
(first, last, total, count) = self.REQUEST.SESSION['res_indexes'] |
|
return (first > 1) |
|
except: |
|
return False |
|
|
|
|
|
def nextResults(self): |
|
"""returns more results""" |
|
try: |
|
(first, last, total, count) = self.REQUEST.SESSION['res_indexes'] |
|
first = first + count |
|
last = last + count |
|
if first > total: |
|
first = total |
|
if last > total: |
|
last = total |
|
except: |
|
print "OUCH: no next results!" |
|
return self.search() |
|
|
|
return self.search(start=first, count=count) |
|
|
|
|
|
def prevResults(self): |
|
"""returns more results""" |
|
try: |
|
(first, last, total, count) = self.REQUEST.SESSION['res_indexes'] |
|
first = first - count |
|
last = last - count |
|
if first < 1: |
|
first = 1 |
|
if last < 1: |
|
last = 1 |
|
except: |
|
print "OUCH: no prev results!" |
|
return self.search() |
|
|
|
return self.search(start=first, count=count) |
|
|
|
|
|
|
def manage_AddOSAS_searchForm(self): |
def manage_AddOSAS_searchForm(self): |
"""create Search form""" |
"""create Search form""" |
Line 217 class SearchResult(SimpleItem):
|
Line 339 class SearchResult(SimpleItem):
|
|
|
def __init__(self, type='unknown', file=None, url=None, content=None, rank=0): |
def __init__(self, type='unknown', file=None, url=None, content=None, rank=0): |
"""init""" |
"""init""" |
|
# result type (e.g. "bib", "archim") |
self.type = type |
self.type = type |
|
# index file name |
self.file = file |
self.file = file |
self.url = url |
# url for result (list of pairs) |
self.urlabel = url |
if url: |
|
self.urls = url |
|
else: |
|
self.urls = [] |
|
# actual content (list of tuples) |
self.content = content |
self.content = content |
|
# document status (e.g. "online", "archive") |
|
self.status = None |
|
# result rank for presentation |
self.rank = rank |
self.rank = rank |
|
|
class AnyResult(SearchResult): |
class AnyResult(SearchResult): |
Line 229 class AnyResult(SearchResult):
|
Line 360 class AnyResult(SearchResult):
|
|
|
def __init__(self, zope, db_result, rank): |
def __init__(self, zope, db_result, rank): |
"""returns a catch-all type result""" |
"""returns a catch-all type result""" |
SearchResult.__init__(self, type='unknown') |
SearchResult.__init__(self) |
print "NEW ANY RESULT!" |
#print "NEW ANY RESULT!" |
|
self.type='unknown' |
self.zptFile = os.path.join(package_home(globals()), "zpt/searchResult_any.zpt") |
self.zptFile = os.path.join(package_home(globals()), "zpt/searchResult_any.zpt") |
|
|
(fileid, tagidx, tags, content) = db_result |
(db_fileid, db_tagidx, db_tags, db_content) = db_result |
self.hitTag = tags |
self.hitTag = db_tags |
|
|
# get full info from db |
# get full info from db |
self.fileinfo = zope.getDBFile(fileid) |
self.fileinfo = zope.getDBFile(db_fileid) |
assert self.fileinfo |
assert self.fileinfo |
|
|
items = {} |
items = {} |
items[tags] = content |
items[db_tags] = db_content |
self.content = items |
self.content = items |
self.file = self.fileinfo[0] |
self.file = self.fileinfo[0] |
self.url = "" |
self.status = statusForFile(self.file) |
self.urlabel = self.file |
|
self.rank = rank |
self.rank = rank |
|
|
|
def getContentList(self): |
|
"""returns content as list of tuples in preferred order""" |
|
l = [] |
|
for k in self.content.keys(): |
|
l.append((k, self.content[k])) |
|
|
|
return l |
|
|
def render(self, zope): |
def render(self, zope): |
"""render this result object""" |
"""render this result object""" |
zope.REQUEST.SESSION['result'] = self |
zope.REQUEST.SESSION['result'] = self |
print "renderender...", self |
|
pt=PageTemplateFile(self.zptFile).__of__(zope) |
pt=PageTemplateFile(self.zptFile).__of__(zope) |
return pt() |
return pt() |
|
|
Line 263 class MetaResult(AnyResult):
|
Line 400 class MetaResult(AnyResult):
|
def __init__(self, zope, db_result, rank): |
def __init__(self, zope, db_result, rank): |
"""contructor""" |
"""contructor""" |
AnyResult.__init__(self, zope, db_result, rank) |
AnyResult.__init__(self, zope, db_result, rank) |
print "NEW META RESULT!" |
#print "NEW META RESULT!" |
|
|
(fileid, tagidx, tags, content) = db_result |
(fileid, tagidx, tags, content) = db_result |
|
|
Line 271 class MetaResult(AnyResult):
|
Line 408 class MetaResult(AnyResult):
|
self.metainfo = zope.getDBFileMeta(fileid) |
self.metainfo = zope.getDBFileMeta(fileid) |
assert self.metainfo |
assert self.metainfo |
|
|
def checkContext(self, tags, content): |
def checkContext(self, tags, content, ctxurl): |
"""takes meta entry and sets url from context tags""" |
"""takes meta entry and updates url from context tags""" |
if tags.endswith('/context/link'): |
if tags.endswith('/context/link'): |
if content: |
if content: |
self.url = content |
#print "CTXlink: ", content |
|
ctxurl[0] = content |
|
|
elif tags.endswith('/context/name'): |
elif tags.endswith('/context/name'): |
if content: |
if content: |
self.urlabel = content |
#print "CTXname: ", content |
|
ctxurl[1] = content |
else: |
|
return False |
|
|
|
return True |
return ctxurl |
|
|
|
|
class BibResult(MetaResult): |
class BibResult(MetaResult): |
Line 293 class BibResult(MetaResult):
|
Line 429 class BibResult(MetaResult):
|
def __init__(self, zope, db_result, rank): |
def __init__(self, zope, db_result, rank): |
"""constructor""" |
"""constructor""" |
MetaResult.__init__(self, zope, db_result, rank) |
MetaResult.__init__(self, zope, db_result, rank) |
print "NEW BIB RESULT!" |
#print "NEW BIB RESULT!", self |
self.type = "bib" |
self.type = "bib" |
self.zptFile = os.path.join(package_home(globals()), "zpt/searchResult_bib.zpt") |
self.zptFile = os.path.join(package_home(globals()), "zpt/searchResult_bib.zpt") |
self.url = urlForFile(self.file) |
url = storageURL(self.file) |
self.urlabel = None |
if url: |
|
self.urls.append(url) |
(fileid, tagidx, tags, content) = db_result |
(fileid, tagidx, tags, content) = db_result |
|
|
btype = "" |
btype = "" |
bitems = {} |
bitems = {} |
|
ctxurl = ['', ''] |
|
|
for me in self.metainfo: |
for me in self.metainfo: |
(m_idx, m_tags, m_content, m_attributes) = me |
(m_idx, m_tags, m_content, m_attributes) = me |
# context tag |
# context tag |
if self.checkContext(m_tags, m_content): |
ctxurl = self.checkContext(m_tags, m_content, ctxurl) |
continue |
|
# first tag with bib type attribute |
# first tag with bib type attribute |
if m_tags.endswith('/meta/bib'): |
if m_tags.endswith('/meta/bib'): |
r = re.search('type="([^"]*)"', m_attributes) |
r = re.search('type="([^"]*)"', m_attributes) |
Line 329 class BibResult(MetaResult):
|
Line 466 class BibResult(MetaResult):
|
k = r.group(1) |
k = r.group(1) |
#print "CONTENT: ", m_content |
#print "CONTENT: ", m_content |
bitems[k] = m_content |
bitems[k] = m_content |
|
# remember hit tag |
|
if m_tags == self.hitTag: |
|
self.hitTag = k |
continue |
continue |
|
|
self.content = bitems |
self.content = bitems |
|
# store context |
|
if not ctxurl[1]: |
|
ctxurl[1] = "View" |
|
# must have link |
|
if ctxurl[0]: |
|
self.urls.append(ctxurl) |
|
|
self.rank += 100 |
self.rank += 100 |
if not self.urlabel and self.url: |
|
self.urlabel = "view" |
def getContentList(self): |
|
"""returns content as list of tuples in preferred order""" |
|
l = [] |
|
c = self.content.copy() |
|
# preferred items first |
|
for k in ('author', 'title', 'journal', 'year'): |
|
if c.has_key(k): |
|
l.append((k, c[k])) |
|
del c[k] |
|
|
|
# no type |
|
del c['type'] |
|
# copy the rest |
|
for k in c.keys(): |
|
l.append((k, c[k])) |
|
|
|
return l |
|
|
|
|
class ArchimResult(MetaResult): |
class ArchimResult(MetaResult): |
Line 343 class ArchimResult(MetaResult):
|
Line 506 class ArchimResult(MetaResult):
|
def __init__(self, zope, db_result, rank): |
def __init__(self, zope, db_result, rank): |
"""constructor""" |
"""constructor""" |
MetaResult.__init__(self, zope, db_result, rank) |
MetaResult.__init__(self, zope, db_result, rank) |
print "NEW ARCHIM RESULT!" |
#print "NEW ARCHIM RESULT!", self |
self.type = "archim" |
self.type = "archim" |
self.zptFile = os.path.join(package_home(globals()), "zpt/searchResult_archim.zpt") |
self.zptFile = os.path.join(package_home(globals()), "zpt/searchResult_archim.zpt") |
self.url = urlForFile(self.file) |
url = storageURL(self.file) |
self.urlabel = None |
if url: |
|
self.urls.append(url) |
|
|
(fileid, tagidx, tags, content) = db_result |
(fileid, tagidx, tags, content) = db_result |
|
|
# process info |
# process info |
bitems = {} |
bitems = {} |
|
ctxurl = ['', ''] |
for me in self.metainfo: |
for me in self.metainfo: |
(m_idx, m_tags, m_content, m_attributes) = me |
(m_idx, m_tags, m_content, m_attributes) = me |
# context tag |
# context tag |
if self.checkContext(m_tags, m_content): |
ctxurl = self.checkContext(m_tags, m_content, ctxurl) |
continue |
|
# collect archimedes/something |
# collect archimedes/something |
r = re.search('/meta/archimedes/(.*)', m_tags) |
r = re.search('/meta/archimedes/(.*)', m_tags) |
if r: |
if r: |
k = r.group(1) |
k = r.group(1) |
#print "CONTENT: ", m_content |
#print "CONTENT: ", m_content |
bitems[k] = m_content |
bitems[k] = m_content |
|
# remember hit tag |
|
if m_tags == self.hitTag: |
|
self.hitTag = k |
continue |
continue |
|
|
self.content = bitems |
self.content = bitems |
self.rank += 100 |
self.rank += 100 |
if not self.urlabel and self.url: |
# store context |
self.urlabel = "view" |
if not ctxurl[1]: |
|
ctxurl[1] = "View" |
|
# must have link |
|
if ctxurl[0]: |
|
self.urls.append(ctxurl) |
|
|
|
|
|
def getContentList(self): |
|
"""returns content as list of tuples in preferred order""" |
|
l = [] |
|
c = self.content.copy() |
|
# preferred items first |
|
for k in ('author', 'title', 'date', 'place'): |
|
if c.has_key(k): |
|
l.append((k, c[k])) |
|
del c[k] |
|
|
|
# copy the rest |
|
for k in c.keys(): |
|
l.append((k, c[k])) |
|
|
|
return l |
|
|
|
|
|
|
Line 378 def ranksort(res1, res2):
|
Line 567 def ranksort(res1, res2):
|
return cmp(res2.rank, res1.rank) |
return cmp(res2.rank, res1.rank) |
|
|
|
|
def urlForFile(filename): |
def statusForFile(filename): |
|
"""heuristic... returns status for a index file name""" |
|
status = None |
|
if filename.startswith('/mpiwg/online/'): |
|
status = "online" |
|
elif filename.startswith('/mpiwg/archive/'): |
|
status = "archive" |
|
elif filename.startswith('http://'): |
|
status = "database" |
|
|
|
return status |
|
|
|
def storageURL(filename): |
"""heuristic... returns an URL for a index file name""" |
"""heuristic... returns an URL for a index file name""" |
url = None |
url = None |
|
name = None |
if filename.startswith('/mpiwg/online/'): |
if filename.startswith('/mpiwg/online/'): |
print "URLFORFILE: online ", filename |
#print "URLFORFILE: online ", filename |
r = re.search('/mpiwg/online/(.*)/index.meta', filename) |
r = re.search('^(.*)/index.meta', filename) |
if r: |
if r: |
url = "http://nausikaa.mpiwg-berlin.mpg.de/digitallibrary/digilib.jsp?fn=%s"%r.group(1) |
url = "http://content.mpiwg-berlin.mpg.de/mpistorage/storage/ShowOnline/index_html?path=%s"%r.group(1) |
|
name = "Storage System" |
|
|
|
elif filename.startswith('http://'): |
|
#print "URLFORFILE: url ", filename |
|
url = filename |
|
name = "Online Database" |
|
|
|
if name and url: |
|
return (url, name) |
|
|
|
return None |
|
|
return url |
|