version 1.2, 2004/07/05 21:08:55
|
version 1.8, 2007/01/19 17:16:25
|
Line 8 from Globals import InitializeClass
|
Line 8 from Globals import InitializeClass
|
from Globals import Persistent, package_home |
from Globals import Persistent, package_home |
from Products.PageTemplates.PageTemplateFile import PageTemplateFile |
from Products.PageTemplates.PageTemplateFile import PageTemplateFile |
from Products.PageTemplates.PageTemplate import PageTemplate |
from Products.PageTemplates.PageTemplate import PageTemplate |
|
from Products.PageTemplates.ZopePageTemplate import ZopePageTemplate |
|
from OFS.Folder import Folder |
from OFS.SimpleItem import SimpleItem |
from OFS.SimpleItem import SimpleItem |
#from pyPgSQL import PgSQL |
try: |
|
import psycopg2 as PgSQL |
|
except: |
|
try: |
import psycopg as PgSQL |
import psycopg as PgSQL |
|
except: |
|
from pyPgSQL import PgSQL |
|
|
import re |
import re |
import os |
import os |
|
|
MAXHITS = 1000 |
MAXHITS = 1000 |
|
|
class OSAS_search(SimpleItem): |
class OSAS_search(Folder): |
"""Object for global metadata search""" |
"""Object for global metadata search""" |
|
|
meta_type="OSAS_search" |
meta_type="OSAS_search" |
|
|
|
manage_options=Folder.manage_options+( |
|
{'label':'Main config','action':'manage_ChangeOSAS_searchForm'}, |
|
) |
|
|
|
|
def __init__(self,id,title,dsn=None): |
def __init__(self,id,title,dsn=None): |
Line 119 class OSAS_search(SimpleItem):
|
Line 129 class OSAS_search(SimpleItem):
|
|
|
def dbSearch(self, query, type): |
def dbSearch(self, query, type): |
"""search DB for query and return result set""" |
"""search DB for query and return result set""" |
|
results = [] |
|
restypes = {} |
|
if not query: |
|
# empty query |
|
return results |
|
|
curs = self.dbCursor() |
curs = self.dbCursor() |
if type == 'equals': |
if type == 'equals': |
qs = query |
qs = query |
Line 127 class OSAS_search(SimpleItem):
|
Line 143 class OSAS_search(SimpleItem):
|
elif type == 'contains': |
elif type == 'contains': |
qs = "%" + query + "%" |
qs = "%" + query + "%" |
|
|
sql = 'select fileid,idx,tags,content from meta where content like %(qs)s' |
sql = 'select fileid,idx,tags,content from meta where lower(content) like lower(%(qs)s)' |
print sql, " -> ", qs |
print sql, " -> ", qs |
curs.execute(sql, {'qs':qs}) |
curs.execute(sql, {'qs':qs}) |
print "done" |
print "done" |
results = [] |
|
res = curs.fetchone() |
res = curs.fetchone() |
rescnt = 1 |
rescnt = 1 |
#print "res0:", res |
#print "res0:", res |
while res and rescnt < MAXHITS: |
while res and rescnt < MAXHITS: |
#print "res:", res |
#print "res:", res |
result = self.getResult(res) |
result = self.getResult(res) |
if (result): |
if result: |
results.append(result) |
results.append(result) |
|
restypes[result.type] = result.type |
|
|
res = curs.fetchone() |
res = curs.fetchone() |
rescnt += 1 |
rescnt += 1 |
Line 148 class OSAS_search(SimpleItem):
|
Line 164 class OSAS_search(SimpleItem):
|
#self.dbCon = None |
#self.dbCon = None |
|
|
#print "SEARCH: ", rescnt, " results" |
#print "SEARCH: ", rescnt, " results" |
return results |
restypelist = restypes.keys() |
|
return (results, restypelist) |
|
|
|
|
def getResult(self, db_result, rank=0): |
def getResult(self, db_result, rank=0): |
Line 166 class OSAS_search(SimpleItem):
|
Line 183 class OSAS_search(SimpleItem):
|
|
|
return res |
return res |
|
|
|
|
def renderResult(self, result): |
def renderResult(self, result): |
"""returns HTML rendering of a search result""" |
"""returns HTML rendering of a search result""" |
|
|
return result.render(self) |
return result.render(self) |
|
|
|
|
|
def filterResults(self, results, start, end, restypefilter=None): |
|
"""returns list of results that match a filter""" |
|
# filter types first |
|
if restypefilter: |
|
res = [] |
|
for r in results: |
|
if r.type == restypefilter: |
|
res.append(r) |
|
else: |
|
res = results |
|
# new total count (because of filter) |
|
rescnt = len(res) |
|
# filter on count |
|
resgroup = res[start:end] |
|
|
|
return (resgroup, rescnt) |
|
|
|
|
# |
# |
Line 185 class OSAS_search(SimpleItem):
|
Line 218 class OSAS_search(SimpleItem):
|
return pt() |
return pt() |
|
|
|
|
def search(self, searchstring=None, searchtype='startswith', start=1, count=10): |
def search(self, searchstring=None, searchtype='startswith', start=1, count=10, restypefilter=None): |
"""search and create result""" |
"""search and create result""" |
sres = int(start) -1 |
sres = int(start) -1 |
lres = sres + count |
lres = sres + count |
Line 202 class OSAS_search(SimpleItem):
|
Line 235 class OSAS_search(SimpleItem):
|
|
|
if not oldsearch or searchstring != oldsearch or searchtype != oldtype: |
if not oldsearch or searchstring != oldsearch or searchtype != oldtype: |
# new search |
# new search |
res = self.dbSearch(searchstring, searchtype) |
(res, restypes) = self.dbSearch(searchstring, searchtype) |
# sort the result |
# sort the result |
res.sort(ranksort) |
res.sort(ranksort) |
# store it |
# store it |
self.REQUEST.SESSION['results'] = res |
self.REQUEST.SESSION['results'] = res |
self.REQUEST.SESSION['searchstring'] = searchstring |
self.REQUEST.SESSION['searchstring'] = searchstring |
self.REQUEST.SESSION['searchtype'] = searchtype |
self.REQUEST.SESSION['searchtype'] = searchtype |
|
self.REQUEST.SESSION['resulttypes'] = restypes |
|
|
self.REQUEST.SESSION['resultgroup'] = self.REQUEST.SESSION['results'][sres:lres] |
(resgroup, nres) = self.filterResults(self.REQUEST.SESSION['results'], sres, lres, restypefilter) |
self.REQUEST.SESSION['res_indexes'] = (sres+1, lres, len(self.REQUEST.SESSION['results']), int(count)) |
lres = min(lres, nres) |
|
sres = min(sres, nres) |
|
self.REQUEST.SESSION['resultgroup'] = resgroup |
|
self.REQUEST.SESSION['res_indexes'] = (sres+1, lres, nres, int(count)) |
|
self.REQUEST.SESSION['res_type_filter'] = restypefilter |
|
if nres > 0: |
|
zpt = "zpt/searchResult.zpt" |
|
else: |
|
zpt = "zpt/searchResult_none.zpt" |
|
|
pt=PageTemplateFile(os.path.join(package_home(globals()), "zpt/searchResult.zpt")).__of__(self) |
pt=PageTemplateFile(os.path.join(package_home(globals()), zpt)).__of__(self) |
return pt() |
return pt() |
|
|
|
|
Line 240 class OSAS_search(SimpleItem):
|
Line 282 class OSAS_search(SimpleItem):
|
"""returns if there are more results""" |
"""returns if there are more results""" |
try: |
try: |
(first, last, total, count) = self.REQUEST.SESSION['res_indexes'] |
(first, last, total, count) = self.REQUEST.SESSION['res_indexes'] |
return (first < total) |
return (first + count < total) |
except: |
except: |
return False |
return False |
|
|
Line 264 class OSAS_search(SimpleItem):
|
Line 306 class OSAS_search(SimpleItem):
|
if last > total: |
if last > total: |
last = total |
last = total |
except: |
except: |
print "OUCH: no next results: ", first, last, total, count |
print "OUCH: no next results!" |
|
return self.search() |
|
|
return self.search(start=first, count=count) |
return self.search(start=first, count=count) |
|
|
Line 280 class OSAS_search(SimpleItem):
|
Line 323 class OSAS_search(SimpleItem):
|
if last < 1: |
if last < 1: |
last = 1 |
last = 1 |
except: |
except: |
print "OUCH: no prev results: ", first, last, total, count |
print "OUCH: no prev results!" |
|
return self.search() |
|
|
return self.search(start=first, count=count) |
return self.search(start=first, count=count) |
|
|
|
|
|
def manage_ChangeOSAS_searchForm(self): |
|
"""create Search form""" |
|
pt=PageTemplateFile(os.path.join(package_home(globals()), "zpt/ChangeOSAS_search.zpt")).__of__(self) |
|
return pt() |
|
|
|
def manage_ChangeOSAS_search(self,id,title=None,dsn=None,RESPONSE=None): |
|
"""add the OSAS_root""" |
|
self.id = id |
|
self.title = title |
|
self.dsn = dsn |
|
if RESPONSE is not None: |
|
RESPONSE.redirect('manage_main') |
|
|
|
|
def manage_AddOSAS_searchForm(self): |
def manage_AddOSAS_searchForm(self): |
"""create Search form""" |
"""create Search form""" |
Line 307 class SearchResult(SimpleItem):
|
Line 363 class SearchResult(SimpleItem):
|
|
|
def __init__(self, type='unknown', file=None, url=None, content=None, rank=0): |
def __init__(self, type='unknown', file=None, url=None, content=None, rank=0): |
"""init""" |
"""init""" |
|
# result type (e.g. "bib", "archim") |
self.type = type |
self.type = type |
|
# index file name |
self.file = file |
self.file = file |
self.url = url |
# url for result (list of pairs) |
self.urlabel = url |
if url: |
|
self.urls = url |
|
else: |
|
self.urls = [] |
|
# actual content (list of tuples) |
self.content = content |
self.content = content |
|
# document status (e.g. "online", "archive") |
|
self.status = None |
|
# result rank for presentation |
self.rank = rank |
self.rank = rank |
|
|
class AnyResult(SearchResult): |
class AnyResult(SearchResult): |
Line 319 class AnyResult(SearchResult):
|
Line 384 class AnyResult(SearchResult):
|
|
|
def __init__(self, zope, db_result, rank): |
def __init__(self, zope, db_result, rank): |
"""returns a catch-all type result""" |
"""returns a catch-all type result""" |
SearchResult.__init__(self, type='unknown') |
SearchResult.__init__(self) |
#print "NEW ANY RESULT!" |
#print "NEW ANY RESULT!" |
|
self.type='unknown' |
self.zptFile = os.path.join(package_home(globals()), "zpt/searchResult_any.zpt") |
self.zptFile = os.path.join(package_home(globals()), "zpt/searchResult_any.zpt") |
|
|
(fileid, tagidx, tags, content) = db_result |
(db_fileid, db_tagidx, db_tags, db_content) = db_result |
self.hitTag = tags |
self.hitTag = db_tags |
|
|
# get full info from db |
# get full info from db |
self.fileinfo = zope.getDBFile(fileid) |
self.fileinfo = zope.getDBFile(db_fileid) |
assert self.fileinfo |
assert self.fileinfo |
|
|
items = {} |
items = {} |
items[tags] = content |
items[db_tags] = db_content |
self.content = items |
self.content = items |
self.file = self.fileinfo[0] |
self.file = self.fileinfo[0] |
self.url = "" |
self.status = statusForFile(self.file) |
self.urlabel = self.file |
|
self.rank = rank |
self.rank = rank |
|
|
|
def getContentList(self): |
|
"""returns content as list of tuples in preferred order""" |
|
l = [] |
|
for k in self.content.keys(): |
|
l.append((k, self.content[k])) |
|
|
|
return l |
|
|
def render(self, zope): |
def render(self, zope): |
"""render this result object""" |
"""render this result object""" |
zope.REQUEST.SESSION['result'] = self |
zope.REQUEST.SESSION['result'] = self |
Line 360 class MetaResult(AnyResult):
|
Line 432 class MetaResult(AnyResult):
|
self.metainfo = zope.getDBFileMeta(fileid) |
self.metainfo = zope.getDBFileMeta(fileid) |
assert self.metainfo |
assert self.metainfo |
|
|
def checkContext(self, tags, content): |
def checkContext(self, tags, content, ctxurl): |
"""takes meta entry and sets url from context tags""" |
"""takes meta entry and updates url from context tags""" |
if tags.endswith('/context/link'): |
if tags.endswith('/context/link'): |
if content: |
if content: |
self.url = content |
#print "CTXlink: ", content |
|
ctxurl[0] = content |
|
|
elif tags.endswith('/context/name'): |
elif tags.endswith('/context/name'): |
if content: |
if content: |
self.urlabel = content |
#print "CTXname: ", content |
|
ctxurl[1] = content |
|
|
else: |
return ctxurl |
return False |
|
|
|
return True |
|
|
|
|
|
class BibResult(MetaResult): |
class BibResult(MetaResult): |
Line 382 class BibResult(MetaResult):
|
Line 453 class BibResult(MetaResult):
|
def __init__(self, zope, db_result, rank): |
def __init__(self, zope, db_result, rank): |
"""constructor""" |
"""constructor""" |
MetaResult.__init__(self, zope, db_result, rank) |
MetaResult.__init__(self, zope, db_result, rank) |
#print "NEW BIB RESULT!" |
#print "NEW BIB RESULT!", self |
self.type = "bib" |
self.type = "bib" |
self.zptFile = os.path.join(package_home(globals()), "zpt/searchResult_bib.zpt") |
self.zptFile = os.path.join(package_home(globals()), "zpt/searchResult_bib.zpt") |
self.url = urlForFile(self.file) |
url = storageURL(self.file) |
self.urlabel = None |
if url: |
|
self.urls.append(url) |
(fileid, tagidx, tags, content) = db_result |
(fileid, tagidx, tags, content) = db_result |
|
|
btype = "" |
btype = "" |
bitems = {} |
bitems = {} |
|
ctxurl = ['', ''] |
|
|
for me in self.metainfo: |
for me in self.metainfo: |
(m_idx, m_tags, m_content, m_attributes) = me |
(m_idx, m_tags, m_content, m_attributes) = me |
# context tag |
# context tag |
if self.checkContext(m_tags, m_content): |
ctxurl = self.checkContext(m_tags, m_content, ctxurl) |
continue |
|
# first tag with bib type attribute |
# first tag with bib type attribute |
if m_tags.endswith('/meta/bib'): |
if m_tags.endswith('/meta/bib'): |
r = re.search('type="([^"]*)"', m_attributes) |
r = re.search('type="([^"]*)"', m_attributes) |
Line 418 class BibResult(MetaResult):
|
Line 490 class BibResult(MetaResult):
|
k = r.group(1) |
k = r.group(1) |
#print "CONTENT: ", m_content |
#print "CONTENT: ", m_content |
bitems[k] = m_content |
bitems[k] = m_content |
|
# remember hit tag |
|
if m_tags == self.hitTag: |
|
self.hitTag = k |
continue |
continue |
|
|
self.content = bitems |
self.content = bitems |
|
# store context |
|
if not ctxurl[1]: |
|
ctxurl[1] = "View" |
|
# must have link |
|
if ctxurl[0]: |
|
self.urls.append(ctxurl) |
|
|
self.rank += 100 |
self.rank += 100 |
if not self.urlabel and self.url: |
|
self.urlabel = "view" |
def getContentList(self): |
|
"""returns content as list of tuples in preferred order""" |
|
l = [] |
|
c = self.content.copy() |
|
# preferred items first |
|
for k in ('author', 'title', 'journal', 'year'): |
|
if c.has_key(k): |
|
l.append((k, c[k])) |
|
del c[k] |
|
|
|
# no type |
|
del c['type'] |
|
# copy the rest |
|
for k in c.keys(): |
|
l.append((k, c[k])) |
|
|
|
return l |
|
|
|
|
class ArchimResult(MetaResult): |
class ArchimResult(MetaResult): |
Line 432 class ArchimResult(MetaResult):
|
Line 530 class ArchimResult(MetaResult):
|
def __init__(self, zope, db_result, rank): |
def __init__(self, zope, db_result, rank): |
"""constructor""" |
"""constructor""" |
MetaResult.__init__(self, zope, db_result, rank) |
MetaResult.__init__(self, zope, db_result, rank) |
#print "NEW ARCHIM RESULT!" |
#print "NEW ARCHIM RESULT!", self |
self.type = "archim" |
self.type = "archim" |
self.zptFile = os.path.join(package_home(globals()), "zpt/searchResult_archim.zpt") |
self.zptFile = os.path.join(package_home(globals()), "zpt/searchResult_archim.zpt") |
self.url = urlForFile(self.file) |
url = storageURL(self.file) |
self.urlabel = None |
if url: |
|
self.urls.append(url) |
|
|
(fileid, tagidx, tags, content) = db_result |
(fileid, tagidx, tags, content) = db_result |
|
|
# process info |
# process info |
bitems = {} |
bitems = {} |
|
ctxurl = ['', ''] |
for me in self.metainfo: |
for me in self.metainfo: |
(m_idx, m_tags, m_content, m_attributes) = me |
(m_idx, m_tags, m_content, m_attributes) = me |
# context tag |
# context tag |
if self.checkContext(m_tags, m_content): |
ctxurl = self.checkContext(m_tags, m_content, ctxurl) |
continue |
|
# collect archimedes/something |
# collect archimedes/something |
r = re.search('/meta/archimedes/(.*)', m_tags) |
r = re.search('/meta/archimedes/(.*)', m_tags) |
if r: |
if r: |
k = r.group(1) |
k = r.group(1) |
#print "CONTENT: ", m_content |
#print "CONTENT: ", m_content |
bitems[k] = m_content |
bitems[k] = m_content |
|
# remember hit tag |
|
if m_tags == self.hitTag: |
|
self.hitTag = k |
continue |
continue |
|
|
self.content = bitems |
self.content = bitems |
self.rank += 100 |
self.rank += 100 |
if not self.urlabel and self.url: |
# store context |
self.urlabel = "view" |
if not ctxurl[1]: |
|
ctxurl[1] = "View" |
|
# must have link |
|
if ctxurl[0]: |
|
self.urls.append(ctxurl) |
|
|
|
|
|
def getContentList(self): |
|
"""returns content as list of tuples in preferred order""" |
|
l = [] |
|
c = self.content.copy() |
|
# preferred items first |
|
for k in ('author', 'title', 'date', 'place'): |
|
if c.has_key(k): |
|
l.append((k, c[k])) |
|
del c[k] |
|
|
|
# copy the rest |
|
for k in c.keys(): |
|
l.append((k, c[k])) |
|
|
|
return l |
|
|
|
|
|
|
Line 467 def ranksort(res1, res2):
|
Line 591 def ranksort(res1, res2):
|
return cmp(res2.rank, res1.rank) |
return cmp(res2.rank, res1.rank) |
|
|
|
|
def urlForFile(filename): |
def statusForFile(filename): |
|
"""heuristic... returns status for a index file name""" |
|
status = None |
|
if filename.startswith('/mpiwg/online/'): |
|
status = "online" |
|
elif filename.startswith('/mpiwg/archive/'): |
|
status = "archive" |
|
elif filename.startswith('http://'): |
|
status = "database" |
|
|
|
return status |
|
|
|
def storageURL(filename): |
"""heuristic... returns an URL for a index file name""" |
"""heuristic... returns an URL for a index file name""" |
url = None |
url = None |
|
name = None |
if filename.startswith('/mpiwg/online/'): |
if filename.startswith('/mpiwg/online/'): |
print "URLFORFILE: online ", filename |
#print "URLFORFILE: online ", filename |
r = re.search('/mpiwg/online/(.*)/index.meta', filename) |
r = re.search('^(.*)/index.meta', filename) |
if r: |
if r: |
url = "http://nausikaa.mpiwg-berlin.mpg.de/digitallibrary/digilib.jsp?fn=%s"%r.group(1) |
url = "http://content.mpiwg-berlin.mpg.de/mpistorage/storage/ShowOnline/index_html?path=%s"%r.group(1) |
|
name = "Storage System" |
|
|
|
elif filename.startswith('http://'): |
|
#print "URLFORFILE: url ", filename |
|
url = filename |
|
name = "Online Database" |
|
|
|
if name and url: |
|
return (url, name) |
|
|
|
return None |
|
|
return url |
|