1: """Metadata search interface
2: ROC 2004, itgroup
3:
4: """
5:
6: from AccessControl import ClassSecurityInfo
7: from Globals import InitializeClass
8: from Globals import Persistent, package_home
9: from Products.PageTemplates.PageTemplateFile import PageTemplateFile
10: from Products.PageTemplates.PageTemplate import PageTemplate
11: from OFS.SimpleItem import SimpleItem
12: #from pyPgSQL import PgSQL
13: import psycopg as PgSQL
14:
15: import re
16: import os
17:
18: MAXHITS = 1000
19:
20: class OSAS_search(SimpleItem):
21: """Object for global metadata search"""
22:
23: meta_type="OSAS_search"
24:
25:
26:
27: def __init__(self,id,title,dsn=None):
28: """init"""
29: self.id=id
30: self.title=title
31: if dsn:
32: self.dsn = dsn
33: else:
34: self.dsn = "host=foxridge.mpiwg-berlin.mpg.de dbname=storage user=archiveread password=archiveread"
35: # volatile database connection object
36: self._v_dbCon = None
37: self._v_tryCon = 0
38:
39:
40: def dbCursor(self):
41: """returns new SQL cursor object"""
42: curs = None
43: if hasattr(self, '_v_dbCon') and self._v_dbCon is not None:
44: try:
45: curs = self._v_dbCon.cursor()
46: self._v_tryCon = 0
47: except:
48: # in case of problems reset dbCon
49: self._v_dbCon = None
50: self._v_tryCon += 1
51: else:
52: self._v_dbCon = None
53: self._v_tryCon = 0
54:
55: if not curs and self._v_tryCon < 3:
56: self._v_dbCon = PgSQL.connect(self.dsn, serialize=0)
57: # call ourself with the new connection
58: curs = self.dbCursor()
59:
60: assert curs, "AIIEE no db cursor!!"
61: return curs
62:
63: def getDBFileMeta(self, fileid):
64: """returns an array with all meta entries of fileid"""
65:
66: metacache = {}
67: # try in cache
68: if self.REQUEST.SESSION.has_key('dbMeta'):
69: metacache = self.REQUEST.SESSION['dbMeta']
70: if metacache.has_key(fileid):
71: res = metacache[fileid]
72: print "meta from cache "
73: return res
74:
75: curs = self.dbCursor()
76:
77: sql = 'SELECT idx,tags,content,attributes FROM meta WHERE fileid=%(id)s ORDER BY idx'
78: print sql, " -> ", fileid
79: curs.execute(sql, {'id':fileid})
80: print "done"
81:
82: res = curs.fetchall()
83: #print "res:", res
84: curs.close()
85: # store info in cache
86: metacache[fileid] = res
87: self.REQUEST.SESSION['dbMeta'] = metacache
88:
89: return res
90:
91: def getDBFile(self, fileid):
92: """returns the file information of fileid"""
93:
94: filecache = {}
95: # try in cache
96: if self.REQUEST.SESSION.has_key('dbFiles'):
97: filecache = self.REQUEST.SESSION['dbFiles']
98: if filecache.has_key(fileid):
99: res = filecache[fileid]
100: print "file from cache "
101: return res
102:
103: curs = self.dbCursor()
104:
105: sql = 'select filename,mtime from files where id=%(id)s'
106: print 'DBFILE: ', sql, " -> ", fileid
107: curs.execute(sql, {'id':fileid})
108: print "DBFILE: done"
109:
110: res = curs.fetchone()
111: #print "DBFILE: res:", res
112: curs.close()
113: # store info in cache
114: filecache[fileid] = res
115: self.REQUEST.SESSION['dbFiles'] = filecache
116:
117: return res
118:
119:
120: def dbSearch(self, query):
121: """search DB for query and return result set"""
122: curs = self.dbCursor()
123: qs = query + "%"
124: sql = 'select fileid,idx,tags,content from meta where content like %(qs)s'
125: print sql, " -> ", qs
126: curs.execute(sql, {'qs':qs})
127: print "done"
128: results = []
129: res = curs.fetchone()
130: rescnt = 1
131: #print "res0:", res
132: while res and rescnt < MAXHITS:
133: #print "res:", res
134: result = self.getResult(res)
135: if (result):
136: results.append(result)
137:
138: res = curs.fetchone()
139: rescnt += 1
140:
141: curs.close()
142: #self.dbCon = None
143:
144: print "SEARCH: ", rescnt, " results"
145: return results
146:
147:
148: def getResult(self, db_result, rank=0):
149: """factory for result objects"""
150: print "NEW RESULT!"
151:
152: (fileid, tagidx, tags, content) = db_result
153: res = None
154:
155: print "tags: ", tags
156: if tags.find('/meta/bib/') > -1:
157: res = BibResult(self, db_result, rank)
158: elif tags.find('/meta/archimedes/') > -1:
159: res = ArchimResult(self, db_result, rank)
160: else:
161: res = AnyResult(self, db_result, rank)
162:
163: return res
164:
165: def renderResult(self, result):
166: """returns HTML rendering of a search result"""
167:
168: print "renderresult!", result, " -- ", result.url
169: return result.render(self)
170:
171:
172:
173:
174:
175: #
176: # Web page stuff
177: #
178:
179: def index_html(self):
180: """metadata search"""
181: pt=PageTemplateFile(os.path.join(package_home(globals()), "zpt/OSAS_search.zpt")).__of__(self)
182: return pt()
183:
184:
185: def search(self, searchstring=None):
186: """search and result"""
187: if searchstring:
188: print "SEARCH: ", searchstring
189: res = self.dbSearch(searchstring)
190: res.sort(ranksort)
191: self.REQUEST.SESSION['results'] = res
192: self.REQUEST.SESSION['searchstring'] = searchstring
193:
194: print "SEARCH res:", res
195: pt=PageTemplateFile(os.path.join(package_home(globals()), "zpt/searchResult.zpt")).__of__(self)
196: return pt()
197:
198:
199:
200: def manage_AddOSAS_searchForm(self):
201: """create Search form"""
202: pt=PageTemplateFile(os.path.join(package_home(globals()), "zpt/AddOSAS_search.zpt")).__of__(self)
203: return pt()
204:
205: def manage_AddOSAS_search(self,id,title=None,dsn=None,RESPONSE=None):
206: """add the OSAS_root"""
207: newObj=OSAS_search(id,title,dsn)
208: self._setObject(id,newObj)
209: if RESPONSE is not None:
210: RESPONSE.redirect('manage_main')
211:
212:
213:
214:
215: class SearchResult(SimpleItem):
216: """base search result object"""
217:
218: def __init__(self, type='unknown', file=None, url=None, content=None, rank=0):
219: """init"""
220: self.type = type
221: self.file = file
222: self.url = url
223: self.urlabel = url
224: self.content = content
225: self.rank = rank
226:
227: class AnyResult(SearchResult):
228: """catch-all type result object"""
229:
230: def __init__(self, zope, db_result, rank):
231: """returns a catch-all type result"""
232: SearchResult.__init__(self, type='unknown')
233: print "NEW ANY RESULT!"
234:
235: self.zptFile = os.path.join(package_home(globals()), "zpt/searchResult_any.zpt")
236:
237: (fileid, tagidx, tags, content) = db_result
238: self.hitTag = tags
239:
240: # get full info from db
241: self.fileinfo = zope.getDBFile(fileid)
242: assert self.fileinfo
243:
244: items = {}
245: items[tags] = content
246: self.content = items
247: self.file = self.fileinfo[0]
248: self.url = ""
249: self.urlabel = self.file
250: self.rank = rank
251:
252: def render(self, zope):
253: """render this result object"""
254: zope.REQUEST.SESSION['result'] = self
255: print "renderender...", self
256: pt=PageTemplateFile(self.zptFile).__of__(zope)
257: return pt()
258:
259:
260: class MetaResult(AnyResult):
261: """result object that collects metadata"""
262:
263: def __init__(self, zope, db_result, rank):
264: """contructor"""
265: AnyResult.__init__(self, zope, db_result, rank)
266: print "NEW META RESULT!"
267:
268: (fileid, tagidx, tags, content) = db_result
269:
270: # get full info from db
271: self.metainfo = zope.getDBFileMeta(fileid)
272: assert self.metainfo
273:
274: def checkContext(self, tags, content):
275: """takes meta entry and sets url from context tags"""
276: if tags.endswith('/context/link'):
277: if content:
278: self.url = content
279:
280: elif tags.endswith('/context/name'):
281: if content:
282: self.urlabel = content
283:
284: else:
285: return False
286:
287: return True
288:
289:
290: class BibResult(MetaResult):
291: """bib type result object"""
292:
293: def __init__(self, zope, db_result, rank):
294: """constructor"""
295: MetaResult.__init__(self, zope, db_result, rank)
296: print "NEW BIB RESULT!"
297: self.type = "bib"
298: self.zptFile = os.path.join(package_home(globals()), "zpt/searchResult_bib.zpt")
299: self.url = urlForFile(self.file)
300: self.urlabel = None
301: (fileid, tagidx, tags, content) = db_result
302:
303: btype = ""
304: bitems = {}
305:
306: for me in self.metainfo:
307: (m_idx, m_tags, m_content, m_attributes) = me
308: # context tag
309: if self.checkContext(m_tags, m_content):
310: continue
311: # first tag with bib type attribute
312: if m_tags.endswith('/meta/bib'):
313: r = re.search('type="([^"]*)"', m_attributes)
314: if r:
315: btype = r.group(1)
316:
317: if not btype:
318: btype = "*unknown*"
319:
320: bitems['type'] = btype
321: continue
322:
323: # skip other tags
324: if not btype: continue
325:
326: # collect bib/something
327: r = re.search('/meta/bib/(.*)', m_tags)
328: if r:
329: k = r.group(1)
330: #print "CONTENT: ", m_content
331: bitems[k] = m_content
332: continue
333:
334: self.content = bitems
335: self.rank += 100
336: if not self.urlabel and self.url:
337: self.urlabel = "view"
338:
339:
340: class ArchimResult(MetaResult):
341: """archimedes type result object"""
342:
343: def __init__(self, zope, db_result, rank):
344: """constructor"""
345: MetaResult.__init__(self, zope, db_result, rank)
346: print "NEW ARCHIM RESULT!"
347: self.type = "archim"
348: self.zptFile = os.path.join(package_home(globals()), "zpt/searchResult_archim.zpt")
349: self.url = urlForFile(self.file)
350: self.urlabel = None
351: (fileid, tagidx, tags, content) = db_result
352:
353: # process info
354: bitems = {}
355: for me in self.metainfo:
356: (m_idx, m_tags, m_content, m_attributes) = me
357: # context tag
358: if self.checkContext(m_tags, m_content):
359: continue
360: # collect archimedes/something
361: r = re.search('/meta/archimedes/(.*)', m_tags)
362: if r:
363: k = r.group(1)
364: #print "CONTENT: ", m_content
365: bitems[k] = m_content
366: continue
367:
368: self.content = bitems
369: self.rank += 100
370: if not self.urlabel and self.url:
371: self.urlabel = "view"
372:
373:
374:
375:
376: def ranksort(res1, res2):
377: """sort results on rank"""
378: return cmp(res2.rank, res1.rank)
379:
380:
381: def urlForFile(filename):
382: """heuristic... returns an URL for a index file name"""
383: url = None
384: if filename.startswith('/mpiwg/online/'):
385: print "URLFORFILE: online ", filename
386: r = re.search('/mpiwg/online/(.*)/index.meta', filename)
387: if r:
388: url = "http://nausikaa.mpiwg-berlin.mpg.de/digitallibrary/digilib.jsp?fn=%s"%r.group(1)
389:
390: return url
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>