version 1.115, 2010/03/19 14:01:41
|
version 1.117, 2011/04/27 16:19:27
|
Line 306 def manage_addCDLIFile(self,id,title,loc
|
Line 306 def manage_addCDLIFile(self,id,title,loc
|
|
|
def checkUTF8(data): |
def checkUTF8(data): |
"""check utf 8""" |
"""check utf 8""" |
|
if not isinstance(data, str): |
|
logging.error("checkUTF8 data is not string! (%s)"%repr(data)) |
|
|
try: |
try: |
data.encode('utf-8') |
data.decode('utf-8') |
|
logging.debug("checkUTF8: ok!") |
return True |
return True |
except: |
except: |
|
logging.debug("checkUTF8: false!") |
return False |
return False |
|
|
|
|
Line 335 def splitatf(fh,dir=None,ext=None):
|
Line 340 def splitatf(fh,dir=None,ext=None):
|
i=0 |
i=0 |
|
|
#ROC: why split \n first and then \r??? |
#ROC: why split \n first and then \r??? |
if (type(fh) is StringType) or (type(fh) is UnicodeType): |
if isinstance(fh, basestring): |
iter=fh.split("\n") |
iter=fh.split("\n") |
else: |
else: |
iter=fh.readlines() |
iter=fh.readlines() |
Line 369 def splitatf(fh,dir=None,ext=None):
|
Line 374 def splitatf(fh,dir=None,ext=None):
|
if dir: |
if dir: |
filename=os.path.join(dir,filename) |
filename=os.path.join(dir,filename) |
nf=file(filename,"w") |
nf=file(filename,"w") |
logging.info("open %s"%filename) |
logging.debug("open %s"%filename) |
if nf: |
if nf: |
nf.write(line.replace("\n","")+"\n") |
nf.write(line.replace("\n","")+"\n") |
|
|
Line 378 def splitatf(fh,dir=None,ext=None):
|
Line 383 def splitatf(fh,dir=None,ext=None):
|
except: |
except: |
pass |
pass |
|
|
if not((type(fh) is StringType) or (type(fh) is UnicodeType)): |
if not isinstance(fh, basestring): |
fh.close() |
fh.close() |
|
|
return ret,len(os.listdir(dir)) |
return ret,len(os.listdir(dir)) |
|
|
|
|
Line 888 class CDLIRoot(Folder):
|
Line 894 class CDLIRoot(Folder):
|
|
|
|
|
|
|
def searchText(self, query, index='graphemes'): |
def searchText(self, query, index='graphemes', resultFilter=None): |
"""searches query in the fulltext index and returns a list of file ids/P-numbers""" |
"""searches query in the fulltext index and returns a list of file IDs/P-numbers |
|
resultFilter is matched against the beginning of the file ID""" |
# see also: http://www.plope.com/Books/2_7Edition/SearchingZCatalog.stx#2-13 |
# see also: http://www.plope.com/Books/2_7Edition/SearchingZCatalog.stx#2-13 |
logging.debug("searchtext for '%s' in index %s"%(query,index)) |
logging.debug("searchtext for '%s' in index %s"%(query,index)) |
#import Products.ZCTextIndex.QueryParser |
#import Products.ZCTextIndex.QueryParser |
Line 900 class CDLIRoot(Folder):
|
Line 907 class CDLIRoot(Folder):
|
# do search |
# do search |
resultset = idx.search(query_request=idxQuery,sort_index='textid') |
resultset = idx.search(query_request=idxQuery,sort_index='textid') |
# put only the P-Number in the result |
# put only the P-Number in the result |
|
if resultFilter is None: |
results = [res.getId[:7] for res in resultset] |
results = [res.getId[:7] for res in resultset] |
|
else: |
|
results = [res.getId[:7] for res in resultset if res.getId.startswith(resultFilter)] |
logging.debug("searchtext: found %d texts"%len(results)) |
logging.debug("searchtext: found %d texts"%len(results)) |
return results |
return results |
|
|