version 1.7.2.3, 2007/10/19 16:30:58
|
version 1.7.2.6, 2007/12/03 21:30:19
|
Line 26 separators=['']
|
Line 26 separators=['']
|
# kommas relevant for graphemes will not be deleted |
# kommas relevant for graphemes will not be deleted |
komma_exception="([^sStThH])," |
komma_exception="([^sStThH])," |
# grapheme boundaries |
# grapheme boundaries |
graphemeBounds="\{|\}|<|>|\(|\)|-|_|\#|,|\||\]|\[|\!|\?" |
#graphemeBounds="\{|\}|<|>|\(|\)|-|_|\#|,|\||\]|\[|\!|\?" |
|
graphemeBounds="\{|\}|<|>|-|_|\#|,|\]|\[|\!|\?|\"" |
# for words |
# for words |
wordBounds="<|>|\(|\)|_|\#|,|\||\]|\[|\!|\?" |
#wordBounds="<|>|\(|\)|_|\#|,|\||\]|\[|\!|\?" |
|
wordBounds="<|>|_|\#|,|\]|\[|\!|\?|\"" |
|
|
|
|
class cdliSplitter: |
class cdliSplitter: |
Line 44 class cdliSplitter:
|
Line 46 class cdliSplitter:
|
def process(self, lst): |
def process(self, lst): |
"""gets a list of strings and returns a list of words""" |
"""gets a list of strings and returns a list of words""" |
|
|
logging.debug("cdliSplitter") |
logging.debug("cdliSplitter: %s"%self.indexName) |
result = [] |
result = [] |
pNum=None |
pNum=None |
lineNum=None |
lineNum=None |
Line 86 class cdliSplitter:
|
Line 88 class cdliSplitter:
|
if not (w==''): |
if not (w==''): |
result.append(w) |
result.append(w) |
|
|
|
logging.debug("split '%s' into %s"%(lst,repr(result))) |
return result |
return result |
|
|
|
|