version 1.7.2.5, 2007/10/26 22:45:12
|
version 1.7.2.6, 2007/12/03 21:30:19
|
Line 26 separators=['']
|
Line 26 separators=['']
|
# kommas relevant for graphemes will not be deleted |
# kommas relevant for graphemes will not be deleted |
komma_exception="([^sStThH])," |
komma_exception="([^sStThH])," |
# grapheme boundaries |
# grapheme boundaries |
graphemeBounds="\{|\}|<|>|\(|\)|-|_|\#|,|\||\]|\[|\!|\?" |
#graphemeBounds="\{|\}|<|>|\(|\)|-|_|\#|,|\||\]|\[|\!|\?" |
|
graphemeBounds="\{|\}|<|>|-|_|\#|,|\]|\[|\!|\?|\"" |
# for words |
# for words |
wordBounds="<|>|\(|\)|_|\#|,|\||\]|\[|\!|\?" |
#wordBounds="<|>|\(|\)|_|\#|,|\||\]|\[|\!|\?" |
|
wordBounds="<|>|_|\#|,|\]|\[|\!|\?|\"" |
|
|
|
|
class cdliSplitter: |
class cdliSplitter: |
Line 86 class cdliSplitter:
|
Line 88 class cdliSplitter:
|
if not (w==''): |
if not (w==''): |
result.append(w) |
result.append(w) |
|
|
#logging.debug("split '%s' into %s"%(lst,repr(result))) |
logging.debug("split '%s' into %s"%(lst,repr(result))) |
return result |
return result |
|
|
|
|