version 1.8, 2008/01/21 17:19:01
|
version 1.9, 2008/09/25 12:37:55
|
Line 36 wordBounds="_|,|\""
|
Line 36 wordBounds="_|,|\""
|
wordIgnore="<|>|\#|\||\]|\[|\!|\?\*|;" |
wordIgnore="<|>|\#|\||\]|\[|\!|\?\*|;" |
|
|
class cdliSplitter: |
class cdliSplitter: |
|
|
"""base class for splitter. |
"""base class for splitter. |
the difference between word and grapheme splitter |
the difference between word and grapheme splitter |
is the word boundary list.""" |
is the word boundary list.""" |
|
|
|
|
default_encoding = "utf-8" |
default_encoding = "utf-8" |
bounds=graphemeBounds |
bounds=graphemeBounds |
boundsex=re.compile(graphemeBounds) |
boundsex=re.compile(graphemeBounds) |