--- cdli/cdliSplitter.py 2008/01/14 17:35:26 1.7.2.12 +++ cdli/cdliSplitter.py 2008/09/25 12:37:55 1.9 @@ -36,9 +36,11 @@ wordBounds="_|,|\"" wordIgnore="<|>|\#|\||\]|\[|\!|\?\*|;" class cdliSplitter: + """base class for splitter. the difference between word and grapheme splitter is the word boundary list.""" + default_encoding = "utf-8" bounds=graphemeBounds @@ -95,7 +97,7 @@ class cdliSplitter: if not (w==''): result.append(w) - logging.debug("split '%s' into %s"%(lst,repr(result))) + #logging.debug("split '%s' into %s"%(lst,repr(result))) return result