--- cdli/cdliSplitter.py 2007/12/13 19:20:45 1.7.2.9 +++ cdli/cdliSplitter.py 2008/01/14 17:35:26 1.7.2.12 @@ -29,11 +29,11 @@ komma_exceptionex=re.compile(komma_excep # grapheme boundaries #graphemeBounds="\{|\}|<|>|\(|\)|-|_|\#|,|\||\]|\[|\!|\?" graphemeBounds="\{|\}|<|>|-|_|\#|,|\]|\[|\!|\?|\"" -graphemeIgnore="<|>|\#|\||\]|\[|\!|\?" +graphemeIgnore="<|>|\#|\||\]|\[|\!|\?\*|;" # for words #wordBounds="<|>|\(|\)|_|\#|,|\||\]|\[|\!|\?" wordBounds="_|,|\"" -wordIgnore="<|>|\#|\||\]|\[|\!|\?" +wordIgnore="<|>|\#|\||\]|\[|\!|\?\*|;" class cdliSplitter: """base class for splitter. @@ -73,7 +73,7 @@ class cdliSplitter: elif not (s[0] in ignoreLines): # regular line - lineparts=s.split(".") + lineparts=s.split(". ",1) if len(lineparts)==1: # no line number txt=s @@ -95,7 +95,7 @@ class cdliSplitter: if not (w==''): result.append(w) - #logging.debug("split '%s' into %s"%(lst,repr(result))) + logging.debug("split '%s' into %s"%(lst,repr(result))) return result