--- cdli/cdliSplitter.py	2008/01/02 15:52:01	1.7.2.10
+++ cdli/cdliSplitter.py	2008/09/25 12:37:55	1.9
@@ -29,16 +29,18 @@ komma_exceptionex=re.compile(komma_excep
 # grapheme boundaries
 #graphemeBounds="\{|\}|<|>|\(|\)|-|_|\#|,|\||\]|\[|\!|\?"
 graphemeBounds="\{|\}|<|>|-|_|\#|,|\]|\[|\!|\?|\""
-graphemeIgnore="<|>|\#|\||\]|\[|\!|\?\*"
+graphemeIgnore="<|>|\#|\||\]|\[|\!|\?\*|;"
 # for words 
 #wordBounds="<|>|\(|\)|_|\#|,|\||\]|\[|\!|\?"
 wordBounds="_|,|\""
-wordIgnore="<|>|\#|\||\]|\[|\!|\?\*"
+wordIgnore="<|>|\#|\||\]|\[|\!|\?\*|;"
            
 class cdliSplitter:
+
     """base class for splitter. 
     the difference between word and grapheme splitter 
     is the word boundary list."""
+
     
     default_encoding = "utf-8"
     bounds=graphemeBounds
@@ -73,7 +75,7 @@ class cdliSplitter:
                         
                     elif not (s[0] in ignoreLines):
                         # regular line
-                        lineparts=s.split(".")
+                        lineparts=s.split(". ",1)
                         if len(lineparts)==1: 
                             # no line number
                             txt=s