version 1.6, 2007/04/23 13:07:10
|
version 1.7, 2007/08/31 14:22:52
|
Line 30 def getSupportedEncoding(encodings):
|
Line 30 def getSupportedEncoding(encodings):
|
"""beta of a fulltext splitter for cdli |
"""beta of a fulltext splitter for cdli |
|
|
""" |
""" |
ignoreLines=['$','@','#','&'] |
ignoreLines=['$','@','#','&','>'] |
separators=[''] |
separators=[''] |
komma_exception="([^sStThH])," # komma relevant for graphemes will not be deleted |
komma_exception="([^sStThH])," # komma relevant for graphemes will not be deleted |
deleteGraphems="\{|\}|<|>|\(|\)|-|_|\#|,|\||\]|\[|\!|\?" # for graphems |
deleteGraphems="\{|\}|<|>|\(|\)|-|_|\#|,|\||\]|\[|\!|\?" # for graphems |