--- cdli/cdliSplitter.py 2007/03/21 19:29:23 1.5 +++ cdli/cdliSplitter.py 2007/08/31 14:22:52 1.7 @@ -11,8 +11,10 @@ from Products.ZCTextIndex.PipelineFactor import re from types import StringType import logging - -import PyLucene +try: + import PyLucene +except: + print "no Lucene support" def getSupportedEncoding(encodings): for encoding in encodings: @@ -28,7 +30,7 @@ def getSupportedEncoding(encodings): """beta of a fulltext splitter for cdli """ -ignoreLines=['$','@','#','&'] +ignoreLines=['$','@','#','&','>'] separators=[''] komma_exception="([^sStThH])," # komma relevant for graphemes will not be deleted deleteGraphems="\{|\}|<|>|\(|\)|-|_|\#|,|\||\]|\[|\!|\?" # for graphems