|
|
| version 1.5, 2007/03/21 19:29:23 | version 1.7, 2007/08/31 14:22:52 |
|---|---|
| Line 11 from Products.ZCTextIndex.PipelineFactor | Line 11 from Products.ZCTextIndex.PipelineFactor |
| import re | import re |
| from types import StringType | from types import StringType |
| import logging | import logging |
| try: | |
| import PyLucene | import PyLucene |
| except: | |
| print "no Lucene support" | |
| def getSupportedEncoding(encodings): | def getSupportedEncoding(encodings): |
| for encoding in encodings: | for encoding in encodings: |
| Line 28 def getSupportedEncoding(encodings): | Line 30 def getSupportedEncoding(encodings): |
| """beta of a fulltext splitter for cdli | """beta of a fulltext splitter for cdli |
| """ | """ |
| ignoreLines=['$','@','#','&'] | ignoreLines=['$','@','#','&','>'] |
| separators=[''] | separators=[''] |
| komma_exception="([^sStThH])," # komma relevant for graphemes will not be deleted | komma_exception="([^sStThH])," # komma relevant for graphemes will not be deleted |
| deleteGraphems="\{|\}|<|>|\(|\)|-|_|\#|,|\||\]|\[|\!|\?" # for graphems | deleteGraphems="\{|\}|<|>|\(|\)|-|_|\#|,|\||\]|\[|\!|\?" # for graphems |