version 1.5, 2007/03/21 19:29:23
|
version 1.7, 2007/08/31 14:22:52
|
Line 11 from Products.ZCTextIndex.PipelineFactor
|
Line 11 from Products.ZCTextIndex.PipelineFactor
|
import re |
import re |
from types import StringType |
from types import StringType |
import logging |
import logging |
|
try: |
import PyLucene |
import PyLucene |
|
except: |
|
print "no Lucene support" |
|
|
def getSupportedEncoding(encodings): |
def getSupportedEncoding(encodings): |
for encoding in encodings: |
for encoding in encodings: |
Line 28 def getSupportedEncoding(encodings):
|
Line 30 def getSupportedEncoding(encodings):
|
"""beta of a fulltext splitter for cdli |
"""beta of a fulltext splitter for cdli |
|
|
""" |
""" |
ignoreLines=['$','@','#','&'] |
ignoreLines=['$','@','#','&','>'] |
separators=[''] |
separators=[''] |
komma_exception="([^sStThH])," # komma relevant for graphemes will not be deleted |
komma_exception="([^sStThH])," # komma relevant for graphemes will not be deleted |
deleteGraphems="\{|\}|<|>|\(|\)|-|_|\#|,|\||\]|\[|\!|\?" # for graphems |
deleteGraphems="\{|\}|<|>|\(|\)|-|_|\#|,|\||\]|\[|\!|\?" # for graphems |