version 1.1.2.1, 2006/08/27 05:40:45
|
version 1.1.2.3, 2006/10/19 07:35:31
|
Line 5 name splitter
|
Line 5 name splitter
|
from Products.ZCTextIndex.ISplitter import ISplitter |
from Products.ZCTextIndex.ISplitter import ISplitter |
from Products.ZCTextIndex.PipelineFactory import element_factory |
from Products.ZCTextIndex.PipelineFactory import element_factory |
|
|
import psycopg |
|
|
|
import re |
import re |
from types import StringType |
from types import StringType |
Line 31 def getSupportedEncoding(encodings):
|
Line 31 def getSupportedEncoding(encodings):
|
splitter for lastnames in database |
splitter for lastnames in database |
|
|
""" |
""" |
|
import re |
|
|
def quote(str): |
def quote(str): |
str=str.replace("'","\\\'") |
str=str.replace("'","\\\'") |
Line 40 class nameSplitter:
|
Line 41 class nameSplitter:
|
default_encoding = "utf-8" |
default_encoding = "utf-8" |
|
|
def process(self, lsttmp): |
def process(self, lsttmp): |
|
import psycopg |
result = [] |
result = [] |
o = psycopg.connect('dbname=authorities user=dwinter password=3333',serialize=0) |
o = psycopg.connect('dbname=authorities user=dwinter password=3333',serialize=0) |
c = o.cursor() |
c = o.cursor() |
replaceStr="<>;.:()" |
# replaceStr="<>;.:()," |
lst=" ".join(lsttmp) |
lst=" ".join(lsttmp) |
for x in replaceStr: |
# for x in replaceStr: |
lst=lst.replace(x," ") |
# lst=lst.replace(x," ") |
|
lst=re.sub("[<|>|;|.|:|\(|\|)|,]", " ", lst) |
for s in lst.split(): |
for s in lst.split(): |
|
|
if type(s) is not StringType: # not unicode |
if type(s) is not StringType: # not unicode |
s = s.decode(self.default_encoding) |
s = s.decode(self.default_encoding) |
|
|
if s not in result: # check for database entry |
if s not in result: # check for database entry |
c.execute("select lastname from persons where lower(lastname) = '%s'"%quote(s)) |
|
|
#c.execute("select lastname from persons where lower(lastname) = '%s'"%quote(s.lower())) |
|
c.execute("select lastname from persons where lastname = '%s'"%quote(s)) |
if c.fetchone(): |
if c.fetchone(): |
print "found",s |
print "found",s |
result.append(lastname) |
result.append(s) |
return result |
return result |
|
|
|
|