version 1.1.2.1, 2006/08/27 05:40:45
|
version 1.1.2.2, 2006/09/13 08:17:33
|
Line 31 def getSupportedEncoding(encodings):
|
Line 31 def getSupportedEncoding(encodings):
|
splitter for lastnames in database |
splitter for lastnames in database |
|
|
""" |
""" |
|
import re |
|
|
def quote(str): |
def quote(str): |
str=str.replace("'","\\\'") |
str=str.replace("'","\\\'") |
Line 40 class nameSplitter:
|
Line 41 class nameSplitter:
|
default_encoding = "utf-8" |
default_encoding = "utf-8" |
|
|
def process(self, lsttmp): |
def process(self, lsttmp): |
|
print "XXX" |
result = [] |
result = [] |
o = psycopg.connect('dbname=authorities user=dwinter password=3333',serialize=0) |
o = psycopg.connect('dbname=authorities user=dwinter password=3333',serialize=0) |
c = o.cursor() |
c = o.cursor() |
replaceStr="<>;.:()" |
# replaceStr="<>;.:()," |
lst=" ".join(lsttmp) |
lst=" ".join(lsttmp) |
for x in replaceStr: |
# for x in replaceStr: |
lst=lst.replace(x," ") |
# lst=lst.replace(x," ") |
|
lst=re.sub("[<|>|;|.|:|\(|\|)|,]", " ", lst) |
for s in lst.split(): |
for s in lst.split(): |
|
|
if type(s) is not StringType: # not unicode |
if type(s) is not StringType: # not unicode |
s = s.decode(self.default_encoding) |
s = s.decode(self.default_encoding) |
|
|
if s not in result: # check for database entry |
if s not in result: # check for database entry |
c.execute("select lastname from persons where lower(lastname) = '%s'"%quote(s)) |
|
|
#c.execute("select lastname from persons where lower(lastname) = '%s'"%quote(s.lower())) |
|
c.execute("select lastname from persons where lastname = '%s'"%quote(s)) |
if c.fetchone(): |
if c.fetchone(): |
print "found",s |
print "found",s |
result.append(lastname) |
result.append(s) |
return result |
return result |
|
|
|
|