--- MPIWGWeb/Attic/nameSplitter.py 2006/08/27 05:40:45 1.1.2.1 +++ MPIWGWeb/Attic/nameSplitter.py 2006/09/13 08:17:33 1.1.2.2 @@ -31,6 +31,7 @@ def getSupportedEncoding(encodings): splitter for lastnames in database """ +import re def quote(str): str=str.replace("'","\\\'") @@ -40,22 +41,27 @@ class nameSplitter: default_encoding = "utf-8" def process(self, lsttmp): + print "XXX" result = [] o = psycopg.connect('dbname=authorities user=dwinter password=3333',serialize=0) c = o.cursor() - replaceStr="<>;.:()" +# replaceStr="<>;.:()," lst=" ".join(lsttmp) - for x in replaceStr: - lst=lst.replace(x," ") - +# for x in replaceStr: +# lst=lst.replace(x," ") + lst=re.sub("[<|>|;|.|:|\(|\|)|,]", " ", lst) for s in lst.split(): + if type(s) is not StringType: # not unicode s = s.decode(self.default_encoding) + if s not in result: # check for database entry - c.execute("select lastname from persons where lower(lastname) = '%s'"%quote(s)) + + #c.execute("select lastname from persons where lower(lastname) = '%s'"%quote(s.lower())) + c.execute("select lastname from persons where lastname = '%s'"%quote(s)) if c.fetchone(): print "found",s - result.append(lastname) + result.append(s) return result