--- MPIWGWeb/Attic/wikisqldumpToSQL.py 2006/09/13 08:17:33 1.1 +++ MPIWGWeb/Attic/wikisqldumpToSQL.py 2006/09/13 08:17:33 1.1.2.1 @@ -0,0 +1,35 @@ +import psycopg + +o = psycopg.connect('dbname=authorities user=dwinter password=3333',serialize=0) +c = o.cursor() + +import re + +def quote(str): + str=str.replace("'","\\\'") + str=str.replace("@","\\\'") + return str#.encode('utf-8') + +filename="/Users/dwinter/Desktop/dewiki-20060908-categorylinks.sql" +txt=file(filename).read() +datas=re.findall("(VALUES.*)",txt) +for data in datas: + print data + txt=data.replace("VALUES","") + data=re.findall("\((.*?)\)",txt) + print len(data) + for result in data: + result=result.replace("\\\'","@") + + new=re.findall("\'(.*?)\'",result) + if len(new)>1: + if (len(new)>0) and ((new[0]=='Mann') or (new[0]=="Frau")): + try: + firstname=new[1].split(",")[1].lstrip() + except: + firstname="" + lastname=new[1].split(",")[0].lstrip() + + print "INSERT into persons (firstname,lastname) VALUES ('%s','%s')"% (quote(firstname),quote(lastname)) + c.execute("INSERT into persons (firstname,lastname) VALUES ('%s','%s')"% (quote(firstname),quote(lastname))) + c.commit() \ No newline at end of file