view wikisqldumpToSQL.py @ 284:1a103b073c72 default tip

make favicon url host and schema relative.
author casties
date Thu, 25 Jun 2015 17:44:57 +0200
parents bca61e893fcc
children
line wrap: on
line source

import psycopg

o = psycopg.connect('dbname=authorities user=dwinter password=3333',serialize=0) 
c = o.cursor() 

import re

def quote(str):
    str=str.replace("'","\\\'")
    str=str.replace("@","\\\'")
    return str#.encode('utf-8')

filename="/Users/dwinter/Desktop/dewiki-20060908-categorylinks.sql"
txt=file(filename).read()
datas=re.findall("(VALUES.*)",txt)
for data in datas:
    print data
    txt=data.replace("VALUES","")
    data=re.findall("\((.*?)\)",txt)
    print len(data)
    for result in data:
        result=result.replace("\\\'","@")
        
        new=re.findall("\'(.*?)\'",result)
        if len(new)>1:
            if (len(new)>0) and ((new[0]=='Mann') or (new[0]=="Frau")):
                try:
                    firstname=new[1].split(",")[1].lstrip()
                except:
                    firstname=""
                lastname=new[1].split(",")[0].lstrip()
                
                print "INSERT into persons (firstname,lastname) VALUES ('%s','%s')"% (quote(firstname),quote(lastname))
                c.execute("INSERT into persons (firstname,lastname) VALUES ('%s','%s')"% (quote(firstname),quote(lastname)))
                c.commit()