view wikisqldumpToSQL.py @ 228:afc96bc56817

also show multiple projects with the same number (including none) in the tree.
author casties
date Tue, 05 Nov 2013 13:58:45 +0100
parents bca61e893fcc
children
line wrap: on
line source

import psycopg

o = psycopg.connect('dbname=authorities user=dwinter password=3333',serialize=0) 
c = o.cursor() 

import re

def quote(str):
    str=str.replace("'","\\\'")
    str=str.replace("@","\\\'")
    return str#.encode('utf-8')

filename="/Users/dwinter/Desktop/dewiki-20060908-categorylinks.sql"
txt=file(filename).read()
datas=re.findall("(VALUES.*)",txt)
for data in datas:
    print data
    txt=data.replace("VALUES","")
    data=re.findall("\((.*?)\)",txt)
    print len(data)
    for result in data:
        result=result.replace("\\\'","@")
        
        new=re.findall("\'(.*?)\'",result)
        if len(new)>1:
            if (len(new)>0) and ((new[0]=='Mann') or (new[0]=="Frau")):
                try:
                    firstname=new[1].split(",")[1].lstrip()
                except:
                    firstname=""
                lastname=new[1].split(",")[0].lstrip()
                
                print "INSERT into persons (firstname,lastname) VALUES ('%s','%s')"% (quote(firstname),quote(lastname))
                c.execute("INSERT into persons (firstname,lastname) VALUES ('%s','%s')"% (quote(firstname),quote(lastname)))
                c.commit()