Mercurial > hg > MPIWGWeb
annotate wikisqldumpToSQL.py @ 0:bca61e893fcc
first checkin of MPIWGWeb r2 branch from CVS into mercurial
author | casties |
---|---|
date | Thu, 10 Jan 2013 17:52:13 +0100 |
parents | |
children |
rev | line source |
---|---|
0
bca61e893fcc
first checkin of MPIWGWeb r2 branch from CVS into mercurial
casties
parents:
diff
changeset
|
1 import psycopg |
bca61e893fcc
first checkin of MPIWGWeb r2 branch from CVS into mercurial
casties
parents:
diff
changeset
|
2 |
bca61e893fcc
first checkin of MPIWGWeb r2 branch from CVS into mercurial
casties
parents:
diff
changeset
|
3 o = psycopg.connect('dbname=authorities user=dwinter password=3333',serialize=0) |
bca61e893fcc
first checkin of MPIWGWeb r2 branch from CVS into mercurial
casties
parents:
diff
changeset
|
4 c = o.cursor() |
bca61e893fcc
first checkin of MPIWGWeb r2 branch from CVS into mercurial
casties
parents:
diff
changeset
|
5 |
bca61e893fcc
first checkin of MPIWGWeb r2 branch from CVS into mercurial
casties
parents:
diff
changeset
|
6 import re |
bca61e893fcc
first checkin of MPIWGWeb r2 branch from CVS into mercurial
casties
parents:
diff
changeset
|
7 |
bca61e893fcc
first checkin of MPIWGWeb r2 branch from CVS into mercurial
casties
parents:
diff
changeset
|
8 def quote(str): |
bca61e893fcc
first checkin of MPIWGWeb r2 branch from CVS into mercurial
casties
parents:
diff
changeset
|
9 str=str.replace("'","\\\'") |
bca61e893fcc
first checkin of MPIWGWeb r2 branch from CVS into mercurial
casties
parents:
diff
changeset
|
10 str=str.replace("@","\\\'") |
bca61e893fcc
first checkin of MPIWGWeb r2 branch from CVS into mercurial
casties
parents:
diff
changeset
|
11 return str#.encode('utf-8') |
bca61e893fcc
first checkin of MPIWGWeb r2 branch from CVS into mercurial
casties
parents:
diff
changeset
|
12 |
bca61e893fcc
first checkin of MPIWGWeb r2 branch from CVS into mercurial
casties
parents:
diff
changeset
|
13 filename="/Users/dwinter/Desktop/dewiki-20060908-categorylinks.sql" |
bca61e893fcc
first checkin of MPIWGWeb r2 branch from CVS into mercurial
casties
parents:
diff
changeset
|
14 txt=file(filename).read() |
bca61e893fcc
first checkin of MPIWGWeb r2 branch from CVS into mercurial
casties
parents:
diff
changeset
|
15 datas=re.findall("(VALUES.*)",txt) |
bca61e893fcc
first checkin of MPIWGWeb r2 branch from CVS into mercurial
casties
parents:
diff
changeset
|
16 for data in datas: |
bca61e893fcc
first checkin of MPIWGWeb r2 branch from CVS into mercurial
casties
parents:
diff
changeset
|
17 print data |
bca61e893fcc
first checkin of MPIWGWeb r2 branch from CVS into mercurial
casties
parents:
diff
changeset
|
18 txt=data.replace("VALUES","") |
bca61e893fcc
first checkin of MPIWGWeb r2 branch from CVS into mercurial
casties
parents:
diff
changeset
|
19 data=re.findall("\((.*?)\)",txt) |
bca61e893fcc
first checkin of MPIWGWeb r2 branch from CVS into mercurial
casties
parents:
diff
changeset
|
20 print len(data) |
bca61e893fcc
first checkin of MPIWGWeb r2 branch from CVS into mercurial
casties
parents:
diff
changeset
|
21 for result in data: |
bca61e893fcc
first checkin of MPIWGWeb r2 branch from CVS into mercurial
casties
parents:
diff
changeset
|
22 result=result.replace("\\\'","@") |
bca61e893fcc
first checkin of MPIWGWeb r2 branch from CVS into mercurial
casties
parents:
diff
changeset
|
23 |
bca61e893fcc
first checkin of MPIWGWeb r2 branch from CVS into mercurial
casties
parents:
diff
changeset
|
24 new=re.findall("\'(.*?)\'",result) |
bca61e893fcc
first checkin of MPIWGWeb r2 branch from CVS into mercurial
casties
parents:
diff
changeset
|
25 if len(new)>1: |
bca61e893fcc
first checkin of MPIWGWeb r2 branch from CVS into mercurial
casties
parents:
diff
changeset
|
26 if (len(new)>0) and ((new[0]=='Mann') or (new[0]=="Frau")): |
bca61e893fcc
first checkin of MPIWGWeb r2 branch from CVS into mercurial
casties
parents:
diff
changeset
|
27 try: |
bca61e893fcc
first checkin of MPIWGWeb r2 branch from CVS into mercurial
casties
parents:
diff
changeset
|
28 firstname=new[1].split(",")[1].lstrip() |
bca61e893fcc
first checkin of MPIWGWeb r2 branch from CVS into mercurial
casties
parents:
diff
changeset
|
29 except: |
bca61e893fcc
first checkin of MPIWGWeb r2 branch from CVS into mercurial
casties
parents:
diff
changeset
|
30 firstname="" |
bca61e893fcc
first checkin of MPIWGWeb r2 branch from CVS into mercurial
casties
parents:
diff
changeset
|
31 lastname=new[1].split(",")[0].lstrip() |
bca61e893fcc
first checkin of MPIWGWeb r2 branch from CVS into mercurial
casties
parents:
diff
changeset
|
32 |
bca61e893fcc
first checkin of MPIWGWeb r2 branch from CVS into mercurial
casties
parents:
diff
changeset
|
33 print "INSERT into persons (firstname,lastname) VALUES ('%s','%s')"% (quote(firstname),quote(lastname)) |
bca61e893fcc
first checkin of MPIWGWeb r2 branch from CVS into mercurial
casties
parents:
diff
changeset
|
34 c.execute("INSERT into persons (firstname,lastname) VALUES ('%s','%s')"% (quote(firstname),quote(lastname))) |
bca61e893fcc
first checkin of MPIWGWeb r2 branch from CVS into mercurial
casties
parents:
diff
changeset
|
35 c.commit() |