Mercurial > hg > MPIWGWeb
annotate wikisqldumpToSQL.py @ 0:bca61e893fcc
first checkin of MPIWGWeb r2 branch from CVS into mercurial
| author | casties |
|---|---|
| date | Thu, 10 Jan 2013 17:52:13 +0100 |
| parents | |
| children |
| rev | line source |
|---|---|
|
0
bca61e893fcc
first checkin of MPIWGWeb r2 branch from CVS into mercurial
casties
parents:
diff
changeset
|
1 import psycopg |
|
bca61e893fcc
first checkin of MPIWGWeb r2 branch from CVS into mercurial
casties
parents:
diff
changeset
|
2 |
|
bca61e893fcc
first checkin of MPIWGWeb r2 branch from CVS into mercurial
casties
parents:
diff
changeset
|
3 o = psycopg.connect('dbname=authorities user=dwinter password=3333',serialize=0) |
|
bca61e893fcc
first checkin of MPIWGWeb r2 branch from CVS into mercurial
casties
parents:
diff
changeset
|
4 c = o.cursor() |
|
bca61e893fcc
first checkin of MPIWGWeb r2 branch from CVS into mercurial
casties
parents:
diff
changeset
|
5 |
|
bca61e893fcc
first checkin of MPIWGWeb r2 branch from CVS into mercurial
casties
parents:
diff
changeset
|
6 import re |
|
bca61e893fcc
first checkin of MPIWGWeb r2 branch from CVS into mercurial
casties
parents:
diff
changeset
|
7 |
|
bca61e893fcc
first checkin of MPIWGWeb r2 branch from CVS into mercurial
casties
parents:
diff
changeset
|
8 def quote(str): |
|
bca61e893fcc
first checkin of MPIWGWeb r2 branch from CVS into mercurial
casties
parents:
diff
changeset
|
9 str=str.replace("'","\\\'") |
|
bca61e893fcc
first checkin of MPIWGWeb r2 branch from CVS into mercurial
casties
parents:
diff
changeset
|
10 str=str.replace("@","\\\'") |
|
bca61e893fcc
first checkin of MPIWGWeb r2 branch from CVS into mercurial
casties
parents:
diff
changeset
|
11 return str#.encode('utf-8') |
|
bca61e893fcc
first checkin of MPIWGWeb r2 branch from CVS into mercurial
casties
parents:
diff
changeset
|
12 |
|
bca61e893fcc
first checkin of MPIWGWeb r2 branch from CVS into mercurial
casties
parents:
diff
changeset
|
13 filename="/Users/dwinter/Desktop/dewiki-20060908-categorylinks.sql" |
|
bca61e893fcc
first checkin of MPIWGWeb r2 branch from CVS into mercurial
casties
parents:
diff
changeset
|
14 txt=file(filename).read() |
|
bca61e893fcc
first checkin of MPIWGWeb r2 branch from CVS into mercurial
casties
parents:
diff
changeset
|
15 datas=re.findall("(VALUES.*)",txt) |
|
bca61e893fcc
first checkin of MPIWGWeb r2 branch from CVS into mercurial
casties
parents:
diff
changeset
|
16 for data in datas: |
|
bca61e893fcc
first checkin of MPIWGWeb r2 branch from CVS into mercurial
casties
parents:
diff
changeset
|
17 print data |
|
bca61e893fcc
first checkin of MPIWGWeb r2 branch from CVS into mercurial
casties
parents:
diff
changeset
|
18 txt=data.replace("VALUES","") |
|
bca61e893fcc
first checkin of MPIWGWeb r2 branch from CVS into mercurial
casties
parents:
diff
changeset
|
19 data=re.findall("\((.*?)\)",txt) |
|
bca61e893fcc
first checkin of MPIWGWeb r2 branch from CVS into mercurial
casties
parents:
diff
changeset
|
20 print len(data) |
|
bca61e893fcc
first checkin of MPIWGWeb r2 branch from CVS into mercurial
casties
parents:
diff
changeset
|
21 for result in data: |
|
bca61e893fcc
first checkin of MPIWGWeb r2 branch from CVS into mercurial
casties
parents:
diff
changeset
|
22 result=result.replace("\\\'","@") |
|
bca61e893fcc
first checkin of MPIWGWeb r2 branch from CVS into mercurial
casties
parents:
diff
changeset
|
23 |
|
bca61e893fcc
first checkin of MPIWGWeb r2 branch from CVS into mercurial
casties
parents:
diff
changeset
|
24 new=re.findall("\'(.*?)\'",result) |
|
bca61e893fcc
first checkin of MPIWGWeb r2 branch from CVS into mercurial
casties
parents:
diff
changeset
|
25 if len(new)>1: |
|
bca61e893fcc
first checkin of MPIWGWeb r2 branch from CVS into mercurial
casties
parents:
diff
changeset
|
26 if (len(new)>0) and ((new[0]=='Mann') or (new[0]=="Frau")): |
|
bca61e893fcc
first checkin of MPIWGWeb r2 branch from CVS into mercurial
casties
parents:
diff
changeset
|
27 try: |
|
bca61e893fcc
first checkin of MPIWGWeb r2 branch from CVS into mercurial
casties
parents:
diff
changeset
|
28 firstname=new[1].split(",")[1].lstrip() |
|
bca61e893fcc
first checkin of MPIWGWeb r2 branch from CVS into mercurial
casties
parents:
diff
changeset
|
29 except: |
|
bca61e893fcc
first checkin of MPIWGWeb r2 branch from CVS into mercurial
casties
parents:
diff
changeset
|
30 firstname="" |
|
bca61e893fcc
first checkin of MPIWGWeb r2 branch from CVS into mercurial
casties
parents:
diff
changeset
|
31 lastname=new[1].split(",")[0].lstrip() |
|
bca61e893fcc
first checkin of MPIWGWeb r2 branch from CVS into mercurial
casties
parents:
diff
changeset
|
32 |
|
bca61e893fcc
first checkin of MPIWGWeb r2 branch from CVS into mercurial
casties
parents:
diff
changeset
|
33 print "INSERT into persons (firstname,lastname) VALUES ('%s','%s')"% (quote(firstname),quote(lastname)) |
|
bca61e893fcc
first checkin of MPIWGWeb r2 branch from CVS into mercurial
casties
parents:
diff
changeset
|
34 c.execute("INSERT into persons (firstname,lastname) VALUES ('%s','%s')"% (quote(firstname),quote(lastname))) |
|
bca61e893fcc
first checkin of MPIWGWeb r2 branch from CVS into mercurial
casties
parents:
diff
changeset
|
35 c.commit() |
