annotate wikisqldumpToSQL.py @ 106:7dd264f9b728

importOldSettingforSorting and Publish
author dwinter
date Mon, 27 May 2013 15:25:06 +0200
parents bca61e893fcc
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
bca61e893fcc first checkin of MPIWGWeb r2 branch from CVS into mercurial
casties
parents:
diff changeset
1 import psycopg
bca61e893fcc first checkin of MPIWGWeb r2 branch from CVS into mercurial
casties
parents:
diff changeset
2
bca61e893fcc first checkin of MPIWGWeb r2 branch from CVS into mercurial
casties
parents:
diff changeset
3 o = psycopg.connect('dbname=authorities user=dwinter password=3333',serialize=0)
bca61e893fcc first checkin of MPIWGWeb r2 branch from CVS into mercurial
casties
parents:
diff changeset
4 c = o.cursor()
bca61e893fcc first checkin of MPIWGWeb r2 branch from CVS into mercurial
casties
parents:
diff changeset
5
bca61e893fcc first checkin of MPIWGWeb r2 branch from CVS into mercurial
casties
parents:
diff changeset
6 import re
bca61e893fcc first checkin of MPIWGWeb r2 branch from CVS into mercurial
casties
parents:
diff changeset
7
bca61e893fcc first checkin of MPIWGWeb r2 branch from CVS into mercurial
casties
parents:
diff changeset
8 def quote(str):
bca61e893fcc first checkin of MPIWGWeb r2 branch from CVS into mercurial
casties
parents:
diff changeset
9 str=str.replace("'","\\\'")
bca61e893fcc first checkin of MPIWGWeb r2 branch from CVS into mercurial
casties
parents:
diff changeset
10 str=str.replace("@","\\\'")
bca61e893fcc first checkin of MPIWGWeb r2 branch from CVS into mercurial
casties
parents:
diff changeset
11 return str#.encode('utf-8')
bca61e893fcc first checkin of MPIWGWeb r2 branch from CVS into mercurial
casties
parents:
diff changeset
12
bca61e893fcc first checkin of MPIWGWeb r2 branch from CVS into mercurial
casties
parents:
diff changeset
13 filename="/Users/dwinter/Desktop/dewiki-20060908-categorylinks.sql"
bca61e893fcc first checkin of MPIWGWeb r2 branch from CVS into mercurial
casties
parents:
diff changeset
14 txt=file(filename).read()
bca61e893fcc first checkin of MPIWGWeb r2 branch from CVS into mercurial
casties
parents:
diff changeset
15 datas=re.findall("(VALUES.*)",txt)
bca61e893fcc first checkin of MPIWGWeb r2 branch from CVS into mercurial
casties
parents:
diff changeset
16 for data in datas:
bca61e893fcc first checkin of MPIWGWeb r2 branch from CVS into mercurial
casties
parents:
diff changeset
17 print data
bca61e893fcc first checkin of MPIWGWeb r2 branch from CVS into mercurial
casties
parents:
diff changeset
18 txt=data.replace("VALUES","")
bca61e893fcc first checkin of MPIWGWeb r2 branch from CVS into mercurial
casties
parents:
diff changeset
19 data=re.findall("\((.*?)\)",txt)
bca61e893fcc first checkin of MPIWGWeb r2 branch from CVS into mercurial
casties
parents:
diff changeset
20 print len(data)
bca61e893fcc first checkin of MPIWGWeb r2 branch from CVS into mercurial
casties
parents:
diff changeset
21 for result in data:
bca61e893fcc first checkin of MPIWGWeb r2 branch from CVS into mercurial
casties
parents:
diff changeset
22 result=result.replace("\\\'","@")
bca61e893fcc first checkin of MPIWGWeb r2 branch from CVS into mercurial
casties
parents:
diff changeset
23
bca61e893fcc first checkin of MPIWGWeb r2 branch from CVS into mercurial
casties
parents:
diff changeset
24 new=re.findall("\'(.*?)\'",result)
bca61e893fcc first checkin of MPIWGWeb r2 branch from CVS into mercurial
casties
parents:
diff changeset
25 if len(new)>1:
bca61e893fcc first checkin of MPIWGWeb r2 branch from CVS into mercurial
casties
parents:
diff changeset
26 if (len(new)>0) and ((new[0]=='Mann') or (new[0]=="Frau")):
bca61e893fcc first checkin of MPIWGWeb r2 branch from CVS into mercurial
casties
parents:
diff changeset
27 try:
bca61e893fcc first checkin of MPIWGWeb r2 branch from CVS into mercurial
casties
parents:
diff changeset
28 firstname=new[1].split(",")[1].lstrip()
bca61e893fcc first checkin of MPIWGWeb r2 branch from CVS into mercurial
casties
parents:
diff changeset
29 except:
bca61e893fcc first checkin of MPIWGWeb r2 branch from CVS into mercurial
casties
parents:
diff changeset
30 firstname=""
bca61e893fcc first checkin of MPIWGWeb r2 branch from CVS into mercurial
casties
parents:
diff changeset
31 lastname=new[1].split(",")[0].lstrip()
bca61e893fcc first checkin of MPIWGWeb r2 branch from CVS into mercurial
casties
parents:
diff changeset
32
bca61e893fcc first checkin of MPIWGWeb r2 branch from CVS into mercurial
casties
parents:
diff changeset
33 print "INSERT into persons (firstname,lastname) VALUES ('%s','%s')"% (quote(firstname),quote(lastname))
bca61e893fcc first checkin of MPIWGWeb r2 branch from CVS into mercurial
casties
parents:
diff changeset
34 c.execute("INSERT into persons (firstname,lastname) VALUES ('%s','%s')"% (quote(firstname),quote(lastname)))
bca61e893fcc first checkin of MPIWGWeb r2 branch from CVS into mercurial
casties
parents:
diff changeset
35 c.commit()