Mercurial > hg > MPIWGWeb
annotate wikixml2sql.py @ 131:c5761a3f3c50
Merge with 741ddaf604b14515efacec8c3110ea04635c1b17
author | casties |
---|---|
date | Thu, 30 May 2013 22:14:00 +0200 |
parents | bca61e893fcc |
children |
rev | line source |
---|---|
0
bca61e893fcc
first checkin of MPIWGWeb r2 branch from CVS into mercurial
casties
parents:
diff
changeset
|
1 import xml.parsers.expat |
bca61e893fcc
first checkin of MPIWGWeb r2 branch from CVS into mercurial
casties
parents:
diff
changeset
|
2 import psycopg |
bca61e893fcc
first checkin of MPIWGWeb r2 branch from CVS into mercurial
casties
parents:
diff
changeset
|
3 |
bca61e893fcc
first checkin of MPIWGWeb r2 branch from CVS into mercurial
casties
parents:
diff
changeset
|
4 filename="mann.xml" |
bca61e893fcc
first checkin of MPIWGWeb r2 branch from CVS into mercurial
casties
parents:
diff
changeset
|
5 |
bca61e893fcc
first checkin of MPIWGWeb r2 branch from CVS into mercurial
casties
parents:
diff
changeset
|
6 # 3 handler functions |
bca61e893fcc
first checkin of MPIWGWeb r2 branch from CVS into mercurial
casties
parents:
diff
changeset
|
7 global toggle |
bca61e893fcc
first checkin of MPIWGWeb r2 branch from CVS into mercurial
casties
parents:
diff
changeset
|
8 toggle=False |
bca61e893fcc
first checkin of MPIWGWeb r2 branch from CVS into mercurial
casties
parents:
diff
changeset
|
9 |
bca61e893fcc
first checkin of MPIWGWeb r2 branch from CVS into mercurial
casties
parents:
diff
changeset
|
10 global c |
bca61e893fcc
first checkin of MPIWGWeb r2 branch from CVS into mercurial
casties
parents:
diff
changeset
|
11 |
bca61e893fcc
first checkin of MPIWGWeb r2 branch from CVS into mercurial
casties
parents:
diff
changeset
|
12 def quote(str): |
bca61e893fcc
first checkin of MPIWGWeb r2 branch from CVS into mercurial
casties
parents:
diff
changeset
|
13 str=str.replace("'","\\\'") |
bca61e893fcc
first checkin of MPIWGWeb r2 branch from CVS into mercurial
casties
parents:
diff
changeset
|
14 return str.encode('utf-8') |
bca61e893fcc
first checkin of MPIWGWeb r2 branch from CVS into mercurial
casties
parents:
diff
changeset
|
15 |
bca61e893fcc
first checkin of MPIWGWeb r2 branch from CVS into mercurial
casties
parents:
diff
changeset
|
16 def start_element(name, attrs): |
bca61e893fcc
first checkin of MPIWGWeb r2 branch from CVS into mercurial
casties
parents:
diff
changeset
|
17 global toggle |
bca61e893fcc
first checkin of MPIWGWeb r2 branch from CVS into mercurial
casties
parents:
diff
changeset
|
18 if name=="title": |
bca61e893fcc
first checkin of MPIWGWeb r2 branch from CVS into mercurial
casties
parents:
diff
changeset
|
19 toggle=True |
bca61e893fcc
first checkin of MPIWGWeb r2 branch from CVS into mercurial
casties
parents:
diff
changeset
|
20 def end_element(name): |
bca61e893fcc
first checkin of MPIWGWeb r2 branch from CVS into mercurial
casties
parents:
diff
changeset
|
21 global toggle |
bca61e893fcc
first checkin of MPIWGWeb r2 branch from CVS into mercurial
casties
parents:
diff
changeset
|
22 if name=="title": |
bca61e893fcc
first checkin of MPIWGWeb r2 branch from CVS into mercurial
casties
parents:
diff
changeset
|
23 toggle=False |
bca61e893fcc
first checkin of MPIWGWeb r2 branch from CVS into mercurial
casties
parents:
diff
changeset
|
24 def char_data(data): |
bca61e893fcc
first checkin of MPIWGWeb r2 branch from CVS into mercurial
casties
parents:
diff
changeset
|
25 global toggle |
bca61e893fcc
first checkin of MPIWGWeb r2 branch from CVS into mercurial
casties
parents:
diff
changeset
|
26 global c |
bca61e893fcc
first checkin of MPIWGWeb r2 branch from CVS into mercurial
casties
parents:
diff
changeset
|
27 if toggle: |
bca61e893fcc
first checkin of MPIWGWeb r2 branch from CVS into mercurial
casties
parents:
diff
changeset
|
28 splitted=data.split() |
bca61e893fcc
first checkin of MPIWGWeb r2 branch from CVS into mercurial
casties
parents:
diff
changeset
|
29 if splitted >1: |
bca61e893fcc
first checkin of MPIWGWeb r2 branch from CVS into mercurial
casties
parents:
diff
changeset
|
30 lastname=splitted[-1] |
bca61e893fcc
first checkin of MPIWGWeb r2 branch from CVS into mercurial
casties
parents:
diff
changeset
|
31 firstname=" ".join(splitted[0:-1]) |
bca61e893fcc
first checkin of MPIWGWeb r2 branch from CVS into mercurial
casties
parents:
diff
changeset
|
32 else: |
bca61e893fcc
first checkin of MPIWGWeb r2 branch from CVS into mercurial
casties
parents:
diff
changeset
|
33 lastname=splitted[0] |
bca61e893fcc
first checkin of MPIWGWeb r2 branch from CVS into mercurial
casties
parents:
diff
changeset
|
34 firstname="" |
bca61e893fcc
first checkin of MPIWGWeb r2 branch from CVS into mercurial
casties
parents:
diff
changeset
|
35 print "INSERT into persons (firstname,lastname) VALUES ('%s','%s')"% (quote(firstname),quote(lastname)) |
bca61e893fcc
first checkin of MPIWGWeb r2 branch from CVS into mercurial
casties
parents:
diff
changeset
|
36 c.execute("INSERT into persons (firstname,lastname) VALUES ('%s','%s')"% (quote(firstname),quote(lastname))) |
bca61e893fcc
first checkin of MPIWGWeb r2 branch from CVS into mercurial
casties
parents:
diff
changeset
|
37 c.commit() |
bca61e893fcc
first checkin of MPIWGWeb r2 branch from CVS into mercurial
casties
parents:
diff
changeset
|
38 |
bca61e893fcc
first checkin of MPIWGWeb r2 branch from CVS into mercurial
casties
parents:
diff
changeset
|
39 |
bca61e893fcc
first checkin of MPIWGWeb r2 branch from CVS into mercurial
casties
parents:
diff
changeset
|
40 o = psycopg.connect('dbname=authorities user=dwinter password=3333',serialize=0) |
bca61e893fcc
first checkin of MPIWGWeb r2 branch from CVS into mercurial
casties
parents:
diff
changeset
|
41 c = o.cursor() |
bca61e893fcc
first checkin of MPIWGWeb r2 branch from CVS into mercurial
casties
parents:
diff
changeset
|
42 |
bca61e893fcc
first checkin of MPIWGWeb r2 branch from CVS into mercurial
casties
parents:
diff
changeset
|
43 p = xml.parsers.expat.ParserCreate() |
bca61e893fcc
first checkin of MPIWGWeb r2 branch from CVS into mercurial
casties
parents:
diff
changeset
|
44 |
bca61e893fcc
first checkin of MPIWGWeb r2 branch from CVS into mercurial
casties
parents:
diff
changeset
|
45 p.StartElementHandler = start_element |
bca61e893fcc
first checkin of MPIWGWeb r2 branch from CVS into mercurial
casties
parents:
diff
changeset
|
46 p.EndElementHandler = end_element |
bca61e893fcc
first checkin of MPIWGWeb r2 branch from CVS into mercurial
casties
parents:
diff
changeset
|
47 p.CharacterDataHandler = char_data |
bca61e893fcc
first checkin of MPIWGWeb r2 branch from CVS into mercurial
casties
parents:
diff
changeset
|
48 fh=file(filename) |
bca61e893fcc
first checkin of MPIWGWeb r2 branch from CVS into mercurial
casties
parents:
diff
changeset
|
49 p.ParseFile(fh) |
bca61e893fcc
first checkin of MPIWGWeb r2 branch from CVS into mercurial
casties
parents:
diff
changeset
|
50 |
bca61e893fcc
first checkin of MPIWGWeb r2 branch from CVS into mercurial
casties
parents:
diff
changeset
|
51 o.close() |