annotate wikixml2sql.py @ 131:c5761a3f3c50

Merge with 741ddaf604b14515efacec8c3110ea04635c1b17
author casties
date Thu, 30 May 2013 22:14:00 +0200
parents bca61e893fcc
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
bca61e893fcc first checkin of MPIWGWeb r2 branch from CVS into mercurial
casties
parents:
diff changeset
1 import xml.parsers.expat
bca61e893fcc first checkin of MPIWGWeb r2 branch from CVS into mercurial
casties
parents:
diff changeset
2 import psycopg
bca61e893fcc first checkin of MPIWGWeb r2 branch from CVS into mercurial
casties
parents:
diff changeset
3
bca61e893fcc first checkin of MPIWGWeb r2 branch from CVS into mercurial
casties
parents:
diff changeset
4 filename="mann.xml"
bca61e893fcc first checkin of MPIWGWeb r2 branch from CVS into mercurial
casties
parents:
diff changeset
5
bca61e893fcc first checkin of MPIWGWeb r2 branch from CVS into mercurial
casties
parents:
diff changeset
6 # 3 handler functions
bca61e893fcc first checkin of MPIWGWeb r2 branch from CVS into mercurial
casties
parents:
diff changeset
7 global toggle
bca61e893fcc first checkin of MPIWGWeb r2 branch from CVS into mercurial
casties
parents:
diff changeset
8 toggle=False
bca61e893fcc first checkin of MPIWGWeb r2 branch from CVS into mercurial
casties
parents:
diff changeset
9
bca61e893fcc first checkin of MPIWGWeb r2 branch from CVS into mercurial
casties
parents:
diff changeset
10 global c
bca61e893fcc first checkin of MPIWGWeb r2 branch from CVS into mercurial
casties
parents:
diff changeset
11
bca61e893fcc first checkin of MPIWGWeb r2 branch from CVS into mercurial
casties
parents:
diff changeset
12 def quote(str):
bca61e893fcc first checkin of MPIWGWeb r2 branch from CVS into mercurial
casties
parents:
diff changeset
13 str=str.replace("'","\\\'")
bca61e893fcc first checkin of MPIWGWeb r2 branch from CVS into mercurial
casties
parents:
diff changeset
14 return str.encode('utf-8')
bca61e893fcc first checkin of MPIWGWeb r2 branch from CVS into mercurial
casties
parents:
diff changeset
15
bca61e893fcc first checkin of MPIWGWeb r2 branch from CVS into mercurial
casties
parents:
diff changeset
16 def start_element(name, attrs):
bca61e893fcc first checkin of MPIWGWeb r2 branch from CVS into mercurial
casties
parents:
diff changeset
17 global toggle
bca61e893fcc first checkin of MPIWGWeb r2 branch from CVS into mercurial
casties
parents:
diff changeset
18 if name=="title":
bca61e893fcc first checkin of MPIWGWeb r2 branch from CVS into mercurial
casties
parents:
diff changeset
19 toggle=True
bca61e893fcc first checkin of MPIWGWeb r2 branch from CVS into mercurial
casties
parents:
diff changeset
20 def end_element(name):
bca61e893fcc first checkin of MPIWGWeb r2 branch from CVS into mercurial
casties
parents:
diff changeset
21 global toggle
bca61e893fcc first checkin of MPIWGWeb r2 branch from CVS into mercurial
casties
parents:
diff changeset
22 if name=="title":
bca61e893fcc first checkin of MPIWGWeb r2 branch from CVS into mercurial
casties
parents:
diff changeset
23 toggle=False
bca61e893fcc first checkin of MPIWGWeb r2 branch from CVS into mercurial
casties
parents:
diff changeset
24 def char_data(data):
bca61e893fcc first checkin of MPIWGWeb r2 branch from CVS into mercurial
casties
parents:
diff changeset
25 global toggle
bca61e893fcc first checkin of MPIWGWeb r2 branch from CVS into mercurial
casties
parents:
diff changeset
26 global c
bca61e893fcc first checkin of MPIWGWeb r2 branch from CVS into mercurial
casties
parents:
diff changeset
27 if toggle:
bca61e893fcc first checkin of MPIWGWeb r2 branch from CVS into mercurial
casties
parents:
diff changeset
28 splitted=data.split()
bca61e893fcc first checkin of MPIWGWeb r2 branch from CVS into mercurial
casties
parents:
diff changeset
29 if splitted >1:
bca61e893fcc first checkin of MPIWGWeb r2 branch from CVS into mercurial
casties
parents:
diff changeset
30 lastname=splitted[-1]
bca61e893fcc first checkin of MPIWGWeb r2 branch from CVS into mercurial
casties
parents:
diff changeset
31 firstname=" ".join(splitted[0:-1])
bca61e893fcc first checkin of MPIWGWeb r2 branch from CVS into mercurial
casties
parents:
diff changeset
32 else:
bca61e893fcc first checkin of MPIWGWeb r2 branch from CVS into mercurial
casties
parents:
diff changeset
33 lastname=splitted[0]
bca61e893fcc first checkin of MPIWGWeb r2 branch from CVS into mercurial
casties
parents:
diff changeset
34 firstname=""
bca61e893fcc first checkin of MPIWGWeb r2 branch from CVS into mercurial
casties
parents:
diff changeset
35 print "INSERT into persons (firstname,lastname) VALUES ('%s','%s')"% (quote(firstname),quote(lastname))
bca61e893fcc first checkin of MPIWGWeb r2 branch from CVS into mercurial
casties
parents:
diff changeset
36 c.execute("INSERT into persons (firstname,lastname) VALUES ('%s','%s')"% (quote(firstname),quote(lastname)))
bca61e893fcc first checkin of MPIWGWeb r2 branch from CVS into mercurial
casties
parents:
diff changeset
37 c.commit()
bca61e893fcc first checkin of MPIWGWeb r2 branch from CVS into mercurial
casties
parents:
diff changeset
38
bca61e893fcc first checkin of MPIWGWeb r2 branch from CVS into mercurial
casties
parents:
diff changeset
39
bca61e893fcc first checkin of MPIWGWeb r2 branch from CVS into mercurial
casties
parents:
diff changeset
40 o = psycopg.connect('dbname=authorities user=dwinter password=3333',serialize=0)
bca61e893fcc first checkin of MPIWGWeb r2 branch from CVS into mercurial
casties
parents:
diff changeset
41 c = o.cursor()
bca61e893fcc first checkin of MPIWGWeb r2 branch from CVS into mercurial
casties
parents:
diff changeset
42
bca61e893fcc first checkin of MPIWGWeb r2 branch from CVS into mercurial
casties
parents:
diff changeset
43 p = xml.parsers.expat.ParserCreate()
bca61e893fcc first checkin of MPIWGWeb r2 branch from CVS into mercurial
casties
parents:
diff changeset
44
bca61e893fcc first checkin of MPIWGWeb r2 branch from CVS into mercurial
casties
parents:
diff changeset
45 p.StartElementHandler = start_element
bca61e893fcc first checkin of MPIWGWeb r2 branch from CVS into mercurial
casties
parents:
diff changeset
46 p.EndElementHandler = end_element
bca61e893fcc first checkin of MPIWGWeb r2 branch from CVS into mercurial
casties
parents:
diff changeset
47 p.CharacterDataHandler = char_data
bca61e893fcc first checkin of MPIWGWeb r2 branch from CVS into mercurial
casties
parents:
diff changeset
48 fh=file(filename)
bca61e893fcc first checkin of MPIWGWeb r2 branch from CVS into mercurial
casties
parents:
diff changeset
49 p.ParseFile(fh)
bca61e893fcc first checkin of MPIWGWeb r2 branch from CVS into mercurial
casties
parents:
diff changeset
50
bca61e893fcc first checkin of MPIWGWeb r2 branch from CVS into mercurial
casties
parents:
diff changeset
51 o.close()