comparison wikixml2sql.py @ 0:bca61e893fcc

first checkin of MPIWGWeb r2 branch from CVS into mercurial
author casties
date Thu, 10 Jan 2013 17:52:13 +0100
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:bca61e893fcc
1 import xml.parsers.expat
2 import psycopg
3
4 filename="mann.xml"
5
6 # 3 handler functions
7 global toggle
8 toggle=False
9
10 global c
11
12 def quote(str):
13 str=str.replace("'","\\\'")
14 return str.encode('utf-8')
15
16 def start_element(name, attrs):
17 global toggle
18 if name=="title":
19 toggle=True
20 def end_element(name):
21 global toggle
22 if name=="title":
23 toggle=False
24 def char_data(data):
25 global toggle
26 global c
27 if toggle:
28 splitted=data.split()
29 if splitted >1:
30 lastname=splitted[-1]
31 firstname=" ".join(splitted[0:-1])
32 else:
33 lastname=splitted[0]
34 firstname=""
35 print "INSERT into persons (firstname,lastname) VALUES ('%s','%s')"% (quote(firstname),quote(lastname))
36 c.execute("INSERT into persons (firstname,lastname) VALUES ('%s','%s')"% (quote(firstname),quote(lastname)))
37 c.commit()
38
39
40 o = psycopg.connect('dbname=authorities user=dwinter password=3333',serialize=0)
41 c = o.cursor()
42
43 p = xml.parsers.expat.ParserCreate()
44
45 p.StartElementHandler = start_element
46 p.EndElementHandler = end_element
47 p.CharacterDataHandler = char_data
48 fh=file(filename)
49 p.ParseFile(fh)
50
51 o.close()