Mercurial > hg > MPIWGWeb
diff wikixml2sql.py @ 0:bca61e893fcc
first checkin of MPIWGWeb r2 branch from CVS into mercurial
author | casties |
---|---|
date | Thu, 10 Jan 2013 17:52:13 +0100 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/wikixml2sql.py Thu Jan 10 17:52:13 2013 +0100 @@ -0,0 +1,51 @@ +import xml.parsers.expat +import psycopg + +filename="mann.xml" + +# 3 handler functions +global toggle +toggle=False + +global c + +def quote(str): + str=str.replace("'","\\\'") + return str.encode('utf-8') + +def start_element(name, attrs): + global toggle + if name=="title": + toggle=True +def end_element(name): + global toggle + if name=="title": + toggle=False +def char_data(data): + global toggle + global c + if toggle: + splitted=data.split() + if splitted >1: + lastname=splitted[-1] + firstname=" ".join(splitted[0:-1]) + else: + lastname=splitted[0] + firstname="" + print "INSERT into persons (firstname,lastname) VALUES ('%s','%s')"% (quote(firstname),quote(lastname)) + c.execute("INSERT into persons (firstname,lastname) VALUES ('%s','%s')"% (quote(firstname),quote(lastname))) + c.commit() + + +o = psycopg.connect('dbname=authorities user=dwinter password=3333',serialize=0) +c = o.cursor() + +p = xml.parsers.expat.ParserCreate() + +p.StartElementHandler = start_element +p.EndElementHandler = end_element +p.CharacterDataHandler = char_data +fh=file(filename) +p.ParseFile(fh) + +o.close() \ No newline at end of file