--- MPIWGWeb/Attic/wikixml2sql.py 2006/08/27 05:40:45 1.1 +++ MPIWGWeb/Attic/wikixml2sql.py 2006/08/27 05:40:45 1.1.2.1 @@ -0,0 +1,51 @@ +import xml.parsers.expat +import psycopg + +filename="mann.xml" + +# 3 handler functions +global toggle +toggle=False + +global c + +def quote(str): + str=str.replace("'","\\\'") + return str.encode('utf-8') + +def start_element(name, attrs): + global toggle + if name=="title": + toggle=True +def end_element(name): + global toggle + if name=="title": + toggle=False +def char_data(data): + global toggle + global c + if toggle: + splitted=data.split() + if splitted >1: + lastname=splitted[-1] + firstname=" ".join(splitted[0:-1]) + else: + lastname=splitted[0] + firstname="" + print "INSERT into persons (firstname,lastname) VALUES ('%s','%s')"% (quote(firstname),quote(lastname)) + c.execute("INSERT into persons (firstname,lastname) VALUES ('%s','%s')"% (quote(firstname),quote(lastname))) + c.commit() + + +o = psycopg.connect('dbname=authorities user=dwinter password=3333',serialize=0) +c = o.cursor() + +p = xml.parsers.expat.ParserCreate() + +p.StartElementHandler = start_element +p.EndElementHandler = end_element +p.CharacterDataHandler = char_data +fh=file(filename) +p.ParseFile(fh) + +o.close() \ No newline at end of file