diff wikixml2sql.py @ 0:bca61e893fcc

first checkin of MPIWGWeb r2 branch from CVS into mercurial
author casties
date Thu, 10 Jan 2013 17:52:13 +0100
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/wikixml2sql.py	Thu Jan 10 17:52:13 2013 +0100
@@ -0,0 +1,51 @@
+import xml.parsers.expat
+import psycopg
+
+filename="mann.xml"
+
+# 3 handler functions
+global toggle
+toggle=False
+
+global c
+
+def quote(str):
+    str=str.replace("'","\\\'")
+    return str.encode('utf-8')
+
+def start_element(name, attrs):
+    global toggle
+    if name=="title":
+        toggle=True
+def end_element(name):
+    global toggle
+    if name=="title":
+        toggle=False
+def char_data(data):
+    global toggle
+    global c
+    if toggle:
+        splitted=data.split()
+        if splitted >1:
+            lastname=splitted[-1]
+            firstname=" ".join(splitted[0:-1])
+        else:
+            lastname=splitted[0]
+            firstname=""
+        print "INSERT into persons (firstname,lastname) VALUES ('%s','%s')"% (quote(firstname),quote(lastname))
+        c.execute("INSERT into persons (firstname,lastname) VALUES ('%s','%s')"% (quote(firstname),quote(lastname)))
+        c.commit()
+    
+    
+o = psycopg.connect('dbname=authorities user=dwinter password=3333',serialize=0) 
+c = o.cursor() 
+
+p = xml.parsers.expat.ParserCreate()
+
+p.StartElementHandler = start_element
+p.EndElementHandler = end_element
+p.CharacterDataHandler = char_data
+fh=file(filename)
+p.ParseFile(fh)
+
+o.close()
\ No newline at end of file