1: import psycopg2 as psycopg
2: psyco = 2
3:
4: import logging
5: from MPIWGHelper import unicodify, utf8ify
6:
7: from xml import sax
8: from xml.sax.handler import ContentHandler
9: #from amara import saxtools
10:
11: # namespace for FileMaker8
12: fm_ns = 'http://www.filemaker.com/fmpxmlresult'
13:
14: # list of fields that are taken from XML and checked against DB as conflicts
15: #checkFields=['key','first_name','last_name','title','home_inst','current_work','e_mail2']
16: checkFields=['key','first_name','last_name','title','titles_new','home_inst','current_work']
17:
18:
19: def sql_quote(v):
20: # quote dictionary
21: quote_dict = {"\'": "''", "\\": "\\\\"}
22: for dkey in quote_dict.keys():
23: if v.find(dkey) >= 0:
24: v=quote_dict[dkey].join(v.split(dkey))
25: return "'%s'"%v
26:
27: def SimpleSearch(curs,query, args=None):
28: """execute sql query and return data"""
29: logging.debug("executing: "+query)
30: if psyco == 1:
31: query = query.encode("UTF-8")
32: #if args is not None:
33: # args = [ sql_quote(a) for a in args ]
34: #logging.debug(query)
35: #logging.debug(args)
36:
37: curs.execute(query, args)
38: logging.debug("sql done")
39: try:
40: return curs.fetchall()
41: except:
42: return None
43:
44: class xml_handler(ContentHandler):
45:
46: def __init__(self):
47: '''
48: SAX handler to import FileMaker XML file (FMPXMLRESULT format) into the table.
49: @param dsn: database connection string
50: @param table: name of the table the xml shall be imported into
51: '''
52:
53: # set up parser
54: self.result={}
55: self.event = None
56: # self.top_dispatcher = {
57: # (saxtools.START_ELEMENT, fm_ns, u'METADATA'):
58: # self.handle_meta_fields,
59: # (saxtools.START_ELEMENT, fm_ns, u'RESULTSET'):
60: # self.handle_data,
61: # }
62:
63: # connect database
64:
65:
66:
67:
68: self.dbIDs = {}
69: self.rowcnt = 0
70:
71: self.currentName=None
72:
73: self.newDataset = []
74: self.conflicts = []
75: self.ok = []
76: self.fieldNames=[]
77: self.currentRow={}
78: self.currentTag=""
79: return
80:
81: def startElement(self, name, attrs):
82: logging.debug(name)
83: if (name.lower() == "field") :
84: self.handle_meta_fields(attrs)
85: if (name.lower() == "row") :
86: logging.debug("handleROW")
87: self.currentRow={} # new Row
88: self.currentData=0
89:
90:
91: if (name.lower()=="data"):
92:
93: self.currentName=self.fieldNames[self.currentData]
94: self.currentData+=1
95: self.currentTag="data"
96:
97: def endElement(self,name):
98: if (name.lower() == "data") :
99: self.currentTag=""
100: if (name.lower() == "row"):
101: self.handle_end_row()
102:
103: def characters(self,content):
104:
105: if self.currentName is not None:
106: logging.debug(self.currentName+" "+content)
107: self.currentRow[self.currentName]=content;
108:
109:
110: def handle_end_row(self):
111:
112: logging.debug("edd ROW")
113:
114: if self.result.has_key(self.currentRow['key']):
115: logging.error("Key %s not unique"%self.currentRow['key'])
116:
117: if self.currentName is not None:
118: self.result[self.currentRow['key']]=self.currentRow.copy()
119: #
120: #
121: # return
122:
123: def handle_meta_fields(self,attrs):
124:
125: #First round through the generator corresponds to the
126: #start element event
127: logging.debug("START -FIELD")
128: name = attrs.get('NAME')
129: name=name.replace(" ","_")# make sure no spaces
130: self.fieldNames.append(name)
131:
132: self.update_fields = self.fieldNames
133:
134: logging.debug("xml-fieldnames:"+repr(self.fieldNames))
135: # get list of fields in db table
136:
137: #print "upQ: ", self.updQuery
138: #print "adQ: ", self.addQuery
139:
140: return
141:
142: # def handle_meta_field(self, end_condition):
143: # name = self.params.get((None, u'NAME'))
144: # yield None
145: # #Element closed. Wrap up
146: # name=name.replace(" ","_")# make sure no spaces
147: # self.fieldNames.append(name)
148: # logging.debug("FIELD name: "+name)
149: # return
150:
151: # def handle_data(self, end_condition):
152: # dispatcher = {
153: # (saxtools.START_ELEMENT, fm_ns, u'ROW'):
154: # self.handle_row,
155: # }
156: # #First round through the generator corresponds to the
157: # #start element event
158: # logging.debug("START RESULTSET")
159: # self.rowcnt = 0
160: # yield None
161: #
162: # #delegate is a generator that handles all the events "within"
163: # #this element
164: # delegate = None
165: # while not self.event == end_condition:
166: # delegate = saxtools.tenorsax.event_loop_body(
167: # dispatcher, delegate, self.event)
168: # yield None
169: #
170: # #Element closed. Wrap up
171: # logging.debug("END RESULTSET")
172: #
173: #
174: #
175: # return
176:
177: # def handle_row(self, end_condition):
178: # dispatcher = {
179: # (saxtools.START_ELEMENT, fm_ns, u'COL'):
180: # self.handle_col,
181: # }
182: # logging.debug("START ROW")
183: # self.dataSet = {}
184: # self.colIdx = 0
185: # yield None
186: #
187: # #delegate is a generator that handles all the events "within"
188: # #this element
189: # delegate = None
190: # while not self.event == end_condition:
191: # delegate = saxtools.tenorsax.event_loop_body(
192: # dispatcher, delegate, self.event)
193: # yield None
194: #
195: # #Element closed. Wrap up
196: # logging.debug("END ROW")
197: # self.rowcnt += 1
198: # # process collected row data
199: # update=False
200: # id_val=''
201: #
202: # if self.result.has_key(self.dataSet['key']):
203: # logging.error("Key %s not unique"%self.dataSet['key'])
204: #
205: # self.result[self.dataSet['key']]=self.dataSet
206: #
207: #
208: # return
209:
210: # def handle_col(self, end_condition):
211: # dispatcher = {
212: # (saxtools.START_ELEMENT, fm_ns, u'DATA'):
213: # self.handle_data_tag,
214: # }
215: # #print "START COL"
216: # yield None
217: # #delegate is a generator that handles all the events "within"
218: # #this element
219: # delegate = None
220: # while not self.event == end_condition:
221: # delegate = saxtools.tenorsax.event_loop_body(
222: # dispatcher, delegate, self.event)
223: # yield None
224: # #Element closed. Wrap up
225: # #print "END COL"
226: # self.colIdx += 1
227: # return
228: #
229: # def handle_data_tag(self, end_condition):
230: # #print "START DATA"
231: # content = u''
232: # yield None
233: # # gather child elements
234: # while not self.event == end_condition:
235: # if self.event[0] == saxtools.CHARACTER_DATA:
236: # content += self.params
237: # yield None
238: # #Element closed. Wrap up
239: # field = self.fieldNames[self.colIdx]
240: # self.dataSet[field.lower()] = content
241: # #print " DATA(", field, ") ", repr(content)
242: # return
243:
244:
245: def checkImport(dsn,resultSet):
246: #now connect to the database
247: logging.info("dsn: %s"%dsn)
248: dbCon = psycopg.connect(dsn)
249: db = dbCon.cursor()
250:
251:
252: qstr="select key from personal_www"
253:
254: results=SimpleSearch(db,qstr)
255:
256: keys=[]
257: for x in results:
258: if x[0]:
259: keys.append(unicodify(x[0]))
260:
261:
262: #first step detect new entries and conflicts
263: new=[]
264: conflicts={}
265:
266: for x in resultSet.iterkeys():
267:
268: if x not in keys:
269:
270: new.append(x)
271:
272: else:
273:
274: conflict,ret=checkForConflicts(db,resultSet[x],x)
275: if conflict:
276: conflicts[x]=ret
277:
278: return new,conflicts
279:
280: def importFMPXML(filename):
281: '''
282: method to import FileMaker XML file (FMPXMLRESULT format) into the table.
283: @param filename: xmlfile filename
284:
285: '''
286:
287: parser = sax.make_parser()
288: #The "consumer" is our own handler
289: consumer = xml_handler()
290: #Initialize Tenorsax with handler
291: #handler = saxtools.tenorsax(consumer)
292: #Resulting tenorsax instance is the SAX handler
293: parser.setContentHandler(consumer)
294: #parser.setFeature(sax.handler.feature_namespaces, 1)
295: parser.parse(filename)
296: resultSet=consumer.result # xml now transformed into an dictionary
297:
298: return resultSet
299:
300:
301:
302: def checkForConflicts(cursor,dataSet,key):
303:
304: ret=[]
305: fields=",".join(checkFields)
306:
307: qstr="select %s from personal_www where key='%s'"%(fields,key)
308:
309:
310: sr=SimpleSearch(cursor,qstr)
311:
312: if not sr:
313: return True, None
314:
315: i=0
316: retValue=False
317:
318: for checkField in checkFields:
319: dbValueR=sr[0][i]
320: if dbValueR:
321: dbValue=unicodify(dbValueR)
322: else:
323: dbValue=""
324:
325: if checkField in dataSet:
326: setValue=dataSet[checkField]
327: logging.debug( " %s %s %s %s"%(repr(key),checkField,repr(dbValue),repr(setValue)))
328: if dbValue.strip().rstrip()!=setValue.lstrip().rstrip():
329: ret.append((checkField,dbValue,setValue))
330: retValue=True
331:
332: else:
333: logging.warning("unknown field %s in data file!"%checkField)
334:
335: i+=1
336:
337: return retValue,ret
338:
339:
340: ##
341: ## public static int main()
342: ##
343:
344: if __name__ == "__main__":
345:
346:
347:
348: loglevel = logging.DEBUG
349:
350:
351: logging.basicConfig(level=loglevel,
352: format='%(asctime)s %(levelname)s %(message)s',
353: datefmt='%H:%M:%S')
354:
355: resultSet=importFMPXML(filename="/Users/dwinter/Desktop/personalwww.xml")
356: news,conflicts=checkImport(dsn="dbname=personalwww user=www password=e1nste1n", resultSet=resultSet)
357:
358:
359: print "new"
360: print len(news),news
361: print "-----------"
362: print "conflicts"
363: print conflicts
364:
365: # update_fields = None
366: #
367: # if options.update_fields:
368: # update_fields = [string.strip(s) for s in options.update_fields.split(',')]
369: #
370: # parser = sax.make_parser()
371: # #The "consumer" is our own handler
372: # consumer = xml_handler(dsn=options.dsn,table=options.table,
373: # update_fields=update_fields,id_field=options.id_field,
374: # sync_mode=options.sync_mode)
375: # #Initialize Tenorsax with handler
376: # handler = saxtools.tenorsax(consumer)
377: # #Resulting tenorsax instance is the SAX handler
378: # parser.setContentHandler(handler)
379: # parser.setFeature(sax.handler.feature_namespaces, 1)
380: # parser.parse(options.filename)
381: #
382: #
383: # print "DONE!"
384:
385:
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>