1: import psycopg2 as psycopg
2: psyco = 2
3:
4: import logging
5: from MPIWGHelper import unicodify, utf8ify
6:
7: from xml import sax
8: from xml.sax.handler import ContentHandler
9:
10: # namespace for FileMaker8
11: fm_ns = 'http://www.filemaker.com/fmpxmlresult'
12:
13: # list of fields that are taken from XML and checked against DB as conflicts
14: #checkFields=['key','first_name','last_name','title','home_inst','current_work','e_mail2']
15: checkFields=['key','first_name','last_name','title','titles_new','home_inst','current_work']
16:
17:
18: def sql_quote(v):
19: # quote dictionary
20: quote_dict = {"\'": "''", "\\": "\\\\"}
21: for dkey in quote_dict.keys():
22: if v.find(dkey) >= 0:
23: v=quote_dict[dkey].join(v.split(dkey))
24: return "'%s'"%v
25:
26: def SimpleSearch(curs,query, args=None):
27: """execute sql query and return data"""
28: logging.debug("executing: "+query)
29: if psyco == 1:
30: query = query.encode("UTF-8")
31: #if args is not None:
32: # args = [ sql_quote(a) for a in args ]
33: #logging.debug(query)
34: #logging.debug(args)
35:
36: curs.execute(query, args)
37: logging.debug("sql done")
38: try:
39: return curs.fetchall()
40: except:
41: return None
42:
43: class xml_handler(ContentHandler):
44:
45: def __init__(self):
46: '''
47: SAX handler to import FileMaker XML file (FMPXMLRESULT format) into the table.
48: @param dsn: database connection string
49: @param table: name of the table the xml shall be imported into
50: '''
51:
52: # set up parser
53: self.result={}
54: self.event = None
55: # self.top_dispatcher = {
56: # (saxtools.START_ELEMENT, fm_ns, u'METADATA'):
57: # self.handle_meta_fields,
58: # (saxtools.START_ELEMENT, fm_ns, u'RESULTSET'):
59: # self.handle_data,
60: # }
61:
62: # connect database
63:
64:
65:
66:
67: self.dbIDs = {}
68: self.rowcnt = 0
69:
70: self.currentName=None
71:
72: self.newDataset = []
73: self.conflicts = []
74: self.ok = []
75: self.fieldNames=[]
76: self.currentRow={}
77: self.currentTag=""
78: return
79:
80: def startElement(self, name, attrs):
81: logging.debug(name)
82: if (name.lower() == "field") :
83: self.handle_meta_fields(attrs)
84: if (name.lower() == "row") :
85: logging.debug("handleROW")
86: self.currentRow={} # new Row
87: self.currentData=0
88:
89:
90: if (name.lower()=="data"):
91:
92: self.currentName=self.fieldNames[self.currentData]
93: self.currentData+=1
94: self.currentTag="data"
95:
96: def endElement(self,name):
97: if (name.lower() == "data") :
98: self.currentTag=""
99: if (name.lower() == "row"):
100: self.handle_end_row()
101:
102: def characters(self,content):
103:
104: if self.currentName is not None:
105: logging.debug(self.currentName+" "+content)
106: self.currentRow[self.currentName]=content;
107:
108:
109: def handle_end_row(self):
110:
111: logging.debug("edd ROW")
112:
113: if self.result.has_key(self.currentRow['key']):
114: logging.error("Key %s not unique"%self.currentRow['key'])
115:
116: if self.currentName is not None:
117: self.result[self.currentRow['key']]=self.currentRow.copy()
118: #
119: #
120: # return
121:
122: def handle_meta_fields(self,attrs):
123:
124: #First round through the generator corresponds to the
125: #start element event
126: logging.debug("START -FIELD")
127: name = attrs.get('NAME')
128: name=name.replace(" ","_")# make sure no spaces
129: self.fieldNames.append(name)
130:
131: self.update_fields = self.fieldNames
132:
133: logging.debug("xml-fieldnames:"+repr(self.fieldNames))
134: # get list of fields in db table
135:
136: #print "upQ: ", self.updQuery
137: #print "adQ: ", self.addQuery
138:
139: return
140:
141: # def handle_meta_field(self, end_condition):
142: # name = self.params.get((None, u'NAME'))
143: # yield None
144: # #Element closed. Wrap up
145: # name=name.replace(" ","_")# make sure no spaces
146: # self.fieldNames.append(name)
147: # logging.debug("FIELD name: "+name)
148: # return
149:
150: # def handle_data(self, end_condition):
151: # dispatcher = {
152: # (saxtools.START_ELEMENT, fm_ns, u'ROW'):
153: # self.handle_row,
154: # }
155: # #First round through the generator corresponds to the
156: # #start element event
157: # logging.debug("START RESULTSET")
158: # self.rowcnt = 0
159: # yield None
160: #
161: # #delegate is a generator that handles all the events "within"
162: # #this element
163: # delegate = None
164: # while not self.event == end_condition:
165: # delegate = saxtools.tenorsax.event_loop_body(
166: # dispatcher, delegate, self.event)
167: # yield None
168: #
169: # #Element closed. Wrap up
170: # logging.debug("END RESULTSET")
171: #
172: #
173: #
174: # return
175:
176: # def handle_row(self, end_condition):
177: # dispatcher = {
178: # (saxtools.START_ELEMENT, fm_ns, u'COL'):
179: # self.handle_col,
180: # }
181: # logging.debug("START ROW")
182: # self.dataSet = {}
183: # self.colIdx = 0
184: # yield None
185: #
186: # #delegate is a generator that handles all the events "within"
187: # #this element
188: # delegate = None
189: # while not self.event == end_condition:
190: # delegate = saxtools.tenorsax.event_loop_body(
191: # dispatcher, delegate, self.event)
192: # yield None
193: #
194: # #Element closed. Wrap up
195: # logging.debug("END ROW")
196: # self.rowcnt += 1
197: # # process collected row data
198: # update=False
199: # id_val=''
200: #
201: # if self.result.has_key(self.dataSet['key']):
202: # logging.error("Key %s not unique"%self.dataSet['key'])
203: #
204: # self.result[self.dataSet['key']]=self.dataSet
205: #
206: #
207: # return
208:
209: # def handle_col(self, end_condition):
210: # dispatcher = {
211: # (saxtools.START_ELEMENT, fm_ns, u'DATA'):
212: # self.handle_data_tag,
213: # }
214: # #print "START COL"
215: # yield None
216: # #delegate is a generator that handles all the events "within"
217: # #this element
218: # delegate = None
219: # while not self.event == end_condition:
220: # delegate = saxtools.tenorsax.event_loop_body(
221: # dispatcher, delegate, self.event)
222: # yield None
223: # #Element closed. Wrap up
224: # #print "END COL"
225: # self.colIdx += 1
226: # return
227: #
228: # def handle_data_tag(self, end_condition):
229: # #print "START DATA"
230: # content = u''
231: # yield None
232: # # gather child elements
233: # while not self.event == end_condition:
234: # if self.event[0] == saxtools.CHARACTER_DATA:
235: # content += self.params
236: # yield None
237: # #Element closed. Wrap up
238: # field = self.fieldNames[self.colIdx]
239: # self.dataSet[field.lower()] = content
240: # #print " DATA(", field, ") ", repr(content)
241: # return
242:
243:
244: def checkImport(dsn,resultSet):
245: #now connect to the database
246: logging.info("dsn: %s"%dsn)
247: dbCon = psycopg.connect(dsn)
248: db = dbCon.cursor()
249:
250:
251: qstr="select key from personal_www"
252:
253: results=SimpleSearch(db,qstr)
254:
255: keys=[]
256: for x in results:
257: if x[0]:
258: keys.append(unicodify(x[0]))
259:
260:
261: #first step detect new entries and conflicts
262: new=[]
263: conflicts={}
264:
265: for x in resultSet.iterkeys():
266:
267: if x not in keys:
268:
269: new.append(x)
270:
271: else:
272:
273: conflict,ret=checkForConflicts(db,resultSet[x],x)
274: if conflict:
275: conflicts[x]=ret
276:
277: return new,conflicts
278:
279: def importFMPXML(filename):
280: '''
281: method to import FileMaker XML file (FMPXMLRESULT format) into the table.
282: @param filename: xmlfile filename
283:
284: '''
285:
286: parser = sax.make_parser()
287: #The "consumer" is our own handler
288: consumer = xml_handler()
289: #Initialize Tenorsax with handler
290: #handler = saxtools.tenorsax(consumer)
291: #Resulting tenorsax instance is the SAX handler
292: parser.setContentHandler(consumer)
293: #parser.setFeature(sax.handler.feature_namespaces, 1)
294: parser.parse(filename)
295: resultSet=consumer.result # xml now transformed into an dictionary
296:
297: return resultSet
298:
299:
300:
301: def checkForConflicts(cursor,dataSet,key):
302:
303: ret=[]
304: fields=",".join(checkFields)
305:
306: qstr="select %s from personal_www where key='%s'"%(fields,key)
307:
308:
309: sr=SimpleSearch(cursor,qstr)
310:
311: if not sr:
312: return True, None
313:
314: i=0
315: retValue=False
316:
317: for checkField in checkFields:
318: dbValueR=sr[0][i]
319: if dbValueR:
320: dbValue=unicodify(dbValueR)
321: else:
322: dbValue=""
323:
324: if checkField in dataSet:
325: setValue=dataSet[checkField]
326: logging.debug( " %s %s %s %s"%(repr(key),checkField,repr(dbValue),repr(setValue)))
327: if dbValue.strip().rstrip()!=setValue.lstrip().rstrip():
328: ret.append((checkField,dbValue,setValue))
329: retValue=True
330:
331: else:
332: logging.warning("unknown field %s in data file!"%checkField)
333:
334: i+=1
335:
336: return retValue,ret
337:
338:
339: ##
340: ## public static int main()
341: ##
342:
343: if __name__ == "__main__":
344:
345:
346:
347: loglevel = logging.DEBUG
348:
349:
350: logging.basicConfig(level=loglevel,
351: format='%(asctime)s %(levelname)s %(message)s',
352: datefmt='%H:%M:%S')
353:
354: resultSet=importFMPXML(filename="/Users/dwinter/Desktop/personalwww.xml")
355: news,conflicts=checkImport(dsn="dbname=personalwww user=www password=e1nste1n", resultSet=resultSet)
356:
357:
358: print "new"
359: print len(news),news
360: print "-----------"
361: print "conflicts"
362: print conflicts
363:
364: # update_fields = None
365: #
366: # if options.update_fields:
367: # update_fields = [string.strip(s) for s in options.update_fields.split(',')]
368: #
369: # parser = sax.make_parser()
370: # #The "consumer" is our own handler
371: # consumer = xml_handler(dsn=options.dsn,table=options.table,
372: # update_fields=update_fields,id_field=options.id_field,
373: # sync_mode=options.sync_mode)
374: # #Initialize Tenorsax with handler
375: # handler = saxtools.tenorsax(consumer)
376: # #Resulting tenorsax instance is the SAX handler
377: # parser.setContentHandler(handler)
378: # parser.setFeature(sax.handler.feature_namespaces, 1)
379: # parser.parse(options.filename)
380: #
381: #
382: # print "DONE!"
383:
384:
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>