1: try:
2: import psycopg2 as psycopg
3: psyco = 2
4: except:
5: import psycopg
6: psyco = 1
7:
8: import logging
9: from MPIWGHelper import unicodify, utf8ify
10:
11: from xml import sax
12: from amara import saxtools
13:
14: # namespace for FileMaker8
15: fm_ns = 'http://www.filemaker.com/fmpxmlresult'
16:
17: # list of fields that are taken from XML and checked against DB as conflicts
18: #checkFields=['key','first_name','last_name','title','home_inst','current_work','e_mail2']
19: checkFields=['key','first_name','last_name','title','titles_new','home_inst','current_work']
20:
21:
22: def sql_quote(v):
23: # quote dictionary
24: quote_dict = {"\'": "''", "\\": "\\\\"}
25: for dkey in quote_dict.keys():
26: if v.find(dkey) >= 0:
27: v=quote_dict[dkey].join(v.split(dkey))
28: return "'%s'"%v
29:
30: def SimpleSearch(curs,query, args=None):
31: """execute sql query and return data"""
32: logging.debug("executing: "+query)
33: if psyco == 1:
34: query = query.encode("UTF-8")
35: #if args is not None:
36: # args = [ sql_quote(a) for a in args ]
37: #logging.debug(query)
38: #logging.debug(args)
39:
40: curs.execute(query, args)
41: logging.debug("sql done")
42: try:
43: return curs.fetchall()
44: except:
45: return None
46:
47: class xml_handler:
48:
49: def __init__(self):
50: '''
51: SAX handler to import FileMaker XML file (FMPXMLRESULT format) into the table.
52: @param dsn: database connection string
53: @param table: name of the table the xml shall be imported into
54: '''
55:
56: # set up parser
57: self.result={}
58: self.event = None
59: self.top_dispatcher = {
60: (saxtools.START_ELEMENT, fm_ns, u'METADATA'):
61: self.handle_meta_fields,
62: (saxtools.START_ELEMENT, fm_ns, u'RESULTSET'):
63: self.handle_data,
64: }
65:
66: # connect database
67:
68:
69:
70:
71: self.dbIDs = {}
72: self.rowcnt = 0
73:
74:
75:
76: self.newDataset = []
77: self.conflicts = []
78: self.ok = []
79: self.fieldNames=[]
80: return
81:
82: def handle_meta_fields(self, end_condition):
83: dispatcher = {
84: (saxtools.START_ELEMENT, fm_ns, u'FIELD'):
85: self.handle_meta_field,
86: }
87: #First round through the generator corresponds to the
88: #start element event
89: logging.debug("START METADATA")
90: yield None
91:
92: #delegate is a generator that handles all the events "within"
93: #this element
94: delegate = None
95: while not self.event == end_condition:
96: delegate = saxtools.tenorsax.event_loop_body(
97: dispatcher, delegate, self.event)
98: yield None
99:
100: #Element closed. Wrap up
101: logging.debug("END METADATA")
102:
103: self.update_fields = self.fieldNames
104:
105: logging.debug("xml-fieldnames:"+repr(self.fieldNames))
106: # get list of fields in db table
107:
108: #print "upQ: ", self.updQuery
109: #print "adQ: ", self.addQuery
110:
111: return
112:
113: def handle_meta_field(self, end_condition):
114: name = self.params.get((None, u'NAME'))
115: yield None
116: #Element closed. Wrap up
117: name=name.replace(" ","_")# make sure no spaces
118: self.fieldNames.append(name)
119: logging.debug("FIELD name: "+name)
120: return
121:
122: def handle_data(self, end_condition):
123: dispatcher = {
124: (saxtools.START_ELEMENT, fm_ns, u'ROW'):
125: self.handle_row,
126: }
127: #First round through the generator corresponds to the
128: #start element event
129: logging.debug("START RESULTSET")
130: self.rowcnt = 0
131: yield None
132:
133: #delegate is a generator that handles all the events "within"
134: #this element
135: delegate = None
136: while not self.event == end_condition:
137: delegate = saxtools.tenorsax.event_loop_body(
138: dispatcher, delegate, self.event)
139: yield None
140:
141: #Element closed. Wrap up
142: logging.debug("END RESULTSET")
143:
144:
145:
146: return
147:
148: def handle_row(self, end_condition):
149: dispatcher = {
150: (saxtools.START_ELEMENT, fm_ns, u'COL'):
151: self.handle_col,
152: }
153: logging.debug("START ROW")
154: self.dataSet = {}
155: self.colIdx = 0
156: yield None
157:
158: #delegate is a generator that handles all the events "within"
159: #this element
160: delegate = None
161: while not self.event == end_condition:
162: delegate = saxtools.tenorsax.event_loop_body(
163: dispatcher, delegate, self.event)
164: yield None
165:
166: #Element closed. Wrap up
167: logging.debug("END ROW")
168: self.rowcnt += 1
169: # process collected row data
170: update=False
171: id_val=''
172:
173: if self.result.has_key(self.dataSet['key']):
174: logging.error("Key %s not unique"%self.dataSet['key'])
175:
176: self.result[self.dataSet['key']]=self.dataSet
177:
178:
179: return
180:
181: def handle_col(self, end_condition):
182: dispatcher = {
183: (saxtools.START_ELEMENT, fm_ns, u'DATA'):
184: self.handle_data_tag,
185: }
186: #print "START COL"
187: yield None
188: #delegate is a generator that handles all the events "within"
189: #this element
190: delegate = None
191: while not self.event == end_condition:
192: delegate = saxtools.tenorsax.event_loop_body(
193: dispatcher, delegate, self.event)
194: yield None
195: #Element closed. Wrap up
196: #print "END COL"
197: self.colIdx += 1
198: return
199:
200: def handle_data_tag(self, end_condition):
201: #print "START DATA"
202: content = u''
203: yield None
204: # gather child elements
205: while not self.event == end_condition:
206: if self.event[0] == saxtools.CHARACTER_DATA:
207: content += self.params
208: yield None
209: #Element closed. Wrap up
210: field = self.fieldNames[self.colIdx]
211: self.dataSet[field.lower()] = content
212: #print " DATA(", field, ") ", repr(content)
213: return
214:
215:
216: def checkImport(dsn,resultSet):
217: #now connect to the database
218: logging.info("dsn: %s"%dsn)
219: dbCon = psycopg.connect(dsn)
220: db = dbCon.cursor()
221:
222:
223: qstr="select key from personal_www"
224:
225: results=SimpleSearch(db,qstr)
226:
227: keys=[]
228: for x in results:
229: if x[0]:
230: keys.append(unicodify(x[0]))
231:
232:
233: #first step detect new entries and conflicts
234: new=[]
235: conflicts={}
236:
237: for x in resultSet.iterkeys():
238:
239: if x not in keys:
240:
241: new.append(x)
242:
243: else:
244:
245: conflict,ret=checkForConflicts(db,resultSet[x],x)
246: if conflict:
247: conflicts[x]=ret
248:
249: return new,conflicts
250:
251: def importFMPXML(filename):
252: '''
253: method to import FileMaker XML file (FMPXMLRESULT format) into the table.
254: @param filename: xmlfile filename
255:
256: '''
257:
258: parser = sax.make_parser()
259: #The "consumer" is our own handler
260: consumer = xml_handler()
261: #Initialize Tenorsax with handler
262: handler = saxtools.tenorsax(consumer)
263: #Resulting tenorsax instance is the SAX handler
264: parser.setContentHandler(handler)
265: parser.setFeature(sax.handler.feature_namespaces, 1)
266: parser.parse(filename)
267: resultSet=consumer.result # xml now transformed into an dictionary
268:
269: return resultSet
270:
271:
272:
273: def checkForConflicts(cursor,dataSet,key):
274:
275: ret=[]
276: fields=",".join(checkFields)
277:
278: qstr="select %s from personal_www where key='%s'"%(fields,key)
279:
280:
281: sr=SimpleSearch(cursor,qstr)
282:
283: if not sr:
284: return True, None
285:
286: i=0
287: retValue=False
288:
289: for checkField in checkFields:
290: dbValueR=sr[0][i]
291: if dbValueR:
292: dbValue=unicodify(dbValueR)
293: else:
294: dbValue=""
295:
296: if checkField in dataSet:
297: setValue=dataSet[checkField]
298: logging.debug( " %s %s %s %s"%(repr(key),checkField,repr(dbValue),repr(setValue)))
299: if dbValue.strip().rstrip()!=setValue.lstrip().rstrip():
300: ret.append((checkField,dbValue,setValue))
301: retValue=True
302:
303: else:
304: logging.warning("unknown field %s in data file!"%checkField)
305:
306: i+=1
307:
308: return retValue,ret
309:
310:
311: ##
312: ## public static int main()
313: ##
314:
315: if __name__ == "__main__":
316:
317:
318:
319: loglevel = logging.DEBUG
320:
321:
322: logging.basicConfig(level=loglevel,
323: format='%(asctime)s %(levelname)s %(message)s',
324: datefmt='%H:%M:%S')
325:
326: resultSet=importFMPXML(filename="/Users/dwinter/Desktop/personalwww.xml")
327: news,conflicts=checkImport(dsn="dbname=personalwww host=xserve02a user=mysql password=e1nste1n", resultSet=resultSet)
328:
329:
330: print "new"
331: print len(news),news
332: print "-----------"
333: print "conflicts"
334: print conflicts
335:
336: # update_fields = None
337: #
338: # if options.update_fields:
339: # update_fields = [string.strip(s) for s in options.update_fields.split(',')]
340: #
341: # parser = sax.make_parser()
342: # #The "consumer" is our own handler
343: # consumer = xml_handler(dsn=options.dsn,table=options.table,
344: # update_fields=update_fields,id_field=options.id_field,
345: # sync_mode=options.sync_mode)
346: # #Initialize Tenorsax with handler
347: # handler = saxtools.tenorsax(consumer)
348: # #Resulting tenorsax instance is the SAX handler
349: # parser.setContentHandler(handler)
350: # parser.setFeature(sax.handler.feature_namespaces, 1)
351: # parser.parse(options.filename)
352: #
353: #
354: # print "DONE!"
355:
356:
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>