1: import psycopg2 as psycopg
2: psyco = 2
3:
4: import logging
5: from MPIWGHelper import unicodify, utf8ify
6:
7: from xml import sax
8: from amara import saxtools
9:
10: # namespace for FileMaker8
11: fm_ns = 'http://www.filemaker.com/fmpxmlresult'
12:
13: # list of fields that are taken from XML and checked against DB as conflicts
14: #checkFields=['key','first_name','last_name','title','home_inst','current_work','e_mail2']
15: checkFields=['key','first_name','last_name','title','titles_new','home_inst','current_work']
16:
17:
18: def sql_quote(v):
19: # quote dictionary
20: quote_dict = {"\'": "''", "\\": "\\\\"}
21: for dkey in quote_dict.keys():
22: if v.find(dkey) >= 0:
23: v=quote_dict[dkey].join(v.split(dkey))
24: return "'%s'"%v
25:
26: def SimpleSearch(curs,query, args=None):
27: """execute sql query and return data"""
28: logging.debug("executing: "+query)
29: if psyco == 1:
30: query = query.encode("UTF-8")
31: #if args is not None:
32: # args = [ sql_quote(a) for a in args ]
33: #logging.debug(query)
34: #logging.debug(args)
35:
36: curs.execute(query, args)
37: logging.debug("sql done")
38: try:
39: return curs.fetchall()
40: except:
41: return None
42:
43: class xml_handler:
44:
45: def __init__(self):
46: '''
47: SAX handler to import FileMaker XML file (FMPXMLRESULT format) into the table.
48: @param dsn: database connection string
49: @param table: name of the table the xml shall be imported into
50: '''
51:
52: # set up parser
53: self.result={}
54: self.event = None
55: self.top_dispatcher = {
56: (saxtools.START_ELEMENT, fm_ns, u'METADATA'):
57: self.handle_meta_fields,
58: (saxtools.START_ELEMENT, fm_ns, u'RESULTSET'):
59: self.handle_data,
60: }
61:
62: # connect database
63:
64:
65:
66:
67: self.dbIDs = {}
68: self.rowcnt = 0
69:
70:
71:
72: self.newDataset = []
73: self.conflicts = []
74: self.ok = []
75: self.fieldNames=[]
76: return
77:
78: def handle_meta_fields(self, end_condition):
79: dispatcher = {
80: (saxtools.START_ELEMENT, fm_ns, u'FIELD'):
81: self.handle_meta_field,
82: }
83: #First round through the generator corresponds to the
84: #start element event
85: logging.debug("START METADATA")
86: yield None
87:
88: #delegate is a generator that handles all the events "within"
89: #this element
90: delegate = None
91: while not self.event == end_condition:
92: delegate = saxtools.tenorsax.event_loop_body(
93: dispatcher, delegate, self.event)
94: yield None
95:
96: #Element closed. Wrap up
97: logging.debug("END METADATA")
98:
99: self.update_fields = self.fieldNames
100:
101: logging.debug("xml-fieldnames:"+repr(self.fieldNames))
102: # get list of fields in db table
103:
104: #print "upQ: ", self.updQuery
105: #print "adQ: ", self.addQuery
106:
107: return
108:
109: def handle_meta_field(self, end_condition):
110: name = self.params.get((None, u'NAME'))
111: yield None
112: #Element closed. Wrap up
113: name=name.replace(" ","_")# make sure no spaces
114: self.fieldNames.append(name)
115: logging.debug("FIELD name: "+name)
116: return
117:
118: def handle_data(self, end_condition):
119: dispatcher = {
120: (saxtools.START_ELEMENT, fm_ns, u'ROW'):
121: self.handle_row,
122: }
123: #First round through the generator corresponds to the
124: #start element event
125: logging.debug("START RESULTSET")
126: self.rowcnt = 0
127: yield None
128:
129: #delegate is a generator that handles all the events "within"
130: #this element
131: delegate = None
132: while not self.event == end_condition:
133: delegate = saxtools.tenorsax.event_loop_body(
134: dispatcher, delegate, self.event)
135: yield None
136:
137: #Element closed. Wrap up
138: logging.debug("END RESULTSET")
139:
140:
141:
142: return
143:
144: def handle_row(self, end_condition):
145: dispatcher = {
146: (saxtools.START_ELEMENT, fm_ns, u'COL'):
147: self.handle_col,
148: }
149: logging.debug("START ROW")
150: self.dataSet = {}
151: self.colIdx = 0
152: yield None
153:
154: #delegate is a generator that handles all the events "within"
155: #this element
156: delegate = None
157: while not self.event == end_condition:
158: delegate = saxtools.tenorsax.event_loop_body(
159: dispatcher, delegate, self.event)
160: yield None
161:
162: #Element closed. Wrap up
163: logging.debug("END ROW")
164: self.rowcnt += 1
165: # process collected row data
166: update=False
167: id_val=''
168:
169: if self.result.has_key(self.dataSet['key']):
170: logging.error("Key %s not unique"%self.dataSet['key'])
171:
172: self.result[self.dataSet['key']]=self.dataSet
173:
174:
175: return
176:
177: def handle_col(self, end_condition):
178: dispatcher = {
179: (saxtools.START_ELEMENT, fm_ns, u'DATA'):
180: self.handle_data_tag,
181: }
182: #print "START COL"
183: yield None
184: #delegate is a generator that handles all the events "within"
185: #this element
186: delegate = None
187: while not self.event == end_condition:
188: delegate = saxtools.tenorsax.event_loop_body(
189: dispatcher, delegate, self.event)
190: yield None
191: #Element closed. Wrap up
192: #print "END COL"
193: self.colIdx += 1
194: return
195:
196: def handle_data_tag(self, end_condition):
197: #print "START DATA"
198: content = u''
199: yield None
200: # gather child elements
201: while not self.event == end_condition:
202: if self.event[0] == saxtools.CHARACTER_DATA:
203: content += self.params
204: yield None
205: #Element closed. Wrap up
206: field = self.fieldNames[self.colIdx]
207: self.dataSet[field.lower()] = content
208: #print " DATA(", field, ") ", repr(content)
209: return
210:
211:
212: def checkImport(dsn,resultSet):
213: #now connect to the database
214: logging.info("dsn: %s"%dsn)
215: dbCon = psycopg.connect(dsn)
216: db = dbCon.cursor()
217:
218:
219: qstr="select key from personal_www"
220:
221: results=SimpleSearch(db,qstr)
222:
223: keys=[]
224: for x in results:
225: if x[0]:
226: keys.append(unicodify(x[0]))
227:
228:
229: #first step detect new entries and conflicts
230: new=[]
231: conflicts={}
232:
233: for x in resultSet.iterkeys():
234:
235: if x not in keys:
236:
237: new.append(x)
238:
239: else:
240:
241: conflict,ret=checkForConflicts(db,resultSet[x],x)
242: if conflict:
243: conflicts[x]=ret
244:
245: return new,conflicts
246:
247: def importFMPXML(filename):
248: '''
249: method to import FileMaker XML file (FMPXMLRESULT format) into the table.
250: @param filename: xmlfile filename
251:
252: '''
253:
254: parser = sax.make_parser()
255: #The "consumer" is our own handler
256: consumer = xml_handler()
257: #Initialize Tenorsax with handler
258: handler = saxtools.tenorsax(consumer)
259: #Resulting tenorsax instance is the SAX handler
260: parser.setContentHandler(handler)
261: parser.setFeature(sax.handler.feature_namespaces, 1)
262: parser.parse(filename)
263: resultSet=consumer.result # xml now transformed into an dictionary
264:
265: return resultSet
266:
267:
268:
269: def checkForConflicts(cursor,dataSet,key):
270:
271: ret=[]
272: fields=",".join(checkFields)
273:
274: qstr="select %s from personal_www where key='%s'"%(fields,key)
275:
276:
277: sr=SimpleSearch(cursor,qstr)
278:
279: if not sr:
280: return True, None
281:
282: i=0
283: retValue=False
284:
285: for checkField in checkFields:
286: dbValueR=sr[0][i]
287: if dbValueR:
288: dbValue=unicodify(dbValueR)
289: else:
290: dbValue=""
291:
292: if checkField in dataSet:
293: setValue=dataSet[checkField]
294: logging.debug( " %s %s %s %s"%(repr(key),checkField,repr(dbValue),repr(setValue)))
295: if dbValue.strip().rstrip()!=setValue.lstrip().rstrip():
296: ret.append((checkField,dbValue,setValue))
297: retValue=True
298:
299: else:
300: logging.warning("unknown field %s in data file!"%checkField)
301:
302: i+=1
303:
304: return retValue,ret
305:
306:
307: ##
308: ## public static int main()
309: ##
310:
311: if __name__ == "__main__":
312:
313:
314:
315: loglevel = logging.DEBUG
316:
317:
318: logging.basicConfig(level=loglevel,
319: format='%(asctime)s %(levelname)s %(message)s',
320: datefmt='%H:%M:%S')
321:
322: resultSet=importFMPXML(filename="/Users/dwinter/Desktop/personalwww.xml")
323: news,conflicts=checkImport(dsn="dbname=personalwww host=xserve02a user=mysql password=e1nste1n", resultSet=resultSet)
324:
325:
326: print "new"
327: print len(news),news
328: print "-----------"
329: print "conflicts"
330: print conflicts
331:
332: # update_fields = None
333: #
334: # if options.update_fields:
335: # update_fields = [string.strip(s) for s in options.update_fields.split(',')]
336: #
337: # parser = sax.make_parser()
338: # #The "consumer" is our own handler
339: # consumer = xml_handler(dsn=options.dsn,table=options.table,
340: # update_fields=update_fields,id_field=options.id_field,
341: # sync_mode=options.sync_mode)
342: # #Initialize Tenorsax with handler
343: # handler = saxtools.tenorsax(consumer)
344: # #Resulting tenorsax instance is the SAX handler
345: # parser.setContentHandler(handler)
346: # parser.setFeature(sax.handler.feature_namespaces, 1)
347: # parser.parse(options.filename)
348: #
349: #
350: # print "DONE!"
351:
352:
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>