Return to updatePersonalWWW.py CVS log | Up to [Repository] / MPIWGWeb |
1.1.2.1 dwinter 1: try:
2: import psycopg2 as psycopg
3: psyco = 2
4: except:
5: import psycopg
6: psyco = 1
7:
8: import logging
9:
10: from xml import sax
11: from amara import saxtools
12:
13:
14: fm_ns = 'http://www.filemaker.com/fmpxmlresult'
15: #checkFields=['key','first_name','last_name','title','home_inst','current_work','e_mail2']
16: checkFields=['key','first_name','last_name','title','home_inst','e_mail2']
17:
18: def sql_quote(v):
19: # quote dictionary
20: quote_dict = {"\'": "''", "\\": "\\\\"}
21: for dkey in quote_dict.keys():
22: if v.find(dkey) >= 0:
23: v=quote_dict[dkey].join(v.split(dkey))
24: return "'%s'"%v
25:
26: def SimpleSearch(curs,query, args=None):
27: """execute sql query and return data"""
28: logging.debug("executing: "+query)
29: if psyco == 1:
30: query = query.encode("UTF-8")
31: #if args is not None:
32: # args = [ sql_quote(a) for a in args ]
33: #logging.debug(query)
34: #logging.debug(args)
35:
36: curs.execute(query, args)
37: logging.debug("sql done")
38: try:
39: return curs.fetchall()
40: except:
41: return None
42:
43: class xml_handler:
44:
45: def __init__(self):
46: '''
47: SAX handler to import FileMaker XML file (FMPXMLRESULT format) into the table.
48: @param dsn: database connection string
49: @param table: name of the table the xml shall be imported into
50: '''
51:
52: # set up parser
53: self.result={}
54: self.event = None
55: self.top_dispatcher = {
56: (saxtools.START_ELEMENT, fm_ns, u'METADATA'):
57: self.handle_meta_fields,
58: (saxtools.START_ELEMENT, fm_ns, u'RESULTSET'):
59: self.handle_data,
60: }
61:
62: # connect database
63:
64:
65:
66:
67: self.dbIDs = {}
68: self.rowcnt = 0
69:
70:
71:
72: self.newDataset = []
73: self.conflicts = []
74: self.ok = []
75: self.fieldNames=[]
76: return
77:
78: def handle_meta_fields(self, end_condition):
79: dispatcher = {
80: (saxtools.START_ELEMENT, fm_ns, u'FIELD'):
81: self.handle_meta_field,
82: }
83: #First round through the generator corresponds to the
84: #start element event
85: logging.debug("START METADATA")
86: yield None
87:
88: #delegate is a generator that handles all the events "within"
89: #this element
90: delegate = None
91: while not self.event == end_condition:
92: delegate = saxtools.tenorsax.event_loop_body(
93: dispatcher, delegate, self.event)
94: yield None
95:
96: #Element closed. Wrap up
97: logging.debug("END METADATA")
98:
99: self.update_fields = self.fieldNames
100:
101: logging.debug("xml-fieldnames:"+repr(self.fieldNames))
102: # get list of fields in db table
103:
104: #print "upQ: ", self.updQuery
105: #print "adQ: ", self.addQuery
106:
107: return
108:
109: def handle_meta_field(self, end_condition):
110: name = self.params.get((None, u'NAME'))
111: yield None
112: #Element closed. Wrap up
113: name=name.replace(" ","_")# make sure no spaces
114: self.fieldNames.append(name)
115: logging.debug("FIELD name: "+name)
116: return
117:
118: def handle_data(self, end_condition):
119: dispatcher = {
120: (saxtools.START_ELEMENT, fm_ns, u'ROW'):
121: self.handle_row,
122: }
123: #First round through the generator corresponds to the
124: #start element event
125: logging.debug("START RESULTSET")
126: self.rowcnt = 0
127: yield None
128:
129: #delegate is a generator that handles all the events "within"
130: #this element
131: delegate = None
132: while not self.event == end_condition:
133: delegate = saxtools.tenorsax.event_loop_body(
134: dispatcher, delegate, self.event)
135: yield None
136:
137: #Element closed. Wrap up
138: logging.debug("END RESULTSET")
139:
140:
141:
142: return
143:
144: def handle_row(self, end_condition):
145: dispatcher = {
146: (saxtools.START_ELEMENT, fm_ns, u'COL'):
147: self.handle_col,
148: }
149: logging.debug("START ROW")
150: self.dataSet = {}
151: self.colIdx = 0
152: yield None
153:
154: #delegate is a generator that handles all the events "within"
155: #this element
156: delegate = None
157: while not self.event == end_condition:
158: delegate = saxtools.tenorsax.event_loop_body(
159: dispatcher, delegate, self.event)
160: yield None
161:
162: #Element closed. Wrap up
163: logging.debug("END ROW")
164: self.rowcnt += 1
165: # process collected row data
166: update=False
167: id_val=''
168:
169: if self.result.has_key(self.dataSet['key']):
170: logging.error("Key %s not unique"%self.dataSet['key'])
171:
172: self.result[self.dataSet['key']]=self.dataSet
173:
174:
175: return
176:
177: def handle_col(self, end_condition):
178: dispatcher = {
179: (saxtools.START_ELEMENT, fm_ns, u'DATA'):
180: self.handle_data_tag,
181: }
182: #print "START COL"
183: yield None
184: #delegate is a generator that handles all the events "within"
185: #this element
186: delegate = None
187: while not self.event == end_condition:
188: delegate = saxtools.tenorsax.event_loop_body(
189: dispatcher, delegate, self.event)
190: yield None
191: #Element closed. Wrap up
192: #print "END COL"
193: self.colIdx += 1
194: return
195:
196: def handle_data_tag(self, end_condition):
197: #print "START DATA"
198: content = u''
199: yield None
200: # gather child elements
201: while not self.event == end_condition:
202: if self.event[0] == saxtools.CHARACTER_DATA:
203: content += self.params
204: yield None
205: #Element closed. Wrap up
206: field = self.fieldNames[self.colIdx]
207: self.dataSet[field.lower()] = content
208: #print " DATA(", field, ") ", repr(content)
209: return
210:
211:
212: def checkImport(dsn,resultSet):
213: #now connect to the database
214: dbCon = psycopg.connect(dsn)
215: db = dbCon.cursor()
216:
217:
218: qstr="select key from personal_www"
219:
220: results=SimpleSearch(db,qstr)
221:
222: keys=[]
223: for x in results:
224: if x[0]:
225: keys.append(x[0].decode('utf-8'))
226:
227:
228: #first step detect new entries and conflicts
229: new=[]
230: conflicts={}
231:
232: for x in resultSet.iterkeys():
233:
234: if x not in keys:
235:
236: new.append(x)
237:
238: else:
239:
240: conflict,ret=checkForConflicts(db,resultSet[x],x)
241: if conflict:
242: conflicts[x]=ret
243:
244: return new,conflicts
245:
246: def importFMPXML(filename):
247: '''
248: method to import FileMaker XML file (FMPXMLRESULT format) into the table.
249: @param filename: xmlfile filename
250:
251: '''
252:
253: parser = sax.make_parser()
254: #The "consumer" is our own handler
255: consumer = xml_handler()
256: #Initialize Tenorsax with handler
257: handler = saxtools.tenorsax(consumer)
258: #Resulting tenorsax instance is the SAX handler
259: parser.setContentHandler(handler)
260: parser.setFeature(sax.handler.feature_namespaces, 1)
261: parser.parse(filename)
262: resultSet=consumer.result # xml now transformed into an dictionary
263:
264: return resultSet
265:
266:
267:
268: def checkForConflicts(cursor,dataSet,key):
269:
270: ret=[]
271: fields=",".join(checkFields)
272:
273: qstr="select %s from personal_www where key='%s'"%(fields,key)
274:
275:
276: sr=SimpleSearch(cursor,qstr)
277:
278: if not sr:
279: return True, None
280:
281: i=0
282: retValue=False
283:
284: for checkField in checkFields:
285: dbValueR=sr[0][i]
286: if dbValueR:
287: dbValue=dbValueR.decode('utf-8')
288: else:
289: dbValue=""
290:
291: setValue=dataSet[checkField]
292: logging.debug( " %s %s %s %s"%(repr(key),checkField,repr(dbValue),repr(setValue)))
293: if dbValue.strip().rstrip()!=setValue.lstrip().rstrip():
294: ret.append((checkField,dbValue,setValue))
295: retValue=True
296: i+=1
297:
298: return retValue,ret
299:
300:
301: ##
302: ## public static int main()
303: ##
304:
305: if __name__ == "__main__":
306:
307:
308:
309: loglevel = logging.DEBUG
310:
311:
312: logging.basicConfig(level=loglevel,
313: format='%(asctime)s %(levelname)s %(message)s',
314: datefmt='%H:%M:%S')
315:
316: resultSet=importFMPXML(filename="/Users/dwinter/Desktop/personalwww.xml")
1.1.2.2 ! dwinter 317: news,conflicts=checkImport(dsn="dbname=personalwww host=xserve02a user=mysql password=e1nste1n", resultSet=resultSet)
1.1.2.1 dwinter 318:
319:
320: print "new"
321: print len(news),news
322: print "-----------"
323: print "conflicts"
324: print conflicts
325:
326: # update_fields = None
327: #
328: # if options.update_fields:
329: # update_fields = [string.strip(s) for s in options.update_fields.split(',')]
330: #
331: # parser = sax.make_parser()
332: # #The "consumer" is our own handler
333: # consumer = xml_handler(dsn=options.dsn,table=options.table,
334: # update_fields=update_fields,id_field=options.id_field,
335: # sync_mode=options.sync_mode)
336: # #Initialize Tenorsax with handler
337: # handler = saxtools.tenorsax(consumer)
338: # #Resulting tenorsax instance is the SAX handler
339: # parser.setContentHandler(handler)
340: # parser.setFeature(sax.handler.feature_namespaces, 1)
341: # parser.parse(options.filename)
342: #
343: #
344: # print "DONE!"
345:
346: