Annotation of MPIWGWeb/updatePersonalWWW.py, revision 1.1.2.6
1.1.2.1 dwinter 1: try:
2: import psycopg2 as psycopg
3: psyco = 2
4: except:
5: import psycopg
6: psyco = 1
7:
8: import logging
9:
10: from xml import sax
11: from amara import saxtools
12:
1.1.2.4 casties 13: # namespace for FileMaker8
1.1.2.1 dwinter 14: fm_ns = 'http://www.filemaker.com/fmpxmlresult'
1.1.2.4 casties 15:
16: # list of fields that are taken from XML and checked against DB as conflicts
1.1.2.1 dwinter 17: #checkFields=['key','first_name','last_name','title','home_inst','current_work','e_mail2']
1.1.2.6 ! casties 18: checkFields=['key','first_name','last_name','titles_new','home_inst','e_mail2']
1.1.2.1 dwinter 19:
20: def sql_quote(v):
21: # quote dictionary
22: quote_dict = {"\'": "''", "\\": "\\\\"}
23: for dkey in quote_dict.keys():
24: if v.find(dkey) >= 0:
25: v=quote_dict[dkey].join(v.split(dkey))
26: return "'%s'"%v
27:
28: def SimpleSearch(curs,query, args=None):
29: """execute sql query and return data"""
30: logging.debug("executing: "+query)
31: if psyco == 1:
32: query = query.encode("UTF-8")
33: #if args is not None:
34: # args = [ sql_quote(a) for a in args ]
35: #logging.debug(query)
36: #logging.debug(args)
37:
38: curs.execute(query, args)
39: logging.debug("sql done")
40: try:
41: return curs.fetchall()
42: except:
43: return None
44:
45: class xml_handler:
46:
47: def __init__(self):
48: '''
49: SAX handler to import FileMaker XML file (FMPXMLRESULT format) into the table.
50: @param dsn: database connection string
51: @param table: name of the table the xml shall be imported into
52: '''
53:
54: # set up parser
55: self.result={}
56: self.event = None
57: self.top_dispatcher = {
58: (saxtools.START_ELEMENT, fm_ns, u'METADATA'):
59: self.handle_meta_fields,
60: (saxtools.START_ELEMENT, fm_ns, u'RESULTSET'):
61: self.handle_data,
62: }
63:
64: # connect database
65:
66:
67:
68:
69: self.dbIDs = {}
70: self.rowcnt = 0
71:
72:
73:
74: self.newDataset = []
75: self.conflicts = []
76: self.ok = []
77: self.fieldNames=[]
78: return
79:
80: def handle_meta_fields(self, end_condition):
81: dispatcher = {
82: (saxtools.START_ELEMENT, fm_ns, u'FIELD'):
83: self.handle_meta_field,
84: }
85: #First round through the generator corresponds to the
86: #start element event
87: logging.debug("START METADATA")
88: yield None
89:
90: #delegate is a generator that handles all the events "within"
91: #this element
92: delegate = None
93: while not self.event == end_condition:
94: delegate = saxtools.tenorsax.event_loop_body(
95: dispatcher, delegate, self.event)
96: yield None
97:
98: #Element closed. Wrap up
99: logging.debug("END METADATA")
100:
101: self.update_fields = self.fieldNames
102:
103: logging.debug("xml-fieldnames:"+repr(self.fieldNames))
104: # get list of fields in db table
105:
106: #print "upQ: ", self.updQuery
107: #print "adQ: ", self.addQuery
108:
109: return
110:
111: def handle_meta_field(self, end_condition):
112: name = self.params.get((None, u'NAME'))
113: yield None
114: #Element closed. Wrap up
115: name=name.replace(" ","_")# make sure no spaces
116: self.fieldNames.append(name)
117: logging.debug("FIELD name: "+name)
118: return
119:
120: def handle_data(self, end_condition):
121: dispatcher = {
122: (saxtools.START_ELEMENT, fm_ns, u'ROW'):
123: self.handle_row,
124: }
125: #First round through the generator corresponds to the
126: #start element event
127: logging.debug("START RESULTSET")
128: self.rowcnt = 0
129: yield None
130:
131: #delegate is a generator that handles all the events "within"
132: #this element
133: delegate = None
134: while not self.event == end_condition:
135: delegate = saxtools.tenorsax.event_loop_body(
136: dispatcher, delegate, self.event)
137: yield None
138:
139: #Element closed. Wrap up
140: logging.debug("END RESULTSET")
141:
142:
143:
144: return
145:
146: def handle_row(self, end_condition):
147: dispatcher = {
148: (saxtools.START_ELEMENT, fm_ns, u'COL'):
149: self.handle_col,
150: }
151: logging.debug("START ROW")
152: self.dataSet = {}
153: self.colIdx = 0
154: yield None
155:
156: #delegate is a generator that handles all the events "within"
157: #this element
158: delegate = None
159: while not self.event == end_condition:
160: delegate = saxtools.tenorsax.event_loop_body(
161: dispatcher, delegate, self.event)
162: yield None
163:
164: #Element closed. Wrap up
165: logging.debug("END ROW")
166: self.rowcnt += 1
167: # process collected row data
168: update=False
169: id_val=''
170:
171: if self.result.has_key(self.dataSet['key']):
172: logging.error("Key %s not unique"%self.dataSet['key'])
173:
174: self.result[self.dataSet['key']]=self.dataSet
175:
176:
177: return
178:
179: def handle_col(self, end_condition):
180: dispatcher = {
181: (saxtools.START_ELEMENT, fm_ns, u'DATA'):
182: self.handle_data_tag,
183: }
184: #print "START COL"
185: yield None
186: #delegate is a generator that handles all the events "within"
187: #this element
188: delegate = None
189: while not self.event == end_condition:
190: delegate = saxtools.tenorsax.event_loop_body(
191: dispatcher, delegate, self.event)
192: yield None
193: #Element closed. Wrap up
194: #print "END COL"
195: self.colIdx += 1
196: return
197:
198: def handle_data_tag(self, end_condition):
199: #print "START DATA"
200: content = u''
201: yield None
202: # gather child elements
203: while not self.event == end_condition:
204: if self.event[0] == saxtools.CHARACTER_DATA:
205: content += self.params
206: yield None
207: #Element closed. Wrap up
208: field = self.fieldNames[self.colIdx]
209: self.dataSet[field.lower()] = content
210: #print " DATA(", field, ") ", repr(content)
211: return
212:
213:
214: def checkImport(dsn,resultSet):
215: #now connect to the database
1.1.2.3 casties 216: logging.info("dsn: %s"%dsn)
1.1.2.1 dwinter 217: dbCon = psycopg.connect(dsn)
218: db = dbCon.cursor()
219:
220:
221: qstr="select key from personal_www"
222:
223: results=SimpleSearch(db,qstr)
224:
225: keys=[]
226: for x in results:
227: if x[0]:
228: keys.append(x[0].decode('utf-8'))
229:
230:
231: #first step detect new entries and conflicts
232: new=[]
233: conflicts={}
234:
235: for x in resultSet.iterkeys():
236:
237: if x not in keys:
238:
239: new.append(x)
240:
241: else:
242:
243: conflict,ret=checkForConflicts(db,resultSet[x],x)
244: if conflict:
245: conflicts[x]=ret
246:
247: return new,conflicts
248:
249: def importFMPXML(filename):
250: '''
251: method to import FileMaker XML file (FMPXMLRESULT format) into the table.
252: @param filename: xmlfile filename
253:
254: '''
255:
256: parser = sax.make_parser()
257: #The "consumer" is our own handler
258: consumer = xml_handler()
259: #Initialize Tenorsax with handler
260: handler = saxtools.tenorsax(consumer)
261: #Resulting tenorsax instance is the SAX handler
262: parser.setContentHandler(handler)
263: parser.setFeature(sax.handler.feature_namespaces, 1)
264: parser.parse(filename)
265: resultSet=consumer.result # xml now transformed into an dictionary
266:
267: return resultSet
268:
269:
270:
271: def checkForConflicts(cursor,dataSet,key):
272:
273: ret=[]
274: fields=",".join(checkFields)
275:
276: qstr="select %s from personal_www where key='%s'"%(fields,key)
277:
278:
279: sr=SimpleSearch(cursor,qstr)
280:
281: if not sr:
282: return True, None
283:
284: i=0
285: retValue=False
286:
287: for checkField in checkFields:
288: dbValueR=sr[0][i]
289: if dbValueR:
290: dbValue=dbValueR.decode('utf-8')
291: else:
292: dbValue=""
293:
294: setValue=dataSet[checkField]
295: logging.debug( " %s %s %s %s"%(repr(key),checkField,repr(dbValue),repr(setValue)))
296: if dbValue.strip().rstrip()!=setValue.lstrip().rstrip():
297: ret.append((checkField,dbValue,setValue))
298: retValue=True
299: i+=1
300:
301: return retValue,ret
302:
303:
304: ##
305: ## public static int main()
306: ##
307:
308: if __name__ == "__main__":
309:
310:
311:
312: loglevel = logging.DEBUG
313:
314:
315: logging.basicConfig(level=loglevel,
316: format='%(asctime)s %(levelname)s %(message)s',
317: datefmt='%H:%M:%S')
318:
319: resultSet=importFMPXML(filename="/Users/dwinter/Desktop/personalwww.xml")
1.1.2.2 dwinter 320: news,conflicts=checkImport(dsn="dbname=personalwww host=xserve02a user=mysql password=e1nste1n", resultSet=resultSet)
1.1.2.1 dwinter 321:
322:
323: print "new"
324: print len(news),news
325: print "-----------"
326: print "conflicts"
327: print conflicts
328:
329: # update_fields = None
330: #
331: # if options.update_fields:
332: # update_fields = [string.strip(s) for s in options.update_fields.split(',')]
333: #
334: # parser = sax.make_parser()
335: # #The "consumer" is our own handler
336: # consumer = xml_handler(dsn=options.dsn,table=options.table,
337: # update_fields=update_fields,id_field=options.id_field,
338: # sync_mode=options.sync_mode)
339: # #Initialize Tenorsax with handler
340: # handler = saxtools.tenorsax(consumer)
341: # #Resulting tenorsax instance is the SAX handler
342: # parser.setContentHandler(handler)
343: # parser.setFeature(sax.handler.feature_namespaces, 1)
344: # parser.parse(options.filename)
345: #
346: #
347: # print "DONE!"
348:
349:
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>