Return to updatePersonalWWW.py CVS log | Up to [Repository] / MPIWGWeb |
1.1.2.1 dwinter 1: try:
2: import psycopg2 as psycopg
3: psyco = 2
4: except:
5: import psycopg
6: psyco = 1
7:
8: import logging
9:
10: from xml import sax
11: from amara import saxtools
12:
13:
14: fm_ns = 'http://www.filemaker.com/fmpxmlresult'
15: #checkFields=['key','first_name','last_name','title','home_inst','current_work','e_mail2']
16: checkFields=['key','first_name','last_name','title','home_inst','e_mail2']
17:
18: def sql_quote(v):
19: # quote dictionary
20: quote_dict = {"\'": "''", "\\": "\\\\"}
21: for dkey in quote_dict.keys():
22: if v.find(dkey) >= 0:
23: v=quote_dict[dkey].join(v.split(dkey))
24: return "'%s'"%v
25:
26: def SimpleSearch(curs,query, args=None):
27: """execute sql query and return data"""
28: logging.debug("executing: "+query)
29: if psyco == 1:
30: query = query.encode("UTF-8")
31: #if args is not None:
32: # args = [ sql_quote(a) for a in args ]
33: #logging.debug(query)
34: #logging.debug(args)
35:
36: curs.execute(query, args)
37: logging.debug("sql done")
38: try:
39: return curs.fetchall()
40: except:
41: return None
42:
43: class xml_handler:
44:
45: def __init__(self):
46: '''
47: SAX handler to import FileMaker XML file (FMPXMLRESULT format) into the table.
48: @param dsn: database connection string
49: @param table: name of the table the xml shall be imported into
50: '''
51:
52: # set up parser
53: self.result={}
54: self.event = None
55: self.top_dispatcher = {
56: (saxtools.START_ELEMENT, fm_ns, u'METADATA'):
57: self.handle_meta_fields,
58: (saxtools.START_ELEMENT, fm_ns, u'RESULTSET'):
59: self.handle_data,
60: }
61:
62: # connect database
63:
64:
65:
66:
67: self.dbIDs = {}
68: self.rowcnt = 0
69:
70:
71:
72: self.newDataset = []
73: self.conflicts = []
74: self.ok = []
75: self.fieldNames=[]
76: return
77:
78: def handle_meta_fields(self, end_condition):
79: dispatcher = {
80: (saxtools.START_ELEMENT, fm_ns, u'FIELD'):
81: self.handle_meta_field,
82: }
83: #First round through the generator corresponds to the
84: #start element event
85: logging.debug("START METADATA")
86: yield None
87:
88: #delegate is a generator that handles all the events "within"
89: #this element
90: delegate = None
91: while not self.event == end_condition:
92: delegate = saxtools.tenorsax.event_loop_body(
93: dispatcher, delegate, self.event)
94: yield None
95:
96: #Element closed. Wrap up
97: logging.debug("END METADATA")
98:
99: self.update_fields = self.fieldNames
100:
101: logging.debug("xml-fieldnames:"+repr(self.fieldNames))
102: # get list of fields in db table
103:
104: #print "upQ: ", self.updQuery
105: #print "adQ: ", self.addQuery
106:
107: return
108:
109: def handle_meta_field(self, end_condition):
110: name = self.params.get((None, u'NAME'))
111: yield None
112: #Element closed. Wrap up
113: name=name.replace(" ","_")# make sure no spaces
114: self.fieldNames.append(name)
115: logging.debug("FIELD name: "+name)
116: return
117:
118: def handle_data(self, end_condition):
119: dispatcher = {
120: (saxtools.START_ELEMENT, fm_ns, u'ROW'):
121: self.handle_row,
122: }
123: #First round through the generator corresponds to the
124: #start element event
125: logging.debug("START RESULTSET")
126: self.rowcnt = 0
127: yield None
128:
129: #delegate is a generator that handles all the events "within"
130: #this element
131: delegate = None
132: while not self.event == end_condition:
133: delegate = saxtools.tenorsax.event_loop_body(
134: dispatcher, delegate, self.event)
135: yield None
136:
137: #Element closed. Wrap up
138: logging.debug("END RESULTSET")
139:
140:
141:
142: return
143:
144: def handle_row(self, end_condition):
145: dispatcher = {
146: (saxtools.START_ELEMENT, fm_ns, u'COL'):
147: self.handle_col,
148: }
149: logging.debug("START ROW")
150: self.dataSet = {}
151: self.colIdx = 0
152: yield None
153:
154: #delegate is a generator that handles all the events "within"
155: #this element
156: delegate = None
157: while not self.event == end_condition:
158: delegate = saxtools.tenorsax.event_loop_body(
159: dispatcher, delegate, self.event)
160: yield None
161:
162: #Element closed. Wrap up
163: logging.debug("END ROW")
164: self.rowcnt += 1
165: # process collected row data
166: update=False
167: id_val=''
168:
169: if self.result.has_key(self.dataSet['key']):
170: logging.error("Key %s not unique"%self.dataSet['key'])
171:
172: self.result[self.dataSet['key']]=self.dataSet
173:
174:
175: return
176:
177: def handle_col(self, end_condition):
178: dispatcher = {
179: (saxtools.START_ELEMENT, fm_ns, u'DATA'):
180: self.handle_data_tag,
181: }
182: #print "START COL"
183: yield None
184: #delegate is a generator that handles all the events "within"
185: #this element
186: delegate = None
187: while not self.event == end_condition:
188: delegate = saxtools.tenorsax.event_loop_body(
189: dispatcher, delegate, self.event)
190: yield None
191: #Element closed. Wrap up
192: #print "END COL"
193: self.colIdx += 1
194: return
195:
196: def handle_data_tag(self, end_condition):
197: #print "START DATA"
198: content = u''
199: yield None
200: # gather child elements
201: while not self.event == end_condition:
202: if self.event[0] == saxtools.CHARACTER_DATA:
203: content += self.params
204: yield None
205: #Element closed. Wrap up
206: field = self.fieldNames[self.colIdx]
207: self.dataSet[field.lower()] = content
208: #print " DATA(", field, ") ", repr(content)
209: return
210:
211:
212: def checkImport(dsn,resultSet):
213: #now connect to the database
1.1.2.3 ! casties 214: logging.info("dsn: %s"%dsn)
1.1.2.1 dwinter 215: dbCon = psycopg.connect(dsn)
216: db = dbCon.cursor()
217:
218:
219: qstr="select key from personal_www"
220:
221: results=SimpleSearch(db,qstr)
222:
223: keys=[]
224: for x in results:
225: if x[0]:
226: keys.append(x[0].decode('utf-8'))
227:
228:
229: #first step detect new entries and conflicts
230: new=[]
231: conflicts={}
232:
233: for x in resultSet.iterkeys():
234:
235: if x not in keys:
236:
237: new.append(x)
238:
239: else:
240:
241: conflict,ret=checkForConflicts(db,resultSet[x],x)
242: if conflict:
243: conflicts[x]=ret
244:
245: return new,conflicts
246:
247: def importFMPXML(filename):
248: '''
249: method to import FileMaker XML file (FMPXMLRESULT format) into the table.
250: @param filename: xmlfile filename
251:
252: '''
253:
254: parser = sax.make_parser()
255: #The "consumer" is our own handler
256: consumer = xml_handler()
257: #Initialize Tenorsax with handler
258: handler = saxtools.tenorsax(consumer)
259: #Resulting tenorsax instance is the SAX handler
260: parser.setContentHandler(handler)
261: parser.setFeature(sax.handler.feature_namespaces, 1)
262: parser.parse(filename)
263: resultSet=consumer.result # xml now transformed into an dictionary
264:
265: return resultSet
266:
267:
268:
269: def checkForConflicts(cursor,dataSet,key):
270:
271: ret=[]
272: fields=",".join(checkFields)
273:
274: qstr="select %s from personal_www where key='%s'"%(fields,key)
275:
276:
277: sr=SimpleSearch(cursor,qstr)
278:
279: if not sr:
280: return True, None
281:
282: i=0
283: retValue=False
284:
285: for checkField in checkFields:
286: dbValueR=sr[0][i]
287: if dbValueR:
288: dbValue=dbValueR.decode('utf-8')
289: else:
290: dbValue=""
291:
292: setValue=dataSet[checkField]
293: logging.debug( " %s %s %s %s"%(repr(key),checkField,repr(dbValue),repr(setValue)))
294: if dbValue.strip().rstrip()!=setValue.lstrip().rstrip():
295: ret.append((checkField,dbValue,setValue))
296: retValue=True
297: i+=1
298:
299: return retValue,ret
300:
301:
302: ##
303: ## public static int main()
304: ##
305:
306: if __name__ == "__main__":
307:
308:
309:
310: loglevel = logging.DEBUG
311:
312:
313: logging.basicConfig(level=loglevel,
314: format='%(asctime)s %(levelname)s %(message)s',
315: datefmt='%H:%M:%S')
316:
317: resultSet=importFMPXML(filename="/Users/dwinter/Desktop/personalwww.xml")
1.1.2.2 dwinter 318: news,conflicts=checkImport(dsn="dbname=personalwww host=xserve02a user=mysql password=e1nste1n", resultSet=resultSet)
1.1.2.1 dwinter 319:
320:
321: print "new"
322: print len(news),news
323: print "-----------"
324: print "conflicts"
325: print conflicts
326:
327: # update_fields = None
328: #
329: # if options.update_fields:
330: # update_fields = [string.strip(s) for s in options.update_fields.split(',')]
331: #
332: # parser = sax.make_parser()
333: # #The "consumer" is our own handler
334: # consumer = xml_handler(dsn=options.dsn,table=options.table,
335: # update_fields=update_fields,id_field=options.id_field,
336: # sync_mode=options.sync_mode)
337: # #Initialize Tenorsax with handler
338: # handler = saxtools.tenorsax(consumer)
339: # #Resulting tenorsax instance is the SAX handler
340: # parser.setContentHandler(handler)
341: # parser.setFeature(sax.handler.feature_namespaces, 1)
342: # parser.parse(options.filename)
343: #
344: #
345: # print "DONE!"
346:
347: