Mercurial > hg > foxridge-archiver
comparison archiver.pl @ 18:fdf4ceb36db1
fixed problem with dir names in metacheck
new version of metacheck defaults to not change index file
new version of archiver uses new version of metacheck
author | casties |
---|---|
date | Tue, 20 Sep 2005 19:24:57 +0200 |
parents | b19df18aa19a |
children | a3c35eae25dc |
comparison
equal
deleted
inserted
replaced
17:6c5c7743acb1 | 18:fdf4ceb36db1 |
---|---|
14 ####################################################### | 14 ####################################################### |
15 # internal parameters | 15 # internal parameters |
16 # | 16 # |
17 | 17 |
18 # program version | 18 # program version |
19 my $version = "0.6 (ROC 23.3.2005)"; | 19 my $version = "0.7 (ROC 20.9.2005)"; |
20 | 20 |
21 # short help | 21 # short help |
22 my $shorthelp = "MPIWG archiver $version\nuse:\n archiver [options] docpath\noptions:\n -premigrate don't delete archived files\n"; | 22 my $help = "MPIWG archiver $version |
23 use: archiver [options] docpath | |
24 options: | |
25 -debug show debugging info | |
26 -premigrate don't delete archived files | |
27 -force archive even if already archived | |
28 "; | |
23 | 29 |
24 # read command line parameters | 30 # read command line parameters |
25 my $args = MPIWGStor::parseargs; | 31 my $args = MPIWGStor::parseargs; |
32 if (! scalar(%$args)) { | |
33 print $help, "\n"; | |
34 exit 1; | |
35 } | |
26 | 36 |
27 # debug level | 37 # debug level |
28 my $debug = (exists $$args{'debug'}) ? $$args{'debug'} : 0; | 38 $debug = (exists $$args{'debug'}) ? $$args{'debug'} : 0; |
39 | |
40 # force archiving | |
41 my $force_archive = (exists $$args{'force'}) ? $$args{'force'} : 0; | |
29 | 42 |
30 # rewrite XML file (necessary for archive date!) | 43 # rewrite XML file (necessary for archive date!) |
31 my $fix_xml = 1; | 44 my $fix_xml = 1; |
32 my $xml_changed = 0; | 45 my $xml_changed = 0; |
46 | |
33 # XML namespace (not really implemented!) | 47 # XML namespace (not really implemented!) |
34 my $namespace = ""; | 48 my $namespace = ""; |
35 | 49 |
36 # archive name (archive-path element, usually == $docdir) | 50 # archive name (archive-path element, usually == $docdir) |
37 my $archname; | 51 my $archname; |
52 | |
38 # archive storage date (now) | 53 # archive storage date (now) |
39 my $archdate = stime(time); | 54 my $archdate = stime(time); |
40 | 55 |
41 # delete "junk" files before archiving | 56 # delete "junk" files before archiving |
42 my $delete_junk_files = 1; | 57 my $delete_junk_files = 1; |
43 | 58 |
44 # delete data files after archiving | 59 # delete data files after archiving |
45 my $delete_data_files = 1; | 60 my $delete_data_files = 1; |
46 | 61 |
62 # don't delete archived files with "-premigrate" | |
63 if (exists $$args{'premigrate'}) { | |
64 $delete_data_files = not $$args{'premigrate'}; | |
65 } | |
66 if ($delete_data_files) { | |
67 logger('INFO', "going to remove successfully archived files from disk"); | |
68 } | |
69 | |
47 | 70 |
48 ####################################################### | 71 ####################################################### |
49 # external programs | 72 # external programs |
50 # | 73 # |
51 my $archprog = "/opt/tivoli/tsm/client/ba/bin/dsmc"; | 74 my $archprog = "/opt/tivoli/tsm/client/ba/bin/dsmc"; |
52 if (! -x $archprog) { | 75 if (! -x $archprog) { |
53 logger('ABORT', "TSM client program '$archprog' missing!!\n"); | 76 logger('ABORT', "TSM client program '$archprog' missing!"); |
54 exit 1; | 77 exit 1; |
55 } | 78 } |
56 my $checkprog = "/usr/local/mpiwg/archive/metacheck"; | 79 my $checkprog = "/usr/local/mpiwg/archive/metacheck"; |
57 if (! -x $checkprog) { | 80 if (! -x $checkprog) { |
58 logger('ABORT', "meta data checking program '$checkprog' missing!!\n"); | 81 logger('ABORT', "meta data checking program '$checkprog' missing!"); |
59 exit 1; | 82 exit 1; |
60 } | 83 } |
61 # log file for archiver | 84 # log file for archiver |
62 my $log_file = "/var/log/mpiwg-archiver.log"; | 85 my $log_file = "/var/log/mpiwg-archiver.log"; |
63 if (! open LOG, ">>$log_file") { | 86 if (! open LOG, ">>$log_file") { |
64 logger('ABORT', "unable to write log file '$log_file'!!\n"); | 87 logger('ABORT', "unable to write log file '$log_file'!"); |
65 exit 1; | 88 exit 1; |
66 } | 89 } |
67 | 90 |
68 ####################################################### | 91 ####################################################### |
69 # check parameters that were passed to the program | 92 # check parameters that were passed to the program |
70 # | 93 # |
71 if ($#ARGV < 0) { | 94 |
72 print $shorthelp; | |
73 exit 1; | |
74 } | |
75 my $docdir = $$args{'path'}; | 95 my $docdir = $$args{'path'}; |
76 # strip double slashes | 96 # strip double slashes |
77 $docdir =~ s/\/\//\//; | 97 $docdir =~ s/\/\//\//; |
78 # strip trailing slashes | 98 # strip trailing slashes |
79 $docdir =~ s/\/+$//; | 99 $docdir =~ s/\/+$//; |
80 if (! -d $docdir) { | 100 if (! -d $docdir) { |
81 logger('ABORT', "document directory \'$docdir\' doesn't exist!\n"); | 101 logger('ABORT', "document directory \'$docdir\' doesn't exist!"); |
82 exit 1; | 102 exit 1; |
83 } | |
84 # don't delete archived files with "-premigrate" | |
85 if (exists $$args{'premigrate'}) { | |
86 $delete_data_files = not $$args{'premigrate'}; | |
87 } | |
88 if ($delete_data_files) { | |
89 logger('INFO', "going to remove successfully archived files from disk"); | |
90 } | 103 } |
91 | 104 |
92 my $metafile = "$docdir/index.meta"; | 105 my $metafile = "$docdir/index.meta"; |
93 if (! -f $metafile) { | 106 if (! -f $metafile) { |
94 logger('ABORT', "metadata index file \'$metafile\' doesn't exist!\n"); | 107 logger('ABORT', "metadata index file \'$metafile\' doesn't exist!"); |
95 exit 1; | 108 exit 1; |
96 } | 109 } |
97 | 110 |
98 ####################################################### | 111 ####################################################### |
99 # internal variables | 112 # internal variables |
120 # archive path | 133 # archive path |
121 # | 134 # |
122 # get archive-path | 135 # get archive-path |
123 $archname = MPIWGStor::sstrip($rootnode->findvalue('child::archive-path')); | 136 $archname = MPIWGStor::sstrip($rootnode->findvalue('child::archive-path')); |
124 if (! $archname) { | 137 if (! $archname) { |
125 logger('ABORT', "archive-name element missing!!\n"); | 138 logger('ABORT', "archive-name element missing!"); |
126 exit 1; | 139 exit 1; |
127 } | 140 } |
128 | 141 |
129 # | 142 # |
130 # files | 143 # files |
131 # | 144 # |
132 my @filenodes = $rootnode->findnodes('child::file'); | 145 my @filenodes = $rootnode->findnodes('child::file'); |
133 foreach my $fn (@filenodes) { | 146 foreach my $fn (@filenodes) { |
134 my $name = MPIWGStor::sstrip($fn->findvalue('child::name')); | 147 my $name = MPIWGStor::sstrip($fn->findvalue('child::name')); |
135 my $path = MPIWGStor::sstrip($fn->findvalue('child::path')); | 148 my $path = MPIWGStor::sstrip($fn->findvalue('child::path')); |
136 logger('DEBUG', "FILE ($path)$name\n"); | 149 logger('DEBUG', "FILE ($path)$name"); |
137 my $f = ($path) ? "$path/$name" : "$name"; | 150 my $f = ($path) ? "$path/$name" : "$name"; |
138 $files{$f} = $name; | 151 $files{$f} = $name; |
139 } | 152 } |
140 | 153 |
141 # | 154 # |
142 # archive-storage-date | 155 # archive-storage-date |
143 # | 156 # |
144 my $stordatenode = ($rootnode->find('child::archive-storage-date'))->get_node(1); | 157 my $stordatenode = ($rootnode->find('child::archive-storage-date'))->get_node(1); |
145 if ($stordatenode) { | 158 if ($stordatenode) { |
146 logger('WARNING', "archive storage date exists! Resource already archived?\n"); | 159 logger('WARNING', "archive storage date exists! Resource already archived?"); |
147 $warncnt++; | 160 $warncnt++; |
148 # delete old date | 161 # delete old date |
149 $stordatenode->removeChildNodes; | 162 $stordatenode->removeChildNodes; |
150 } else { | 163 } else { |
151 # create new storage date node | 164 # create new storage date node |
184 if (/^Archive processing of .* finished without failure./) { | 197 if (/^Archive processing of .* finished without failure./) { |
185 print " ARCH: OK\n"; | 198 print " ARCH: OK\n"; |
186 } | 199 } |
187 } | 200 } |
188 } else { | 201 } else { |
189 logger('ABORT', "unable to start archive command '$archcmd'!!\n"); | 202 logger('ABORT', "unable to start archive command '$archcmd'!!"); |
190 exit 1; | 203 exit 1; |
191 } | 204 } |
192 | 205 |
193 return \%files; | 206 return \%files; |
194 } | 207 } |
207 | 220 |
208 foreach my $ft (sort keys %$to_archive) { | 221 foreach my $ft (sort keys %$to_archive) { |
209 my $fp = "$docdir/$ft"; | 222 my $fp = "$docdir/$ft"; |
210 #print " fp: $fp\n"; | 223 #print " fp: $fp\n"; |
211 if ($$archived{$fp}) { | 224 if ($$archived{$fp}) { |
212 logger('DEBUG', "$ft archived OK\n"); | 225 logger('DEBUG', "$ft archived OK"); |
213 $$archived{$fp} = "OK"; | 226 $$archived{$fp} = "OK"; |
214 } else { | 227 } else { |
215 logger('ERROR', "file '$ft' missing from archive!\n"); | 228 logger('ERROR', "file '$ft' missing from archive!"); |
216 $errcnt++; | 229 $errcnt++; |
217 } | 230 } |
218 } | 231 } |
219 | 232 |
220 foreach my $fa (sort keys %$archived) { | 233 foreach my $fa (sort keys %$archived) { |
221 if ($$archived{$fa} ne "OK") { | 234 if ($$archived{$fa} ne "OK") { |
222 my ($fn, $fp) = MPIWGStor::split_file_path($fa); | 235 my ($fn, $fp) = MPIWGStor::split_file_path($fa); |
223 if ($MPIWGStor::index_files{$fn}) { | 236 if ($MPIWGStor::index_files{$fn}) { |
224 logger('DEBUG', "$fa ignored\n"); | 237 logger('DEBUG', "$fa ignored"); |
225 $na--; | 238 $na--; |
226 } else { | 239 } else { |
227 logger('WARNING', "$fa archived but not in list!\n"); | 240 logger('WARNING', "$fa archived but not in list!"); |
228 $warncnt++; | 241 $warncnt++; |
229 } | 242 } |
230 } | 243 } |
231 } | 244 } |
232 | 245 |
233 if ($nt > $na) { | 246 if ($nt > $na) { |
234 logger('WARNING', "less files were archived ($na vs. $nt)!\n"); | 247 logger('WARNING', "less files were archived ($na vs. $nt)!"); |
235 } elsif ($na > $nt) { | 248 } elsif ($na > $nt) { |
236 logger('WARNING', "more files were archived ($na vs. $nt)!\n"); | 249 logger('WARNING', "more files were archived ($na vs. $nt)!"); |
237 } | 250 } |
238 | 251 |
239 } | 252 } |
240 | 253 |
241 | 254 |
259 next if ($MPIWGStor::index_files{$fn}); | 272 next if ($MPIWGStor::index_files{$fn}); |
260 # no file no delete | 273 # no file no delete |
261 next unless (-f $f); | 274 next unless (-f $f); |
262 # delete files | 275 # delete files |
263 if (unlink $f) { | 276 if (unlink $f) { |
264 logger('INFO', "remove $f ($fn)\n"); | 277 logger('INFO', "remove $f ($fn)"); |
265 } else { | 278 } else { |
266 logger('ERROR', "unable to delete $f!\n"); | 279 logger('ERROR', "unable to delete $f!"); |
267 $errcnt++; | 280 $errcnt++; |
268 } | 281 } |
269 } | 282 } |
270 # try to delete all empty directories | 283 # try to delete all empty directories |
271 my @dirkeys = sort keys %dirs; | 284 my @dirkeys = sort keys %dirs; |
273 for (my $i = $#dirkeys; $i >= 0; $i--) { | 286 for (my $i = $#dirkeys; $i >= 0; $i--) { |
274 my $d = $dirkeys[$i]; | 287 my $d = $dirkeys[$i]; |
275 # dont't remove document dir (shouldn't be empty anyway) | 288 # dont't remove document dir (shouldn't be empty anyway) |
276 next if ($d eq $docdir); | 289 next if ($d eq $docdir); |
277 if (-d $d) { | 290 if (-d $d) { |
278 logger('INFO', "remove dir $d\n"); | 291 logger('INFO', "remove dir $d"); |
279 rmdir $d; | 292 rmdir $d; |
280 } | 293 } |
281 } | 294 } |
282 } | 295 } |
283 | 296 |
290 # | 303 # |
291 sub delete_all_files { | 304 sub delete_all_files { |
292 my ($files, $dir) = @_; | 305 my ($files, $dir) = @_; |
293 | 306 |
294 if (! opendir DIR, $dir) { | 307 if (! opendir DIR, $dir) { |
295 logger('ERROR', "unable to read directory $dir!\n"); | 308 logger('ERROR', "unable to read directory $dir!"); |
296 $errcnt++; | 309 $errcnt++; |
297 return; | 310 return; |
298 } | 311 } |
299 my @fl = readdir DIR; | 312 my @fl = readdir DIR; |
300 closedir DIR; | 313 closedir DIR; |
304 if ($$files{$f}) { | 317 if ($$files{$f}) { |
305 # $f is in the file list | 318 # $f is in the file list |
306 if (-f "$dir/$f") { | 319 if (-f "$dir/$f") { |
307 # $f is a file | 320 # $f is a file |
308 if (unlink "$dir/$f") { | 321 if (unlink "$dir/$f") { |
309 logger('INFO', "removed $f\n"); | 322 logger('INFO', "removed $f"); |
310 } else { | 323 } else { |
311 logger('ERROR', "unable to delete $f!\n"); | 324 logger('ERROR', "unable to delete $f!"); |
312 $errcnt++; | 325 $errcnt++; |
313 } | 326 } |
314 } elsif (-d _) { | 327 } elsif (-d _) { |
315 # $f is a directory (unlink won't work) | 328 # $f is a directory (unlink won't work) |
316 if ((system 'rm', '-r', "$dir/$f") == 0) { | 329 if ((system 'rm', '-r', "$dir/$f") == 0) { |
317 logger('INFO', "removed directory $f\n"); | 330 logger('INFO', "removed directory $f"); |
318 } else { | 331 } else { |
319 logger('ERROR', "unable to delete directory $f!\n"); | 332 logger('ERROR', "unable to delete directory $f!"); |
320 $errcnt++; | 333 $errcnt++; |
321 } | 334 } |
322 } else { | 335 } else { |
323 logger('ERROR', "funny object $dir/$f!\n"); | 336 logger('ERROR', "funny object $dir/$f!"); |
324 $errcnt++; | 337 $errcnt++; |
325 } | 338 } |
326 } else { | 339 } else { |
327 # $f is not in the list | 340 # $f is not in the list |
328 if (-d "$dir/$f") { | 341 if (-d "$dir/$f") { |
329 # recurse into directories | 342 # recurse into directories |
330 logger('DEBUG', "enter $dir/$f\n"); | 343 logger('DEBUG', "enter $dir/$f"); |
331 delete_all_files($files, "$dir/$f"); | 344 delete_all_files($files, "$dir/$f"); |
332 } | 345 } |
333 } | 346 } |
334 } | 347 } |
335 } | 348 } |
337 | 350 |
338 ####################################################### | 351 ####################################################### |
339 # main | 352 # main |
340 # | 353 # |
341 | 354 |
342 logger('START', "archiver $version at $archdate\n"); | 355 logger('START', "archiver $version at $archdate"); |
343 | 356 |
344 # make shure the right user is running this program | 357 # make shure the right user is running this program |
345 my $user = getlogin; | 358 my $user = getlogin; |
346 #if (($user ne "archive")||($user ne "root")) { | 359 if (($user ne "archive")&&($user ne "root")) { |
347 # logger("ABORT", "you ($user) must be archive or root user to run this program!"); | 360 logger("ABORT", "you ($user) must be archive or root user to run this program!"); |
348 # exit 1; | 361 exit 1; |
349 #} | 362 } |
363 | |
364 # check for .archived file | |
365 if (-f "$docdir/.archived") { | |
366 if (not $force_archive) { | |
367 logger('ABORT', "already archived! (.archived file exists)"); | |
368 exit 1; | |
369 } else { | |
370 logger('WARNING', "resource already archived? (.archived file exists)"); | |
371 $warncnt++; | |
372 } | |
373 } | |
350 | 374 |
351 # use metacheck first | 375 # use metacheck first |
352 if (system("$checkprog $docdir >/dev/null") == 0) { | 376 if (open CHECK, "$checkprog -add-files $docdir |") { |
353 logger('INFO', "resource '$docdir' check OK\n"); | 377 my @errors; |
378 my $msg; | |
379 while (<CHECK>) { | |
380 chomp; | |
381 if (/^ERROR/) { | |
382 push @errors, $_; | |
383 } | |
384 $msg = $_; | |
385 } | |
386 if ($msg =~ /^DONE/) { | |
387 logger('DEBUG', "checking index file: $msg"); | |
388 logger('INFO', "resource '$docdir' check OK"); | |
389 } else { | |
390 logger('DEBUG', "errors checking index file:\n " . join("\n ", @errors) . "\n $msg"); | |
391 logger('ABORT', "resource '$docdir' check failed!"); | |
392 exit 1; | |
393 } | |
354 } else { | 394 } else { |
355 logger('ABORT', "resource '$docdir' check failed!!\n"); | 395 logger('ABORT', "unable to run $checkprog"); |
356 exit 1; | 396 exit 1; |
357 } | 397 } |
398 # if (system("$checkprog $docdir >/dev/null") == 0) { | |
399 # logger('INFO', "resource '$docdir' check OK"); | |
400 # } else { | |
401 # logger('ABORT', "resource '$docdir' check failed!!"); | |
402 # exit 1; | |
403 # } | |
358 | 404 |
359 # read index.meta file | 405 # read index.meta file |
360 my ($document, $rootnode) = MPIWGStor::read_xml($metafile); | 406 my ($document, $rootnode) = MPIWGStor::read_xml($metafile); |
361 | 407 |
362 # check file and add archive date | 408 # check file and add archive date |
363 my $files_to_archive = read_resource_meta($rootnode); | 409 my $files_to_archive = read_resource_meta($rootnode); |
364 | 410 |
365 print "INFO: ", scalar keys %$files_to_archive, " files to archive\n"; | 411 logger('INFO', (scalar keys %$files_to_archive) . " files to archive"); |
366 | 412 |
367 # check for .archived file | 413 # remove .archived file |
368 if (-f "$docdir/.archived") { | 414 if (-f "$docdir/.archived") { |
369 if (unlink "$docdir/.archived") { | 415 if (unlink "$docdir/.archived") { |
370 logger('WARNING', "existing .archived file has been removed! Resource already archived?\n"); | 416 logger('WARNING', "existing .archived file has been removed!"); |
371 $warncnt++; | 417 $warncnt++; |
372 } else { | 418 } else { |
373 logger('ERROR', "unable to remove existing .archived file!\n"); | 419 logger('ERROR', "unable to remove existing .archived file!"); |
374 $errcnt++; | 420 $errcnt++; |
375 } | 421 } |
376 } | 422 } |
377 | 423 |
378 # remove junk files | 424 # remove junk files |
380 delete_all_files(\%MPIWGStor::junk_files, $docdir); | 426 delete_all_files(\%MPIWGStor::junk_files, $docdir); |
381 } | 427 } |
382 | 428 |
383 # write new index.meta | 429 # write new index.meta |
384 if ($errcnt > 0) { | 430 if ($errcnt > 0) { |
385 logger('ABORT', "there were errors!\n"); | 431 logger('ABORT', "there were errors!"); |
386 exit 1; | 432 exit 1; |
387 } else { | 433 } else { |
388 if ($fix_xml) { | 434 if ($fix_xml) { |
389 MPIWGStor::write_xml($document, $metafile); | 435 MPIWGStor::write_xml($document, $metafile); |
390 } | 436 } |
392 | 438 |
393 # start archiving | 439 # start archiving |
394 my $archived_files = run_archive(); | 440 my $archived_files = run_archive(); |
395 my $num_archfiles = scalar keys %$archived_files; | 441 my $num_archfiles = scalar keys %$archived_files; |
396 | 442 |
397 logger('INFO', "$num_archfiles files archived\n"); | 443 logger('INFO', "$num_archfiles files archived"); |
398 | 444 |
399 # check list of archived files | 445 # check list of archived files |
400 check_files($files_to_archive, $archived_files); | 446 check_files($files_to_archive, $archived_files); |
401 | 447 |
402 # delete files if all went OK | 448 # delete files if all went OK |
410 if ($delete_data_files) { | 456 if ($delete_data_files) { |
411 delete_files($archived_files); | 457 delete_files($archived_files); |
412 } | 458 } |
413 } | 459 } |
414 | 460 |
415 logger('INFO', "$warncnt warnings\n"); | 461 logger('INFO', "$warncnt warnings"); |
416 logger('INFO', "$errcnt errors\n"); | 462 logger('INFO', "$errcnt errors"); |
417 if ($errcnt > 0) { | 463 if ($errcnt > 0) { |
418 logger('ABORT', "there were errors! ($num_archfiles files archived) at " . stime(time)); | 464 logger('ABORT', "there were errors! ($num_archfiles files archived) at " . stime(time)); |
419 exit 1; | 465 exit 1; |
420 } else { | 466 } else { |
421 logger('DONE', "$num_archfiles files archived at " . stime(time)); | 467 logger('DONE', "$num_archfiles files archived at " . stime(time)); |