comparison archiver.pl @ 18:fdf4ceb36db1

fixed problem with dir names in metacheck new version of metacheck defaults to not change index file new version of archiver uses new version of metacheck
author casties
date Tue, 20 Sep 2005 19:24:57 +0200
parents b19df18aa19a
children a3c35eae25dc
comparison
equal deleted inserted replaced
17:6c5c7743acb1 18:fdf4ceb36db1
14 ####################################################### 14 #######################################################
15 # internal parameters 15 # internal parameters
16 # 16 #
17 17
18 # program version 18 # program version
19 my $version = "0.6 (ROC 23.3.2005)"; 19 my $version = "0.7 (ROC 20.9.2005)";
20 20
21 # short help 21 # short help
22 my $shorthelp = "MPIWG archiver $version\nuse:\n archiver [options] docpath\noptions:\n -premigrate don't delete archived files\n"; 22 my $help = "MPIWG archiver $version
23 use: archiver [options] docpath
24 options:
25 -debug show debugging info
26 -premigrate don't delete archived files
27 -force archive even if already archived
28 ";
23 29
24 # read command line parameters 30 # read command line parameters
25 my $args = MPIWGStor::parseargs; 31 my $args = MPIWGStor::parseargs;
32 if (! scalar(%$args)) {
33 print $help, "\n";
34 exit 1;
35 }
26 36
27 # debug level 37 # debug level
28 my $debug = (exists $$args{'debug'}) ? $$args{'debug'} : 0; 38 $debug = (exists $$args{'debug'}) ? $$args{'debug'} : 0;
39
40 # force archiving
41 my $force_archive = (exists $$args{'force'}) ? $$args{'force'} : 0;
29 42
30 # rewrite XML file (necessary for archive date!) 43 # rewrite XML file (necessary for archive date!)
31 my $fix_xml = 1; 44 my $fix_xml = 1;
32 my $xml_changed = 0; 45 my $xml_changed = 0;
46
33 # XML namespace (not really implemented!) 47 # XML namespace (not really implemented!)
34 my $namespace = ""; 48 my $namespace = "";
35 49
36 # archive name (archive-path element, usually == $docdir) 50 # archive name (archive-path element, usually == $docdir)
37 my $archname; 51 my $archname;
52
38 # archive storage date (now) 53 # archive storage date (now)
39 my $archdate = stime(time); 54 my $archdate = stime(time);
40 55
41 # delete "junk" files before archiving 56 # delete "junk" files before archiving
42 my $delete_junk_files = 1; 57 my $delete_junk_files = 1;
43 58
44 # delete data files after archiving 59 # delete data files after archiving
45 my $delete_data_files = 1; 60 my $delete_data_files = 1;
46 61
62 # don't delete archived files with "-premigrate"
63 if (exists $$args{'premigrate'}) {
64 $delete_data_files = not $$args{'premigrate'};
65 }
66 if ($delete_data_files) {
67 logger('INFO', "going to remove successfully archived files from disk");
68 }
69
47 70
48 ####################################################### 71 #######################################################
49 # external programs 72 # external programs
50 # 73 #
51 my $archprog = "/opt/tivoli/tsm/client/ba/bin/dsmc"; 74 my $archprog = "/opt/tivoli/tsm/client/ba/bin/dsmc";
52 if (! -x $archprog) { 75 if (! -x $archprog) {
53 logger('ABORT', "TSM client program '$archprog' missing!!\n"); 76 logger('ABORT', "TSM client program '$archprog' missing!");
54 exit 1; 77 exit 1;
55 } 78 }
56 my $checkprog = "/usr/local/mpiwg/archive/metacheck"; 79 my $checkprog = "/usr/local/mpiwg/archive/metacheck";
57 if (! -x $checkprog) { 80 if (! -x $checkprog) {
58 logger('ABORT', "meta data checking program '$checkprog' missing!!\n"); 81 logger('ABORT', "meta data checking program '$checkprog' missing!");
59 exit 1; 82 exit 1;
60 } 83 }
61 # log file for archiver 84 # log file for archiver
62 my $log_file = "/var/log/mpiwg-archiver.log"; 85 my $log_file = "/var/log/mpiwg-archiver.log";
63 if (! open LOG, ">>$log_file") { 86 if (! open LOG, ">>$log_file") {
64 logger('ABORT', "unable to write log file '$log_file'!!\n"); 87 logger('ABORT', "unable to write log file '$log_file'!");
65 exit 1; 88 exit 1;
66 } 89 }
67 90
68 ####################################################### 91 #######################################################
69 # check parameters that were passed to the program 92 # check parameters that were passed to the program
70 # 93 #
71 if ($#ARGV < 0) { 94
72 print $shorthelp;
73 exit 1;
74 }
75 my $docdir = $$args{'path'}; 95 my $docdir = $$args{'path'};
76 # strip double slashes 96 # strip double slashes
77 $docdir =~ s/\/\//\//; 97 $docdir =~ s/\/\//\//;
78 # strip trailing slashes 98 # strip trailing slashes
79 $docdir =~ s/\/+$//; 99 $docdir =~ s/\/+$//;
80 if (! -d $docdir) { 100 if (! -d $docdir) {
81 logger('ABORT', "document directory \'$docdir\' doesn't exist!\n"); 101 logger('ABORT', "document directory \'$docdir\' doesn't exist!");
82 exit 1; 102 exit 1;
83 }
84 # don't delete archived files with "-premigrate"
85 if (exists $$args{'premigrate'}) {
86 $delete_data_files = not $$args{'premigrate'};
87 }
88 if ($delete_data_files) {
89 logger('INFO', "going to remove successfully archived files from disk");
90 } 103 }
91 104
92 my $metafile = "$docdir/index.meta"; 105 my $metafile = "$docdir/index.meta";
93 if (! -f $metafile) { 106 if (! -f $metafile) {
94 logger('ABORT', "metadata index file \'$metafile\' doesn't exist!\n"); 107 logger('ABORT', "metadata index file \'$metafile\' doesn't exist!");
95 exit 1; 108 exit 1;
96 } 109 }
97 110
98 ####################################################### 111 #######################################################
99 # internal variables 112 # internal variables
120 # archive path 133 # archive path
121 # 134 #
122 # get archive-path 135 # get archive-path
123 $archname = MPIWGStor::sstrip($rootnode->findvalue('child::archive-path')); 136 $archname = MPIWGStor::sstrip($rootnode->findvalue('child::archive-path'));
124 if (! $archname) { 137 if (! $archname) {
125 logger('ABORT', "archive-name element missing!!\n"); 138 logger('ABORT', "archive-name element missing!");
126 exit 1; 139 exit 1;
127 } 140 }
128 141
129 # 142 #
130 # files 143 # files
131 # 144 #
132 my @filenodes = $rootnode->findnodes('child::file'); 145 my @filenodes = $rootnode->findnodes('child::file');
133 foreach my $fn (@filenodes) { 146 foreach my $fn (@filenodes) {
134 my $name = MPIWGStor::sstrip($fn->findvalue('child::name')); 147 my $name = MPIWGStor::sstrip($fn->findvalue('child::name'));
135 my $path = MPIWGStor::sstrip($fn->findvalue('child::path')); 148 my $path = MPIWGStor::sstrip($fn->findvalue('child::path'));
136 logger('DEBUG', "FILE ($path)$name\n"); 149 logger('DEBUG', "FILE ($path)$name");
137 my $f = ($path) ? "$path/$name" : "$name"; 150 my $f = ($path) ? "$path/$name" : "$name";
138 $files{$f} = $name; 151 $files{$f} = $name;
139 } 152 }
140 153
141 # 154 #
142 # archive-storage-date 155 # archive-storage-date
143 # 156 #
144 my $stordatenode = ($rootnode->find('child::archive-storage-date'))->get_node(1); 157 my $stordatenode = ($rootnode->find('child::archive-storage-date'))->get_node(1);
145 if ($stordatenode) { 158 if ($stordatenode) {
146 logger('WARNING', "archive storage date exists! Resource already archived?\n"); 159 logger('WARNING', "archive storage date exists! Resource already archived?");
147 $warncnt++; 160 $warncnt++;
148 # delete old date 161 # delete old date
149 $stordatenode->removeChildNodes; 162 $stordatenode->removeChildNodes;
150 } else { 163 } else {
151 # create new storage date node 164 # create new storage date node
184 if (/^Archive processing of .* finished without failure./) { 197 if (/^Archive processing of .* finished without failure./) {
185 print " ARCH: OK\n"; 198 print " ARCH: OK\n";
186 } 199 }
187 } 200 }
188 } else { 201 } else {
189 logger('ABORT', "unable to start archive command '$archcmd'!!\n"); 202 logger('ABORT', "unable to start archive command '$archcmd'!!");
190 exit 1; 203 exit 1;
191 } 204 }
192 205
193 return \%files; 206 return \%files;
194 } 207 }
207 220
208 foreach my $ft (sort keys %$to_archive) { 221 foreach my $ft (sort keys %$to_archive) {
209 my $fp = "$docdir/$ft"; 222 my $fp = "$docdir/$ft";
210 #print " fp: $fp\n"; 223 #print " fp: $fp\n";
211 if ($$archived{$fp}) { 224 if ($$archived{$fp}) {
212 logger('DEBUG', "$ft archived OK\n"); 225 logger('DEBUG', "$ft archived OK");
213 $$archived{$fp} = "OK"; 226 $$archived{$fp} = "OK";
214 } else { 227 } else {
215 logger('ERROR', "file '$ft' missing from archive!\n"); 228 logger('ERROR', "file '$ft' missing from archive!");
216 $errcnt++; 229 $errcnt++;
217 } 230 }
218 } 231 }
219 232
220 foreach my $fa (sort keys %$archived) { 233 foreach my $fa (sort keys %$archived) {
221 if ($$archived{$fa} ne "OK") { 234 if ($$archived{$fa} ne "OK") {
222 my ($fn, $fp) = MPIWGStor::split_file_path($fa); 235 my ($fn, $fp) = MPIWGStor::split_file_path($fa);
223 if ($MPIWGStor::index_files{$fn}) { 236 if ($MPIWGStor::index_files{$fn}) {
224 logger('DEBUG', "$fa ignored\n"); 237 logger('DEBUG', "$fa ignored");
225 $na--; 238 $na--;
226 } else { 239 } else {
227 logger('WARNING', "$fa archived but not in list!\n"); 240 logger('WARNING', "$fa archived but not in list!");
228 $warncnt++; 241 $warncnt++;
229 } 242 }
230 } 243 }
231 } 244 }
232 245
233 if ($nt > $na) { 246 if ($nt > $na) {
234 logger('WARNING', "less files were archived ($na vs. $nt)!\n"); 247 logger('WARNING', "less files were archived ($na vs. $nt)!");
235 } elsif ($na > $nt) { 248 } elsif ($na > $nt) {
236 logger('WARNING', "more files were archived ($na vs. $nt)!\n"); 249 logger('WARNING', "more files were archived ($na vs. $nt)!");
237 } 250 }
238 251
239 } 252 }
240 253
241 254
259 next if ($MPIWGStor::index_files{$fn}); 272 next if ($MPIWGStor::index_files{$fn});
260 # no file no delete 273 # no file no delete
261 next unless (-f $f); 274 next unless (-f $f);
262 # delete files 275 # delete files
263 if (unlink $f) { 276 if (unlink $f) {
264 logger('INFO', "remove $f ($fn)\n"); 277 logger('INFO', "remove $f ($fn)");
265 } else { 278 } else {
266 logger('ERROR', "unable to delete $f!\n"); 279 logger('ERROR', "unable to delete $f!");
267 $errcnt++; 280 $errcnt++;
268 } 281 }
269 } 282 }
270 # try to delete all empty directories 283 # try to delete all empty directories
271 my @dirkeys = sort keys %dirs; 284 my @dirkeys = sort keys %dirs;
273 for (my $i = $#dirkeys; $i >= 0; $i--) { 286 for (my $i = $#dirkeys; $i >= 0; $i--) {
274 my $d = $dirkeys[$i]; 287 my $d = $dirkeys[$i];
275 # dont't remove document dir (shouldn't be empty anyway) 288 # dont't remove document dir (shouldn't be empty anyway)
276 next if ($d eq $docdir); 289 next if ($d eq $docdir);
277 if (-d $d) { 290 if (-d $d) {
278 logger('INFO', "remove dir $d\n"); 291 logger('INFO', "remove dir $d");
279 rmdir $d; 292 rmdir $d;
280 } 293 }
281 } 294 }
282 } 295 }
283 296
290 # 303 #
291 sub delete_all_files { 304 sub delete_all_files {
292 my ($files, $dir) = @_; 305 my ($files, $dir) = @_;
293 306
294 if (! opendir DIR, $dir) { 307 if (! opendir DIR, $dir) {
295 logger('ERROR', "unable to read directory $dir!\n"); 308 logger('ERROR', "unable to read directory $dir!");
296 $errcnt++; 309 $errcnt++;
297 return; 310 return;
298 } 311 }
299 my @fl = readdir DIR; 312 my @fl = readdir DIR;
300 closedir DIR; 313 closedir DIR;
304 if ($$files{$f}) { 317 if ($$files{$f}) {
305 # $f is in the file list 318 # $f is in the file list
306 if (-f "$dir/$f") { 319 if (-f "$dir/$f") {
307 # $f is a file 320 # $f is a file
308 if (unlink "$dir/$f") { 321 if (unlink "$dir/$f") {
309 logger('INFO', "removed $f\n"); 322 logger('INFO', "removed $f");
310 } else { 323 } else {
311 logger('ERROR', "unable to delete $f!\n"); 324 logger('ERROR', "unable to delete $f!");
312 $errcnt++; 325 $errcnt++;
313 } 326 }
314 } elsif (-d _) { 327 } elsif (-d _) {
315 # $f is a directory (unlink won't work) 328 # $f is a directory (unlink won't work)
316 if ((system 'rm', '-r', "$dir/$f") == 0) { 329 if ((system 'rm', '-r', "$dir/$f") == 0) {
317 logger('INFO', "removed directory $f\n"); 330 logger('INFO', "removed directory $f");
318 } else { 331 } else {
319 logger('ERROR', "unable to delete directory $f!\n"); 332 logger('ERROR', "unable to delete directory $f!");
320 $errcnt++; 333 $errcnt++;
321 } 334 }
322 } else { 335 } else {
323 logger('ERROR', "funny object $dir/$f!\n"); 336 logger('ERROR', "funny object $dir/$f!");
324 $errcnt++; 337 $errcnt++;
325 } 338 }
326 } else { 339 } else {
327 # $f is not in the list 340 # $f is not in the list
328 if (-d "$dir/$f") { 341 if (-d "$dir/$f") {
329 # recurse into directories 342 # recurse into directories
330 logger('DEBUG', "enter $dir/$f\n"); 343 logger('DEBUG', "enter $dir/$f");
331 delete_all_files($files, "$dir/$f"); 344 delete_all_files($files, "$dir/$f");
332 } 345 }
333 } 346 }
334 } 347 }
335 } 348 }
337 350
338 ####################################################### 351 #######################################################
339 # main 352 # main
340 # 353 #
341 354
342 logger('START', "archiver $version at $archdate\n"); 355 logger('START', "archiver $version at $archdate");
343 356
344 # make shure the right user is running this program 357 # make shure the right user is running this program
345 my $user = getlogin; 358 my $user = getlogin;
346 #if (($user ne "archive")||($user ne "root")) { 359 if (($user ne "archive")&&($user ne "root")) {
347 # logger("ABORT", "you ($user) must be archive or root user to run this program!"); 360 logger("ABORT", "you ($user) must be archive or root user to run this program!");
348 # exit 1; 361 exit 1;
349 #} 362 }
363
364 # check for .archived file
365 if (-f "$docdir/.archived") {
366 if (not $force_archive) {
367 logger('ABORT', "already archived! (.archived file exists)");
368 exit 1;
369 } else {
370 logger('WARNING', "resource already archived? (.archived file exists)");
371 $warncnt++;
372 }
373 }
350 374
351 # use metacheck first 375 # use metacheck first
352 if (system("$checkprog $docdir >/dev/null") == 0) { 376 if (open CHECK, "$checkprog -add-files $docdir |") {
353 logger('INFO', "resource '$docdir' check OK\n"); 377 my @errors;
378 my $msg;
379 while (<CHECK>) {
380 chomp;
381 if (/^ERROR/) {
382 push @errors, $_;
383 }
384 $msg = $_;
385 }
386 if ($msg =~ /^DONE/) {
387 logger('DEBUG', "checking index file: $msg");
388 logger('INFO', "resource '$docdir' check OK");
389 } else {
390 logger('DEBUG', "errors checking index file:\n " . join("\n ", @errors) . "\n $msg");
391 logger('ABORT', "resource '$docdir' check failed!");
392 exit 1;
393 }
354 } else { 394 } else {
355 logger('ABORT', "resource '$docdir' check failed!!\n"); 395 logger('ABORT', "unable to run $checkprog");
356 exit 1; 396 exit 1;
357 } 397 }
398 # if (system("$checkprog $docdir >/dev/null") == 0) {
399 # logger('INFO', "resource '$docdir' check OK");
400 # } else {
401 # logger('ABORT', "resource '$docdir' check failed!!");
402 # exit 1;
403 # }
358 404
359 # read index.meta file 405 # read index.meta file
360 my ($document, $rootnode) = MPIWGStor::read_xml($metafile); 406 my ($document, $rootnode) = MPIWGStor::read_xml($metafile);
361 407
362 # check file and add archive date 408 # check file and add archive date
363 my $files_to_archive = read_resource_meta($rootnode); 409 my $files_to_archive = read_resource_meta($rootnode);
364 410
365 print "INFO: ", scalar keys %$files_to_archive, " files to archive\n"; 411 logger('INFO', (scalar keys %$files_to_archive) . " files to archive");
366 412
367 # check for .archived file 413 # remove .archived file
368 if (-f "$docdir/.archived") { 414 if (-f "$docdir/.archived") {
369 if (unlink "$docdir/.archived") { 415 if (unlink "$docdir/.archived") {
370 logger('WARNING', "existing .archived file has been removed! Resource already archived?\n"); 416 logger('WARNING', "existing .archived file has been removed!");
371 $warncnt++; 417 $warncnt++;
372 } else { 418 } else {
373 logger('ERROR', "unable to remove existing .archived file!\n"); 419 logger('ERROR', "unable to remove existing .archived file!");
374 $errcnt++; 420 $errcnt++;
375 } 421 }
376 } 422 }
377 423
378 # remove junk files 424 # remove junk files
380 delete_all_files(\%MPIWGStor::junk_files, $docdir); 426 delete_all_files(\%MPIWGStor::junk_files, $docdir);
381 } 427 }
382 428
383 # write new index.meta 429 # write new index.meta
384 if ($errcnt > 0) { 430 if ($errcnt > 0) {
385 logger('ABORT', "there were errors!\n"); 431 logger('ABORT', "there were errors!");
386 exit 1; 432 exit 1;
387 } else { 433 } else {
388 if ($fix_xml) { 434 if ($fix_xml) {
389 MPIWGStor::write_xml($document, $metafile); 435 MPIWGStor::write_xml($document, $metafile);
390 } 436 }
392 438
393 # start archiving 439 # start archiving
394 my $archived_files = run_archive(); 440 my $archived_files = run_archive();
395 my $num_archfiles = scalar keys %$archived_files; 441 my $num_archfiles = scalar keys %$archived_files;
396 442
397 logger('INFO', "$num_archfiles files archived\n"); 443 logger('INFO', "$num_archfiles files archived");
398 444
399 # check list of archived files 445 # check list of archived files
400 check_files($files_to_archive, $archived_files); 446 check_files($files_to_archive, $archived_files);
401 447
402 # delete files if all went OK 448 # delete files if all went OK
410 if ($delete_data_files) { 456 if ($delete_data_files) {
411 delete_files($archived_files); 457 delete_files($archived_files);
412 } 458 }
413 } 459 }
414 460
415 logger('INFO', "$warncnt warnings\n"); 461 logger('INFO', "$warncnt warnings");
416 logger('INFO', "$errcnt errors\n"); 462 logger('INFO', "$errcnt errors");
417 if ($errcnt > 0) { 463 if ($errcnt > 0) {
418 logger('ABORT', "there were errors! ($num_archfiles files archived) at " . stime(time)); 464 logger('ABORT', "there were errors! ($num_archfiles files archived) at " . stime(time));
419 exit 1; 465 exit 1;
420 } else { 466 } else {
421 logger('DONE', "$num_archfiles files archived at " . stime(time)); 467 logger('DONE', "$num_archfiles files archived at " . stime(time));