Annotation of foxridge-archiver/archiver.pl, revision 1.7
1.1 casties 1: #!/usr/local/bin/perl -w
2:
3: use strict;
4:
5: use XML::LibXML;
6:
7: # MPIWG libraries
8: use lib '/usr/local/mpiwg/archive';
9: use MPIWGStor;
10:
11: # make output unbuffered
12: $|=1;
13:
14: #######################################################
15: # internal parameters
16: #
17:
18: # program version
1.7 ! casties 19: my $version = "0.7.2 (ROC 5.10.2005)";
1.4 casties 20:
21: # short help
1.5 casties 22: my $help = "MPIWG archiver $version
23: use: archiver [options] docpath
24: options:
25: -debug show debugging info
26: -premigrate don't delete archived files
27: -force archive even if already archived
1.7 ! casties 28: -replace rewrite index file
1.5 casties 29: ";
1.4 casties 30:
31: # read command line parameters
32: my $args = MPIWGStor::parseargs;
1.5 casties 33: if (! scalar(%$args)) {
34: print $help, "\n";
35: exit 1;
36: }
1.4 casties 37:
38: # debug level
1.5 casties 39: $debug = (exists $$args{'debug'}) ? $$args{'debug'} : 0;
40:
41: # force archiving
42: my $force_archive = (exists $$args{'force'}) ? $$args{'force'} : 0;
1.1 casties 43:
1.7 ! casties 44: # rewrite index file
! 45: my $rewrite_index = (exists $$args{'replace'}) ? $$args{'replace'} : 0;
! 46:
1.1 casties 47: # rewrite XML file (necessary for archive date!)
48: my $fix_xml = 1;
49: my $xml_changed = 0;
1.5 casties 50:
1.1 casties 51: # XML namespace (not really implemented!)
52: my $namespace = "";
53:
54: # archive name (archive-path element, usually == $docdir)
55: my $archname;
1.5 casties 56:
1.1 casties 57: # archive storage date (now)
58: my $archdate = stime(time);
59:
60: # delete "junk" files before archiving
61: my $delete_junk_files = 1;
62:
63: # delete data files after archiving
64: my $delete_data_files = 1;
65:
1.5 casties 66: # don't delete archived files with "-premigrate"
67: if (exists $$args{'premigrate'}) {
68: $delete_data_files = not $$args{'premigrate'};
69: }
70: if ($delete_data_files) {
71: logger('INFO', "going to remove successfully archived files from disk");
72: }
73:
1.1 casties 74:
75: #######################################################
76: # external programs
77: #
78: my $archprog = "/opt/tivoli/tsm/client/ba/bin/dsmc";
79: if (! -x $archprog) {
1.5 casties 80: logger('ABORT', "TSM client program '$archprog' missing!");
1.1 casties 81: exit 1;
82: }
83: my $checkprog = "/usr/local/mpiwg/archive/metacheck";
84: if (! -x $checkprog) {
1.5 casties 85: logger('ABORT', "meta data checking program '$checkprog' missing!");
1.1 casties 86: exit 1;
87: }
88: # log file for archiver
89: my $log_file = "/var/log/mpiwg-archiver.log";
90: if (! open LOG, ">>$log_file") {
1.5 casties 91: logger('ABORT', "unable to write log file '$log_file'!");
1.1 casties 92: exit 1;
93: }
94:
95: #######################################################
96: # check parameters that were passed to the program
97: #
1.5 casties 98:
1.4 casties 99: my $docdir = $$args{'path'};
1.2 casties 100: # strip double slashes
101: $docdir =~ s/\/\//\//;
1.1 casties 102: # strip trailing slashes
1.2 casties 103: $docdir =~ s/\/+$//;
1.1 casties 104: if (! -d $docdir) {
1.5 casties 105: logger('ABORT', "document directory \'$docdir\' doesn't exist!");
1.1 casties 106: exit 1;
107: }
108:
109: my $metafile = "$docdir/index.meta";
110: if (! -f $metafile) {
1.5 casties 111: logger('ABORT', "metadata index file \'$metafile\' doesn't exist!");
1.1 casties 112: exit 1;
113: }
114:
115: #######################################################
116: # internal variables
117: #
118:
119: # number of errors
120: my $errcnt = 0;
121: # number of warnings
122: my $warncnt = 0;
123:
124: #######################################################
125: # subroutines
126: #
127:
128: #
129: # $files = read_resource_meta($rootnode)
130: #
131: # checks general resource meta information and reads the list of files
132: #
133: sub read_resource_meta {
134: my ($rootnode) = @_;
135: my %files;
136: #
137: # archive path
138: #
139: # get archive-path
140: $archname = MPIWGStor::sstrip($rootnode->findvalue('child::archive-path'));
141: if (! $archname) {
1.5 casties 142: logger('ABORT', "archive-name element missing!");
1.1 casties 143: exit 1;
144: }
145:
146: #
147: # files
148: #
149: my @filenodes = $rootnode->findnodes('child::file');
150: foreach my $fn (@filenodes) {
151: my $name = MPIWGStor::sstrip($fn->findvalue('child::name'));
152: my $path = MPIWGStor::sstrip($fn->findvalue('child::path'));
1.5 casties 153: logger('DEBUG', "FILE ($path)$name");
1.1 casties 154: my $f = ($path) ? "$path/$name" : "$name";
155: $files{$f} = $name;
156: }
157:
158: #
159: # archive-storage-date
160: #
161: my $stordatenode = ($rootnode->find('child::archive-storage-date'))->get_node(1);
162: if ($stordatenode) {
1.5 casties 163: logger('WARNING', "archive storage date exists! Resource already archived?");
1.1 casties 164: $warncnt++;
165: # delete old date
166: $stordatenode->removeChildNodes;
167: } else {
168: # create new storage date node
169: $stordatenode = $rootnode->addNewChild($namespace, "archive-storage-date");
170: # move after archive-path
171: $rootnode->insertAfter($stordatenode, ($rootnode->find('child::archive-path'))->get_node(1));
172: }
173: $stordatenode->appendTextNode($archdate);
174: $xml_changed++;
175: return \%files;
176: }
177:
178:
179: #
180: # $%files = run_archive
181: #
182: # runs the archiver program on $docdir and returns a list of archived files
183: #
184: sub run_archive {
185: my %files;
186: print LOG "START archiver $version $archdate\n";
187: my $archcmd = $archprog;
188: $archcmd .= " archive -archsymlinkasfile=no -subdir=yes";
189: $archcmd .= " -description='$archname'";
190: $archcmd .= " '$docdir/'";
191:
192: print LOG "CMD: $archcmd\n";
193: if (open ARCH, "$archcmd 2>&1 |") {
194: while (<ARCH>) {
195: chomp;
196: print LOG "ARCH: $_\n";
197: if (/Normal File-->\s+[\d,]+\s+(.*)\s+\[Sent\]/) {
198: print " ARCH: file '$1'\n";
199: $files{$1} = "ok";
200: }
201: if (/^Archive processing of .* finished without failure./) {
202: print " ARCH: OK\n";
203: }
204: }
205: } else {
1.5 casties 206: logger('ABORT', "unable to start archive command '$archcmd'!!");
1.1 casties 207: exit 1;
208: }
209:
210: return \%files;
211: }
212:
213:
214: #
215: # check_files(\%files_to_archive, \%archived_files)
216: #
217: # compares the list of archived and to be archived files
218: #
219: sub check_files {
220: my ($to_archive, $archived) = @_;
221:
222: my $nt = scalar keys %$to_archive;
223: my $na = scalar keys %$archived;
224:
225: foreach my $ft (sort keys %$to_archive) {
226: my $fp = "$docdir/$ft";
227: #print " fp: $fp\n";
228: if ($$archived{$fp}) {
1.5 casties 229: logger('DEBUG', "$ft archived OK");
1.1 casties 230: $$archived{$fp} = "OK";
231: } else {
1.5 casties 232: logger('ERROR', "file '$ft' missing from archive!");
1.1 casties 233: $errcnt++;
234: }
235: }
236:
237: foreach my $fa (sort keys %$archived) {
238: if ($$archived{$fa} ne "OK") {
239: my ($fn, $fp) = MPIWGStor::split_file_path($fa);
240: if ($MPIWGStor::index_files{$fn}) {
1.5 casties 241: logger('DEBUG', "$fa ignored");
1.1 casties 242: $na--;
243: } else {
1.5 casties 244: logger('WARNING', "$fa archived but not in list!");
1.1 casties 245: $warncnt++;
246: }
247: }
248: }
249:
250: if ($nt > $na) {
1.5 casties 251: logger('WARNING', "less files were archived ($na vs. $nt)!");
1.1 casties 252: } elsif ($na > $nt) {
1.5 casties 253: logger('WARNING', "more files were archived ($na vs. $nt)!");
1.1 casties 254: }
255:
256: }
257:
258:
259: #
260: # delete_files(\%files)
261: #
262: # deletes the files from the list (of absolute files) and their directories
263: # if they are empty
264: #
265: sub delete_files {
266: my ($files) = @_;
267: my %dirs;
268:
269: foreach my $f (sort keys %$files) {
270: my ($fn, $fp) = MPIWGStor::split_file_path($f);
271: # collect all unique directories
272: if ($fp && (! $dirs{$fp})) {
273: $dirs{$fp} = $fp;
274: }
275: # don't delete index files
276: next if ($MPIWGStor::index_files{$fn});
277: # no file no delete
278: next unless (-f $f);
279: # delete files
280: if (unlink $f) {
1.6 casties 281: logger('DEBUG', "remove $f ($fn)");
1.1 casties 282: } else {
1.5 casties 283: logger('ERROR', "unable to delete $f!");
1.1 casties 284: $errcnt++;
285: }
286: }
287: # try to delete all empty directories
288: my @dirkeys = sort keys %dirs;
289: # starting at the end to get to the subdirectories first
290: for (my $i = $#dirkeys; $i >= 0; $i--) {
291: my $d = $dirkeys[$i];
292: # dont't remove document dir (shouldn't be empty anyway)
293: next if ($d eq $docdir);
294: if (-d $d) {
1.6 casties 295: logger('DEBUG', "remove dir $d");
1.1 casties 296: rmdir $d;
297: }
298: }
299: }
300:
301:
302: #
303: # delete_all_files(\%files, $dir)
304: #
305: # deletes all files with names from the list %files
306: # in the directory $dir and its subdirectories
307: #
308: sub delete_all_files {
309: my ($files, $dir) = @_;
310:
311: if (! opendir DIR, $dir) {
1.5 casties 312: logger('ERROR', "unable to read directory $dir!");
1.1 casties 313: $errcnt++;
314: return;
315: }
316: my @fl = readdir DIR;
317: closedir DIR;
318:
319: foreach my $f (@fl) {
320: next if ($f =~ /^\.{1,2}$/);
321: if ($$files{$f}) {
322: # $f is in the file list
323: if (-f "$dir/$f") {
324: # $f is a file
325: if (unlink "$dir/$f") {
1.6 casties 326: logger('DEBUG', "removed $f");
1.1 casties 327: } else {
1.5 casties 328: logger('ERROR', "unable to delete $f!");
1.1 casties 329: $errcnt++;
330: }
331: } elsif (-d _) {
332: # $f is a directory (unlink won't work)
333: if ((system 'rm', '-r', "$dir/$f") == 0) {
1.6 casties 334: logger('DEBUG', "removed directory $f");
1.1 casties 335: } else {
1.5 casties 336: logger('ERROR', "unable to delete directory $f!");
1.1 casties 337: $errcnt++;
338: }
339: } else {
1.5 casties 340: logger('ERROR', "funny object $dir/$f!");
1.1 casties 341: $errcnt++;
342: }
343: } else {
344: # $f is not in the list
345: if (-d "$dir/$f") {
346: # recurse into directories
1.5 casties 347: logger('DEBUG', "enter $dir/$f");
1.1 casties 348: delete_all_files($files, "$dir/$f");
349: }
350: }
351: }
352: }
353:
354:
355: #######################################################
356: # main
357: #
358:
1.5 casties 359: logger('START', "archiver $version at $archdate");
1.1 casties 360:
361: # make shure the right user is running this program
362: my $user = getlogin;
1.5 casties 363: if (($user ne "archive")&&($user ne "root")) {
1.6 casties 364: logger("WARNING", "you ($user) should be archive or root user to run this program!");
1.5 casties 365: }
366:
367: # check for .archived file
368: if (-f "$docdir/.archived") {
369: if (not $force_archive) {
370: logger('ABORT', "already archived! (.archived file exists)");
371: exit 1;
372: } else {
373: logger('WARNING', "resource already archived? (.archived file exists)");
374: $warncnt++;
375: }
376: }
1.1 casties 377:
378: # use metacheck first
1.7 ! casties 379: my $check_opts="-add-files";
! 380: if ($rewrite_index) {
! 381: $check_opts = "-replace";
! 382: }
! 383: if (open CHECK, "$checkprog $check_opts $docdir |") {
1.5 casties 384: my @errors;
385: my $msg;
386: while (<CHECK>) {
387: chomp;
388: if (/^ERROR/) {
389: push @errors, $_;
390: }
391: $msg = $_;
392: }
393: if ($msg =~ /^DONE/) {
394: logger('DEBUG', "checking index file: $msg");
395: logger('INFO', "resource '$docdir' check OK");
396: } else {
397: logger('DEBUG', "errors checking index file:\n " . join("\n ", @errors) . "\n $msg");
398: logger('ABORT', "resource '$docdir' check failed!");
399: exit 1;
400: }
1.1 casties 401: } else {
1.5 casties 402: logger('ABORT', "unable to run $checkprog");
1.1 casties 403: exit 1;
404: }
1.5 casties 405: # if (system("$checkprog $docdir >/dev/null") == 0) {
406: # logger('INFO', "resource '$docdir' check OK");
407: # } else {
408: # logger('ABORT', "resource '$docdir' check failed!!");
409: # exit 1;
410: # }
1.1 casties 411:
412: # read index.meta file
413: my ($document, $rootnode) = MPIWGStor::read_xml($metafile);
414:
415: # check file and add archive date
416: my $files_to_archive = read_resource_meta($rootnode);
417:
1.5 casties 418: logger('INFO', (scalar keys %$files_to_archive) . " files to archive");
1.1 casties 419:
1.5 casties 420: # remove .archived file
1.1 casties 421: if (-f "$docdir/.archived") {
422: if (unlink "$docdir/.archived") {
1.5 casties 423: logger('WARNING', "existing .archived file has been removed!");
1.1 casties 424: $warncnt++;
425: } else {
1.5 casties 426: logger('ERROR', "unable to remove existing .archived file!");
1.1 casties 427: $errcnt++;
428: }
429: }
430:
431: # remove junk files
432: if ($delete_junk_files) {
433: delete_all_files(\%MPIWGStor::junk_files, $docdir);
434: }
435:
436: # write new index.meta
437: if ($errcnt > 0) {
1.5 casties 438: logger('ABORT', "there were errors!");
1.1 casties 439: exit 1;
440: } else {
441: if ($fix_xml) {
442: MPIWGStor::write_xml($document, $metafile);
443: }
444: }
445:
446: # start archiving
1.3 casties 447: my $archived_files = run_archive();
448: my $num_archfiles = scalar keys %$archived_files;
1.1 casties 449:
1.5 casties 450: logger('INFO', "$num_archfiles files archived");
1.1 casties 451:
452: # check list of archived files
453: check_files($files_to_archive, $archived_files);
454:
455: # delete files if all went OK
456: if ($errcnt == 0) {
457: system("touch", "$docdir/.archived");
458: # remove junk files (again)
459: if ($delete_junk_files) {
460: delete_all_files(\%MPIWGStor::junk_files, $docdir);
461: }
462: # remove archived files
463: if ($delete_data_files) {
464: delete_files($archived_files);
465: }
466: }
467:
1.5 casties 468: logger('INFO', "$warncnt warnings");
469: logger('INFO', "$errcnt errors");
1.1 casties 470: if ($errcnt > 0) {
1.4 casties 471: logger('ABORT', "there were errors! ($num_archfiles files archived) at " . stime(time));
1.1 casties 472: exit 1;
473: } else {
1.4 casties 474: logger('DONE', "$num_archfiles files archived at " . stime(time));
1.1 casties 475: }
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>