#!/usr/local/bin/perl -w use strict; use XML::LibXML; # MPIWG libraries use lib '/usr/local/mpiwg/archive'; use MPIWGStor; # make output unbuffered $|=1; ####################################################### # internal parameters # # program version my $version = "0.7.1 (ROC 23.9.2005)"; # short help my $help = "MPIWG archiver $version use: archiver [options] docpath options: -debug show debugging info -premigrate don't delete archived files -force archive even if already archived "; # read command line parameters my $args = MPIWGStor::parseargs; if (! scalar(%$args)) { print $help, "\n"; exit 1; } # debug level $debug = (exists $$args{'debug'}) ? $$args{'debug'} : 0; # force archiving my $force_archive = (exists $$args{'force'}) ? $$args{'force'} : 0; # rewrite XML file (necessary for archive date!) my $fix_xml = 1; my $xml_changed = 0; # XML namespace (not really implemented!) my $namespace = ""; # archive name (archive-path element, usually == $docdir) my $archname; # archive storage date (now) my $archdate = stime(time); # delete "junk" files before archiving my $delete_junk_files = 1; # delete data files after archiving my $delete_data_files = 1; # don't delete archived files with "-premigrate" if (exists $$args{'premigrate'}) { $delete_data_files = not $$args{'premigrate'}; } if ($delete_data_files) { logger('INFO', "going to remove successfully archived files from disk"); } ####################################################### # external programs # my $archprog = "/opt/tivoli/tsm/client/ba/bin/dsmc"; if (! -x $archprog) { logger('ABORT', "TSM client program '$archprog' missing!"); exit 1; } my $checkprog = "/usr/local/mpiwg/archive/metacheck"; if (! -x $checkprog) { logger('ABORT', "meta data checking program '$checkprog' missing!"); exit 1; } # log file for archiver my $log_file = "/var/log/mpiwg-archiver.log"; if (! open LOG, ">>$log_file") { logger('ABORT', "unable to write log file '$log_file'!"); exit 1; } ####################################################### # check parameters that were passed to the program # my $docdir = $$args{'path'}; # strip double slashes $docdir =~ s/\/\//\//; # strip trailing slashes $docdir =~ s/\/+$//; if (! -d $docdir) { logger('ABORT', "document directory \'$docdir\' doesn't exist!"); exit 1; } my $metafile = "$docdir/index.meta"; if (! -f $metafile) { logger('ABORT', "metadata index file \'$metafile\' doesn't exist!"); exit 1; } ####################################################### # internal variables # # number of errors my $errcnt = 0; # number of warnings my $warncnt = 0; ####################################################### # subroutines # # # $files = read_resource_meta($rootnode) # # checks general resource meta information and reads the list of files # sub read_resource_meta { my ($rootnode) = @_; my %files; # # archive path # # get archive-path $archname = MPIWGStor::sstrip($rootnode->findvalue('child::archive-path')); if (! $archname) { logger('ABORT', "archive-name element missing!"); exit 1; } # # files # my @filenodes = $rootnode->findnodes('child::file'); foreach my $fn (@filenodes) { my $name = MPIWGStor::sstrip($fn->findvalue('child::name')); my $path = MPIWGStor::sstrip($fn->findvalue('child::path')); logger('DEBUG', "FILE ($path)$name"); my $f = ($path) ? "$path/$name" : "$name"; $files{$f} = $name; } # # archive-storage-date # my $stordatenode = ($rootnode->find('child::archive-storage-date'))->get_node(1); if ($stordatenode) { logger('WARNING', "archive storage date exists! Resource already archived?"); $warncnt++; # delete old date $stordatenode->removeChildNodes; } else { # create new storage date node $stordatenode = $rootnode->addNewChild($namespace, "archive-storage-date"); # move after archive-path $rootnode->insertAfter($stordatenode, ($rootnode->find('child::archive-path'))->get_node(1)); } $stordatenode->appendTextNode($archdate); $xml_changed++; return \%files; } # # $%files = run_archive # # runs the archiver program on $docdir and returns a list of archived files # sub run_archive { my %files; print LOG "START archiver $version $archdate\n"; my $archcmd = $archprog; $archcmd .= " archive -archsymlinkasfile=no -subdir=yes"; $archcmd .= " -description='$archname'"; $archcmd .= " '$docdir/'"; print LOG "CMD: $archcmd\n"; if (open ARCH, "$archcmd 2>&1 |") { while () { chomp; print LOG "ARCH: $_\n"; if (/Normal File-->\s+[\d,]+\s+(.*)\s+\[Sent\]/) { print " ARCH: file '$1'\n"; $files{$1} = "ok"; } if (/^Archive processing of .* finished without failure./) { print " ARCH: OK\n"; } } } else { logger('ABORT', "unable to start archive command '$archcmd'!!"); exit 1; } return \%files; } # # check_files(\%files_to_archive, \%archived_files) # # compares the list of archived and to be archived files # sub check_files { my ($to_archive, $archived) = @_; my $nt = scalar keys %$to_archive; my $na = scalar keys %$archived; foreach my $ft (sort keys %$to_archive) { my $fp = "$docdir/$ft"; #print " fp: $fp\n"; if ($$archived{$fp}) { logger('DEBUG', "$ft archived OK"); $$archived{$fp} = "OK"; } else { logger('ERROR', "file '$ft' missing from archive!"); $errcnt++; } } foreach my $fa (sort keys %$archived) { if ($$archived{$fa} ne "OK") { my ($fn, $fp) = MPIWGStor::split_file_path($fa); if ($MPIWGStor::index_files{$fn}) { logger('DEBUG', "$fa ignored"); $na--; } else { logger('WARNING', "$fa archived but not in list!"); $warncnt++; } } } if ($nt > $na) { logger('WARNING', "less files were archived ($na vs. $nt)!"); } elsif ($na > $nt) { logger('WARNING', "more files were archived ($na vs. $nt)!"); } } # # delete_files(\%files) # # deletes the files from the list (of absolute files) and their directories # if they are empty # sub delete_files { my ($files) = @_; my %dirs; foreach my $f (sort keys %$files) { my ($fn, $fp) = MPIWGStor::split_file_path($f); # collect all unique directories if ($fp && (! $dirs{$fp})) { $dirs{$fp} = $fp; } # don't delete index files next if ($MPIWGStor::index_files{$fn}); # no file no delete next unless (-f $f); # delete files if (unlink $f) { logger('DEBUG', "remove $f ($fn)"); } else { logger('ERROR', "unable to delete $f!"); $errcnt++; } } # try to delete all empty directories my @dirkeys = sort keys %dirs; # starting at the end to get to the subdirectories first for (my $i = $#dirkeys; $i >= 0; $i--) { my $d = $dirkeys[$i]; # dont't remove document dir (shouldn't be empty anyway) next if ($d eq $docdir); if (-d $d) { logger('DEBUG', "remove dir $d"); rmdir $d; } } } # # delete_all_files(\%files, $dir) # # deletes all files with names from the list %files # in the directory $dir and its subdirectories # sub delete_all_files { my ($files, $dir) = @_; if (! opendir DIR, $dir) { logger('ERROR', "unable to read directory $dir!"); $errcnt++; return; } my @fl = readdir DIR; closedir DIR; foreach my $f (@fl) { next if ($f =~ /^\.{1,2}$/); if ($$files{$f}) { # $f is in the file list if (-f "$dir/$f") { # $f is a file if (unlink "$dir/$f") { logger('DEBUG', "removed $f"); } else { logger('ERROR', "unable to delete $f!"); $errcnt++; } } elsif (-d _) { # $f is a directory (unlink won't work) if ((system 'rm', '-r', "$dir/$f") == 0) { logger('DEBUG', "removed directory $f"); } else { logger('ERROR', "unable to delete directory $f!"); $errcnt++; } } else { logger('ERROR', "funny object $dir/$f!"); $errcnt++; } } else { # $f is not in the list if (-d "$dir/$f") { # recurse into directories logger('DEBUG', "enter $dir/$f"); delete_all_files($files, "$dir/$f"); } } } } ####################################################### # main # logger('START', "archiver $version at $archdate"); # make shure the right user is running this program my $user = getlogin; if (($user ne "archive")&&($user ne "root")) { logger("WARNING", "you ($user) should be archive or root user to run this program!"); } # check for .archived file if (-f "$docdir/.archived") { if (not $force_archive) { logger('ABORT', "already archived! (.archived file exists)"); exit 1; } else { logger('WARNING', "resource already archived? (.archived file exists)"); $warncnt++; } } # use metacheck first if (open CHECK, "$checkprog -add-files $docdir |") { my @errors; my $msg; while () { chomp; if (/^ERROR/) { push @errors, $_; } $msg = $_; } if ($msg =~ /^DONE/) { logger('DEBUG', "checking index file: $msg"); logger('INFO', "resource '$docdir' check OK"); } else { logger('DEBUG', "errors checking index file:\n " . join("\n ", @errors) . "\n $msg"); logger('ABORT', "resource '$docdir' check failed!"); exit 1; } } else { logger('ABORT', "unable to run $checkprog"); exit 1; } # if (system("$checkprog $docdir >/dev/null") == 0) { # logger('INFO', "resource '$docdir' check OK"); # } else { # logger('ABORT', "resource '$docdir' check failed!!"); # exit 1; # } # read index.meta file my ($document, $rootnode) = MPIWGStor::read_xml($metafile); # check file and add archive date my $files_to_archive = read_resource_meta($rootnode); logger('INFO', (scalar keys %$files_to_archive) . " files to archive"); # remove .archived file if (-f "$docdir/.archived") { if (unlink "$docdir/.archived") { logger('WARNING', "existing .archived file has been removed!"); $warncnt++; } else { logger('ERROR', "unable to remove existing .archived file!"); $errcnt++; } } # remove junk files if ($delete_junk_files) { delete_all_files(\%MPIWGStor::junk_files, $docdir); } # write new index.meta if ($errcnt > 0) { logger('ABORT', "there were errors!"); exit 1; } else { if ($fix_xml) { MPIWGStor::write_xml($document, $metafile); } } # start archiving my $archived_files = run_archive(); my $num_archfiles = scalar keys %$archived_files; logger('INFO', "$num_archfiles files archived"); # check list of archived files check_files($files_to_archive, $archived_files); # delete files if all went OK if ($errcnt == 0) { system("touch", "$docdir/.archived"); # remove junk files (again) if ($delete_junk_files) { delete_all_files(\%MPIWGStor::junk_files, $docdir); } # remove archived files if ($delete_data_files) { delete_files($archived_files); } } logger('INFO', "$warncnt warnings"); logger('INFO', "$errcnt errors"); if ($errcnt > 0) { logger('ABORT', "there were errors! ($num_archfiles files archived) at " . stime(time)); exit 1; } else { logger('DONE', "$num_archfiles files archived at " . stime(time)); }