#!/usr/local/bin/perl -w use strict; use XML::LibXML; # MPIWG libraries use lib '/usr/local/mpiwg/archive'; use MPIWGStor; # make output unbuffered $|=1; ####################################################### # internal parameters # # program version my $version = "0.5 (15.1.2004)"; # rewrite XML file (necessary for archive date!) my $fix_xml = 1; my $xml_changed = 0; # XML namespace (not really implemented!) my $namespace = ""; # archive name (archive-path element, usually == $docdir) my $archname; # archive storage date (now) my $archdate = stime(time); # delete "junk" files before archiving my $delete_junk_files = 1; # delete data files after archiving my $delete_data_files = 1; ####################################################### # external programs # my $archprog = "/opt/tivoli/tsm/client/ba/bin/dsmc"; if (! -x $archprog) { print "ABORT: TSM client program '$archprog' missing!!\n"; exit 1; } my $checkprog = "/usr/local/mpiwg/archive/metacheck"; if (! -x $checkprog) { print "ABORT: meta data checking program '$checkprog' missing!!\n"; exit 1; } # log file for archiver my $log_file = "/var/log/mpiwg-archiver.log"; if (! open LOG, ">>$log_file") { print "ABORT: unable to write log file '$log_file'!!\n"; exit 1; } ####################################################### # check parameters that were passed to the program # if ($#ARGV < 0) { print "ABORT: no document directory given!\n"; exit 1; } my $docdir = $ARGV[0]; # strip trailing slashes $docdir =~ s/\/$//; if (! -d $docdir) { print "ABORT: document directory \'$docdir\' doesn't exist!\n"; exit 1; } if (($#ARGV > 0)&&($ARGV[1] eq "-premigrate")) { $delete_data_files = 0; } my $metafile = "$docdir/index.meta"; if (! -f $metafile) { print "ABORT: metadata index file \'$metafile\' doesn't exist!\n"; exit 1; } ####################################################### # internal variables # # number of errors my $errcnt = 0; # number of warnings my $warncnt = 0; ####################################################### # subroutines # # # $files = read_resource_meta($rootnode) # # checks general resource meta information and reads the list of files # sub read_resource_meta { my ($rootnode) = @_; my %files; # # archive path # # get archive-path $archname = MPIWGStor::sstrip($rootnode->findvalue('child::archive-path')); if (! $archname) { print "ABORT: archive-name element missing!!\n"; exit 1; } # # files # my @filenodes = $rootnode->findnodes('child::file'); foreach my $fn (@filenodes) { my $name = MPIWGStor::sstrip($fn->findvalue('child::name')); my $path = MPIWGStor::sstrip($fn->findvalue('child::path')); print "FILE: ($path)$name\n"; my $f = ($path) ? "$path/$name" : "$name"; $files{$f} = $name; } # # archive-storage-date # my $stordatenode = ($rootnode->find('child::archive-storage-date'))->get_node(1); if ($stordatenode) { print "WARNING: archive storage date exists! Resource already archived?\n"; $warncnt++; # delete old date $stordatenode->removeChildNodes; } else { # create new storage date node $stordatenode = $rootnode->addNewChild($namespace, "archive-storage-date"); # move after archive-path $rootnode->insertAfter($stordatenode, ($rootnode->find('child::archive-path'))->get_node(1)); } $stordatenode->appendTextNode($archdate); $xml_changed++; return \%files; } # # $%files = run_archive # # runs the archiver program on $docdir and returns a list of archived files # sub run_archive { my %files; print LOG "START archiver $version $archdate\n"; my $archcmd = $archprog; $archcmd .= " archive -archsymlinkasfile=no -subdir=yes"; $archcmd .= " -description='$archname'"; $archcmd .= " '$docdir/'"; print LOG "CMD: $archcmd\n"; if (open ARCH, "$archcmd 2>&1 |") { while () { chomp; print LOG "ARCH: $_\n"; if (/Normal File-->\s+[\d,]+\s+(.*)\s+\[Sent\]/) { print " ARCH: file '$1'\n"; $files{$1} = "ok"; } if (/^Archive processing of .* finished without failure./) { print " ARCH: OK\n"; } } } else { print "ABORT: unable to start archive command '$archcmd'!!\n"; exit 1; } return \%files; } # # check_files(\%files_to_archive, \%archived_files) # # compares the list of archived and to be archived files # sub check_files { my ($to_archive, $archived) = @_; my $nt = scalar keys %$to_archive; my $na = scalar keys %$archived; foreach my $ft (sort keys %$to_archive) { my $fp = "$docdir/$ft"; #print " fp: $fp\n"; if ($$archived{$fp}) { print "DEBUG: $ft archived OK\n"; $$archived{$fp} = "OK"; } else { print "ERROR: file '$ft' missing from archive!\n"; $errcnt++; } } foreach my $fa (sort keys %$archived) { if ($$archived{$fa} ne "OK") { my ($fn, $fp) = MPIWGStor::split_file_path($fa); if ($MPIWGStor::index_files{$fn}) { print "DEBUG: $fa ignored\n"; $na--; } else { print "WARNING: $fa archived but not in list!\n"; $warncnt++; } } } if ($nt > $na) { print "WARNING: less files were archived ($na vs. $nt)!\n"; } elsif ($na > $nt) { print "WARNING: more files were archived ($na vs. $nt)!\n"; } } # # delete_files(\%files) # # deletes the files from the list (of absolute files) and their directories # if they are empty # sub delete_files { my ($files) = @_; my %dirs; foreach my $f (sort keys %$files) { my ($fn, $fp) = MPIWGStor::split_file_path($f); # collect all unique directories if ($fp && (! $dirs{$fp})) { $dirs{$fp} = $fp; } # don't delete index files next if ($MPIWGStor::index_files{$fn}); # no file no delete next unless (-f $f); # delete files if (unlink $f) { print "INFO: remove $f ($fn)\n"; } else { print "ERROR: unable to delete $f!\n"; $errcnt++; } } # try to delete all empty directories my @dirkeys = sort keys %dirs; # starting at the end to get to the subdirectories first for (my $i = $#dirkeys; $i >= 0; $i--) { my $d = $dirkeys[$i]; # dont't remove document dir (shouldn't be empty anyway) next if ($d eq $docdir); if (-d $d) { print "INFO: remove dir $d\n"; rmdir $d; } } } # # delete_all_files(\%files, $dir) # # deletes all files with names from the list %files # in the directory $dir and its subdirectories # sub delete_all_files { my ($files, $dir) = @_; if (! opendir DIR, $dir) { print "ERROR: unable to read directory $dir!\n"; $errcnt++; return; } my @fl = readdir DIR; closedir DIR; foreach my $f (@fl) { next if ($f =~ /^\.{1,2}$/); if ($$files{$f}) { # $f is in the file list if (-f "$dir/$f") { # $f is a file if (unlink "$dir/$f") { print "INFO: removed $f\n"; } else { print "ERROR: unable to delete $f!\n"; $errcnt++; } } elsif (-d _) { # $f is a directory (unlink won't work) if ((system 'rm', '-r', "$dir/$f") == 0) { print "INFO: removed directory $f\n"; } else { print "ERROR: unable to delete directory $f!\n"; $errcnt++; } } else { print "ERROR: funny object $dir/$f!\n"; $errcnt++; } } else { # $f is not in the list if (-d "$dir/$f") { # recurse into directories print "DEBUG: enter $dir/$f\n"; delete_all_files($files, "$dir/$f"); } } } } ####################################################### # main # print "START: archiver $version at $archdate\n"; # make shure the right user is running this program my $user = getlogin; #if (($user ne "archive")||($user ne "root")) { # logger("ABORT", "you ($user) must be archive or root user to run this program!"); # exit 1; #} # use metacheck first if (system("$checkprog $docdir >/dev/null") == 0) { print "INFO: resource '$docdir' check OK\n"; } else { print "ABORT: resource '$docdir' check failed!!\n"; exit 1; } # read index.meta file my ($document, $rootnode) = MPIWGStor::read_xml($metafile); # check file and add archive date my $files_to_archive = read_resource_meta($rootnode); print "INFO: ", scalar keys %$files_to_archive, " files to archive\n"; # check for .archived file if (-f "$docdir/.archived") { if (unlink "$docdir/.archived") { print "WARNING: existing .archived file has been removed! Resource already archived?\n"; $warncnt++; } else { print "ERROR: unable to remove existing .archived file!\n"; $errcnt++; } } # remove junk files if ($delete_junk_files) { delete_all_files(\%MPIWGStor::junk_files, $docdir); } # write new index.meta if ($errcnt > 0) { print "ABORT: there were errors!\n"; exit 1; } else { if ($fix_xml) { MPIWGStor::write_xml($document, $metafile); } } # start archiving my $archived_files = run_archive; print "INFO: ", scalar keys %$archived_files, " files archived\n"; # check list of archived files check_files($files_to_archive, $archived_files); # delete files if all went OK if ($errcnt == 0) { system("touch", "$docdir/.archived"); # remove junk files (again) if ($delete_junk_files) { delete_all_files(\%MPIWGStor::junk_files, $docdir); } # remove archived files if ($delete_data_files) { delete_files($archived_files); } } print "INFO: $warncnt warnings\n"; print "INFO: $errcnt errors\n"; my $num_archfiles = %$archived_files + 1; if ($errcnt > 0) { print "ABORT: there were errors! ($num_archfiles files archived) at ", stime(time), "\n"; exit 1; } else { print "DONE: $num_archfiles files archived at ", stime(time), "\n"; }