#!/usr/bin/perl -w
use strict;
use XML::LibXML;
# MPIWG libraries
use lib '/usr/local/mpiwg/archive';
use MPIWGStor;
# make output unbuffered
$|=1;
#######################################################
# internal parameters
#
# program version
my $version = "0.7.3 (ROC 10.9.2008)";
# short help
my $help = "MPIWG archiver $version
use: archiver [options] docpath
options:
-debug show debugging info
-premigrate don't delete archived files
-force archive even if already archived
-replace rewrite index file
";
# read command line parameters
my $args = MPIWGStor::parseargs;
if (! scalar(%$args)) {
print $help, "\n";
exit 1;
}
# debug level
$debug = (exists $$args{'debug'}) ? $$args{'debug'} : 0;
# force archiving
my $force_archive = (exists $$args{'force'}) ? $$args{'force'} : 0;
# rewrite index file
my $rewrite_index = (exists $$args{'replace'}) ? $$args{'replace'} : 0;
# rewrite XML file (necessary for archive date!)
my $fix_xml = 1;
my $xml_changed = 0;
# XML namespace (not really implemented!)
my $namespace = "";
# archive name (archive-path element, usually == $docdir)
my $archname;
# archive storage date (now)
my $archdate = stime(time);
# delete "junk" files before archiving
my $delete_junk_files = 1;
# delete data files after archiving
my $delete_data_files = 1;
# don't delete archived files with "-premigrate"
if (exists $$args{'premigrate'}) {
$delete_data_files = not $$args{'premigrate'};
}
if ($delete_data_files) {
logger('INFO', "going to remove successfully archived files from disk");
}
#######################################################
# external programs
#
my $archprog = "/opt/tivoli/tsm/client/ba/bin/dsmc";
if (! -x $archprog) {
logger('ABORT', "TSM client program '$archprog' missing!");
exit 1;
}
my $checkprog = "/usr/local/mpiwg/archive/metacheck";
if (! -x $checkprog) {
logger('ABORT', "meta data checking program '$checkprog' missing!");
exit 1;
}
# log file for archiver
my $log_file = "/var/log/mpiwg-archiver.log";
if (! open LOG, ">>$log_file") {
logger('ABORT', "unable to write log file '$log_file'!");
exit 1;
}
#######################################################
# check parameters that were passed to the program
#
my $docdir = $$args{'path'};
# strip double slashes
$docdir =~ s/\/\//\//;
# strip trailing slashes
$docdir =~ s/\/+$//;
if (! -d $docdir) {
logger('ABORT', "document directory \'$docdir\' doesn't exist!");
exit 1;
}
my $metafile = "$docdir/index.meta";
if (! -f $metafile) {
logger('ABORT', "metadata index file \'$metafile\' doesn't exist!");
exit 1;
}
#######################################################
# internal variables
#
# number of errors
my $errcnt = 0;
# number of warnings
my $warncnt = 0;
#######################################################
# subroutines
#
#
# $files = read_resource_meta($rootnode)
#
# checks general resource meta information and reads the list of files
#
sub read_resource_meta {
my ($rootnode) = @_;
my %files;
#
# archive path
#
# get archive-path
$archname = MPIWGStor::sstrip($rootnode->findvalue('child::archive-path'));
if (! $archname) {
logger('ABORT', "archive-name element missing!");
exit 1;
}
#
# files
#
my @filenodes = $rootnode->findnodes('child::file');
foreach my $fn (@filenodes) {
my $name = MPIWGStor::sstrip($fn->findvalue('child::name'));
my $path = MPIWGStor::sstrip($fn->findvalue('child::path'));
logger('DEBUG', "FILE ($path)$name");
my $f = ($path) ? "$path/$name" : "$name";
$files{$f} = $name;
}
#
# archive-storage-date
#
my $stordatenode = ($rootnode->find('child::archive-storage-date'))->get_node(1);
if ($stordatenode) {
logger('WARNING', "archive storage date exists! Resource already archived?");
$warncnt++;
# delete old date
$stordatenode->removeChildNodes;
} else {
# create new storage date node
$stordatenode = $rootnode->addNewChild($namespace, "archive-storage-date");
# move after archive-path
$rootnode->insertAfter($stordatenode, ($rootnode->find('child::archive-path'))->get_node(1));
}
$stordatenode->appendTextNode($archdate);
$xml_changed++;
return \%files;
}
#
# $%files = run_archive
#
# runs the archiver program on $docdir and returns a list of archived files
#
sub run_archive {
my %files;
print LOG "START archiver $version $archdate\n";
my $archcmd = $archprog;
$archcmd .= " archive -archsymlinkasfile=no -subdir=yes";
$archcmd .= " -description='$archname'";
$archcmd .= " '$docdir/'";
print LOG "CMD: $archcmd\n";
if (open ARCH, "$archcmd 2>&1 |") {
while (<ARCH>) {
chomp;
print LOG "ARCH: $_\n";
if (/Normal File-->\s+[\d,]+\s+(.*)\s+\[Sent\]/) {
print " ARCH: file '$1'\n";
$files{$1} = "ok";
}
if (/^Archive processing of .* finished without failure./) {
print " ARCH: OK\n";
}
}
} else {
logger('ABORT', "unable to start archive command '$archcmd'!!");
exit 1;
}
return \%files;
}
#
# check_files(\%files_to_archive, \%archived_files)
#
# compares the list of archived and to be archived files
#
sub check_files {
my ($to_archive, $archived) = @_;
my $nt = scalar keys %$to_archive;
my $na = scalar keys %$archived;
foreach my $ft (sort keys %$to_archive) {
my $fp = "$docdir/$ft";
#print " fp: $fp\n";
if ($$archived{$fp}) {
logger('DEBUG', "$ft archived OK");
$$archived{$fp} = "OK";
} else {
logger('ERROR', "file '$ft' missing from archive!");
$errcnt++;
}
}
foreach my $fa (sort keys %$archived) {
if ($$archived{$fa} ne "OK") {
my ($fn, $fp) = MPIWGStor::split_file_path($fa);
if ($MPIWGStor::index_files{$fn}) {
logger('DEBUG', "$fa ignored");
$na--;
} else {
logger('WARNING', "$fa archived but not in list!");
$warncnt++;
}
}
}
if ($nt > $na) {
logger('WARNING', "less files were archived ($na vs. $nt)!");
} elsif ($na > $nt) {
logger('WARNING', "more files were archived ($na vs. $nt)!");
}
}
#
# delete_files(\%files)
#
# deletes the files from the list (of absolute files) and their directories
# if they are empty
#
sub delete_files {
my ($files) = @_;
my %dirs;
foreach my $f (sort keys %$files) {
my ($fn, $fp) = MPIWGStor::split_file_path($f);
# collect all unique directories
if ($fp && (! $dirs{$fp})) {
$dirs{$fp} = $fp;
}
# don't delete index files
next if ($MPIWGStor::index_files{$fn});
# no file no delete
next unless (-f $f);
# delete files
if (unlink $f) {
logger('DEBUG', "remove $f ($fn)");
} else {
logger('ERROR', "unable to delete $f!");
$errcnt++;
}
}
# try to delete all empty directories
my @dirkeys = sort keys %dirs;
# starting at the end to get to the subdirectories first
for (my $i = $#dirkeys; $i >= 0; $i--) {
my $d = $dirkeys[$i];
# dont't remove document dir (shouldn't be empty anyway)
next if ($d eq $docdir);
if (-d $d) {
logger('DEBUG', "remove dir $d");
rmdir $d;
}
}
}
#
# delete_all_files(\%files, $dir)
#
# deletes all files with names from the list %files
# in the directory $dir and its subdirectories
#
sub delete_all_files {
my ($files, $dir) = @_;
if (! opendir DIR, $dir) {
logger('ERROR', "unable to read directory $dir!");
$errcnt++;
return;
}
my @fl = readdir DIR;
closedir DIR;
foreach my $f (@fl) {
next if ($f =~ /^\.{1,2}$/);
if ($$files{$f}) {
# $f is in the file list
if (-f "$dir/$f") {
# $f is a file
if (unlink "$dir/$f") {
logger('DEBUG', "removed $f");
} else {
logger('ERROR', "unable to delete $f!");
$errcnt++;
}
} elsif (-d _) {
# $f is a directory (unlink won't work)
if ((system 'rm', '-r', "$dir/$f") == 0) {
logger('DEBUG', "removed directory $f");
} else {
logger('ERROR', "unable to delete directory $f!");
$errcnt++;
}
} else {
logger('ERROR', "funny object $dir/$f!");
$errcnt++;
}
} else {
# $f is not in the list
if (-d "$dir/$f") {
# recurse into directories
logger('DEBUG', "enter $dir/$f");
delete_all_files($files, "$dir/$f");
}
}
}
}
#######################################################
# main
#
logger('START', "archiver $version at $archdate");
# make shure the right user is running this program
my $user = getlogin || getpwuid($<);
if (($user ne "archive")&&($user ne "root")) {
logger("WARNING", "you ($user) should be archive or root user to run this program!");
}
# check for .archived file
if (-f "$docdir/.archived") {
if (not $force_archive) {
logger('ABORT', "already archived! (.archived file exists)");
exit 1;
} else {
logger('WARNING', "resource already archived? (.archived file exists)");
$warncnt++;
}
}
# use metacheck first
my $check_opts="-add-files";
if ($rewrite_index) {
$check_opts = "-replace";
}
if (open CHECK, "$checkprog $check_opts $docdir |") {
my @errors;
my $msg;
while (<CHECK>) {
chomp;
if (/^ERROR/) {
push @errors, $_;
}
$msg = $_;
}
if ($msg =~ /^DONE/) {
logger('DEBUG', "checking index file: $msg");
logger('INFO', "resource '$docdir' check OK");
} else {
logger('DEBUG', "errors checking index file:\n " . join("\n ", @errors) . "\n $msg");
logger('ABORT', "resource '$docdir' check failed!");
exit 1;
}
} else {
logger('ABORT', "unable to run $checkprog");
exit 1;
}
# if (system("$checkprog $docdir >/dev/null") == 0) {
# logger('INFO', "resource '$docdir' check OK");
# } else {
# logger('ABORT', "resource '$docdir' check failed!!");
# exit 1;
# }
# read index.meta file
my ($document, $rootnode) = MPIWGStor::read_xml($metafile);
# check file and add archive date
my $files_to_archive = read_resource_meta($rootnode);
logger('INFO', (scalar keys %$files_to_archive) . " files to archive");
# remove .archived file
if (-f "$docdir/.archived") {
if (unlink "$docdir/.archived") {
logger('WARNING', "existing .archived file has been removed!");
$warncnt++;
} else {
logger('ERROR', "unable to remove existing .archived file!");
$errcnt++;
}
}
# remove junk files
if ($delete_junk_files) {
delete_all_files(\%MPIWGStor::junk_files, $docdir);
}
# write new index.meta
if ($errcnt > 0) {
logger('ABORT', "there were errors!");
exit 1;
} else {
if ($fix_xml) {
MPIWGStor::write_xml($document, $metafile);
}
}
# start archiving
my $archived_files = run_archive();
my $num_archfiles = scalar keys %$archived_files;
logger('INFO', "$num_archfiles files archived");
# check list of archived files
check_files($files_to_archive, $archived_files);
# delete files if all went OK
if ($errcnt == 0) {
system("touch", "$docdir/.archived");
# remove junk files (again)
if ($delete_junk_files) {
delete_all_files(\%MPIWGStor::junk_files, $docdir);
}
# remove archived files
if ($delete_data_files) {
delete_files($archived_files);
}
}
logger('INFO', "$warncnt warnings");
logger('INFO', "$errcnt errors");
if ($errcnt > 0) {
logger('ABORT', "there were errors! ($num_archfiles files archived) at " . stime(time));
exit 1;
} else {
logger('DONE', "$num_archfiles files archived at " . stime(time));
}
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>