#!/usr/local/bin/perl -w
use strict;
use XML::LibXML;
# MPIWG libraries
use lib '/usr/local/mpiwg/archive';
use MPIWGStor;
# make output unbuffered
$|=1;
#######################################################
# internal parameters
#
# program version
my $version = "0.5.1 (8.7.2004)";
# rewrite XML file (necessary for archive date!)
my $fix_xml = 1;
my $xml_changed = 0;
# XML namespace (not really implemented!)
my $namespace = "";
# archive name (archive-path element, usually == $docdir)
my $archname;
# archive storage date (now)
my $archdate = stime(time);
# delete "junk" files before archiving
my $delete_junk_files = 1;
# delete data files after archiving
my $delete_data_files = 1;
#######################################################
# external programs
#
my $archprog = "/opt/tivoli/tsm/client/ba/bin/dsmc";
if (! -x $archprog) {
print "ABORT: TSM client program '$archprog' missing!!\n";
exit 1;
}
my $checkprog = "/usr/local/mpiwg/archive/metacheck";
if (! -x $checkprog) {
print "ABORT: meta data checking program '$checkprog' missing!!\n";
exit 1;
}
# log file for archiver
my $log_file = "/var/log/mpiwg-archiver.log";
if (! open LOG, ">>$log_file") {
print "ABORT: unable to write log file '$log_file'!!\n";
exit 1;
}
#######################################################
# check parameters that were passed to the program
#
if ($#ARGV < 0) {
print "ABORT: no document directory given!\n";
exit 1;
}
my $docdir = $ARGV[0];
# strip double slashes
$docdir =~ s/\/\//\//;
# strip trailing slashes
$docdir =~ s/\/+$//;
if (! -d $docdir) {
print "ABORT: document directory \'$docdir\' doesn't exist!\n";
exit 1;
}
if (($#ARGV > 0)&&($ARGV[1] eq "-premigrate")) {
$delete_data_files = 0;
}
my $metafile = "$docdir/index.meta";
if (! -f $metafile) {
print "ABORT: metadata index file \'$metafile\' doesn't exist!\n";
exit 1;
}
#######################################################
# internal variables
#
# number of errors
my $errcnt = 0;
# number of warnings
my $warncnt = 0;
#######################################################
# subroutines
#
#
# $files = read_resource_meta($rootnode)
#
# checks general resource meta information and reads the list of files
#
sub read_resource_meta {
my ($rootnode) = @_;
my %files;
#
# archive path
#
# get archive-path
$archname = MPIWGStor::sstrip($rootnode->findvalue('child::archive-path'));
if (! $archname) {
print "ABORT: archive-name element missing!!\n";
exit 1;
}
#
# files
#
my @filenodes = $rootnode->findnodes('child::file');
foreach my $fn (@filenodes) {
my $name = MPIWGStor::sstrip($fn->findvalue('child::name'));
my $path = MPIWGStor::sstrip($fn->findvalue('child::path'));
print "FILE: ($path)$name\n";
my $f = ($path) ? "$path/$name" : "$name";
$files{$f} = $name;
}
#
# archive-storage-date
#
my $stordatenode = ($rootnode->find('child::archive-storage-date'))->get_node(1);
if ($stordatenode) {
print "WARNING: archive storage date exists! Resource already archived?\n";
$warncnt++;
# delete old date
$stordatenode->removeChildNodes;
} else {
# create new storage date node
$stordatenode = $rootnode->addNewChild($namespace, "archive-storage-date");
# move after archive-path
$rootnode->insertAfter($stordatenode, ($rootnode->find('child::archive-path'))->get_node(1));
}
$stordatenode->appendTextNode($archdate);
$xml_changed++;
return \%files;
}
#
# $%files = run_archive
#
# runs the archiver program on $docdir and returns a list of archived files
#
sub run_archive {
my %files;
print LOG "START archiver $version $archdate\n";
my $archcmd = $archprog;
$archcmd .= " archive -archsymlinkasfile=no -subdir=yes";
$archcmd .= " -description='$archname'";
$archcmd .= " '$docdir/'";
print LOG "CMD: $archcmd\n";
if (open ARCH, "$archcmd 2>&1 |") {
while (<ARCH>) {
chomp;
print LOG "ARCH: $_\n";
if (/Normal File-->\s+[\d,]+\s+(.*)\s+\[Sent\]/) {
print " ARCH: file '$1'\n";
$files{$1} = "ok";
}
if (/^Archive processing of .* finished without failure./) {
print " ARCH: OK\n";
}
}
} else {
print "ABORT: unable to start archive command '$archcmd'!!\n";
exit 1;
}
return \%files;
}
#
# check_files(\%files_to_archive, \%archived_files)
#
# compares the list of archived and to be archived files
#
sub check_files {
my ($to_archive, $archived) = @_;
my $nt = scalar keys %$to_archive;
my $na = scalar keys %$archived;
foreach my $ft (sort keys %$to_archive) {
my $fp = "$docdir/$ft";
#print " fp: $fp\n";
if ($$archived{$fp}) {
print "DEBUG: $ft archived OK\n";
$$archived{$fp} = "OK";
} else {
print "ERROR: file '$ft' missing from archive!\n";
$errcnt++;
}
}
foreach my $fa (sort keys %$archived) {
if ($$archived{$fa} ne "OK") {
my ($fn, $fp) = MPIWGStor::split_file_path($fa);
if ($MPIWGStor::index_files{$fn}) {
print "DEBUG: $fa ignored\n";
$na--;
} else {
print "WARNING: $fa archived but not in list!\n";
$warncnt++;
}
}
}
if ($nt > $na) {
print "WARNING: less files were archived ($na vs. $nt)!\n";
} elsif ($na > $nt) {
print "WARNING: more files were archived ($na vs. $nt)!\n";
}
}
#
# delete_files(\%files)
#
# deletes the files from the list (of absolute files) and their directories
# if they are empty
#
sub delete_files {
my ($files) = @_;
my %dirs;
foreach my $f (sort keys %$files) {
my ($fn, $fp) = MPIWGStor::split_file_path($f);
# collect all unique directories
if ($fp && (! $dirs{$fp})) {
$dirs{$fp} = $fp;
}
# don't delete index files
next if ($MPIWGStor::index_files{$fn});
# no file no delete
next unless (-f $f);
# delete files
if (unlink $f) {
print "INFO: remove $f ($fn)\n";
} else {
print "ERROR: unable to delete $f!\n";
$errcnt++;
}
}
# try to delete all empty directories
my @dirkeys = sort keys %dirs;
# starting at the end to get to the subdirectories first
for (my $i = $#dirkeys; $i >= 0; $i--) {
my $d = $dirkeys[$i];
# dont't remove document dir (shouldn't be empty anyway)
next if ($d eq $docdir);
if (-d $d) {
print "INFO: remove dir $d\n";
rmdir $d;
}
}
}
#
# delete_all_files(\%files, $dir)
#
# deletes all files with names from the list %files
# in the directory $dir and its subdirectories
#
sub delete_all_files {
my ($files, $dir) = @_;
if (! opendir DIR, $dir) {
print "ERROR: unable to read directory $dir!\n";
$errcnt++;
return;
}
my @fl = readdir DIR;
closedir DIR;
foreach my $f (@fl) {
next if ($f =~ /^\.{1,2}$/);
if ($$files{$f}) {
# $f is in the file list
if (-f "$dir/$f") {
# $f is a file
if (unlink "$dir/$f") {
print "INFO: removed $f\n";
} else {
print "ERROR: unable to delete $f!\n";
$errcnt++;
}
} elsif (-d _) {
# $f is a directory (unlink won't work)
if ((system 'rm', '-r', "$dir/$f") == 0) {
print "INFO: removed directory $f\n";
} else {
print "ERROR: unable to delete directory $f!\n";
$errcnt++;
}
} else {
print "ERROR: funny object $dir/$f!\n";
$errcnt++;
}
} else {
# $f is not in the list
if (-d "$dir/$f") {
# recurse into directories
print "DEBUG: enter $dir/$f\n";
delete_all_files($files, "$dir/$f");
}
}
}
}
#######################################################
# main
#
print "START: archiver $version at $archdate\n";
# make shure the right user is running this program
my $user = getlogin;
#if (($user ne "archive")||($user ne "root")) {
# logger("ABORT", "you ($user) must be archive or root user to run this program!");
# exit 1;
#}
# use metacheck first
if (system("$checkprog $docdir >/dev/null") == 0) {
print "INFO: resource '$docdir' check OK\n";
} else {
print "ABORT: resource '$docdir' check failed!!\n";
exit 1;
}
# read index.meta file
my ($document, $rootnode) = MPIWGStor::read_xml($metafile);
# check file and add archive date
my $files_to_archive = read_resource_meta($rootnode);
print "INFO: ", scalar keys %$files_to_archive, " files to archive\n";
# check for .archived file
if (-f "$docdir/.archived") {
if (unlink "$docdir/.archived") {
print "WARNING: existing .archived file has been removed! Resource already archived?\n";
$warncnt++;
} else {
print "ERROR: unable to remove existing .archived file!\n";
$errcnt++;
}
}
# remove junk files
if ($delete_junk_files) {
delete_all_files(\%MPIWGStor::junk_files, $docdir);
}
# write new index.meta
if ($errcnt > 0) {
print "ABORT: there were errors!\n";
exit 1;
} else {
if ($fix_xml) {
MPIWGStor::write_xml($document, $metafile);
}
}
# start archiving
my $archived_files = run_archive;
print "INFO: ", scalar keys %$archived_files, " files archived\n";
# check list of archived files
check_files($files_to_archive, $archived_files);
# delete files if all went OK
if ($errcnt == 0) {
system("touch", "$docdir/.archived");
# remove junk files (again)
if ($delete_junk_files) {
delete_all_files(\%MPIWGStor::junk_files, $docdir);
}
# remove archived files
if ($delete_data_files) {
delete_files($archived_files);
}
}
print "INFO: $warncnt warnings\n";
print "INFO: $errcnt errors\n";
my $num_archfiles = %$archived_files + 1;
if ($errcnt > 0) {
print "ABORT: there were errors! ($num_archfiles files archived) at ", stime(time), "\n";
exit 1;
} else {
print "DONE: $num_archfiles files archived at ", stime(time), "\n";
}
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>