view archiver.pl @ 18:fdf4ceb36db1

fixed problem with dir names in metacheck new version of metacheck defaults to not change index file new version of archiver uses new version of metacheck
author casties
date Tue, 20 Sep 2005 19:24:57 +0200
parents b19df18aa19a
children a3c35eae25dc
line wrap: on
line source

#!/usr/local/bin/perl -w

use strict;

use XML::LibXML;

# MPIWG libraries
use lib '/usr/local/mpiwg/archive';
use MPIWGStor;

# make output unbuffered
$|=1;

#######################################################
# internal parameters
#

# program version
my $version = "0.7 (ROC 20.9.2005)";

# short help
my $help = "MPIWG archiver $version
use:  archiver [options] docpath
options:
  -debug  show debugging info
  -premigrate  don't delete archived files
  -force  archive even if already archived
";

# read command line parameters
my $args = MPIWGStor::parseargs;
if (! scalar(%$args)) {
    print $help, "\n";
    exit 1;
}

# debug level
$debug = (exists $$args{'debug'}) ? $$args{'debug'} : 0;

# force archiving
my $force_archive = (exists $$args{'force'}) ? $$args{'force'} : 0;

# rewrite XML file (necessary for archive date!)
my $fix_xml = 1;
my $xml_changed = 0;

# XML namespace (not really implemented!)
my $namespace = "";

# archive name (archive-path element, usually == $docdir)
my $archname;

# archive storage date (now)
my $archdate = stime(time);

# delete "junk" files before archiving
my $delete_junk_files = 1;

# delete data files after archiving
my $delete_data_files = 1;

# don't delete archived files with "-premigrate"
if (exists $$args{'premigrate'}) {
    $delete_data_files = not $$args{'premigrate'};
}
if ($delete_data_files) {
    logger('INFO', "going to remove successfully archived files from disk");
}


#######################################################
# external programs
#
my $archprog = "/opt/tivoli/tsm/client/ba/bin/dsmc";
if (! -x $archprog) {
    logger('ABORT', "TSM client program '$archprog' missing!");
    exit 1;
}
my $checkprog = "/usr/local/mpiwg/archive/metacheck";
if (! -x $checkprog) {
    logger('ABORT', "meta data checking program '$checkprog' missing!");
    exit 1;
}
# log file for archiver
my $log_file = "/var/log/mpiwg-archiver.log";
if (! open LOG, ">>$log_file") {
    logger('ABORT', "unable to write log file '$log_file'!");
    exit 1;
}

#######################################################
# check parameters that were passed to the program
#

my $docdir = $$args{'path'};
# strip double slashes
$docdir =~ s/\/\//\//;
# strip trailing slashes
$docdir =~ s/\/+$//;
if (! -d $docdir) {
    logger('ABORT', "document directory \'$docdir\' doesn't exist!");
    exit 1;
}

my $metafile = "$docdir/index.meta";
if (! -f $metafile) {
    logger('ABORT', "metadata index file \'$metafile\' doesn't exist!");
    exit 1;
}

#######################################################
# internal variables
#

# number of errors
my $errcnt = 0;
# number of warnings
my $warncnt = 0;

#######################################################
# subroutines
#

#
# $files = read_resource_meta($rootnode)
#
# checks general resource meta information and reads the list of files
#
sub read_resource_meta {
    my ($rootnode) = @_;
    my %files;
    #
    # archive path
    #
    # get archive-path
    $archname = MPIWGStor::sstrip($rootnode->findvalue('child::archive-path'));
    if (! $archname) {
	logger('ABORT', "archive-name element missing!");
	exit 1;
    }

    #
    # files
    #
    my @filenodes = $rootnode->findnodes('child::file');
    foreach my $fn (@filenodes) {
	my $name = MPIWGStor::sstrip($fn->findvalue('child::name'));
	my $path = MPIWGStor::sstrip($fn->findvalue('child::path'));
	logger('DEBUG', "FILE ($path)$name");
	my $f = ($path) ? "$path/$name" : "$name";
	$files{$f} = $name;
    }

    #
    # archive-storage-date
    #
    my $stordatenode = ($rootnode->find('child::archive-storage-date'))->get_node(1);
    if ($stordatenode) {
	logger('WARNING', "archive storage date exists! Resource already archived?");
	$warncnt++;
	# delete old date
	$stordatenode->removeChildNodes;
    } else {
	# create new storage date node
	$stordatenode = $rootnode->addNewChild($namespace, "archive-storage-date");
	# move after archive-path
	$rootnode->insertAfter($stordatenode, ($rootnode->find('child::archive-path'))->get_node(1));
    }
    $stordatenode->appendTextNode($archdate);
    $xml_changed++;
    return \%files;
}


#
# $%files = run_archive
#
# runs the archiver program on $docdir and returns a list of archived files
#
sub run_archive {
    my %files;
    print LOG "START archiver $version $archdate\n";
    my $archcmd = $archprog;
    $archcmd .= " archive -archsymlinkasfile=no -subdir=yes";
    $archcmd .= " -description='$archname'";
    $archcmd .= " '$docdir/'";

    print LOG "CMD: $archcmd\n";
    if (open ARCH, "$archcmd 2>&1 |") {
	while (<ARCH>) {
	    chomp;
	    print LOG "ARCH: $_\n";
	    if (/Normal File-->\s+[\d,]+\s+(.*)\s+\[Sent\]/) {
		print "  ARCH: file '$1'\n";
		$files{$1} = "ok";
	    }
	    if (/^Archive processing of .* finished without failure./) {
		print "  ARCH: OK\n";
	    }
	}
    } else {
	logger('ABORT', "unable to start archive command '$archcmd'!!");
	exit 1;
    }

    return \%files;
}


#
# check_files(\%files_to_archive, \%archived_files)
#
# compares the list of archived and to be archived files
#
sub check_files {
    my ($to_archive, $archived) = @_;

    my $nt = scalar keys %$to_archive;
    my $na = scalar keys %$archived;

    foreach my $ft (sort keys %$to_archive) {
	my $fp = "$docdir/$ft";
	#print "  fp: $fp\n";
	if ($$archived{$fp}) {
	    logger('DEBUG', "$ft archived OK");
	    $$archived{$fp} = "OK";
	} else {
	    logger('ERROR', "file '$ft' missing from archive!");
	    $errcnt++;
	}
    }

    foreach my $fa (sort keys %$archived) {
	if ($$archived{$fa} ne "OK") {
	    my ($fn, $fp) = MPIWGStor::split_file_path($fa);
	    if ($MPIWGStor::index_files{$fn}) {
		logger('DEBUG', "$fa ignored");
		$na--;
	    } else {
		logger('WARNING', "$fa archived but not in list!");
		$warncnt++;
	    }
	}
    }

    if ($nt > $na) {
	logger('WARNING', "less files were archived ($na vs. $nt)!");
    } elsif ($na > $nt) {
	logger('WARNING', "more files were archived ($na vs. $nt)!");
    }

}


#
# delete_files(\%files)
#
# deletes the files from the list (of absolute files) and their directories
# if they are empty
#
sub delete_files {
    my ($files) = @_;
    my %dirs;

    foreach my $f (sort keys %$files) {
	my ($fn, $fp) = MPIWGStor::split_file_path($f);
	# collect all unique directories
        if ($fp && (! $dirs{$fp})) {
	    $dirs{$fp} = $fp;
	}
	# don't delete index files
	next if ($MPIWGStor::index_files{$fn});
	# no file no delete
	next unless (-f $f);
	# delete files
	if (unlink $f) {
	    logger('INFO', "remove $f ($fn)");
	} else {
	    logger('ERROR', "unable to delete $f!");
	    $errcnt++;
	}
    }
    # try to delete all empty directories
    my @dirkeys = sort keys %dirs;
    # starting at the end to get to the subdirectories first
    for (my $i = $#dirkeys; $i >= 0; $i--) {
	my $d = $dirkeys[$i];
	# dont't remove document dir (shouldn't be empty anyway)
	next if ($d eq $docdir);
	if (-d $d) {
	    logger('INFO', "remove dir $d");
	    rmdir $d;
	}
    }
}


#
# delete_all_files(\%files, $dir)
#
# deletes all files with names from the list %files
# in the directory $dir and its subdirectories 
#
sub delete_all_files {
    my ($files, $dir) = @_;

    if (! opendir DIR, $dir) {
	logger('ERROR', "unable to read directory $dir!");
	$errcnt++;
	return;
    }
    my @fl = readdir DIR;
    closedir DIR;

    foreach my $f (@fl) {
	next if ($f =~ /^\.{1,2}$/);
	if ($$files{$f}) {
	    # $f is in the file list
	    if (-f "$dir/$f") {
		# $f is a file
		if (unlink "$dir/$f") {
		    logger('INFO', "removed $f");
		} else {
		    logger('ERROR', "unable to delete $f!");
		    $errcnt++;
		}
	    } elsif (-d _) {
		# $f is a directory (unlink won't work)
		if ((system 'rm', '-r', "$dir/$f") == 0) {
		    logger('INFO', "removed directory $f");
		} else {
		    logger('ERROR', "unable to delete directory $f!");
		    $errcnt++;
		}
	    } else {
		logger('ERROR', "funny object $dir/$f!");
		$errcnt++;
	    }
	} else {
	    # $f is not in the list
	    if (-d "$dir/$f") {
		# recurse into directories
		logger('DEBUG', "enter $dir/$f");
		delete_all_files($files, "$dir/$f");
	    }
	}
    }
}


#######################################################
# main
#

logger('START', "archiver $version at $archdate");

# make shure the right user is running this program
my $user = getlogin;
if (($user ne "archive")&&($user ne "root")) {
    logger("ABORT", "you ($user) must be archive or root user to run this program!");
    exit 1;
}

# check for .archived file
if (-f "$docdir/.archived") {
    if (not $force_archive) {
	logger('ABORT', "already archived! (.archived file exists)");
	exit 1;
    } else {
	logger('WARNING', "resource already archived? (.archived file exists)");
	$warncnt++;
    }
}

# use metacheck first
if (open CHECK, "$checkprog -add-files $docdir |") {
    my @errors;
    my $msg;
    while (<CHECK>) {
	chomp;
	if (/^ERROR/) {
	    push @errors, $_;
	}
	$msg = $_;
    }
    if ($msg =~ /^DONE/) {
	logger('DEBUG', "checking index file: $msg");
	logger('INFO', "resource '$docdir' check OK");
    } else {
	logger('DEBUG', "errors checking index file:\n    " . join("\n    ", @errors) . "\n    $msg");
	logger('ABORT', "resource '$docdir' check failed!");
	exit 1;
    }
} else {
    logger('ABORT', "unable to run $checkprog");
    exit 1;
}
# if (system("$checkprog $docdir >/dev/null") == 0) {
#     logger('INFO', "resource '$docdir' check OK");
# } else {
#     logger('ABORT', "resource '$docdir' check failed!!");
#     exit 1;
# }

# read index.meta file
my ($document, $rootnode) = MPIWGStor::read_xml($metafile);

# check file and add archive date
my $files_to_archive = read_resource_meta($rootnode);

logger('INFO', (scalar keys %$files_to_archive) . " files to archive");

# remove .archived file
if (-f "$docdir/.archived") {
    if (unlink "$docdir/.archived") {
	logger('WARNING', "existing .archived file has been removed!");
	$warncnt++;
    } else {
	logger('ERROR', "unable to remove existing .archived file!");
	$errcnt++;
    }
}

# remove junk files
if ($delete_junk_files) {
    delete_all_files(\%MPIWGStor::junk_files, $docdir);
}

# write new index.meta
if ($errcnt > 0) {
    logger('ABORT', "there were errors!");
    exit 1;
} else {
    if ($fix_xml) {
	MPIWGStor::write_xml($document, $metafile);
    }
}

# start archiving
my $archived_files = run_archive();
my $num_archfiles = scalar keys %$archived_files;

logger('INFO', "$num_archfiles files archived");

# check list of archived files
check_files($files_to_archive, $archived_files);

# delete files if all went OK
if ($errcnt == 0) {
    system("touch", "$docdir/.archived");
    # remove junk files (again)
    if ($delete_junk_files) {
	delete_all_files(\%MPIWGStor::junk_files, $docdir);
    }
    # remove archived files
    if ($delete_data_files) {
	delete_files($archived_files);
    }
}

logger('INFO', "$warncnt warnings");
logger('INFO', "$errcnt errors");
if ($errcnt > 0) {
    logger('ABORT', "there were errors! ($num_archfiles files archived) at " . stime(time));
    exit 1;
} else {
    logger('DONE', "$num_archfiles files archived at " . stime(time));
}