File:  [Repository] / foxridge-archiver / archiver.pl
Revision 1.9: download - view: text, annotated - select for diffs - revision graph
Thu Mar 16 17:00:43 2017 UTC (7 years, 1 month ago) by casties
Branches: MAIN
CVS tags: HEAD
updated to Ubuntu Perl paths.

#!/usr/bin/perl -w

use strict;

use XML::LibXML;

# MPIWG libraries
use lib '/usr/local/mpiwg/archive';
use MPIWGStor;

# make output unbuffered
$|=1;

#######################################################
# internal parameters
#

# program version
my $version = "0.7.3 (ROC 10.9.2008)";

# short help
my $help = "MPIWG archiver $version
use:  archiver [options] docpath
options:
  -debug  show debugging info
  -premigrate  don't delete archived files
  -force  archive even if already archived
  -replace  rewrite index file
";

# read command line parameters
my $args = MPIWGStor::parseargs;
if (! scalar(%$args)) {
    print $help, "\n";
    exit 1;
}

# debug level
$debug = (exists $$args{'debug'}) ? $$args{'debug'} : 0;

# force archiving
my $force_archive = (exists $$args{'force'}) ? $$args{'force'} : 0;

# rewrite index file
my $rewrite_index = (exists $$args{'replace'}) ? $$args{'replace'} : 0;

# rewrite XML file (necessary for archive date!)
my $fix_xml = 1;
my $xml_changed = 0;

# XML namespace (not really implemented!)
my $namespace = "";

# archive name (archive-path element, usually == $docdir)
my $archname;

# archive storage date (now)
my $archdate = stime(time);

# delete "junk" files before archiving
my $delete_junk_files = 1;

# delete data files after archiving
my $delete_data_files = 1;

# don't delete archived files with "-premigrate"
if (exists $$args{'premigrate'}) {
    $delete_data_files = not $$args{'premigrate'};
}
if ($delete_data_files) {
    logger('INFO', "going to remove successfully archived files from disk");
}


#######################################################
# external programs
#
my $archprog = "/opt/tivoli/tsm/client/ba/bin/dsmc";
if (! -x $archprog) {
    logger('ABORT', "TSM client program '$archprog' missing!");
    exit 1;
}
my $checkprog = "/usr/local/mpiwg/archive/metacheck";
if (! -x $checkprog) {
    logger('ABORT', "meta data checking program '$checkprog' missing!");
    exit 1;
}
# log file for archiver
my $log_file = "/var/log/mpiwg-archiver.log";
if (! open LOG, ">>$log_file") {
    logger('ABORT', "unable to write log file '$log_file'!");
    exit 1;
}

#######################################################
# check parameters that were passed to the program
#

my $docdir = $$args{'path'};
# strip double slashes
$docdir =~ s/\/\//\//;
# strip trailing slashes
$docdir =~ s/\/+$//;
if (! -d $docdir) {
    logger('ABORT', "document directory \'$docdir\' doesn't exist!");
    exit 1;
}

my $metafile = "$docdir/index.meta";
if (! -f $metafile) {
    logger('ABORT', "metadata index file \'$metafile\' doesn't exist!");
    exit 1;
}

#######################################################
# internal variables
#

# number of errors
my $errcnt = 0;
# number of warnings
my $warncnt = 0;

#######################################################
# subroutines
#

#
# $files = read_resource_meta($rootnode)
#
# checks general resource meta information and reads the list of files
#
sub read_resource_meta {
    my ($rootnode) = @_;
    my %files;
    #
    # archive path
    #
    # get archive-path
    $archname = MPIWGStor::sstrip($rootnode->findvalue('child::archive-path'));
    if (! $archname) {
	logger('ABORT', "archive-name element missing!");
	exit 1;
    }

    #
    # files
    #
    my @filenodes = $rootnode->findnodes('child::file');
    foreach my $fn (@filenodes) {
	my $name = MPIWGStor::sstrip($fn->findvalue('child::name'));
	my $path = MPIWGStor::sstrip($fn->findvalue('child::path'));
	logger('DEBUG', "FILE ($path)$name");
	my $f = ($path) ? "$path/$name" : "$name";
	$files{$f} = $name;
    }

    #
    # archive-storage-date
    #
    my $stordatenode = ($rootnode->find('child::archive-storage-date'))->get_node(1);
    if ($stordatenode) {
	logger('WARNING', "archive storage date exists! Resource already archived?");
	$warncnt++;
	# delete old date
	$stordatenode->removeChildNodes;
    } else {
	# create new storage date node
	$stordatenode = $rootnode->addNewChild($namespace, "archive-storage-date");
	# move after archive-path
	$rootnode->insertAfter($stordatenode, ($rootnode->find('child::archive-path'))->get_node(1));
    }
    $stordatenode->appendTextNode($archdate);
    $xml_changed++;
    return \%files;
}


#
# $%files = run_archive
#
# runs the archiver program on $docdir and returns a list of archived files
#
sub run_archive {
    my %files;
    print LOG "START archiver $version $archdate\n";
    my $archcmd = $archprog;
    $archcmd .= " archive -archsymlinkasfile=no -subdir=yes";
    $archcmd .= " -description='$archname'";
    $archcmd .= " '$docdir/'";

    print LOG "CMD: $archcmd\n";
    if (open ARCH, "$archcmd 2>&1 |") {
	while (<ARCH>) {
	    chomp;
	    print LOG "ARCH: $_\n";
	    if (/Normal File-->\s+[\d,]+\s+(.*)\s+\[Sent\]/) {
		print "  ARCH: file '$1'\n";
		$files{$1} = "ok";
	    }
	    if (/^Archive processing of .* finished without failure./) {
		print "  ARCH: OK\n";
	    }
	}
    } else {
	logger('ABORT', "unable to start archive command '$archcmd'!!");
	exit 1;
    }

    return \%files;
}


#
# check_files(\%files_to_archive, \%archived_files)
#
# compares the list of archived and to be archived files
#
sub check_files {
    my ($to_archive, $archived) = @_;

    my $nt = scalar keys %$to_archive;
    my $na = scalar keys %$archived;

    foreach my $ft (sort keys %$to_archive) {
	my $fp = "$docdir/$ft";
	#print "  fp: $fp\n";
	if ($$archived{$fp}) {
	    logger('DEBUG', "$ft archived OK");
	    $$archived{$fp} = "OK";
	} else {
	    logger('ERROR', "file '$ft' missing from archive!");
	    $errcnt++;
	}
    }

    foreach my $fa (sort keys %$archived) {
	if ($$archived{$fa} ne "OK") {
	    my ($fn, $fp) = MPIWGStor::split_file_path($fa);
	    if ($MPIWGStor::index_files{$fn}) {
		logger('DEBUG', "$fa ignored");
		$na--;
	    } else {
		logger('WARNING', "$fa archived but not in list!");
		$warncnt++;
	    }
	}
    }

    if ($nt > $na) {
	logger('WARNING', "less files were archived ($na vs. $nt)!");
    } elsif ($na > $nt) {
	logger('WARNING', "more files were archived ($na vs. $nt)!");
    }

}


#
# delete_files(\%files)
#
# deletes the files from the list (of absolute files) and their directories
# if they are empty
#
sub delete_files {
    my ($files) = @_;
    my %dirs;

    foreach my $f (sort keys %$files) {
	my ($fn, $fp) = MPIWGStor::split_file_path($f);
	# collect all unique directories
        if ($fp && (! $dirs{$fp})) {
	    $dirs{$fp} = $fp;
	}
	# don't delete index files
	next if ($MPIWGStor::index_files{$fn});
	# no file no delete
	next unless (-f $f);
	# delete files
	if (unlink $f) {
	    logger('DEBUG', "remove $f ($fn)");
	} else {
	    logger('ERROR', "unable to delete $f!");
	    $errcnt++;
	}
    }
    # try to delete all empty directories
    my @dirkeys = sort keys %dirs;
    # starting at the end to get to the subdirectories first
    for (my $i = $#dirkeys; $i >= 0; $i--) {
	my $d = $dirkeys[$i];
	# dont't remove document dir (shouldn't be empty anyway)
	next if ($d eq $docdir);
	if (-d $d) {
	    logger('DEBUG', "remove dir $d");
	    rmdir $d;
	}
    }
}


#
# delete_all_files(\%files, $dir)
#
# deletes all files with names from the list %files
# in the directory $dir and its subdirectories 
#
sub delete_all_files {
    my ($files, $dir) = @_;

    if (! opendir DIR, $dir) {
	logger('ERROR', "unable to read directory $dir!");
	$errcnt++;
	return;
    }
    my @fl = readdir DIR;
    closedir DIR;

    foreach my $f (@fl) {
	next if ($f =~ /^\.{1,2}$/);
	if ($$files{$f}) {
	    # $f is in the file list
	    if (-f "$dir/$f") {
		# $f is a file
		if (unlink "$dir/$f") {
		    logger('DEBUG', "removed $f");
		} else {
		    logger('ERROR', "unable to delete $f!");
		    $errcnt++;
		}
	    } elsif (-d _) {
		# $f is a directory (unlink won't work)
		if ((system 'rm', '-r', "$dir/$f") == 0) {
		    logger('DEBUG', "removed directory $f");
		} else {
		    logger('ERROR', "unable to delete directory $f!");
		    $errcnt++;
		}
	    } else {
		logger('ERROR', "funny object $dir/$f!");
		$errcnt++;
	    }
	} else {
	    # $f is not in the list
	    if (-d "$dir/$f") {
		# recurse into directories
		logger('DEBUG', "enter $dir/$f");
		delete_all_files($files, "$dir/$f");
	    }
	}
    }
}


#######################################################
# main
#

logger('START', "archiver $version at $archdate");

# make shure the right user is running this program
my $user = getlogin || getpwuid($<);
if (($user ne "archive")&&($user ne "root")) {
    logger("WARNING", "you ($user) should be archive or root user to run this program!");
}

# check for .archived file
if (-f "$docdir/.archived") {
    if (not $force_archive) {
	logger('ABORT', "already archived! (.archived file exists)");
	exit 1;
    } else {
	logger('WARNING', "resource already archived? (.archived file exists)");
	$warncnt++;
    }
}

# use metacheck first
my $check_opts="-add-files";
if ($rewrite_index) {
    $check_opts = "-replace";
}
if (open CHECK, "$checkprog $check_opts $docdir |") {
    my @errors;
    my $msg;
    while (<CHECK>) {
	chomp;
	if (/^ERROR/) {
	    push @errors, $_;
	}
	$msg = $_;
    }
    if ($msg =~ /^DONE/) {
	logger('DEBUG', "checking index file: $msg");
	logger('INFO', "resource '$docdir' check OK");
    } else {
	logger('DEBUG', "errors checking index file:\n    " . join("\n    ", @errors) . "\n    $msg");
	logger('ABORT', "resource '$docdir' check failed!");
	exit 1;
    }
} else {
    logger('ABORT', "unable to run $checkprog");
    exit 1;
}
# if (system("$checkprog $docdir >/dev/null") == 0) {
#     logger('INFO', "resource '$docdir' check OK");
# } else {
#     logger('ABORT', "resource '$docdir' check failed!!");
#     exit 1;
# }

# read index.meta file
my ($document, $rootnode) = MPIWGStor::read_xml($metafile);

# check file and add archive date
my $files_to_archive = read_resource_meta($rootnode);

logger('INFO', (scalar keys %$files_to_archive) . " files to archive");

# remove .archived file
if (-f "$docdir/.archived") {
    if (unlink "$docdir/.archived") {
	logger('WARNING', "existing .archived file has been removed!");
	$warncnt++;
    } else {
	logger('ERROR', "unable to remove existing .archived file!");
	$errcnt++;
    }
}

# remove junk files
if ($delete_junk_files) {
    delete_all_files(\%MPIWGStor::junk_files, $docdir);
}

# write new index.meta
if ($errcnt > 0) {
    logger('ABORT', "there were errors!");
    exit 1;
} else {
    if ($fix_xml) {
	MPIWGStor::write_xml($document, $metafile);
    }
}

# start archiving
my $archived_files = run_archive();
my $num_archfiles = scalar keys %$archived_files;

logger('INFO', "$num_archfiles files archived");

# check list of archived files
check_files($files_to_archive, $archived_files);

# delete files if all went OK
if ($errcnt == 0) {
    system("touch", "$docdir/.archived");
    # remove junk files (again)
    if ($delete_junk_files) {
	delete_all_files(\%MPIWGStor::junk_files, $docdir);
    }
    # remove archived files
    if ($delete_data_files) {
	delete_files($archived_files);
    }
}

logger('INFO', "$warncnt warnings");
logger('INFO', "$errcnt errors");
if ($errcnt > 0) {
    logger('ABORT', "there were errors! ($num_archfiles files archived) at " . stime(time));
    exit 1;
} else {
    logger('DONE', "$num_archfiles files archived at " . stime(time));
}

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>