view archiver.pl @ 4:046d584ed7b3

forgot lib path...
author casties
date Thu, 08 Jul 2004 23:23:53 +0200
parents 9143a0c4c678
children 1a377102b1ce
line wrap: on
line source

#!/usr/local/bin/perl -w

use strict;

use XML::LibXML;

# MPIWG libraries
use lib '/usr/local/mpiwg/archive';
use MPIWGStor;

# make output unbuffered
$|=1;

#######################################################
# internal parameters
#

# program version
my $version = "0.5.1 (8.7.2004)";

# rewrite XML file (necessary for archive date!)
my $fix_xml = 1;
my $xml_changed = 0;
# XML namespace (not really implemented!)
my $namespace = "";

# archive name (archive-path element, usually == $docdir)
my $archname;
# archive storage date (now)
my $archdate = stime(time);

# delete "junk" files before archiving
my $delete_junk_files = 1;

# delete data files after archiving
my $delete_data_files = 1;


#######################################################
# external programs
#
my $archprog = "/opt/tivoli/tsm/client/ba/bin/dsmc";
if (! -x $archprog) {
    print "ABORT: TSM client program '$archprog' missing!!\n";
    exit 1;
}
my $checkprog = "/usr/local/mpiwg/archive/metacheck";
if (! -x $checkprog) {
    print "ABORT: meta data checking program '$checkprog' missing!!\n";
    exit 1;
}
# log file for archiver
my $log_file = "/var/log/mpiwg-archiver.log";
if (! open LOG, ">>$log_file") {
    print "ABORT: unable to write log file '$log_file'!!\n";
    exit 1;
}

#######################################################
# check parameters that were passed to the program
#
if ($#ARGV < 0) {
    print "ABORT: no document directory given!\n";
    exit 1;
}
my $docdir = $ARGV[0];
# strip double slashes
$docdir =~ s/\/\//\//;
# strip trailing slashes
$docdir =~ s/\/+$//;
if (! -d $docdir) {
    print "ABORT: document directory \'$docdir\' doesn't exist!\n";
    exit 1;
}
if (($#ARGV > 0)&&($ARGV[1] eq "-premigrate")) {
    $delete_data_files = 0;
}

my $metafile = "$docdir/index.meta";
if (! -f $metafile) {
    print "ABORT: metadata index file \'$metafile\' doesn't exist!\n";
    exit 1;
}

#######################################################
# internal variables
#

# number of errors
my $errcnt = 0;
# number of warnings
my $warncnt = 0;

#######################################################
# subroutines
#

#
# $files = read_resource_meta($rootnode)
#
# checks general resource meta information and reads the list of files
#
sub read_resource_meta {
    my ($rootnode) = @_;
    my %files;
    #
    # archive path
    #
    # get archive-path
    $archname = MPIWGStor::sstrip($rootnode->findvalue('child::archive-path'));
    if (! $archname) {
	print "ABORT: archive-name element missing!!\n";
	exit 1;
    }

    #
    # files
    #
    my @filenodes = $rootnode->findnodes('child::file');
    foreach my $fn (@filenodes) {
	my $name = MPIWGStor::sstrip($fn->findvalue('child::name'));
	my $path = MPIWGStor::sstrip($fn->findvalue('child::path'));
	print "FILE: ($path)$name\n";
	my $f = ($path) ? "$path/$name" : "$name";
	$files{$f} = $name;
    }

    #
    # archive-storage-date
    #
    my $stordatenode = ($rootnode->find('child::archive-storage-date'))->get_node(1);
    if ($stordatenode) {
	print "WARNING: archive storage date exists! Resource already archived?\n";
	$warncnt++;
	# delete old date
	$stordatenode->removeChildNodes;
    } else {
	# create new storage date node
	$stordatenode = $rootnode->addNewChild($namespace, "archive-storage-date");
	# move after archive-path
	$rootnode->insertAfter($stordatenode, ($rootnode->find('child::archive-path'))->get_node(1));
    }
    $stordatenode->appendTextNode($archdate);
    $xml_changed++;
    return \%files;
}


#
# $%files = run_archive
#
# runs the archiver program on $docdir and returns a list of archived files
#
sub run_archive {
    my %files;
    print LOG "START archiver $version $archdate\n";
    my $archcmd = $archprog;
    $archcmd .= " archive -archsymlinkasfile=no -subdir=yes";
    $archcmd .= " -description='$archname'";
    $archcmd .= " '$docdir/'";

    print LOG "CMD: $archcmd\n";
    if (open ARCH, "$archcmd 2>&1 |") {
	while (<ARCH>) {
	    chomp;
	    print LOG "ARCH: $_\n";
	    if (/Normal File-->\s+[\d,]+\s+(.*)\s+\[Sent\]/) {
		print "  ARCH: file '$1'\n";
		$files{$1} = "ok";
	    }
	    if (/^Archive processing of .* finished without failure./) {
		print "  ARCH: OK\n";
	    }
	}
    } else {
	print "ABORT: unable to start archive command '$archcmd'!!\n";
	exit 1;
    }

    return \%files;
}


#
# check_files(\%files_to_archive, \%archived_files)
#
# compares the list of archived and to be archived files
#
sub check_files {
    my ($to_archive, $archived) = @_;

    my $nt = scalar keys %$to_archive;
    my $na = scalar keys %$archived;

    foreach my $ft (sort keys %$to_archive) {
	my $fp = "$docdir/$ft";
	#print "  fp: $fp\n";
	if ($$archived{$fp}) {
	    print "DEBUG: $ft archived OK\n";
	    $$archived{$fp} = "OK";
	} else {
	    print "ERROR: file '$ft' missing from archive!\n";
	    $errcnt++;
	}
    }

    foreach my $fa (sort keys %$archived) {
	if ($$archived{$fa} ne "OK") {
	    my ($fn, $fp) = MPIWGStor::split_file_path($fa);
	    if ($MPIWGStor::index_files{$fn}) {
		print "DEBUG: $fa ignored\n";
		$na--;
	    } else {
		print "WARNING: $fa archived but not in list!\n";
		$warncnt++;
	    }
	}
    }

    if ($nt > $na) {
	print "WARNING: less files were archived ($na vs. $nt)!\n";
    } elsif ($na > $nt) {
	print "WARNING: more files were archived ($na vs. $nt)!\n";
    }

}


#
# delete_files(\%files)
#
# deletes the files from the list (of absolute files) and their directories
# if they are empty
#
sub delete_files {
    my ($files) = @_;
    my %dirs;

    foreach my $f (sort keys %$files) {
	my ($fn, $fp) = MPIWGStor::split_file_path($f);
	# collect all unique directories
        if ($fp && (! $dirs{$fp})) {
	    $dirs{$fp} = $fp;
	}
	# don't delete index files
	next if ($MPIWGStor::index_files{$fn});
	# no file no delete
	next unless (-f $f);
	# delete files
	if (unlink $f) {
	    print "INFO: remove $f ($fn)\n";
	} else {
	    print "ERROR: unable to delete $f!\n";
	    $errcnt++;
	}
    }
    # try to delete all empty directories
    my @dirkeys = sort keys %dirs;
    # starting at the end to get to the subdirectories first
    for (my $i = $#dirkeys; $i >= 0; $i--) {
	my $d = $dirkeys[$i];
	# dont't remove document dir (shouldn't be empty anyway)
	next if ($d eq $docdir);
	if (-d $d) {
	    print "INFO: remove dir $d\n";
	    rmdir $d;
	}
    }
}


#
# delete_all_files(\%files, $dir)
#
# deletes all files with names from the list %files
# in the directory $dir and its subdirectories 
#
sub delete_all_files {
    my ($files, $dir) = @_;

    if (! opendir DIR, $dir) {
	print "ERROR: unable to read directory $dir!\n";
	$errcnt++;
	return;
    }
    my @fl = readdir DIR;
    closedir DIR;

    foreach my $f (@fl) {
	next if ($f =~ /^\.{1,2}$/);
	if ($$files{$f}) {
	    # $f is in the file list
	    if (-f "$dir/$f") {
		# $f is a file
		if (unlink "$dir/$f") {
		    print "INFO: removed $f\n";
		} else {
		    print "ERROR: unable to delete $f!\n";
		    $errcnt++;
		}
	    } elsif (-d _) {
		# $f is a directory (unlink won't work)
		if ((system 'rm', '-r', "$dir/$f") == 0) {
		    print "INFO: removed directory $f\n";
		} else {
		    print "ERROR: unable to delete directory $f!\n";
		    $errcnt++;
		}
	    } else {
		print "ERROR: funny object $dir/$f!\n";
		$errcnt++;
	    }
	} else {
	    # $f is not in the list
	    if (-d "$dir/$f") {
		# recurse into directories
		print "DEBUG: enter $dir/$f\n";
		delete_all_files($files, "$dir/$f");
	    }
	}
    }
}


#######################################################
# main
#

print "START: archiver $version at $archdate\n";

# make shure the right user is running this program
my $user = getlogin;
#if (($user ne "archive")||($user ne "root")) {
#    logger("ABORT", "you ($user) must be archive or root user to run this program!");
#    exit 1;
#}

# use metacheck first
if (system("$checkprog $docdir >/dev/null") == 0) {
    print "INFO: resource '$docdir' check OK\n";
} else {
    print "ABORT: resource '$docdir' check failed!!\n";
    exit 1;
}

# read index.meta file
my ($document, $rootnode) = MPIWGStor::read_xml($metafile);

# check file and add archive date
my $files_to_archive = read_resource_meta($rootnode);

print "INFO: ", scalar keys %$files_to_archive, " files to archive\n";

# check for .archived file
if (-f "$docdir/.archived") {
    if (unlink "$docdir/.archived") {
	print "WARNING: existing .archived file has been removed! Resource already archived?\n";
	$warncnt++;
    } else {
	print "ERROR: unable to remove existing .archived file!\n";
	$errcnt++;
    }
}

# remove junk files
if ($delete_junk_files) {
    delete_all_files(\%MPIWGStor::junk_files, $docdir);
}

# write new index.meta
if ($errcnt > 0) {
    print "ABORT: there were errors!\n";
    exit 1;
} else {
    if ($fix_xml) {
	MPIWGStor::write_xml($document, $metafile);
    }
}

# start archiving
my $archived_files = run_archive;

print "INFO: ", scalar keys %$archived_files, " files archived\n";

# check list of archived files
check_files($files_to_archive, $archived_files);

# delete files if all went OK
if ($errcnt == 0) {
    system("touch", "$docdir/.archived");
    # remove junk files (again)
    if ($delete_junk_files) {
	delete_all_files(\%MPIWGStor::junk_files, $docdir);
    }
    # remove archived files
    if ($delete_data_files) {
	delete_files($archived_files);
    }
}

print "INFO: $warncnt warnings\n";
print "INFO: $errcnt errors\n";
my $num_archfiles = %$archived_files + 1;
if ($errcnt > 0) {
    print "ABORT: there were errors! ($num_archfiles files archived) at ", stime(time), "\n";
    exit 1;
} else {
    print "DONE: $num_archfiles files archived at ", stime(time), "\n";
}