view unarchiver.pl @ 7:c4e6fc065b6d

fixed problem with dsmc because of new filespace. uses excplicit destination directory now.
author casties
date Thu, 23 Sep 2004 21:55:44 +0200
parents 30497c6a3eca
children 4417be0e2f07
line wrap: on
line source

#!/usr/local/bin/perl -w

use strict;

use XML::LibXML;
use FileHandle;

# MPIWG libraries
use lib '/usr/local/mpiwg/archive';
use MPIWGStor;

# make output unbuffered
$|=1;


#######################################################
# internal parameters
#

# program version
my $version = "0.2 (23.9.2004)";

# read command line parameters
my $args = parseargs;

# debug level
$debug = (exists $$args{'debug'}) ? ($$args{'debug'}) : 0;

# rewrite XML file (necessary for archive date!)
my $fix_xml = 1;
my $xml_changed = 0;
# XML namespace (not really implemented!)
my $namespace = "";

# archive name (archive-path element, usually == $docdir)
my $archname;
# archive storage date
my $archdate;

#######################################################
# external programs
#
my $archprog = "/opt/tivoli/tsm/client/ba/bin/dsmc";
if (! -x $archprog) {
    logger("ABORT", "TSM client program '$archprog' missing!!");
    exit 1;
}
my $checkprog = "/usr/local/mpiwg/archive/archivecheck";
if (! -x $checkprog) {
    logger("ABORT", "archive checking program '$checkprog' missing!!");
    exit 1;
}
# log file for archiver
my $log_file = "/var/tmp/unarchiver.log";
if (! open LOG, ">>$log_file") {
    logger("ABORT", "unable to write log file '$log_file'!!");
    exit 1;
}
LOG->autoflush(1);

#######################################################
# check parameters that were passed to the program
#
my $docdir = $$args{'path'};
if (! $docdir) {
    print "ABORT: no document directory given!\n";
    exit 1;
}
# strip trailing slashes
$docdir =~ s/\/$//;
if (! -d $docdir) {
    print "ABORT: document directory \'$docdir\' doesn't exist!\n";
    exit 1;
}

my $metafile = "$docdir/index.meta";
if (! -f $metafile) {
    print "ABORT: metadata index file \'$metafile\' doesn't exist!\n";
    exit 1;
}

# construct document's parent dir
my $docparent = $docdir;
$docparent =~ s!/[^/]+$!!;

#######################################################
# internal variables
#

# number of errors
my $errcnt = 0;
# number of warnings
my $warncnt = 0;

#######################################################
# subroutines
#


#
# $files = read_resource_meta($rootnode)
#
# checks general resource meta information and reads the list of files
#
sub read_resource_meta {
    my ($rootnode) = @_;
    my %files;
    #
    # archive path
    #
    # get archive-path
    $archname = sstrip($rootnode->findvalue('child::archive-path'));
    if (! $archname) {
	logger("ABORT", "archive-name element missing!!");
	exit 1;
    }

    #
    # files
    #
    my @filenodes = $rootnode->findnodes('child::file');
    foreach my $fn (@filenodes) {
	my $name = sstrip($fn->findvalue('child::name'));
	my $path = sstrip($fn->findvalue('child::path'));
	logger("DEBUG", "FILE: ($path)$name");
	my $f = ($path) ? "$path/$name" : "$name";
	$files{$f} = [$name];
    }

    #
    # dirs
    #
    my @dirnodes = $rootnode->findnodes('child::dir');
    foreach my $fn (@dirnodes) {
	my $name = sstrip($fn->findvalue('child::name'));
	my $path = sstrip($fn->findvalue('child::path'));
	logger("DEBUG", "DIR: ($path)$name");
	my $f = ($path) ? "$path/$name" : "$name";
	$files{$f} = [$name];
    }

    #
    # archive-storage-date
    #
    my $archdate = $rootnode->find('child::archive-storage-date');
    if ($archdate) {
	logger("INFO", "archive storage date: $archdate");
    } else {
	logger("ERROR", "archive storage date missing!");
	$errcnt++;
    }

    #
    # archive-recall-date
    #
    my $recalldatenode = ($rootnode->find('child::archive-recall-date'))->get_node(1);
    if ($recalldatenode) {
	print "INFO: archive recall date exists!\n";
	# delete old date
	$recalldatenode->removeChildNodes;
    } else {
	# create new storage date node
	$recalldatenode = $rootnode->addNewChild($namespace, "archive-recall-date");
	# move after archive-path
	$rootnode->insertAfter($recalldatenode, ($rootnode->find('child::archive-storage-date'))->get_node(1));
    }
    $recalldatenode->appendTextNode(scalar localtime);
    $xml_changed++;

    return \%files;
}


#
# $%files = run_retrieve
#
# runs the retriever program on $docdir and returns a list of archived files
#
# Sample output:
# (old!) Retrieving          17,234 /mpiwg/archive/data/test/auto_titit_123/pageimg/essen-wind1.jpg [Done]
# Retrieving      42,406,326 /mpiwg/archive/data/library/B980G582/raw/00015.tif --> /mpiwg/archive/data/library/B980G582/raw/00015.tif [Done]
sub run_retrieve {
    my %files;
    print LOG "START unarchive $version on ", scalar localtime, "\n";
    my $archcmd = $archprog;
    $archcmd .= " retrieve -subdir=yes -replace=all";
    $archcmd .= " -description='$archname'";
    $archcmd .= " '$docdir/'"; # archive name
    $archcmd .= " '$docparent/'"; # destination dir name

    my $archcnt = 0;
    print LOG "CMD: $archcmd\n";
    if (open ARCH, "$archcmd 2>&1 |") {
	while (<ARCH>) {
	    chomp;
	    print LOG "ARCH: $_\n";
	    if (/
		Retrieving
		\s+([\d,]+)    # size
		\s+(\S+)       # file name
		\s+-->
		\s+(\S+)       # destination file name
		\s+\[Done\]
		/x) {
		my $size = $1;
		my $file = $2;
		$size =~ s/,//g;
		logger("DEBUG", "  RETRIEVE: file '$file'");
		$archcnt++;
		if ($files{$file}) {
		    logger("WARNING", "file $file seems to be archived multiple times.");
		    $warncnt++;
		} 
		$files{$file} = [$size];
	    }
	}
	logger("INFO", "$archcnt archives of " . (scalar keys %files) . " files.");
    } else {
	logger("ABORT", "unable to start archive command '$archcmd'!!");
	exit 1;
    }
    return \%files;
}


#
# check_files(\%files_to_retrieve, \%retrieved_files)
#
# compares the list of archived and retrieved files
#
sub check_files {
    my ($to_retrieve, $retrieved) = @_;

    my $nt = scalar keys %$to_retrieve;
    my $na = scalar keys %$retrieved;

    foreach my $ft (sort keys %$to_retrieve) {
	my $fp = "$docdir/$ft";
	#logger("DEBUG", "  fp: $fp");
	if ($$retrieved{$fp}) {
	    logger("DEBUG", "$ft retrieved OK");
	    $$retrieved{$fp}->[1] = "OK";
	} else {
	    logger("ERROR", "file entry '$ft' missing from archive!");
	    $errcnt++;
	}
    }

    foreach my $fa (sort keys %$retrieved) {
	if (! $$retrieved{$fa}->[1]) {
	    my ($fn, $fp) = split_file_path($fa);
	    if ($index_files{$fn}) {
		logger("DEBUG", "$fa ignored");
		$na--;
	    } else {
		logger("WARNING", "$fa retrieved but not in list!");
		$warncnt++;
	    }
	}
    }

    if ($nt > $na) {
	logger("WARNING", "less files were retrieved ($na vs. $nt)!");
	$warncnt++;
    } elsif ($na > $nt) {
	logger("WARNING", "more files were retrieved ($na vs. $nt)!");
	$warncnt++;
    }

}



#######################################################
# main
#

logger("INFO", "unarchiver $version");

# make shure the right user is running this program
my $user = getlogin;
if (($user)&&($user ne "archive")&&($user ne "root")) {
    logger("ABORT", "you must be archive or root user to run this program!");
    exit 1;
}

# use checkarchive first
if (system("$checkprog $docdir >/dev/null") == 0) {
    logger("INFO", "archive '$docdir' check OK");
} else {
    logger("ABORT", "archive '$docdir' check failed!!");
    exit 1;
}

# read index.meta file
my ($document, $rootnode) = read_xml($metafile);

# check index file
my $archived_files = read_resource_meta($rootnode);
my $num_archived_files = scalar keys %$archived_files;

# check for .archived file
if (-f "$docdir/.archived") {
    logger("INFO", ".archived file exists.");
} else {
    logger("WARNING", "no .archived file!");
    $warncnt++;
}

logger("INFO", "$num_archived_files files to retrieve.");

# retrieve
my $retrieved_files = run_retrieve;

my $num_arch_files = (scalar keys %$retrieved_files);
if ($num_arch_files == 0) {
    logger("ABORT", "no files retrieved!!");
    exit 1;
}
logger("INFO", "$num_arch_files files retrieved");

# check list of archived files
check_files($archived_files, $retrieved_files);

# rewrite index.meta file
write_xml($document, $metafile);

logger("INFO", "$warncnt warnings");
logger("INFO", "$errcnt errors");
if ($errcnt == 0) {
    logger("DONE", "" . (scalar keys %$retrieved_files) . " archived files retrieved");
} else {
    logger("ABORT", "there were $errcnt errors!!");
    exit 1;
}