#!/usr/local/bin/perl -w
use strict;
use XML::LibXML;
use FileHandle;
# MPIWG libraries
use lib '/usr/local/mpiwg/archive';
use MPIWGStor;
# make output unbuffered
$|=1;
#######################################################
# internal parameters
#
# program version
my $version = "0.3.1 (21.6.2005)";
# read command line parameters
my $args = parseargs;
# debug level
$debug = (exists $$args{'debug'}) ? ($$args{'debug'}) : 0;
# rewrite XML file (necessary for archive date!)
my $fix_xml = 1;
my $xml_changed = 0;
# XML namespace (not really implemented!)
my $namespace = "";
# archive name (archive-path element, usually == $docdir)
my $archname;
# archive storage date
my $archdate;
#######################################################
# external programs
#
my $archprog = "/opt/tivoli/tsm/client/ba/bin/dsmc";
if (! -x $archprog) {
logger("ABORT", "TSM client program '$archprog' missing!!");
exit 1;
}
my $checkprog = "/usr/local/mpiwg/archive/archivecheck";
if (! -x $checkprog) {
logger("ABORT", "archive checking program '$checkprog' missing!!");
exit 1;
}
# log file for archiver
my $log_file = "/var/tmp/unarchiver.log";
if (! open LOG, ">>$log_file") {
logger("ABORT", "unable to write log file '$log_file'!!");
exit 1;
}
LOG->autoflush(1);
#######################################################
# check parameters that were passed to the program
#
my $docdir = $$args{'path'};
if (! $docdir) {
print "ABORT: no document directory given!\n";
exit 1;
}
# strip trailing slashes
$docdir =~ s/\/$//;
if (! -d $docdir) {
print "ABORT: document directory \'$docdir\' doesn't exist!\n";
exit 1;
}
my $metafile = "$docdir/index.meta";
if (! -f $metafile) {
print "ABORT: metadata index file \'$metafile\' doesn't exist!\n";
exit 1;
}
#######################################################
# internal variables
#
# number of errors
my $errcnt = 0;
# number of warnings
my $warncnt = 0;
#######################################################
# subroutines
#
# construct document's parent dir
sub get_parent {
my ($dirname) = @_;
my $dirparent = $dirname;
$dirparent =~ s!/[^/]+$!!;
return $dirparent;
}
#
# $files = read_resource_meta($rootnode)
#
# checks general resource meta information and reads the list of files
#
sub read_resource_meta {
my ($rootnode) = @_;
my %files;
#
# archive path
#
# get archive-path
$archname = sstrip($rootnode->findvalue('child::archive-path'));
if (! $archname) {
logger("ABORT", "archive-name element missing!!");
exit 1;
}
#
# files
#
my @filenodes = $rootnode->findnodes('child::file');
foreach my $fn (@filenodes) {
my $name = sstrip($fn->findvalue('child::name'));
my $path = sstrip($fn->findvalue('child::path'));
logger("DEBUG", "FILE: ($path)$name");
my $f = ($path) ? "$path/$name" : "$name";
$files{$f} = [$name];
}
#
# dirs
#
my @dirnodes = $rootnode->findnodes('child::dir');
foreach my $fn (@dirnodes) {
my $name = sstrip($fn->findvalue('child::name'));
my $path = sstrip($fn->findvalue('child::path'));
logger("DEBUG", "DIR: ($path)$name");
my $f = ($path) ? "$path/$name" : "$name";
$files{$f} = [$name];
}
#
# archive-storage-date
#
my $archdate = $rootnode->find('child::archive-storage-date');
if ($archdate) {
logger("INFO", "archive storage date: $archdate");
} else {
logger("ERROR", "archive storage date missing!");
$errcnt++;
}
#
# archive-recall-date
#
my $recalldatenode = ($rootnode->find('child::archive-recall-date'))->get_node(1);
if ($recalldatenode) {
print "INFO: archive recall date exists!\n";
# delete old date
$recalldatenode->removeChildNodes;
} else {
# create new storage date node
$recalldatenode = $rootnode->addNewChild($namespace, "archive-recall-date");
# move after archive-path
$rootnode->insertAfter($recalldatenode, ($rootnode->find('child::archive-storage-date'))->get_node(1));
}
$recalldatenode->appendTextNode(scalar localtime);
$xml_changed++;
return \%files;
}
#
# $num_files = run_retrieve($docdir, $docmount, \%files)
#
# Runs the retriever program on $docdir and returns the number of unarchived files.
# All filenames are put in %files.
# $docmount is the mount point of the doc partition in cases when the new mount point
# is different.
#
# Sample output:
# (old!) Retrieving 17,234 /mpiwg/archive/data/test/auto_titit_123/pageimg/essen-wind1.jpg [Done]
# Retrieving 42,406,326 /mpiwg/archive/data/library/B980G582/raw/00015.tif --> /mpiwg/archive/data/library/B980G582/raw/00015.tif [Done]
sub run_retrieve {
my ($archdir, $archmount, $files) = @_;
my $archparent;
if ($archmount eq $archdir) {
# no explicit mount point
$archparent = get_parent($archdir);
} else {
# destination dir is mount point
$archparent = $archmount;
}
logger("INFO", "looking for archives in $archmount...");
print LOG "START unarchive $version on ", scalar localtime, "\n";
my $archcmd = $archprog;
$archcmd .= " retrieve -subdir=yes -replace=all";
$archcmd .= " -description='$archname'"; # archive name
$archcmd .= " '$archmount/'"; # archive mount point
$archcmd .= " '$archparent/'"; # destination dir name
my $archcnt = 0;
my $numfiles = 0;
print LOG "CMD: $archcmd\n";
if (open ARCH, "$archcmd 2>&1 |") {
while (<ARCH>) {
chomp;
print LOG "ARCH: $_\n";
if (/
Retrieving
\s+([\d,]+) # size
\s+(\S+) # file name
\s+-->
\s+(\S+) # destination file name
\s+\[Done\]
/x) {
my $size = $1;
my $file = $2;
$size =~ s/,//g;
logger("DEBUG", " RETRIEVE: file '$file'");
$archcnt++;
if ($$files{$file}) {
logger("WARNING", "file $file seems to be archived multiple times.");
$warncnt++;
}
$$files{$file} = [$size];
}
}
$numfiles = (scalar keys %$files);
logger("INFO", "$archcnt archives of $numfiles files (in $archmount).");
} else {
logger("ABORT", "unable to start archive command '$archcmd'!!");
exit 1;
}
return $numfiles;
}
#
# check_files(\%files_to_retrieve, \%retrieved_files)
#
# compares the list of archived and retrieved files
#
sub check_files {
my ($to_retrieve, $retrieved) = @_;
my $nt = scalar keys %$to_retrieve;
my $na = scalar keys %$retrieved;
foreach my $ft (sort keys %$to_retrieve) {
my $fp = "$docdir/$ft";
#logger("DEBUG", " fp: $fp");
if ($$retrieved{$fp}) {
logger("DEBUG", "$ft retrieved OK");
$$retrieved{$fp}->[1] = "OK";
} else {
logger("ERROR", "file entry '$ft' missing from archive!");
$errcnt++;
}
}
foreach my $fa (sort keys %$retrieved) {
if (! $$retrieved{$fa}->[1]) {
my ($fn, $fp) = split_file_path($fa);
if ($index_files{$fn}) {
logger("DEBUG", "$fa ignored");
$na--;
} else {
logger("WARNING", "$fa retrieved but not in list!");
$warncnt++;
}
}
}
if ($nt > $na) {
logger("WARNING", "less files were retrieved ($na vs. $nt)!");
$warncnt++;
} elsif ($na > $nt) {
logger("WARNING", "more files were retrieved ($na vs. $nt)!");
$warncnt++;
}
}
#######################################################
# main
#
logger("INFO", "unarchiver $version");
# make shure the right user is running this program
my $user = getlogin;
if (($user)&&($user ne "archive")&&($user ne "root")) {
logger("ABORT", "you must be archive or root user to run this program!");
exit 1;
}
# use checkarchive first
if (system("$checkprog $docdir >/dev/null") == 0) {
logger("INFO", "archive \"$docdir\" check OK");
} else {
logger("ABORT", "archive \"$docdir\" check failed!!");
exit 1;
}
# read index.meta file
my ($document, $rootnode) = read_xml($metafile);
# check index file
my $archived_files = read_resource_meta($rootnode);
my $num_archived_files = scalar keys %$archived_files;
# check for .archived file
if (-f "$docdir/.archived") {
logger("INFO", ".archived file exists.");
} else {
logger("WARNING", "no .archived file!");
$warncnt++;
}
logger("INFO", "$num_archived_files files to retrieve.");
# save current index.meta
park_file($metafile);
# retrieve
my %retrieved_files = ();
my $archcnt = 0;
if ($docdir =~ /\/mpiwg\/archive\/data\/(.*)/) {
# TSM needs two different paths because of historical mount points :-(
# try the new one first
$archcnt = run_retrieve($docdir, "/mpiwg/archive", \%retrieved_files);
if ($archcnt == 0) {
# and then the old one
$archcnt = run_retrieve($docdir, "/mpiwg/archive/data", \%retrieved_files);
}
} else {
# otherwise we assume that it works
$archcnt += run_retrieve($docdir, $docdir, \%retrieved_files);
}
# restore current index.meta
unpark_file($metafile);
if ($archcnt == 0) {
logger("ABORT", "no files retrieved!!");
exit 1;
}
logger("INFO", "$archcnt files retrieved");
# check list of archived files
check_files($archived_files, \%retrieved_files);
# rewrite index.meta file
write_xml($document, $metafile);
logger("INFO", "$warncnt warnings");
logger("INFO", "$errcnt errors");
if ($errcnt == 0) {
logger("DONE", "$archcnt archived files retrieved");
} else {
logger("ABORT", "there were $errcnt errors!!");
exit 1;
}
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>