#!/usr/local/bin/perl -w
use strict;
use XML::LibXML;
# MPIWG libraries
use lib '/usr/local/mpiwg/archive';
use MPIWGStor;
# make output unbuffered
$|=1;
#######################################################
# internal parameters
#
# program version
my $version = "0.4.3 (21.3.2007 ROC)";
# read command line parameters
my $args = parseargs;
# debug level
$debug = (exists $$args{'debug'}) ? $$args{'debug'} : 0;
# XML namespace (not really implemented!)
my $namespace = "";
# archive name (archive-path element, usually == $docdir)
my $archname;
#######################################################
# external programs
#
my $archprog = "/opt/tivoli/tsm/client/ba/bin/dsmc";
if (! -x $archprog) {
logger("ABORT", "TSM client program '$archprog' missing!!");
exit 1;
}
# my $checkprog = "/usr/local/mpiwg/archive/metacheck";
# if (! -x $checkprog) {
# logge("ABORT", "meta data checking program '$checkprog' missing!!");
# exit 1;
# }
# log file for archiver
my $log_file = "/var/tmp/archivecheck.log";
if (! open LOG, ">>$log_file") {
logger("ABORT", "unable to write log file '$log_file'!!");
exit 1;
}
#######################################################
# check parameters that were passed to the program
#
my $docdir = $$args{'path'};
if (! $docdir) {
print "ABORT: no document directory given!\n";
exit 1;
}
# strip trailing slashes
$docdir =~ s/\/$//;
if (! -d $docdir) {
print "ABORT: document directory \'$docdir\' doesn't exist!\n";
exit 1;
}
my $metafile = "$docdir/index.meta";
if (! -f $metafile) {
print "ABORT: metadata index file \'$metafile\' doesn't exist!\n";
exit 1;
}
#######################################################
# internal variables
#
# number of errors
my $errcnt = 0;
# number of warnings
my $warncnt = 0;
#######################################################
# subroutines
#
#
# $files = read_resource_meta($rootnode)
#
# checks general resource meta information and reads the list of files
#
sub read_resource_meta {
my ($rootnode) = @_;
my %files;
#
# archive path
#
# get archive-path
$archname = sstrip($rootnode->findvalue('child::archive-path'));
if (! $archname) {
logger("ABORT", "archive-name element missing!!");
exit 1;
}
#
# files
#
my @filenodes = $rootnode->findnodes('child::file');
foreach my $fn (@filenodes) {
my $name = sstrip($fn->findvalue('child::name'));
my $path = sstrip($fn->findvalue('child::path'));
logger("DEBUG", "FILE: ($path)$name");
my $f = ($path) ? "$path/$name" : "$name";
$files{$f} = [$name];
}
#
# dirs
#
my @dirnodes = $rootnode->findnodes('child::dir');
foreach my $fn (@dirnodes) {
my $name = sstrip($fn->findvalue('child::name'));
my $path = sstrip($fn->findvalue('child::path'));
logger("DEBUG", "DIR: ($path)$name");
my $f = "$name";
if (($path)&&($path ne '.')) {
$f = "$path/$name";
}
$files{$f} = [$name];
}
#
# archive-storage-date
#
my $archdate = $rootnode->find('child::archive-storage-date');
if ($archdate) {
logger("INFO", "archive storage date: $archdate");
} else {
logger("ERROR", "archive storage date missing!");
$errcnt++;
}
return \%files;
}
#
# fs_read_files($realdir, $docdir, \%files, \%dirs)
#
# reads all files and directories below $realdir and puts the
# files in %files and directories in %dirs
# $docdir is only for recursion, it should be empty when called
# from outside
#
sub fs_read_files {
my ($directory, $docdir, $files, $dirs) = @_;
my $cnt = 0;
if (! opendir DIR, $directory) {
return 0;
}
my @dirfiles = readdir DIR;
foreach my $fn (@dirfiles) {
# ignore names starting with a dot
next if ($fn =~ /^\./);
# ignore other silly files
next if ($junk_files{$fn});
$cnt++;
my $f = "$directory/$fn";
my $docf = ($docdir) ? "$docdir/$fn" : $fn;
#print "fs_file: \"$f\"\n";
if (-f $f) {
#print " is file\n";
my ($dev,$ino,$mode,$nlink,$uid,$gid,$rdev,$size,
$atime,$mtime,$ctime,$blksize,$blocks)
= stat(_);
$$files{$docf} = [$fn, $size, stime($mtime)];
#logger("TEST", "fn $fn, size $size, mtime $mtime");
} elsif (-d _) {
#print " is dir\n";
$$dirs{$docf} = $fn;
# recurse into directory
$cnt += fs_read_files($f, $docf, $files, $dirs);
}
}
return $cnt;
}
#
# $archcnt = run_query($dirquery, \%files)
#
# runs the archiver program on $dirquery and adds to the hash of archived files
#
# Sample output:
# 20,345 B 08/06/03 17:17:02 /mpiwg/archive/data/proyectohumboldt/webb_histo_fr_01_1839/index.meta Never /mpiwg/archive/data/proyectohumboldt/webb_histo_fr_01_1839
#
sub run_query {
my ($dirquery, $files) = @_;
print LOG "START checkarchive $version ", scalar localtime, "\n";
my $archcmd = $archprog;
$archcmd .= " query archive -subdir=yes";
$archcmd .= " -description='$archname'";
$archcmd .= " '$dirquery'";
logger('INFO', "querying TSM server for $dirquery, please wait...");
my $archcnt = 0;
print LOG "CMD: $archcmd\n";
if (open ARCH, "$archcmd 2>&1 |") {
while (<ARCH>) {
chomp;
print LOG "ARCH: $_\n";
if (/
\s*([\d,]+) # size
\s+(\w+) # unit of size
\s+([\d\/]+) # date mm\/dd\/yy
\s+([\d:]+) # time
\s+(\S+) # file name
\s+(\w+) # expiry
\s+(\S+) # archive label
/x) {
my $size = $1;
my $sunit = $2;
my $date = $3;
my $time = $4;
my $file = $5;
my $exp = $6;
my $label = $7;
$size =~ s/,//g;
$date = ymd_date($date);
logger("DEBUG", " QUERY: file '$file'");
$archcnt++;
if ($$files{$file}) {
logger("DEBUG", "file $file seems to be archived multiple times: $time $date");
#$warncnt++;
}
if (length $file <= length $docdir) {
logger("DEBUG", "not below document dir: $file");
next;
}
$$files{$file} = [$size, "$date $time"];
}
}
} else {
logger("ABORT", "unable to start archive command '$archcmd'!!");
exit 1;
}
return $archcnt;
}
#
# check_files(\%files_to_archive, \%archived_files)
#
# compares the list of archived and to be archived files
#
sub check_files {
my ($to_archive, $archived) = @_;
my $nt = scalar keys %$to_archive;
my $na = scalar keys %$archived;
foreach my $ft (sort keys %$to_archive) {
my $fp = "$docdir/$ft";
#logger("DEBUG", " fp: $fp");
if ($$archived{$fp}) {
logger("DEBUG", "$ft archived OK");
$$archived{$fp}->[2] = "OK";
} else {
logger("ERROR", "file entry '$ft' missing from archive!");
$errcnt++;
}
}
foreach my $fa (sort keys %$archived) {
if (! $$archived{$fa}->[2]) {
my ($fn, $fp) = split_file_path($fa);
if ($index_files{$fn}) {
logger("DEBUG", "$fa ignored");
$na--;
} else {
logger("WARNING", "$fa archived but not in list!");
$warncnt++;
}
}
}
if ($nt > $na) {
logger("WARNING", "less files were archived ($na vs. $nt)!");
$warncnt++;
} elsif ($na > $nt) {
logger("WARNING", "more files were archived ($na vs. $nt)!");
$warncnt++;
}
}
#
# compare_files(\%files_on_disk, \%archived_files)
#
# compares the list of archived files and files on disk
#
sub compare_files {
my ($fs_files, $archived) = @_;
foreach my $ft (sort keys %$fs_files) {
next if ($index_files{$ft});
my $fp = "$docdir/$ft";
#logger("DEBUG", " fp: $fp");
if ($$archived{$fp}) {
next if ($index_files{$ft});
my $asize = $$archived{$fp}[0];
my $atime = $$archived{$fp}[1];
my $fsize = $$fs_files{$ft}[1];
my $ftime = $$fs_files{$ft}[2];
if ($asize != $fsize) {
logger("ERROR", "archived $ft ($asize) and file on disk ($fsize) have different size!");
$errcnt++;
} elsif ($atime lt $ftime) {
logger("ERROR", "archived $ft ($atime) is older than file on disk ($ftime)!");
$errcnt++;
} else {
logger("WARNING", "archived file $ft still on disk");
$warncnt++;
}
} else {
logger("ERROR", "file on disk '$ft' is not in archive!");
$errcnt++;
}
}
}
#######################################################
# main
#
logger("INFO", "archivecheck $version");
# make shure the right user is running this program
my $user = getlogin;
if (not (($user eq "archive")||($user eq "root"))) {
logger("ABORT", "you must be archive or root user to run this program!");
exit 1;
}
# read index.meta file
my ($document, $rootnode) = read_xml($metafile);
# check file and add archive date
my $files_to_archive = read_resource_meta($rootnode);
# check for .archived file
if (-f "$docdir/.archived") {
logger("INFO", ".archived file exists.");
} else {
logger("WARNING", "no .archived file!");
$warncnt++;
}
# check archive
my %archived_files = ();
my $archcnt = 0;
if ($docdir =~ /\/mpiwg\/archive\/data\/(.*)/) {
# TSM needs two different paths because of historical mount points :-(
my $docdir1 = "/mpiwg/archive/data/";
$archcnt += run_query($docdir1, \%archived_files);
my $docdir2 = "/mpiwg/archive/";
$archcnt += run_query($docdir2, \%archived_files);
} else {
$archcnt += run_query("$docdir/", \%archived_files);
}
logger("INFO", "$archcnt archives of " . (scalar keys %archived_files) . " files.");
my $num_arch_files = (scalar keys %archived_files);
if ($num_arch_files == 0) {
logger("ABORT", "no archive of this directory!!");
exit 1;
}
logger("INFO", "$num_arch_files files archived");
# check list of archived files
check_files($files_to_archive, \%archived_files);
# read files from filesystem
my %fsfiles;
my %fsdirs;
my $num_fs_files = fs_read_files($docdir, "", \%fsfiles, \%fsdirs);
logger("INFO", "$num_fs_files files still on disk!");
if ($num_fs_files > 0) {
compare_files(\%fsfiles, \%archived_files);
}
logger("INFO", "$warncnt warnings");
logger("INFO", "$errcnt errors");
if ($errcnt == 0) {
logger("DONE", "" . (scalar keys %archived_files) . " archived files OK");
exit 0;
} else {
logger("ABORT", "there were $errcnt errors!!");
exit 1;
}
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>