view archivecheck.pl @ 18:fdf4ceb36db1

fixed problem with dir names in metacheck new version of metacheck defaults to not change index file new version of archiver uses new version of metacheck
author casties
date Tue, 20 Sep 2005 19:24:57 +0200
parents b19df18aa19a
children 320c4b93bf39
line wrap: on
line source

#!/usr/local/bin/perl -w

use strict;

use XML::LibXML;

# MPIWG libraries
use lib '/usr/local/mpiwg/archive';
use MPIWGStor;

# make output unbuffered
$|=1;

#######################################################
# internal parameters
#

# program version
my $version = "0.4.1 (15.2.2005)";

# read command line parameters
my $args = parseargs;

# debug level
$debug = (exists $$args{'debug'}) ? $$args{'debug'} : 0;

# XML namespace (not really implemented!)
my $namespace = "";

# archive name (archive-path element, usually == $docdir)
my $archname;
# archive storage date
my $archdate;


#######################################################
# external programs
#
my $archprog = "/opt/tivoli/tsm/client/ba/bin/dsmc";
if (! -x $archprog) {
    logger("ABORT", "TSM client program '$archprog' missing!!");
    exit 1;
}
# my $checkprog = "/usr/local/mpiwg/archive/metacheck";
# if (! -x $checkprog) {
#     logge("ABORT", "meta data checking program '$checkprog' missing!!");
#     exit 1;
# }
# log file for archiver
my $log_file = "/var/tmp/archivecheck.log";
if (! open LOG, ">>$log_file") {
    logger("ABORT", "unable to write log file '$log_file'!!");
    exit 1;
}

#######################################################
# check parameters that were passed to the program
#
my $docdir = $$args{'path'};
if (! $docdir) {
    print "ABORT: no document directory given!\n";
    exit 1;
}
# strip trailing slashes
$docdir =~ s/\/$//;
if (! -d $docdir) {
    print "ABORT: document directory \'$docdir\' doesn't exist!\n";
    exit 1;
}

my $metafile = "$docdir/index.meta";
if (! -f $metafile) {
    print "ABORT: metadata index file \'$metafile\' doesn't exist!\n";
    exit 1;
}

#######################################################
# internal variables
#

# number of errors
my $errcnt = 0;
# number of warnings
my $warncnt = 0;

#######################################################
# subroutines
#


#
# $files = read_resource_meta($rootnode)
#
# checks general resource meta information and reads the list of files
#
sub read_resource_meta {
    my ($rootnode) = @_;
    my %files;
    #
    # archive path
    #
    # get archive-path
    $archname = sstrip($rootnode->findvalue('child::archive-path'));
    if (! $archname) {
	logger("ABORT", "archive-name element missing!!");
	exit 1;
    }

    #
    # files
    #
    my @filenodes = $rootnode->findnodes('child::file');
    foreach my $fn (@filenodes) {
	my $name = sstrip($fn->findvalue('child::name'));
	my $path = sstrip($fn->findvalue('child::path'));
	logger("DEBUG", "FILE: ($path)$name");
	my $f = ($path) ? "$path/$name" : "$name";
	$files{$f} = [$name];
    }

    #
    # dirs
    #
    my @dirnodes = $rootnode->findnodes('child::dir');
    foreach my $fn (@dirnodes) {
	my $name = sstrip($fn->findvalue('child::name'));
	my $path = sstrip($fn->findvalue('child::path'));
	logger("DEBUG", "DIR: ($path)$name");
	my $f = ($path) ? "$path/$name" : "$name";
	$files{$f} = [$name];
    }

    #
    # archive-storage-date
    #
    my $archdate = $rootnode->find('child::archive-storage-date');
    if ($archdate) {
	logger("INFO", "archive storage date: $archdate");
    } else {
	logger("ERROR", "archive storage date missing!");
	$errcnt++;
    }
    return \%files;
}


#
# fs_read_files($realdir, $docdir, \%files, \%dirs)
#
# reads all files and directories below $realdir and puts the
# files in %files and directories in %dirs
# $docdir is only for recursion, it should be empty when called 
# from outside
#
sub fs_read_files {
    my ($directory, $docdir, $files, $dirs) = @_;    
    my $cnt = 0;

    if (! opendir DIR, $directory) {
	return 0;
    }
    my @dirfiles = readdir DIR;
    foreach my $fn (@dirfiles) {
	# ignore names starting with a dot
	next if ($fn =~ /^\./);
	# ignore other silly files
	next if ($junk_files{$fn});

	$cnt++;
	my $f = "$directory/$fn";
	my $docf = ($docdir) ? "$docdir/$fn" : $fn;
	#print "fs_file: \"$f\"\n";
	if (-f $f) {
	    #print "  is file\n";
	    my ($dev,$ino,$mode,$nlink,$uid,$gid,$rdev,$size,
                      $atime,$mtime,$ctime,$blksize,$blocks)
                          = stat(_); 
	    $$files{$docf} = [$fn, $size, stime($mtime)];
	    #logger("TEST", "fn $fn, size $size, mtime $mtime");
	} elsif (-d _) {
	    #print "  is dir\n";
	    $$dirs{$docf} = $fn;
	    # recurse into directory
	    $cnt += fs_read_files($f, $docf, $files, $dirs);
	}
    }
    return $cnt;
}


#
# $archcnt = run_query($dirquery, \%files)
#
# runs the archiver program on $dirquery and adds to the hash of archived files
#
# Sample output:
#         20,345  B  08/06/03   17:17:02    /mpiwg/archive/data/proyectohumboldt/webb_histo_fr_01_1839/index.meta Never /mpiwg/archive/data/proyectohumboldt/webb_histo_fr_01_1839
#
sub run_query {
    my ($dirquery, $files) = @_;
    print LOG "START checkarchive $version ", scalar localtime, "\n";
    my $archcmd = $archprog;
    $archcmd .= " query archive -subdir=yes";
    $archcmd .= " -description='$archname'";
    $archcmd .= " '$dirquery'";

    my $archcnt = 0;
    print LOG "CMD: $archcmd\n";
    if (open ARCH, "$archcmd 2>&1 |") {
	while (<ARCH>) {
	    chomp;
	    print LOG "ARCH: $_\n";
	    if (/
		\s*([\d,]+)    # size
		\s+(\w+)       # unit of size
		\s+([\d\/]+)   # date mm\/dd\/yy
		\s+([\d:]+)    # time
		\s+(\S+)       # file name
		\s+(\w+)       # expiry
		\s+(\S+)       # archive label
		/x) {
		my $size = $1;
		my $sunit = $2;
		my $date = $3;
		my $time = $4;
		my $file = $5;
		my $exp = $6;
		my $label = $7;
		$size =~ s/,//g;
		$date = ymd_date($date);
		logger("DEBUG", "  QUERY: file '$file'");
		$archcnt++;
		if ($$files{$file}) {
		    logger("DEBUG", "file $file seems to be archived multiple times: $time $date");
		    #$warncnt++;
		}
		if (length $file <= length $docdir) {
		    logger("DEBUG", "not below document dir: $file");
		    next;
		}
		$$files{$file} = [$size, "$date $time"];
	    }
	}
    } else {
	logger("ABORT", "unable to start archive command '$archcmd'!!");
	exit 1;
    }

    return $archcnt;
}


#
# check_files(\%files_to_archive, \%archived_files)
#
# compares the list of archived and to be archived files
#
sub check_files {
    my ($to_archive, $archived) = @_;

    my $nt = scalar keys %$to_archive;
    my $na = scalar keys %$archived;

    foreach my $ft (sort keys %$to_archive) {
	my $fp = "$docdir/$ft";
	#logger("DEBUG", "  fp: $fp");
	if ($$archived{$fp}) {
	    logger("DEBUG", "$ft archived OK");
	    $$archived{$fp}->[2] = "OK";
	} else {
	    logger("ERROR", "file entry '$ft' missing from archive!");
	    $errcnt++;
	}
    }

    foreach my $fa (sort keys %$archived) {
	if (! $$archived{$fa}->[2]) {
	    my ($fn, $fp) = split_file_path($fa);
	    if ($index_files{$fn}) {
		logger("DEBUG", "$fa ignored");
		$na--;
	    } else {
		logger("WARNING", "$fa archived but not in list!");
		$warncnt++;
	    }
	}
    }

    if ($nt > $na) {
	logger("WARNING", "less files were archived ($na vs. $nt)!");
	$warncnt++;
    } elsif ($na > $nt) {
	logger("WARNING", "more files were archived ($na vs. $nt)!");
	$warncnt++;
    }

}

#
# compare_files(\%files_on_disk, \%archived_files)
#
# compares the list of archived files and files on disk
#
sub compare_files {
    my ($fs_files, $archived) = @_;

    foreach my $ft (sort keys %$fs_files) {
	next if ($index_files{$ft});
	my $fp = "$docdir/$ft";
	#logger("DEBUG", "  fp: $fp");
	if ($$archived{$fp}) {
	    next if ($index_files{$ft});
	    
	    my $asize = $$archived{$fp}[0];
	    my $atime = $$archived{$fp}[1];
	    my $fsize = $$fs_files{$ft}[1];
	    my $ftime = $$fs_files{$ft}[2];
	    if ($asize != $fsize) {
		logger("ERROR", "archived $ft ($asize) and file on disk ($fsize) have different size!");
		$errcnt++;
	    } elsif ($atime lt $ftime) {
		logger("ERROR", "archived $ft ($atime) is older than file on disk ($ftime)!");
		$errcnt++;
	    } else {
		logger("WARNING", "archived file $ft still on disk");
		$warncnt++;
	    }
	} else {
	    logger("ERROR", "file on disk '$ft' is not in archive!");
	    $errcnt++;
	}
    }
}



#######################################################
# main
#

logger("INFO", "archivecheck $version");

# make shure the right user is running this program
my $user = getlogin;
if (($user ne "archive")&&($user ne "root")) {
    logger("ABORT", "you must be archive or root user to run this program!");
    exit 1;
}

# read index.meta file
my ($document, $rootnode) = read_xml($metafile);

# check file and add archive date
my $files_to_archive = read_resource_meta($rootnode);

# check for .archived file
if (-f "$docdir/.archived") {
    logger("INFO", ".archived file exists.");
} else {
    logger("WARNING", "no .archived file!");
    $warncnt++;
}

# check archive
my %archived_files = ();
my $archcnt = 0;
if ($docdir =~ /\/mpiwg\/archive\/data\/(.*)/) {
    # TSM needs two different paths because of historical mount points :-(
    my $docdir1 = "/mpiwg/archive/data/";
    $archcnt += run_query($docdir1, \%archived_files);
    my $docdir2 = "/mpiwg/archive/";
    $archcnt += run_query($docdir2, \%archived_files);
} else {
    $archcnt += run_query("$docdir/", \%archived_files);
}
logger("INFO", "$archcnt archives of " . (scalar keys %archived_files) . " files.");

my $num_arch_files = (scalar keys %archived_files);
if ($num_arch_files == 0) {
    logger("ABORT", "no archive of this directory!!");
    exit 1;
}
logger("INFO", "$num_arch_files files archived");

# check list of archived files
check_files($files_to_archive, \%archived_files);

# read files from filesystem
my %fsfiles;
my %fsdirs;
my $num_fs_files = fs_read_files($docdir, "", \%fsfiles, \%fsdirs);

logger("INFO", "$num_fs_files files still on disk!");
if ($num_fs_files > 0) {
    compare_files(\%fsfiles, \%archived_files);
}

logger("INFO", "$warncnt warnings");
logger("INFO", "$errcnt errors");
if ($errcnt == 0) {
    logger("DONE", "" . (scalar keys %archived_files) . " archived files OK");
    exit 0;
} else {
    logger("ABORT", "there were $errcnt errors!!");
    exit 1;
}