#!/usr/local/bin/perl -w use strict; use XML::LibXML; # MPIWG libraries use lib '/usr/local/mpiwg/archive'; use MPIWGStor; # make output unbuffered $|=1; ####################################################### # internal parameters # # program version my $version = "0.4.4 (10.9.2008 ROC)"; # read command line parameters my $args = parseargs; # debug level $debug = (exists $$args{'debug'}) ? $$args{'debug'} : 0; # XML namespace (not really implemented!) my $namespace = ""; # archive name (archive-path element, usually == $docdir) my $archname; ####################################################### # external programs # my $archprog = "/opt/tivoli/tsm/client/ba/bin/dsmc"; if (! -x $archprog) { logger("ABORT", "TSM client program '$archprog' missing!!"); exit 1; } # my $checkprog = "/usr/local/mpiwg/archive/metacheck"; # if (! -x $checkprog) { # logge("ABORT", "meta data checking program '$checkprog' missing!!"); # exit 1; # } # log file for archiver my $log_file = "/var/tmp/archivecheck.log"; if (! open LOG, ">>$log_file") { logger("ABORT", "unable to write log file '$log_file'!!"); exit 1; } ####################################################### # check parameters that were passed to the program # my $docdir = $$args{'path'}; if (! $docdir) { print "ABORT: no document directory given!\n"; exit 1; } # strip trailing slashes $docdir =~ s/\/$//; if (! -d $docdir) { print "ABORT: document directory \'$docdir\' doesn't exist!\n"; exit 1; } my $metafile = "$docdir/index.meta"; if (! -f $metafile) { print "ABORT: metadata index file \'$metafile\' doesn't exist!\n"; exit 1; } ####################################################### # internal variables # # number of errors my $errcnt = 0; # number of warnings my $warncnt = 0; ####################################################### # subroutines # # # $files = read_resource_meta($rootnode) # # checks general resource meta information and reads the list of files # sub read_resource_meta { my ($rootnode) = @_; my %files; # # archive path # # get archive-path $archname = sstrip($rootnode->findvalue('child::archive-path')); if (! $archname) { logger("ABORT", "archive-name element missing!!"); exit 1; } # # files # my @filenodes = $rootnode->findnodes('child::file'); foreach my $fn (@filenodes) { my $name = sstrip($fn->findvalue('child::name')); my $path = sstrip($fn->findvalue('child::path')); logger("DEBUG", "FILE: ($path)$name"); my $f = ($path) ? "$path/$name" : "$name"; $files{$f} = [$name]; } # # dirs # my @dirnodes = $rootnode->findnodes('child::dir'); foreach my $fn (@dirnodes) { my $name = sstrip($fn->findvalue('child::name')); my $path = sstrip($fn->findvalue('child::path')); logger("DEBUG", "DIR: ($path)$name"); my $f = "$name"; if (($path)&&($path ne '.')) { $f = "$path/$name"; } $files{$f} = [$name]; } # # archive-storage-date # my $archdate = $rootnode->find('child::archive-storage-date'); if ($archdate) { logger("INFO", "archive storage date: $archdate"); } else { logger("ERROR", "archive storage date missing!"); $errcnt++; } return \%files; } # # fs_read_files($realdir, $docdir, \%files, \%dirs) # # reads all files and directories below $realdir and puts the # files in %files and directories in %dirs # $docdir is only for recursion, it should be empty when called # from outside # sub fs_read_files { my ($directory, $docdir, $files, $dirs) = @_; my $cnt = 0; if (! opendir DIR, $directory) { return 0; } my @dirfiles = readdir DIR; foreach my $fn (@dirfiles) { # ignore names starting with a dot next if ($fn =~ /^\./); # ignore other silly files next if ($junk_files{$fn}); $cnt++; my $f = "$directory/$fn"; my $docf = ($docdir) ? "$docdir/$fn" : $fn; #print "fs_file: \"$f\"\n"; if (-f $f) { #print " is file\n"; my ($dev,$ino,$mode,$nlink,$uid,$gid,$rdev,$size, $atime,$mtime,$ctime,$blksize,$blocks) = stat(_); $$files{$docf} = [$fn, $size, stime($mtime)]; #logger("TEST", "fn $fn, size $size, mtime $mtime"); } elsif (-d _) { #print " is dir\n"; $$dirs{$docf} = $fn; # recurse into directory $cnt += fs_read_files($f, $docf, $files, $dirs); } } return $cnt; } # # $archcnt = run_query($dirquery, \%files) # # runs the archiver program on $dirquery and adds to the hash of archived files # # Sample output: # 20,345 B 08/06/03 17:17:02 /mpiwg/archive/data/proyectohumboldt/webb_histo_fr_01_1839/index.meta Never /mpiwg/archive/data/proyectohumboldt/webb_histo_fr_01_1839 # sub run_query { my ($dirquery, $files) = @_; print LOG "START checkarchive $version ", scalar localtime, "\n"; my $archcmd = $archprog; $archcmd .= " query archive -subdir=yes"; $archcmd .= " -description='$archname'"; $archcmd .= " '$dirquery'"; logger('INFO', "querying TSM server for $dirquery, please wait..."); my $archcnt = 0; print LOG "CMD: $archcmd\n"; if (open ARCH, "$archcmd 2>&1 |") { while () { chomp; print LOG "ARCH: $_\n"; if (/ \s*([\d,]+) # size \s+(\w+) # unit of size \s+([\d\/]+) # date mm\/dd\/yy \s+([\d:]+) # time \s+(\S+) # file name \s+(\w+) # expiry \s+(\S+) # archive label /x) { my $size = $1; my $sunit = $2; my $date = $3; my $time = $4; my $file = $5; my $exp = $6; my $label = $7; $size =~ s/,//g; $date = ymd_date($date); logger("DEBUG", " QUERY: file '$file'"); $archcnt++; if ($$files{$file}) { logger("DEBUG", "file $file seems to be archived multiple times: $time $date"); #$warncnt++; } if (length $file <= length $docdir) { logger("DEBUG", "not below document dir: $file"); next; } $$files{$file} = [$size, "$date $time"]; } } } else { logger("ABORT", "unable to start archive command '$archcmd'!!"); exit 1; } return $archcnt; } # # check_files(\%files_to_archive, \%archived_files) # # compares the list of archived and to be archived files # sub check_files { my ($to_archive, $archived) = @_; my $nt = scalar keys %$to_archive; my $na = scalar keys %$archived; foreach my $ft (sort keys %$to_archive) { my $fp = "$docdir/$ft"; #logger("DEBUG", " fp: $fp"); if ($$archived{$fp}) { logger("DEBUG", "$ft archived OK"); $$archived{$fp}->[2] = "OK"; } else { logger("ERROR", "file entry '$ft' missing from archive!"); $errcnt++; } } foreach my $fa (sort keys %$archived) { if (! $$archived{$fa}->[2]) { my ($fn, $fp) = split_file_path($fa); if ($index_files{$fn}) { logger("DEBUG", "$fa ignored"); $na--; } else { logger("WARNING", "$fa archived but not in list!"); $warncnt++; } } } if ($nt > $na) { logger("WARNING", "less files were archived ($na vs. $nt)!"); $warncnt++; } elsif ($na > $nt) { logger("WARNING", "more files were archived ($na vs. $nt)!"); $warncnt++; } } # # compare_files(\%files_on_disk, \%archived_files) # # compares the list of archived files and files on disk # sub compare_files { my ($fs_files, $archived) = @_; foreach my $ft (sort keys %$fs_files) { next if ($index_files{$ft}); my $fp = "$docdir/$ft"; #logger("DEBUG", " fp: $fp"); if ($$archived{$fp}) { next if ($index_files{$ft}); my $asize = $$archived{$fp}[0]; my $atime = $$archived{$fp}[1]; my $fsize = $$fs_files{$ft}[1]; my $ftime = $$fs_files{$ft}[2]; if ($asize != $fsize) { logger("ERROR", "archived $ft ($asize) and file on disk ($fsize) have different size!"); $errcnt++; } elsif ($atime lt $ftime) { logger("ERROR", "archived $ft ($atime) is older than file on disk ($ftime)!"); $errcnt++; } else { logger("WARNING", "archived file $ft still on disk"); $warncnt++; } } else { logger("ERROR", "file on disk '$ft' is not in archive!"); $errcnt++; } } } ####################################################### # main # logger("INFO", "archivecheck $version"); # make shure the right user is running this program my $user = getlogin || getpwuid($<) ; if (not (($user eq "archive")||($user eq "root"))) { logger("ABORT", "you must be archive or root user to run this program!"); exit 1; } # read index.meta file my ($document, $rootnode) = read_xml($metafile); # check file and add archive date my $files_to_archive = read_resource_meta($rootnode); # check for .archived file if (-f "$docdir/.archived") { logger("INFO", ".archived file exists."); } else { logger("WARNING", "no .archived file!"); $warncnt++; } # check archive my %archived_files = (); my $archcnt = 0; if ($docdir =~ /\/mpiwg\/archive\/data\/(.*)/) { # TSM needs two different paths because of historical mount points :-( my $docdir1 = "/mpiwg/archive/data/"; $archcnt += run_query($docdir1, \%archived_files); my $docdir2 = "/mpiwg/archive/"; $archcnt += run_query($docdir2, \%archived_files); } else { $archcnt += run_query("$docdir/", \%archived_files); } logger("INFO", "$archcnt archives of " . (scalar keys %archived_files) . " files."); my $num_arch_files = (scalar keys %archived_files); if ($num_arch_files == 0) { logger("ABORT", "no archive of this directory!!"); exit 1; } logger("INFO", "$num_arch_files files archived"); # check list of archived files check_files($files_to_archive, \%archived_files); # read files from filesystem my %fsfiles; my %fsdirs; my $num_fs_files = fs_read_files($docdir, "", \%fsfiles, \%fsdirs); logger("INFO", "$num_fs_files files still on disk!"); if ($num_fs_files > 0) { compare_files(\%fsfiles, \%archived_files); } logger("INFO", "$warncnt warnings"); logger("INFO", "$errcnt errors"); if ($errcnt == 0) { logger("DONE", "" . (scalar keys %archived_files) . " archived files OK"); exit 0; } else { logger("ABORT", "there were $errcnt errors!!"); exit 1; }