File:  [Repository] / foxridge-archiver / archivecheck.pl
Revision 1.3: download - view: text, annotated - select for diffs - revision graph
Wed Mar 23 12:12:08 2005 UTC (19 years, 3 months ago) by casties
Branches: MAIN
CVS tags: HEAD

updated command line option parsing and logging

    1: #!/usr/local/bin/perl -w
    2: 
    3: use strict;
    4: 
    5: use XML::LibXML;
    6: 
    7: # MPIWG libraries
    8: use lib '/usr/local/mpiwg/archive';
    9: use MPIWGStor;
   10: 
   11: # make output unbuffered
   12: $|=1;
   13: 
   14: #######################################################
   15: # internal parameters
   16: #
   17: 
   18: # program version
   19: my $version = "0.4.1 (15.2.2005)";
   20: 
   21: # read command line parameters
   22: my $args = parseargs;
   23: 
   24: # debug level
   25: $debug = (exists $$args{'debug'}) ? $$args{'debug'} : 0;
   26: 
   27: # XML namespace (not really implemented!)
   28: my $namespace = "";
   29: 
   30: # archive name (archive-path element, usually == $docdir)
   31: my $archname;
   32: # archive storage date
   33: my $archdate;
   34: 
   35: 
   36: #######################################################
   37: # external programs
   38: #
   39: my $archprog = "/opt/tivoli/tsm/client/ba/bin/dsmc";
   40: if (! -x $archprog) {
   41:     logger("ABORT", "TSM client program '$archprog' missing!!");
   42:     exit 1;
   43: }
   44: # my $checkprog = "/usr/local/mpiwg/archive/metacheck";
   45: # if (! -x $checkprog) {
   46: #     logge("ABORT", "meta data checking program '$checkprog' missing!!");
   47: #     exit 1;
   48: # }
   49: # log file for archiver
   50: my $log_file = "/var/tmp/archivecheck.log";
   51: if (! open LOG, ">>$log_file") {
   52:     logger("ABORT", "unable to write log file '$log_file'!!");
   53:     exit 1;
   54: }
   55: 
   56: #######################################################
   57: # check parameters that were passed to the program
   58: #
   59: my $docdir = $$args{'path'};
   60: if (! $docdir) {
   61:     print "ABORT: no document directory given!\n";
   62:     exit 1;
   63: }
   64: # strip trailing slashes
   65: $docdir =~ s/\/$//;
   66: if (! -d $docdir) {
   67:     print "ABORT: document directory \'$docdir\' doesn't exist!\n";
   68:     exit 1;
   69: }
   70: 
   71: my $metafile = "$docdir/index.meta";
   72: if (! -f $metafile) {
   73:     print "ABORT: metadata index file \'$metafile\' doesn't exist!\n";
   74:     exit 1;
   75: }
   76: 
   77: #######################################################
   78: # internal variables
   79: #
   80: 
   81: # number of errors
   82: my $errcnt = 0;
   83: # number of warnings
   84: my $warncnt = 0;
   85: 
   86: #######################################################
   87: # subroutines
   88: #
   89: 
   90: 
   91: #
   92: # $files = read_resource_meta($rootnode)
   93: #
   94: # checks general resource meta information and reads the list of files
   95: #
   96: sub read_resource_meta {
   97:     my ($rootnode) = @_;
   98:     my %files;
   99:     #
  100:     # archive path
  101:     #
  102:     # get archive-path
  103:     $archname = sstrip($rootnode->findvalue('child::archive-path'));
  104:     if (! $archname) {
  105: 	logger("ABORT", "archive-name element missing!!");
  106: 	exit 1;
  107:     }
  108: 
  109:     #
  110:     # files
  111:     #
  112:     my @filenodes = $rootnode->findnodes('child::file');
  113:     foreach my $fn (@filenodes) {
  114: 	my $name = sstrip($fn->findvalue('child::name'));
  115: 	my $path = sstrip($fn->findvalue('child::path'));
  116: 	logger("DEBUG", "FILE: ($path)$name");
  117: 	my $f = ($path) ? "$path/$name" : "$name";
  118: 	$files{$f} = [$name];
  119:     }
  120: 
  121:     #
  122:     # dirs
  123:     #
  124:     my @dirnodes = $rootnode->findnodes('child::dir');
  125:     foreach my $fn (@dirnodes) {
  126: 	my $name = sstrip($fn->findvalue('child::name'));
  127: 	my $path = sstrip($fn->findvalue('child::path'));
  128: 	logger("DEBUG", "DIR: ($path)$name");
  129: 	my $f = ($path) ? "$path/$name" : "$name";
  130: 	$files{$f} = [$name];
  131:     }
  132: 
  133:     #
  134:     # archive-storage-date
  135:     #
  136:     my $archdate = $rootnode->find('child::archive-storage-date');
  137:     if ($archdate) {
  138: 	logger("INFO", "archive storage date: $archdate");
  139:     } else {
  140: 	logger("ERROR", "archive storage date missing!");
  141: 	$errcnt++;
  142:     }
  143:     return \%files;
  144: }
  145: 
  146: 
  147: #
  148: # fs_read_files($realdir, $docdir, \%files, \%dirs)
  149: #
  150: # reads all files and directories below $realdir and puts the
  151: # files in %files and directories in %dirs
  152: # $docdir is only for recursion, it should be empty when called 
  153: # from outside
  154: #
  155: sub fs_read_files {
  156:     my ($directory, $docdir, $files, $dirs) = @_;    
  157:     my $cnt = 0;
  158: 
  159:     if (! opendir DIR, $directory) {
  160: 	return 0;
  161:     }
  162:     my @dirfiles = readdir DIR;
  163:     foreach my $fn (@dirfiles) {
  164: 	# ignore names starting with a dot
  165: 	next if ($fn =~ /^\./);
  166: 	# ignore other silly files
  167: 	next if ($junk_files{$fn});
  168: 
  169: 	$cnt++;
  170: 	my $f = "$directory/$fn";
  171: 	my $docf = ($docdir) ? "$docdir/$fn" : $fn;
  172: 	#print "fs_file: \"$f\"\n";
  173: 	if (-f $f) {
  174: 	    #print "  is file\n";
  175: 	    my ($dev,$ino,$mode,$nlink,$uid,$gid,$rdev,$size,
  176:                       $atime,$mtime,$ctime,$blksize,$blocks)
  177:                           = stat(_); 
  178: 	    $$files{$docf} = [$fn, $size, stime($mtime)];
  179: 	    #logger("TEST", "fn $fn, size $size, mtime $mtime");
  180: 	} elsif (-d _) {
  181: 	    #print "  is dir\n";
  182: 	    $$dirs{$docf} = $fn;
  183: 	    # recurse into directory
  184: 	    $cnt += fs_read_files($f, $docf, $files, $dirs);
  185: 	}
  186:     }
  187:     return $cnt;
  188: }
  189: 
  190: 
  191: #
  192: # $archcnt = run_query($dirquery, \%files)
  193: #
  194: # runs the archiver program on $dirquery and adds to the hash of archived files
  195: #
  196: # Sample output:
  197: #         20,345  B  08/06/03   17:17:02    /mpiwg/archive/data/proyectohumboldt/webb_histo_fr_01_1839/index.meta Never /mpiwg/archive/data/proyectohumboldt/webb_histo_fr_01_1839
  198: #
  199: sub run_query {
  200:     my ($dirquery, $files) = @_;
  201:     print LOG "START checkarchive $version ", scalar localtime, "\n";
  202:     my $archcmd = $archprog;
  203:     $archcmd .= " query archive -subdir=yes";
  204:     $archcmd .= " -description='$archname'";
  205:     $archcmd .= " '$dirquery'";
  206: 
  207:     my $archcnt = 0;
  208:     print LOG "CMD: $archcmd\n";
  209:     if (open ARCH, "$archcmd 2>&1 |") {
  210: 	while (<ARCH>) {
  211: 	    chomp;
  212: 	    print LOG "ARCH: $_\n";
  213: 	    if (/
  214: 		\s*([\d,]+)    # size
  215: 		\s+(\w+)       # unit of size
  216: 		\s+([\d\/]+)   # date mm\/dd\/yy
  217: 		\s+([\d:]+)    # time
  218: 		\s+(\S+)       # file name
  219: 		\s+(\w+)       # expiry
  220: 		\s+(\S+)       # archive label
  221: 		/x) {
  222: 		my $size = $1;
  223: 		my $sunit = $2;
  224: 		my $date = $3;
  225: 		my $time = $4;
  226: 		my $file = $5;
  227: 		my $exp = $6;
  228: 		my $label = $7;
  229: 		$size =~ s/,//g;
  230: 		$date = ymd_date($date);
  231: 		logger("DEBUG", "  QUERY: file '$file'");
  232: 		$archcnt++;
  233: 		if ($$files{$file}) {
  234: 		    logger("DEBUG", "file $file seems to be archived multiple times: $time $date");
  235: 		    #$warncnt++;
  236: 		}
  237: 		if (length $file <= length $docdir) {
  238: 		    logger("DEBUG", "not below document dir: $file");
  239: 		    next;
  240: 		}
  241: 		$$files{$file} = [$size, "$date $time"];
  242: 	    }
  243: 	}
  244:     } else {
  245: 	logger("ABORT", "unable to start archive command '$archcmd'!!");
  246: 	exit 1;
  247:     }
  248: 
  249:     return $archcnt;
  250: }
  251: 
  252: 
  253: #
  254: # check_files(\%files_to_archive, \%archived_files)
  255: #
  256: # compares the list of archived and to be archived files
  257: #
  258: sub check_files {
  259:     my ($to_archive, $archived) = @_;
  260: 
  261:     my $nt = scalar keys %$to_archive;
  262:     my $na = scalar keys %$archived;
  263: 
  264:     foreach my $ft (sort keys %$to_archive) {
  265: 	my $fp = "$docdir/$ft";
  266: 	#logger("DEBUG", "  fp: $fp");
  267: 	if ($$archived{$fp}) {
  268: 	    logger("DEBUG", "$ft archived OK");
  269: 	    $$archived{$fp}->[2] = "OK";
  270: 	} else {
  271: 	    logger("ERROR", "file entry '$ft' missing from archive!");
  272: 	    $errcnt++;
  273: 	}
  274:     }
  275: 
  276:     foreach my $fa (sort keys %$archived) {
  277: 	if (! $$archived{$fa}->[2]) {
  278: 	    my ($fn, $fp) = split_file_path($fa);
  279: 	    if ($index_files{$fn}) {
  280: 		logger("DEBUG", "$fa ignored");
  281: 		$na--;
  282: 	    } else {
  283: 		logger("WARNING", "$fa archived but not in list!");
  284: 		$warncnt++;
  285: 	    }
  286: 	}
  287:     }
  288: 
  289:     if ($nt > $na) {
  290: 	logger("WARNING", "less files were archived ($na vs. $nt)!");
  291: 	$warncnt++;
  292:     } elsif ($na > $nt) {
  293: 	logger("WARNING", "more files were archived ($na vs. $nt)!");
  294: 	$warncnt++;
  295:     }
  296: 
  297: }
  298: 
  299: #
  300: # compare_files(\%files_on_disk, \%archived_files)
  301: #
  302: # compares the list of archived files and files on disk
  303: #
  304: sub compare_files {
  305:     my ($fs_files, $archived) = @_;
  306: 
  307:     foreach my $ft (sort keys %$fs_files) {
  308: 	next if ($index_files{$ft});
  309: 	my $fp = "$docdir/$ft";
  310: 	#logger("DEBUG", "  fp: $fp");
  311: 	if ($$archived{$fp}) {
  312: 	    next if ($index_files{$ft});
  313: 	    
  314: 	    my $asize = $$archived{$fp}[0];
  315: 	    my $atime = $$archived{$fp}[1];
  316: 	    my $fsize = $$fs_files{$ft}[1];
  317: 	    my $ftime = $$fs_files{$ft}[2];
  318: 	    if ($asize != $fsize) {
  319: 		logger("ERROR", "archived $ft ($asize) and file on disk ($fsize) have different size!");
  320: 		$errcnt++;
  321: 	    } elsif ($atime lt $ftime) {
  322: 		logger("ERROR", "archived $ft ($atime) is older than file on disk ($ftime)!");
  323: 		$errcnt++;
  324: 	    } else {
  325: 		logger("WARNING", "archived file $ft still on disk");
  326: 		$warncnt++;
  327: 	    }
  328: 	} else {
  329: 	    logger("ERROR", "file on disk '$ft' is not in archive!");
  330: 	    $errcnt++;
  331: 	}
  332:     }
  333: }
  334: 
  335: 
  336: 
  337: #######################################################
  338: # main
  339: #
  340: 
  341: logger("INFO", "archivecheck $version");
  342: 
  343: # make shure the right user is running this program
  344: my $user = getlogin;
  345: if (($user ne "archive")&&($user ne "root")) {
  346:     logger("ABORT", "you must be archive or root user to run this program!");
  347:     exit 1;
  348: }
  349: 
  350: # read index.meta file
  351: my ($document, $rootnode) = read_xml($metafile);
  352: 
  353: # check file and add archive date
  354: my $files_to_archive = read_resource_meta($rootnode);
  355: 
  356: # check for .archived file
  357: if (-f "$docdir/.archived") {
  358:     logger("INFO", ".archived file exists.");
  359: } else {
  360:     logger("WARNING", "no .archived file!");
  361:     $warncnt++;
  362: }
  363: 
  364: # check archive
  365: my %archived_files = ();
  366: my $archcnt = 0;
  367: if ($docdir =~ /\/mpiwg\/archive\/data\/(.*)/) {
  368:     # TSM needs two different paths because of historical mount points :-(
  369:     my $docdir1 = "/mpiwg/archive/data/";
  370:     $archcnt += run_query($docdir1, \%archived_files);
  371:     my $docdir2 = "/mpiwg/archive/";
  372:     $archcnt += run_query($docdir2, \%archived_files);
  373: } else {
  374:     $archcnt += run_query("$docdir/", \%archived_files);
  375: }
  376: logger("INFO", "$archcnt archives of " . (scalar keys %archived_files) . " files.");
  377: 
  378: my $num_arch_files = (scalar keys %archived_files);
  379: if ($num_arch_files == 0) {
  380:     logger("ABORT", "no archive of this directory!!");
  381:     exit 1;
  382: }
  383: logger("INFO", "$num_arch_files files archived");
  384: 
  385: # check list of archived files
  386: check_files($files_to_archive, \%archived_files);
  387: 
  388: # read files from filesystem
  389: my %fsfiles;
  390: my %fsdirs;
  391: my $num_fs_files = fs_read_files($docdir, "", \%fsfiles, \%fsdirs);
  392: 
  393: logger("INFO", "$num_fs_files files still on disk!");
  394: if ($num_fs_files > 0) {
  395:     compare_files(\%fsfiles, \%archived_files);
  396: }
  397: 
  398: logger("INFO", "$warncnt warnings");
  399: logger("INFO", "$errcnt errors");
  400: if ($errcnt == 0) {
  401:     logger("DONE", "" . (scalar keys %archived_files) . " archived files OK");
  402:     exit 0;
  403: } else {
  404:     logger("ABORT", "there were $errcnt errors!!");
  405:     exit 1;
  406: }

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>