File:  [Repository] / foxridge-archiver / archivecheck.pl
Revision 1.5: download - view: text, annotated - select for diffs - revision graph
Wed Dec 7 17:31:04 2005 UTC (18 years, 6 months ago) by casties
Branches: MAIN
CVS tags: HEAD
fixed handling of "." in directories

    1: #!/usr/local/bin/perl -w
    2: 
    3: use strict;
    4: 
    5: use XML::LibXML;
    6: 
    7: # MPIWG libraries
    8: use lib '/usr/local/mpiwg/archive';
    9: use MPIWGStor;
   10: 
   11: # make output unbuffered
   12: $|=1;
   13: 
   14: #######################################################
   15: # internal parameters
   16: #
   17: 
   18: # program version
   19: my $version = "0.4.2 (7.12.2005 ROC)";
   20: 
   21: # read command line parameters
   22: my $args = parseargs;
   23: 
   24: # debug level
   25: $debug = (exists $$args{'debug'}) ? $$args{'debug'} : 0;
   26: 
   27: # XML namespace (not really implemented!)
   28: my $namespace = "";
   29: 
   30: # archive name (archive-path element, usually == $docdir)
   31: my $archname;
   32: 
   33: 
   34: #######################################################
   35: # external programs
   36: #
   37: my $archprog = "/opt/tivoli/tsm/client/ba/bin/dsmc";
   38: if (! -x $archprog) {
   39:     logger("ABORT", "TSM client program '$archprog' missing!!");
   40:     exit 1;
   41: }
   42: # my $checkprog = "/usr/local/mpiwg/archive/metacheck";
   43: # if (! -x $checkprog) {
   44: #     logge("ABORT", "meta data checking program '$checkprog' missing!!");
   45: #     exit 1;
   46: # }
   47: # log file for archiver
   48: my $log_file = "/var/tmp/archivecheck.log";
   49: if (! open LOG, ">>$log_file") {
   50:     logger("ABORT", "unable to write log file '$log_file'!!");
   51:     exit 1;
   52: }
   53: 
   54: #######################################################
   55: # check parameters that were passed to the program
   56: #
   57: my $docdir = $$args{'path'};
   58: if (! $docdir) {
   59:     print "ABORT: no document directory given!\n";
   60:     exit 1;
   61: }
   62: # strip trailing slashes
   63: $docdir =~ s/\/$//;
   64: if (! -d $docdir) {
   65:     print "ABORT: document directory \'$docdir\' doesn't exist!\n";
   66:     exit 1;
   67: }
   68: 
   69: my $metafile = "$docdir/index.meta";
   70: if (! -f $metafile) {
   71:     print "ABORT: metadata index file \'$metafile\' doesn't exist!\n";
   72:     exit 1;
   73: }
   74: 
   75: #######################################################
   76: # internal variables
   77: #
   78: 
   79: # number of errors
   80: my $errcnt = 0;
   81: # number of warnings
   82: my $warncnt = 0;
   83: 
   84: #######################################################
   85: # subroutines
   86: #
   87: 
   88: 
   89: #
   90: # $files = read_resource_meta($rootnode)
   91: #
   92: # checks general resource meta information and reads the list of files
   93: #
   94: sub read_resource_meta {
   95:     my ($rootnode) = @_;
   96:     my %files;
   97:     #
   98:     # archive path
   99:     #
  100:     # get archive-path
  101:     $archname = sstrip($rootnode->findvalue('child::archive-path'));
  102:     if (! $archname) {
  103: 	logger("ABORT", "archive-name element missing!!");
  104: 	exit 1;
  105:     }
  106: 
  107:     #
  108:     # files
  109:     #
  110:     my @filenodes = $rootnode->findnodes('child::file');
  111:     foreach my $fn (@filenodes) {
  112: 	my $name = sstrip($fn->findvalue('child::name'));
  113: 	my $path = sstrip($fn->findvalue('child::path'));
  114: 	logger("DEBUG", "FILE: ($path)$name");
  115: 	my $f = ($path) ? "$path/$name" : "$name";
  116: 	$files{$f} = [$name];
  117:     }
  118: 
  119:     #
  120:     # dirs
  121:     #
  122:     my @dirnodes = $rootnode->findnodes('child::dir');
  123:     foreach my $fn (@dirnodes) {
  124: 	my $name = sstrip($fn->findvalue('child::name'));
  125: 	my $path = sstrip($fn->findvalue('child::path'));
  126: 	logger("DEBUG", "DIR: ($path)$name");
  127: 	my $f = "$name";
  128: 	if (($path)&&($path ne '.')) {
  129: 	    $f = "$path/$name";
  130: 	}
  131: 	$files{$f} = [$name];
  132:     }
  133: 
  134:     #
  135:     # archive-storage-date
  136:     #
  137:     my $archdate = $rootnode->find('child::archive-storage-date');
  138:     if ($archdate) {
  139: 	logger("INFO", "archive storage date: $archdate");
  140:     } else {
  141: 	logger("ERROR", "archive storage date missing!");
  142: 	$errcnt++;
  143:     }
  144:     return \%files;
  145: }
  146: 
  147: 
  148: #
  149: # fs_read_files($realdir, $docdir, \%files, \%dirs)
  150: #
  151: # reads all files and directories below $realdir and puts the
  152: # files in %files and directories in %dirs
  153: # $docdir is only for recursion, it should be empty when called 
  154: # from outside
  155: #
  156: sub fs_read_files {
  157:     my ($directory, $docdir, $files, $dirs) = @_;    
  158:     my $cnt = 0;
  159: 
  160:     if (! opendir DIR, $directory) {
  161: 	return 0;
  162:     }
  163:     my @dirfiles = readdir DIR;
  164:     foreach my $fn (@dirfiles) {
  165: 	# ignore names starting with a dot
  166: 	next if ($fn =~ /^\./);
  167: 	# ignore other silly files
  168: 	next if ($junk_files{$fn});
  169: 
  170: 	$cnt++;
  171: 	my $f = "$directory/$fn";
  172: 	my $docf = ($docdir) ? "$docdir/$fn" : $fn;
  173: 	#print "fs_file: \"$f\"\n";
  174: 	if (-f $f) {
  175: 	    #print "  is file\n";
  176: 	    my ($dev,$ino,$mode,$nlink,$uid,$gid,$rdev,$size,
  177:                       $atime,$mtime,$ctime,$blksize,$blocks)
  178:                           = stat(_); 
  179: 	    $$files{$docf} = [$fn, $size, stime($mtime)];
  180: 	    #logger("TEST", "fn $fn, size $size, mtime $mtime");
  181: 	} elsif (-d _) {
  182: 	    #print "  is dir\n";
  183: 	    $$dirs{$docf} = $fn;
  184: 	    # recurse into directory
  185: 	    $cnt += fs_read_files($f, $docf, $files, $dirs);
  186: 	}
  187:     }
  188:     return $cnt;
  189: }
  190: 
  191: 
  192: #
  193: # $archcnt = run_query($dirquery, \%files)
  194: #
  195: # runs the archiver program on $dirquery and adds to the hash of archived files
  196: #
  197: # Sample output:
  198: #         20,345  B  08/06/03   17:17:02    /mpiwg/archive/data/proyectohumboldt/webb_histo_fr_01_1839/index.meta Never /mpiwg/archive/data/proyectohumboldt/webb_histo_fr_01_1839
  199: #
  200: sub run_query {
  201:     my ($dirquery, $files) = @_;
  202:     print LOG "START checkarchive $version ", scalar localtime, "\n";
  203:     my $archcmd = $archprog;
  204:     $archcmd .= " query archive -subdir=yes";
  205:     $archcmd .= " -description='$archname'";
  206:     $archcmd .= " '$dirquery'";
  207: 
  208:     my $archcnt = 0;
  209:     print LOG "CMD: $archcmd\n";
  210:     if (open ARCH, "$archcmd 2>&1 |") {
  211: 	while (<ARCH>) {
  212: 	    chomp;
  213: 	    print LOG "ARCH: $_\n";
  214: 	    if (/
  215: 		\s*([\d,]+)    # size
  216: 		\s+(\w+)       # unit of size
  217: 		\s+([\d\/]+)   # date mm\/dd\/yy
  218: 		\s+([\d:]+)    # time
  219: 		\s+(\S+)       # file name
  220: 		\s+(\w+)       # expiry
  221: 		\s+(\S+)       # archive label
  222: 		/x) {
  223: 		my $size = $1;
  224: 		my $sunit = $2;
  225: 		my $date = $3;
  226: 		my $time = $4;
  227: 		my $file = $5;
  228: 		my $exp = $6;
  229: 		my $label = $7;
  230: 		$size =~ s/,//g;
  231: 		$date = ymd_date($date);
  232: 		logger("DEBUG", "  QUERY: file '$file'");
  233: 		$archcnt++;
  234: 		if ($$files{$file}) {
  235: 		    logger("DEBUG", "file $file seems to be archived multiple times: $time $date");
  236: 		    #$warncnt++;
  237: 		}
  238: 		if (length $file <= length $docdir) {
  239: 		    logger("DEBUG", "not below document dir: $file");
  240: 		    next;
  241: 		}
  242: 		$$files{$file} = [$size, "$date $time"];
  243: 	    }
  244: 	}
  245:     } else {
  246: 	logger("ABORT", "unable to start archive command '$archcmd'!!");
  247: 	exit 1;
  248:     }
  249: 
  250:     return $archcnt;
  251: }
  252: 
  253: 
  254: #
  255: # check_files(\%files_to_archive, \%archived_files)
  256: #
  257: # compares the list of archived and to be archived files
  258: #
  259: sub check_files {
  260:     my ($to_archive, $archived) = @_;
  261: 
  262:     my $nt = scalar keys %$to_archive;
  263:     my $na = scalar keys %$archived;
  264: 
  265:     foreach my $ft (sort keys %$to_archive) {
  266: 	my $fp = "$docdir/$ft";
  267: 	#logger("DEBUG", "  fp: $fp");
  268: 	if ($$archived{$fp}) {
  269: 	    logger("DEBUG", "$ft archived OK");
  270: 	    $$archived{$fp}->[2] = "OK";
  271: 	} else {
  272: 	    logger("ERROR", "file entry '$ft' missing from archive!");
  273: 	    $errcnt++;
  274: 	}
  275:     }
  276: 
  277:     foreach my $fa (sort keys %$archived) {
  278: 	if (! $$archived{$fa}->[2]) {
  279: 	    my ($fn, $fp) = split_file_path($fa);
  280: 	    if ($index_files{$fn}) {
  281: 		logger("DEBUG", "$fa ignored");
  282: 		$na--;
  283: 	    } else {
  284: 		logger("WARNING", "$fa archived but not in list!");
  285: 		$warncnt++;
  286: 	    }
  287: 	}
  288:     }
  289: 
  290:     if ($nt > $na) {
  291: 	logger("WARNING", "less files were archived ($na vs. $nt)!");
  292: 	$warncnt++;
  293:     } elsif ($na > $nt) {
  294: 	logger("WARNING", "more files were archived ($na vs. $nt)!");
  295: 	$warncnt++;
  296:     }
  297: 
  298: }
  299: 
  300: #
  301: # compare_files(\%files_on_disk, \%archived_files)
  302: #
  303: # compares the list of archived files and files on disk
  304: #
  305: sub compare_files {
  306:     my ($fs_files, $archived) = @_;
  307: 
  308:     foreach my $ft (sort keys %$fs_files) {
  309: 	next if ($index_files{$ft});
  310: 	my $fp = "$docdir/$ft";
  311: 	#logger("DEBUG", "  fp: $fp");
  312: 	if ($$archived{$fp}) {
  313: 	    next if ($index_files{$ft});
  314: 	    
  315: 	    my $asize = $$archived{$fp}[0];
  316: 	    my $atime = $$archived{$fp}[1];
  317: 	    my $fsize = $$fs_files{$ft}[1];
  318: 	    my $ftime = $$fs_files{$ft}[2];
  319: 	    if ($asize != $fsize) {
  320: 		logger("ERROR", "archived $ft ($asize) and file on disk ($fsize) have different size!");
  321: 		$errcnt++;
  322: 	    } elsif ($atime lt $ftime) {
  323: 		logger("ERROR", "archived $ft ($atime) is older than file on disk ($ftime)!");
  324: 		$errcnt++;
  325: 	    } else {
  326: 		logger("WARNING", "archived file $ft still on disk");
  327: 		$warncnt++;
  328: 	    }
  329: 	} else {
  330: 	    logger("ERROR", "file on disk '$ft' is not in archive!");
  331: 	    $errcnt++;
  332: 	}
  333:     }
  334: }
  335: 
  336: 
  337: 
  338: #######################################################
  339: # main
  340: #
  341: 
  342: logger("INFO", "archivecheck $version");
  343: 
  344: # make shure the right user is running this program
  345: my $user = getlogin;
  346: if (not (($user eq "archive")||($user eq "root"))) {
  347:     logger("ABORT", "you must be archive or root user to run this program!");
  348:     exit 1;
  349: }
  350: 
  351: # read index.meta file
  352: my ($document, $rootnode) = read_xml($metafile);
  353: 
  354: # check file and add archive date
  355: my $files_to_archive = read_resource_meta($rootnode);
  356: 
  357: # check for .archived file
  358: if (-f "$docdir/.archived") {
  359:     logger("INFO", ".archived file exists.");
  360: } else {
  361:     logger("WARNING", "no .archived file!");
  362:     $warncnt++;
  363: }
  364: 
  365: # check archive
  366: my %archived_files = ();
  367: my $archcnt = 0;
  368: if ($docdir =~ /\/mpiwg\/archive\/data\/(.*)/) {
  369:     # TSM needs two different paths because of historical mount points :-(
  370:     my $docdir1 = "/mpiwg/archive/data/";
  371:     $archcnt += run_query($docdir1, \%archived_files);
  372:     my $docdir2 = "/mpiwg/archive/";
  373:     $archcnt += run_query($docdir2, \%archived_files);
  374: } else {
  375:     $archcnt += run_query("$docdir/", \%archived_files);
  376: }
  377: logger("INFO", "$archcnt archives of " . (scalar keys %archived_files) . " files.");
  378: 
  379: my $num_arch_files = (scalar keys %archived_files);
  380: if ($num_arch_files == 0) {
  381:     logger("ABORT", "no archive of this directory!!");
  382:     exit 1;
  383: }
  384: logger("INFO", "$num_arch_files files archived");
  385: 
  386: # check list of archived files
  387: check_files($files_to_archive, \%archived_files);
  388: 
  389: # read files from filesystem
  390: my %fsfiles;
  391: my %fsdirs;
  392: my $num_fs_files = fs_read_files($docdir, "", \%fsfiles, \%fsdirs);
  393: 
  394: logger("INFO", "$num_fs_files files still on disk!");
  395: if ($num_fs_files > 0) {
  396:     compare_files(\%fsfiles, \%archived_files);
  397: }
  398: 
  399: logger("INFO", "$warncnt warnings");
  400: logger("INFO", "$errcnt errors");
  401: if ($errcnt == 0) {
  402:     logger("DONE", "" . (scalar keys %archived_files) . " archived files OK");
  403:     exit 0;
  404: } else {
  405:     logger("ABORT", "there were $errcnt errors!!");
  406:     exit 1;
  407: }

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>