File:  [Repository] / foxridge-archiver / archivecheck.pl
Revision 1.8: download - view: text, annotated - select for diffs - revision graph
Thu Mar 16 17:00:43 2017 UTC (7 years, 3 months ago) by casties
Branches: MAIN
CVS tags: HEAD
updated to Ubuntu Perl paths.

    1: #!/usr/bin/perl -w
    2: 
    3: use strict;
    4: 
    5: use XML::LibXML;
    6: 
    7: # MPIWG libraries
    8: use lib '/usr/local/mpiwg/archive';
    9: use MPIWGStor;
   10: 
   11: # make output unbuffered
   12: $|=1;
   13: 
   14: #######################################################
   15: # internal parameters
   16: #
   17: 
   18: # program version
   19: my $version = "0.4.5 (13.7.2009 ROC)";
   20: 
   21: # read command line parameters
   22: my $args = parseargs;
   23: 
   24: # debug level
   25: $debug = (exists $$args{'debug'}) ? $$args{'debug'} : 0;
   26: 
   27: # XML namespace (not really implemented!)
   28: my $namespace = "";
   29: 
   30: # archive name (archive-path element, usually == $docdir)
   31: my $archname;
   32: 
   33: 
   34: #######################################################
   35: # external programs
   36: #
   37: my $archprog = "/opt/tivoli/tsm/client/ba/bin/dsmc";
   38: if (! -x $archprog) {
   39:     logger("ABORT", "TSM client program '$archprog' missing!!");
   40:     exit 1;
   41: }
   42: # my $checkprog = "/usr/local/mpiwg/archive/metacheck";
   43: # if (! -x $checkprog) {
   44: #     logge("ABORT", "meta data checking program '$checkprog' missing!!");
   45: #     exit 1;
   46: # }
   47: # log file for archiver
   48: my $log_file = "/var/tmp/archivecheck.log";
   49: if (! open LOG, ">>$log_file") {
   50:     logger("ABORT", "unable to write log file '$log_file'!!");
   51:     exit 1;
   52: }
   53: 
   54: #######################################################
   55: # check parameters that were passed to the program
   56: #
   57: my $docdir = $$args{'path'};
   58: if (! $docdir) {
   59:     print "ABORT: no document directory given!\n";
   60:     exit 1;
   61: }
   62: # strip trailing slashes
   63: $docdir =~ s/\/$//;
   64: if (! -d $docdir) {
   65:     print "ABORT: document directory \'$docdir\' doesn't exist!\n";
   66:     exit 1;
   67: }
   68: 
   69: my $metafile = "$docdir/index.meta";
   70: if (! -f $metafile) {
   71:     print "ABORT: metadata index file \'$metafile\' doesn't exist!\n";
   72:     exit 1;
   73: }
   74: 
   75: #######################################################
   76: # internal variables
   77: #
   78: 
   79: # number of errors
   80: my $errcnt = 0;
   81: # number of warnings
   82: my $warncnt = 0;
   83: 
   84: #######################################################
   85: # subroutines
   86: #
   87: 
   88: 
   89: #
   90: # $files = read_resource_meta($rootnode)
   91: #
   92: # checks general resource meta information and reads the list of files
   93: #
   94: sub read_resource_meta {
   95:     my ($rootnode) = @_;
   96:     my %files;
   97:     #
   98:     # archive path
   99:     #
  100:     # get archive-path
  101:     $archname = sstrip($rootnode->findvalue('child::archive-path'));
  102:     if (! $archname) {
  103: 	logger("ABORT", "archive-name element missing!!");
  104: 	exit 1;
  105:     }
  106: 
  107:     #
  108:     # files
  109:     #
  110:     my @filenodes = $rootnode->findnodes('child::file');
  111:     foreach my $fn (@filenodes) {
  112: 	my $name = sstrip($fn->findvalue('child::name'));
  113: 	my $path = sstrip($fn->findvalue('child::path'));
  114: 	logger("DEBUG", "FILE: ($path)$name");
  115: 	my $f = ($path) ? "$path/$name" : "$name";
  116: 	$files{$f} = [$name];
  117:     }
  118: 
  119:     #
  120:     # dirs
  121:     #
  122:     my @dirnodes = $rootnode->findnodes('child::dir');
  123:     foreach my $fn (@dirnodes) {
  124: 	my $name = sstrip($fn->findvalue('child::name'));
  125: 	my $path = sstrip($fn->findvalue('child::path'));
  126: 	logger("DEBUG", "DIR: ($path)$name");
  127: 	my $f = "$name";
  128: 	if (($path)&&($path ne '.')) {
  129: 	    $f = "$path/$name";
  130: 	}
  131: 	$files{$f} = [$name];
  132:     }
  133: 
  134:     #
  135:     # archive-storage-date
  136:     #
  137:     my $archdate = $rootnode->find('child::archive-storage-date');
  138:     if ($archdate) {
  139: 	logger("INFO", "archive storage date: $archdate");
  140:     } else {
  141: 	logger("ERROR", "archive storage date missing!");
  142: 	$errcnt++;
  143:     }
  144:     return \%files;
  145: }
  146: 
  147: 
  148: #
  149: # fs_read_files($realdir, $docdir, \%files, \%dirs)
  150: #
  151: # reads all files and directories below $realdir and puts the
  152: # files in %files and directories in %dirs
  153: # $docdir is only for recursion, it should be empty when called 
  154: # from outside
  155: #
  156: sub fs_read_files {
  157:     my ($directory, $docdir, $files, $dirs) = @_;    
  158:     my $cnt = 0;
  159: 
  160:     if (! opendir DIR, $directory) {
  161: 	return 0;
  162:     }
  163:     my @dirfiles = readdir DIR;
  164:     foreach my $fn (@dirfiles) {
  165: 	# ignore names starting with a dot
  166: 	next if ($fn =~ /^\./);
  167: 	# ignore other silly files
  168: 	next if ($junk_files{$fn});
  169: 
  170: 	$cnt++;
  171: 	my $f = "$directory/$fn";
  172: 	my $docf = ($docdir) ? "$docdir/$fn" : $fn;
  173: 	#print "fs_file: \"$f\"\n";
  174: 	if (-f $f) {
  175: 	    #print "  is file\n";
  176: 	    my ($dev,$ino,$mode,$nlink,$uid,$gid,$rdev,$size,
  177:                       $atime,$mtime,$ctime,$blksize,$blocks)
  178:                           = stat(_); 
  179: 	    $$files{$docf} = [$fn, $size, stime($mtime)];
  180: 	    #logger("TEST", "fn $fn, size $size, mtime $mtime");
  181: 	} elsif (-d _) {
  182: 	    #print "  is dir\n";
  183: 	    $$dirs{$docf} = $fn;
  184: 	    # recurse into directory
  185: 	    $cnt += fs_read_files($f, $docf, $files, $dirs);
  186: 	}
  187:     }
  188:     return $cnt;
  189: }
  190: 
  191: 
  192: #
  193: # $archcnt = run_query($dirquery, \%files)
  194: #
  195: # runs the archiver program on $dirquery and adds to the hash of archived files
  196: #
  197: # Sample output:
  198: #         20,345  B  08/06/03   17:17:02    /mpiwg/archive/data/proyectohumboldt/webb_histo_fr_01_1839/index.meta Never /mpiwg/archive/data/proyectohumboldt/webb_histo_fr_01_1839
  199: #
  200: sub run_query {
  201:     my ($dirquery, $files) = @_;
  202:     print LOG "START checkarchive $version ", scalar localtime, "\n";
  203:     my $archcmd = $archprog;
  204:     $archcmd .= " query archive -subdir=yes";
  205:     $archcmd .= " -description='$archname'";
  206:     $archcmd .= " '$dirquery'";
  207: 
  208:     logger('INFO', "querying TSM server for $dirquery, please wait...");
  209:     my $archcnt = 0;
  210:     print LOG "CMD: $archcmd\n";
  211:     if (open ARCH, "$archcmd 2>&1 |") {
  212: 	while (<ARCH>) {
  213: 	    chomp;
  214: 	    print LOG "ARCH: $_\n";
  215: 	    if (/
  216: 		\s*([\d,]+)    # size
  217: 		\s+(\w+)       # unit of size
  218: 		\s+([\d\/]+)   # date mm\/dd\/yy
  219: 		\s+([\d:]+)    # time
  220: 		\s+(\S+)       # file name
  221: 		\s+(\w+)       # expiry
  222: 		\s+(\S+)       # archive label
  223: 		/x) {
  224: 		my $size = $1;
  225: 		my $sunit = $2;
  226: 		my $date = $3;
  227: 		my $time = $4;
  228: 		my $file = $5;
  229: 		my $exp = $6;
  230: 		my $label = $7;
  231: 		$size =~ s/,//g;
  232: 		$date = ymd_date($date);
  233: 		logger("DEBUG", "  QUERY: file '$file'");
  234: 		$archcnt++;
  235: 		if ($$files{$file}) {
  236: 		    logger("DEBUG", "file $file seems to be archived multiple times: $time $date");
  237: 		    #$warncnt++;
  238: 		}
  239: 		if (length $file <= length $docdir) {
  240: 		    logger("DEBUG", "not below document dir: $file");
  241: 		    next;
  242: 		}
  243: 		$$files{$file} = [$size, "$date $time"];
  244: 	    }
  245: 	}
  246:     } else {
  247: 	logger("ABORT", "unable to start archive command '$archcmd'!!");
  248: 	exit 1;
  249:     }
  250: 
  251:     return $archcnt;
  252: }
  253: 
  254: 
  255: #
  256: # check_files(\%files_to_archive, \%archived_files)
  257: #
  258: # compares the list of archived and to be archived files
  259: #
  260: sub check_files {
  261:     my ($to_archive, $archived) = @_;
  262: 
  263:     my $nt = scalar keys %$to_archive;
  264:     my $na = scalar keys %$archived;
  265: 
  266:     foreach my $ft (sort keys %$to_archive) {
  267: 	my $fp = "$docdir/$ft";
  268: 	#logger("DEBUG", "  fp: $fp");
  269: 	if ($$archived{$fp}) {
  270: 	    logger("DEBUG", "$ft archived OK");
  271: 	    $$archived{$fp}->[2] = "OK";
  272: 	} else {
  273: 	    logger("ERROR", "file entry '$ft' missing from archive!");
  274: 	    $errcnt++;
  275: 	}
  276:     }
  277: 
  278:     foreach my $fa (sort keys %$archived) {
  279: 	if (! $$archived{$fa}->[2]) {
  280: 	    my ($fn, $fp) = split_file_path($fa);
  281: 	    if ($index_files{$fn}) {
  282: 		logger("DEBUG", "$fa ignored");
  283: 		$na--;
  284: 	    } else {
  285: 		logger("WARNING", "$fa archived but not in list!");
  286: 		$warncnt++;
  287: 	    }
  288: 	}
  289:     }
  290: 
  291:     if ($nt > $na) {
  292: 	logger("WARNING", "less files were archived ($na vs. $nt)!");
  293: 	$warncnt++;
  294:     } elsif ($na > $nt) {
  295: 	logger("WARNING", "more files were archived ($na vs. $nt)!");
  296: 	$warncnt++;
  297:     }
  298: 
  299: }
  300: 
  301: #
  302: # compare_files(\%files_on_disk, \%archived_files)
  303: #
  304: # compares the list of archived files and files on disk
  305: #
  306: sub compare_files {
  307:     my ($fs_files, $archived) = @_;
  308: 
  309:     foreach my $ft (sort keys %$fs_files) {
  310: 	next if ($index_files{$ft});
  311: 	my $fp = "$docdir/$ft";
  312: 	#logger("DEBUG", "  fp: $fp");
  313: 	if ($$archived{$fp}) {
  314: 	    next if ($index_files{$ft});
  315: 	    
  316: 	    my $asize = $$archived{$fp}[0];
  317: 	    my $atime = $$archived{$fp}[1];
  318: 	    my $fsize = $$fs_files{$ft}[1];
  319: 	    my $ftime = $$fs_files{$ft}[2];
  320: 	    if ($asize != $fsize) {
  321: 		logger("ERROR", "archived $ft ($asize) and file on disk ($fsize) have different size!");
  322: 		$errcnt++;
  323: 	    } elsif ($atime lt $ftime) {
  324: 		logger("ERROR", "archived $ft ($atime) is older than file on disk ($ftime)!");
  325: 		$errcnt++;
  326: 	    } else {
  327: 		logger("WARNING", "archived file $ft still on disk");
  328: 		$warncnt++;
  329: 	    }
  330: 	} else {
  331: 	    logger("ERROR", "file on disk '$ft' is not in archive!");
  332: 	    $errcnt++;
  333: 	}
  334:     }
  335: }
  336: 
  337: 
  338: 
  339: #######################################################
  340: # main
  341: #
  342: 
  343: logger("INFO", "archivecheck $version");
  344: 
  345: # make shure the right user is running this program
  346: my $user = getlogin || getpwuid($<) ;
  347: if (not (($user eq "archive")||($user eq "root"))) {
  348:     logger("ABORT", "you must be archive or root user to run this program!");
  349:     exit 1;
  350: }
  351: 
  352: # read index.meta file
  353: my ($document, $rootnode) = read_xml($metafile);
  354: 
  355: # check file and add archive date
  356: my $files_to_archive = read_resource_meta($rootnode);
  357: 
  358: # check for .archived file
  359: if (-f "$docdir/.archived") {
  360:     logger("INFO", ".archived file exists.");
  361: } else {
  362:     logger("WARNING", "no .archived file!");
  363:     $warncnt++;
  364: }
  365: 
  366: # check archive
  367: my %archived_files = ();
  368: my $archcnt = 0;
  369: if ($docdir =~ /\/mpiwg\/archive\/data\/(.*)/) {
  370:     # TSM needs two different paths because of historical mount points :-(
  371:     # ...doesn't work anymore since old filespace was renamed (13.7.2009)
  372: #    my $docdir1 = "/mpiwg/archivedataold/";
  373: #    $archcnt += run_query($docdir1, \%archived_files);
  374:     my $docdir2 = "/mpiwg/archive/";
  375:     $archcnt += run_query($docdir2, \%archived_files);
  376: } else {
  377:     $archcnt += run_query("$docdir/", \%archived_files);
  378: }
  379: logger("INFO", "$archcnt archives of " . (scalar keys %archived_files) . " files.");
  380: 
  381: my $num_arch_files = (scalar keys %archived_files);
  382: if ($num_arch_files == 0) {
  383:     logger("ABORT", "no archive of this directory!!");
  384:     exit 1;
  385: }
  386: logger("INFO", "$num_arch_files files archived");
  387: 
  388: # check list of archived files
  389: check_files($files_to_archive, \%archived_files);
  390: 
  391: # read files from filesystem
  392: my %fsfiles;
  393: my %fsdirs;
  394: my $num_fs_files = fs_read_files($docdir, "", \%fsfiles, \%fsdirs);
  395: 
  396: logger("INFO", "$num_fs_files files still on disk!");
  397: if ($num_fs_files > 0) {
  398:     compare_files(\%fsfiles, \%archived_files);
  399: }
  400: 
  401: logger("INFO", "$warncnt warnings");
  402: logger("INFO", "$errcnt errors");
  403: if ($errcnt == 0) {
  404:     logger("DONE", "" . (scalar keys %archived_files) . " archived files OK");
  405:     exit 0;
  406: } else {
  407:     logger("ABORT", "there were $errcnt errors!!");
  408:     exit 1;
  409: }

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>