File:  [Repository] / foxridge-archiver / archiver.pl
Revision 1.5: download - view: text, annotated - select for diffs - revision graph
Tue Sep 20 17:24:57 2005 UTC (18 years, 9 months ago) by casties
Branches: MAIN
CVS tags: HEAD
fixed problem with dir names in metacheck
new version of metacheck defaults to not change index file
new version of archiver uses new version of metacheck

    1: #!/usr/local/bin/perl -w
    2: 
    3: use strict;
    4: 
    5: use XML::LibXML;
    6: 
    7: # MPIWG libraries
    8: use lib '/usr/local/mpiwg/archive';
    9: use MPIWGStor;
   10: 
   11: # make output unbuffered
   12: $|=1;
   13: 
   14: #######################################################
   15: # internal parameters
   16: #
   17: 
   18: # program version
   19: my $version = "0.7 (ROC 20.9.2005)";
   20: 
   21: # short help
   22: my $help = "MPIWG archiver $version
   23: use:  archiver [options] docpath
   24: options:
   25:   -debug  show debugging info
   26:   -premigrate  don't delete archived files
   27:   -force  archive even if already archived
   28: ";
   29: 
   30: # read command line parameters
   31: my $args = MPIWGStor::parseargs;
   32: if (! scalar(%$args)) {
   33:     print $help, "\n";
   34:     exit 1;
   35: }
   36: 
   37: # debug level
   38: $debug = (exists $$args{'debug'}) ? $$args{'debug'} : 0;
   39: 
   40: # force archiving
   41: my $force_archive = (exists $$args{'force'}) ? $$args{'force'} : 0;
   42: 
   43: # rewrite XML file (necessary for archive date!)
   44: my $fix_xml = 1;
   45: my $xml_changed = 0;
   46: 
   47: # XML namespace (not really implemented!)
   48: my $namespace = "";
   49: 
   50: # archive name (archive-path element, usually == $docdir)
   51: my $archname;
   52: 
   53: # archive storage date (now)
   54: my $archdate = stime(time);
   55: 
   56: # delete "junk" files before archiving
   57: my $delete_junk_files = 1;
   58: 
   59: # delete data files after archiving
   60: my $delete_data_files = 1;
   61: 
   62: # don't delete archived files with "-premigrate"
   63: if (exists $$args{'premigrate'}) {
   64:     $delete_data_files = not $$args{'premigrate'};
   65: }
   66: if ($delete_data_files) {
   67:     logger('INFO', "going to remove successfully archived files from disk");
   68: }
   69: 
   70: 
   71: #######################################################
   72: # external programs
   73: #
   74: my $archprog = "/opt/tivoli/tsm/client/ba/bin/dsmc";
   75: if (! -x $archprog) {
   76:     logger('ABORT', "TSM client program '$archprog' missing!");
   77:     exit 1;
   78: }
   79: my $checkprog = "/usr/local/mpiwg/archive/metacheck";
   80: if (! -x $checkprog) {
   81:     logger('ABORT', "meta data checking program '$checkprog' missing!");
   82:     exit 1;
   83: }
   84: # log file for archiver
   85: my $log_file = "/var/log/mpiwg-archiver.log";
   86: if (! open LOG, ">>$log_file") {
   87:     logger('ABORT', "unable to write log file '$log_file'!");
   88:     exit 1;
   89: }
   90: 
   91: #######################################################
   92: # check parameters that were passed to the program
   93: #
   94: 
   95: my $docdir = $$args{'path'};
   96: # strip double slashes
   97: $docdir =~ s/\/\//\//;
   98: # strip trailing slashes
   99: $docdir =~ s/\/+$//;
  100: if (! -d $docdir) {
  101:     logger('ABORT', "document directory \'$docdir\' doesn't exist!");
  102:     exit 1;
  103: }
  104: 
  105: my $metafile = "$docdir/index.meta";
  106: if (! -f $metafile) {
  107:     logger('ABORT', "metadata index file \'$metafile\' doesn't exist!");
  108:     exit 1;
  109: }
  110: 
  111: #######################################################
  112: # internal variables
  113: #
  114: 
  115: # number of errors
  116: my $errcnt = 0;
  117: # number of warnings
  118: my $warncnt = 0;
  119: 
  120: #######################################################
  121: # subroutines
  122: #
  123: 
  124: #
  125: # $files = read_resource_meta($rootnode)
  126: #
  127: # checks general resource meta information and reads the list of files
  128: #
  129: sub read_resource_meta {
  130:     my ($rootnode) = @_;
  131:     my %files;
  132:     #
  133:     # archive path
  134:     #
  135:     # get archive-path
  136:     $archname = MPIWGStor::sstrip($rootnode->findvalue('child::archive-path'));
  137:     if (! $archname) {
  138: 	logger('ABORT', "archive-name element missing!");
  139: 	exit 1;
  140:     }
  141: 
  142:     #
  143:     # files
  144:     #
  145:     my @filenodes = $rootnode->findnodes('child::file');
  146:     foreach my $fn (@filenodes) {
  147: 	my $name = MPIWGStor::sstrip($fn->findvalue('child::name'));
  148: 	my $path = MPIWGStor::sstrip($fn->findvalue('child::path'));
  149: 	logger('DEBUG', "FILE ($path)$name");
  150: 	my $f = ($path) ? "$path/$name" : "$name";
  151: 	$files{$f} = $name;
  152:     }
  153: 
  154:     #
  155:     # archive-storage-date
  156:     #
  157:     my $stordatenode = ($rootnode->find('child::archive-storage-date'))->get_node(1);
  158:     if ($stordatenode) {
  159: 	logger('WARNING', "archive storage date exists! Resource already archived?");
  160: 	$warncnt++;
  161: 	# delete old date
  162: 	$stordatenode->removeChildNodes;
  163:     } else {
  164: 	# create new storage date node
  165: 	$stordatenode = $rootnode->addNewChild($namespace, "archive-storage-date");
  166: 	# move after archive-path
  167: 	$rootnode->insertAfter($stordatenode, ($rootnode->find('child::archive-path'))->get_node(1));
  168:     }
  169:     $stordatenode->appendTextNode($archdate);
  170:     $xml_changed++;
  171:     return \%files;
  172: }
  173: 
  174: 
  175: #
  176: # $%files = run_archive
  177: #
  178: # runs the archiver program on $docdir and returns a list of archived files
  179: #
  180: sub run_archive {
  181:     my %files;
  182:     print LOG "START archiver $version $archdate\n";
  183:     my $archcmd = $archprog;
  184:     $archcmd .= " archive -archsymlinkasfile=no -subdir=yes";
  185:     $archcmd .= " -description='$archname'";
  186:     $archcmd .= " '$docdir/'";
  187: 
  188:     print LOG "CMD: $archcmd\n";
  189:     if (open ARCH, "$archcmd 2>&1 |") {
  190: 	while (<ARCH>) {
  191: 	    chomp;
  192: 	    print LOG "ARCH: $_\n";
  193: 	    if (/Normal File-->\s+[\d,]+\s+(.*)\s+\[Sent\]/) {
  194: 		print "  ARCH: file '$1'\n";
  195: 		$files{$1} = "ok";
  196: 	    }
  197: 	    if (/^Archive processing of .* finished without failure./) {
  198: 		print "  ARCH: OK\n";
  199: 	    }
  200: 	}
  201:     } else {
  202: 	logger('ABORT', "unable to start archive command '$archcmd'!!");
  203: 	exit 1;
  204:     }
  205: 
  206:     return \%files;
  207: }
  208: 
  209: 
  210: #
  211: # check_files(\%files_to_archive, \%archived_files)
  212: #
  213: # compares the list of archived and to be archived files
  214: #
  215: sub check_files {
  216:     my ($to_archive, $archived) = @_;
  217: 
  218:     my $nt = scalar keys %$to_archive;
  219:     my $na = scalar keys %$archived;
  220: 
  221:     foreach my $ft (sort keys %$to_archive) {
  222: 	my $fp = "$docdir/$ft";
  223: 	#print "  fp: $fp\n";
  224: 	if ($$archived{$fp}) {
  225: 	    logger('DEBUG', "$ft archived OK");
  226: 	    $$archived{$fp} = "OK";
  227: 	} else {
  228: 	    logger('ERROR', "file '$ft' missing from archive!");
  229: 	    $errcnt++;
  230: 	}
  231:     }
  232: 
  233:     foreach my $fa (sort keys %$archived) {
  234: 	if ($$archived{$fa} ne "OK") {
  235: 	    my ($fn, $fp) = MPIWGStor::split_file_path($fa);
  236: 	    if ($MPIWGStor::index_files{$fn}) {
  237: 		logger('DEBUG', "$fa ignored");
  238: 		$na--;
  239: 	    } else {
  240: 		logger('WARNING', "$fa archived but not in list!");
  241: 		$warncnt++;
  242: 	    }
  243: 	}
  244:     }
  245: 
  246:     if ($nt > $na) {
  247: 	logger('WARNING', "less files were archived ($na vs. $nt)!");
  248:     } elsif ($na > $nt) {
  249: 	logger('WARNING', "more files were archived ($na vs. $nt)!");
  250:     }
  251: 
  252: }
  253: 
  254: 
  255: #
  256: # delete_files(\%files)
  257: #
  258: # deletes the files from the list (of absolute files) and their directories
  259: # if they are empty
  260: #
  261: sub delete_files {
  262:     my ($files) = @_;
  263:     my %dirs;
  264: 
  265:     foreach my $f (sort keys %$files) {
  266: 	my ($fn, $fp) = MPIWGStor::split_file_path($f);
  267: 	# collect all unique directories
  268:         if ($fp && (! $dirs{$fp})) {
  269: 	    $dirs{$fp} = $fp;
  270: 	}
  271: 	# don't delete index files
  272: 	next if ($MPIWGStor::index_files{$fn});
  273: 	# no file no delete
  274: 	next unless (-f $f);
  275: 	# delete files
  276: 	if (unlink $f) {
  277: 	    logger('INFO', "remove $f ($fn)");
  278: 	} else {
  279: 	    logger('ERROR', "unable to delete $f!");
  280: 	    $errcnt++;
  281: 	}
  282:     }
  283:     # try to delete all empty directories
  284:     my @dirkeys = sort keys %dirs;
  285:     # starting at the end to get to the subdirectories first
  286:     for (my $i = $#dirkeys; $i >= 0; $i--) {
  287: 	my $d = $dirkeys[$i];
  288: 	# dont't remove document dir (shouldn't be empty anyway)
  289: 	next if ($d eq $docdir);
  290: 	if (-d $d) {
  291: 	    logger('INFO', "remove dir $d");
  292: 	    rmdir $d;
  293: 	}
  294:     }
  295: }
  296: 
  297: 
  298: #
  299: # delete_all_files(\%files, $dir)
  300: #
  301: # deletes all files with names from the list %files
  302: # in the directory $dir and its subdirectories 
  303: #
  304: sub delete_all_files {
  305:     my ($files, $dir) = @_;
  306: 
  307:     if (! opendir DIR, $dir) {
  308: 	logger('ERROR', "unable to read directory $dir!");
  309: 	$errcnt++;
  310: 	return;
  311:     }
  312:     my @fl = readdir DIR;
  313:     closedir DIR;
  314: 
  315:     foreach my $f (@fl) {
  316: 	next if ($f =~ /^\.{1,2}$/);
  317: 	if ($$files{$f}) {
  318: 	    # $f is in the file list
  319: 	    if (-f "$dir/$f") {
  320: 		# $f is a file
  321: 		if (unlink "$dir/$f") {
  322: 		    logger('INFO', "removed $f");
  323: 		} else {
  324: 		    logger('ERROR', "unable to delete $f!");
  325: 		    $errcnt++;
  326: 		}
  327: 	    } elsif (-d _) {
  328: 		# $f is a directory (unlink won't work)
  329: 		if ((system 'rm', '-r', "$dir/$f") == 0) {
  330: 		    logger('INFO', "removed directory $f");
  331: 		} else {
  332: 		    logger('ERROR', "unable to delete directory $f!");
  333: 		    $errcnt++;
  334: 		}
  335: 	    } else {
  336: 		logger('ERROR', "funny object $dir/$f!");
  337: 		$errcnt++;
  338: 	    }
  339: 	} else {
  340: 	    # $f is not in the list
  341: 	    if (-d "$dir/$f") {
  342: 		# recurse into directories
  343: 		logger('DEBUG', "enter $dir/$f");
  344: 		delete_all_files($files, "$dir/$f");
  345: 	    }
  346: 	}
  347:     }
  348: }
  349: 
  350: 
  351: #######################################################
  352: # main
  353: #
  354: 
  355: logger('START', "archiver $version at $archdate");
  356: 
  357: # make shure the right user is running this program
  358: my $user = getlogin;
  359: if (($user ne "archive")&&($user ne "root")) {
  360:     logger("ABORT", "you ($user) must be archive or root user to run this program!");
  361:     exit 1;
  362: }
  363: 
  364: # check for .archived file
  365: if (-f "$docdir/.archived") {
  366:     if (not $force_archive) {
  367: 	logger('ABORT', "already archived! (.archived file exists)");
  368: 	exit 1;
  369:     } else {
  370: 	logger('WARNING', "resource already archived? (.archived file exists)");
  371: 	$warncnt++;
  372:     }
  373: }
  374: 
  375: # use metacheck first
  376: if (open CHECK, "$checkprog -add-files $docdir |") {
  377:     my @errors;
  378:     my $msg;
  379:     while (<CHECK>) {
  380: 	chomp;
  381: 	if (/^ERROR/) {
  382: 	    push @errors, $_;
  383: 	}
  384: 	$msg = $_;
  385:     }
  386:     if ($msg =~ /^DONE/) {
  387: 	logger('DEBUG', "checking index file: $msg");
  388: 	logger('INFO', "resource '$docdir' check OK");
  389:     } else {
  390: 	logger('DEBUG', "errors checking index file:\n    " . join("\n    ", @errors) . "\n    $msg");
  391: 	logger('ABORT', "resource '$docdir' check failed!");
  392: 	exit 1;
  393:     }
  394: } else {
  395:     logger('ABORT', "unable to run $checkprog");
  396:     exit 1;
  397: }
  398: # if (system("$checkprog $docdir >/dev/null") == 0) {
  399: #     logger('INFO', "resource '$docdir' check OK");
  400: # } else {
  401: #     logger('ABORT', "resource '$docdir' check failed!!");
  402: #     exit 1;
  403: # }
  404: 
  405: # read index.meta file
  406: my ($document, $rootnode) = MPIWGStor::read_xml($metafile);
  407: 
  408: # check file and add archive date
  409: my $files_to_archive = read_resource_meta($rootnode);
  410: 
  411: logger('INFO', (scalar keys %$files_to_archive) . " files to archive");
  412: 
  413: # remove .archived file
  414: if (-f "$docdir/.archived") {
  415:     if (unlink "$docdir/.archived") {
  416: 	logger('WARNING', "existing .archived file has been removed!");
  417: 	$warncnt++;
  418:     } else {
  419: 	logger('ERROR', "unable to remove existing .archived file!");
  420: 	$errcnt++;
  421:     }
  422: }
  423: 
  424: # remove junk files
  425: if ($delete_junk_files) {
  426:     delete_all_files(\%MPIWGStor::junk_files, $docdir);
  427: }
  428: 
  429: # write new index.meta
  430: if ($errcnt > 0) {
  431:     logger('ABORT', "there were errors!");
  432:     exit 1;
  433: } else {
  434:     if ($fix_xml) {
  435: 	MPIWGStor::write_xml($document, $metafile);
  436:     }
  437: }
  438: 
  439: # start archiving
  440: my $archived_files = run_archive();
  441: my $num_archfiles = scalar keys %$archived_files;
  442: 
  443: logger('INFO', "$num_archfiles files archived");
  444: 
  445: # check list of archived files
  446: check_files($files_to_archive, $archived_files);
  447: 
  448: # delete files if all went OK
  449: if ($errcnt == 0) {
  450:     system("touch", "$docdir/.archived");
  451:     # remove junk files (again)
  452:     if ($delete_junk_files) {
  453: 	delete_all_files(\%MPIWGStor::junk_files, $docdir);
  454:     }
  455:     # remove archived files
  456:     if ($delete_data_files) {
  457: 	delete_files($archived_files);
  458:     }
  459: }
  460: 
  461: logger('INFO', "$warncnt warnings");
  462: logger('INFO', "$errcnt errors");
  463: if ($errcnt > 0) {
  464:     logger('ABORT', "there were errors! ($num_archfiles files archived) at " . stime(time));
  465:     exit 1;
  466: } else {
  467:     logger('DONE', "$num_archfiles files archived at " . stime(time));
  468: }

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>