File:  [Repository] / foxridge-archiver / archiver.pl
Revision 1.7: download - view: text, annotated - select for diffs - revision graph
Wed Oct 5 13:35:12 2005 UTC (18 years, 8 months ago) by casties
Branches: MAIN
CVS tags: HEAD
more option passing to metacheck from archivemany via archiver

    1: #!/usr/local/bin/perl -w
    2: 
    3: use strict;
    4: 
    5: use XML::LibXML;
    6: 
    7: # MPIWG libraries
    8: use lib '/usr/local/mpiwg/archive';
    9: use MPIWGStor;
   10: 
   11: # make output unbuffered
   12: $|=1;
   13: 
   14: #######################################################
   15: # internal parameters
   16: #
   17: 
   18: # program version
   19: my $version = "0.7.2 (ROC 5.10.2005)";
   20: 
   21: # short help
   22: my $help = "MPIWG archiver $version
   23: use:  archiver [options] docpath
   24: options:
   25:   -debug  show debugging info
   26:   -premigrate  don't delete archived files
   27:   -force  archive even if already archived
   28:   -replace  rewrite index file
   29: ";
   30: 
   31: # read command line parameters
   32: my $args = MPIWGStor::parseargs;
   33: if (! scalar(%$args)) {
   34:     print $help, "\n";
   35:     exit 1;
   36: }
   37: 
   38: # debug level
   39: $debug = (exists $$args{'debug'}) ? $$args{'debug'} : 0;
   40: 
   41: # force archiving
   42: my $force_archive = (exists $$args{'force'}) ? $$args{'force'} : 0;
   43: 
   44: # rewrite index file
   45: my $rewrite_index = (exists $$args{'replace'}) ? $$args{'replace'} : 0;
   46: 
   47: # rewrite XML file (necessary for archive date!)
   48: my $fix_xml = 1;
   49: my $xml_changed = 0;
   50: 
   51: # XML namespace (not really implemented!)
   52: my $namespace = "";
   53: 
   54: # archive name (archive-path element, usually == $docdir)
   55: my $archname;
   56: 
   57: # archive storage date (now)
   58: my $archdate = stime(time);
   59: 
   60: # delete "junk" files before archiving
   61: my $delete_junk_files = 1;
   62: 
   63: # delete data files after archiving
   64: my $delete_data_files = 1;
   65: 
   66: # don't delete archived files with "-premigrate"
   67: if (exists $$args{'premigrate'}) {
   68:     $delete_data_files = not $$args{'premigrate'};
   69: }
   70: if ($delete_data_files) {
   71:     logger('INFO', "going to remove successfully archived files from disk");
   72: }
   73: 
   74: 
   75: #######################################################
   76: # external programs
   77: #
   78: my $archprog = "/opt/tivoli/tsm/client/ba/bin/dsmc";
   79: if (! -x $archprog) {
   80:     logger('ABORT', "TSM client program '$archprog' missing!");
   81:     exit 1;
   82: }
   83: my $checkprog = "/usr/local/mpiwg/archive/metacheck";
   84: if (! -x $checkprog) {
   85:     logger('ABORT', "meta data checking program '$checkprog' missing!");
   86:     exit 1;
   87: }
   88: # log file for archiver
   89: my $log_file = "/var/log/mpiwg-archiver.log";
   90: if (! open LOG, ">>$log_file") {
   91:     logger('ABORT', "unable to write log file '$log_file'!");
   92:     exit 1;
   93: }
   94: 
   95: #######################################################
   96: # check parameters that were passed to the program
   97: #
   98: 
   99: my $docdir = $$args{'path'};
  100: # strip double slashes
  101: $docdir =~ s/\/\//\//;
  102: # strip trailing slashes
  103: $docdir =~ s/\/+$//;
  104: if (! -d $docdir) {
  105:     logger('ABORT', "document directory \'$docdir\' doesn't exist!");
  106:     exit 1;
  107: }
  108: 
  109: my $metafile = "$docdir/index.meta";
  110: if (! -f $metafile) {
  111:     logger('ABORT', "metadata index file \'$metafile\' doesn't exist!");
  112:     exit 1;
  113: }
  114: 
  115: #######################################################
  116: # internal variables
  117: #
  118: 
  119: # number of errors
  120: my $errcnt = 0;
  121: # number of warnings
  122: my $warncnt = 0;
  123: 
  124: #######################################################
  125: # subroutines
  126: #
  127: 
  128: #
  129: # $files = read_resource_meta($rootnode)
  130: #
  131: # checks general resource meta information and reads the list of files
  132: #
  133: sub read_resource_meta {
  134:     my ($rootnode) = @_;
  135:     my %files;
  136:     #
  137:     # archive path
  138:     #
  139:     # get archive-path
  140:     $archname = MPIWGStor::sstrip($rootnode->findvalue('child::archive-path'));
  141:     if (! $archname) {
  142: 	logger('ABORT', "archive-name element missing!");
  143: 	exit 1;
  144:     }
  145: 
  146:     #
  147:     # files
  148:     #
  149:     my @filenodes = $rootnode->findnodes('child::file');
  150:     foreach my $fn (@filenodes) {
  151: 	my $name = MPIWGStor::sstrip($fn->findvalue('child::name'));
  152: 	my $path = MPIWGStor::sstrip($fn->findvalue('child::path'));
  153: 	logger('DEBUG', "FILE ($path)$name");
  154: 	my $f = ($path) ? "$path/$name" : "$name";
  155: 	$files{$f} = $name;
  156:     }
  157: 
  158:     #
  159:     # archive-storage-date
  160:     #
  161:     my $stordatenode = ($rootnode->find('child::archive-storage-date'))->get_node(1);
  162:     if ($stordatenode) {
  163: 	logger('WARNING', "archive storage date exists! Resource already archived?");
  164: 	$warncnt++;
  165: 	# delete old date
  166: 	$stordatenode->removeChildNodes;
  167:     } else {
  168: 	# create new storage date node
  169: 	$stordatenode = $rootnode->addNewChild($namespace, "archive-storage-date");
  170: 	# move after archive-path
  171: 	$rootnode->insertAfter($stordatenode, ($rootnode->find('child::archive-path'))->get_node(1));
  172:     }
  173:     $stordatenode->appendTextNode($archdate);
  174:     $xml_changed++;
  175:     return \%files;
  176: }
  177: 
  178: 
  179: #
  180: # $%files = run_archive
  181: #
  182: # runs the archiver program on $docdir and returns a list of archived files
  183: #
  184: sub run_archive {
  185:     my %files;
  186:     print LOG "START archiver $version $archdate\n";
  187:     my $archcmd = $archprog;
  188:     $archcmd .= " archive -archsymlinkasfile=no -subdir=yes";
  189:     $archcmd .= " -description='$archname'";
  190:     $archcmd .= " '$docdir/'";
  191: 
  192:     print LOG "CMD: $archcmd\n";
  193:     if (open ARCH, "$archcmd 2>&1 |") {
  194: 	while (<ARCH>) {
  195: 	    chomp;
  196: 	    print LOG "ARCH: $_\n";
  197: 	    if (/Normal File-->\s+[\d,]+\s+(.*)\s+\[Sent\]/) {
  198: 		print "  ARCH: file '$1'\n";
  199: 		$files{$1} = "ok";
  200: 	    }
  201: 	    if (/^Archive processing of .* finished without failure./) {
  202: 		print "  ARCH: OK\n";
  203: 	    }
  204: 	}
  205:     } else {
  206: 	logger('ABORT', "unable to start archive command '$archcmd'!!");
  207: 	exit 1;
  208:     }
  209: 
  210:     return \%files;
  211: }
  212: 
  213: 
  214: #
  215: # check_files(\%files_to_archive, \%archived_files)
  216: #
  217: # compares the list of archived and to be archived files
  218: #
  219: sub check_files {
  220:     my ($to_archive, $archived) = @_;
  221: 
  222:     my $nt = scalar keys %$to_archive;
  223:     my $na = scalar keys %$archived;
  224: 
  225:     foreach my $ft (sort keys %$to_archive) {
  226: 	my $fp = "$docdir/$ft";
  227: 	#print "  fp: $fp\n";
  228: 	if ($$archived{$fp}) {
  229: 	    logger('DEBUG', "$ft archived OK");
  230: 	    $$archived{$fp} = "OK";
  231: 	} else {
  232: 	    logger('ERROR', "file '$ft' missing from archive!");
  233: 	    $errcnt++;
  234: 	}
  235:     }
  236: 
  237:     foreach my $fa (sort keys %$archived) {
  238: 	if ($$archived{$fa} ne "OK") {
  239: 	    my ($fn, $fp) = MPIWGStor::split_file_path($fa);
  240: 	    if ($MPIWGStor::index_files{$fn}) {
  241: 		logger('DEBUG', "$fa ignored");
  242: 		$na--;
  243: 	    } else {
  244: 		logger('WARNING', "$fa archived but not in list!");
  245: 		$warncnt++;
  246: 	    }
  247: 	}
  248:     }
  249: 
  250:     if ($nt > $na) {
  251: 	logger('WARNING', "less files were archived ($na vs. $nt)!");
  252:     } elsif ($na > $nt) {
  253: 	logger('WARNING', "more files were archived ($na vs. $nt)!");
  254:     }
  255: 
  256: }
  257: 
  258: 
  259: #
  260: # delete_files(\%files)
  261: #
  262: # deletes the files from the list (of absolute files) and their directories
  263: # if they are empty
  264: #
  265: sub delete_files {
  266:     my ($files) = @_;
  267:     my %dirs;
  268: 
  269:     foreach my $f (sort keys %$files) {
  270: 	my ($fn, $fp) = MPIWGStor::split_file_path($f);
  271: 	# collect all unique directories
  272:         if ($fp && (! $dirs{$fp})) {
  273: 	    $dirs{$fp} = $fp;
  274: 	}
  275: 	# don't delete index files
  276: 	next if ($MPIWGStor::index_files{$fn});
  277: 	# no file no delete
  278: 	next unless (-f $f);
  279: 	# delete files
  280: 	if (unlink $f) {
  281: 	    logger('DEBUG', "remove $f ($fn)");
  282: 	} else {
  283: 	    logger('ERROR', "unable to delete $f!");
  284: 	    $errcnt++;
  285: 	}
  286:     }
  287:     # try to delete all empty directories
  288:     my @dirkeys = sort keys %dirs;
  289:     # starting at the end to get to the subdirectories first
  290:     for (my $i = $#dirkeys; $i >= 0; $i--) {
  291: 	my $d = $dirkeys[$i];
  292: 	# dont't remove document dir (shouldn't be empty anyway)
  293: 	next if ($d eq $docdir);
  294: 	if (-d $d) {
  295: 	    logger('DEBUG', "remove dir $d");
  296: 	    rmdir $d;
  297: 	}
  298:     }
  299: }
  300: 
  301: 
  302: #
  303: # delete_all_files(\%files, $dir)
  304: #
  305: # deletes all files with names from the list %files
  306: # in the directory $dir and its subdirectories 
  307: #
  308: sub delete_all_files {
  309:     my ($files, $dir) = @_;
  310: 
  311:     if (! opendir DIR, $dir) {
  312: 	logger('ERROR', "unable to read directory $dir!");
  313: 	$errcnt++;
  314: 	return;
  315:     }
  316:     my @fl = readdir DIR;
  317:     closedir DIR;
  318: 
  319:     foreach my $f (@fl) {
  320: 	next if ($f =~ /^\.{1,2}$/);
  321: 	if ($$files{$f}) {
  322: 	    # $f is in the file list
  323: 	    if (-f "$dir/$f") {
  324: 		# $f is a file
  325: 		if (unlink "$dir/$f") {
  326: 		    logger('DEBUG', "removed $f");
  327: 		} else {
  328: 		    logger('ERROR', "unable to delete $f!");
  329: 		    $errcnt++;
  330: 		}
  331: 	    } elsif (-d _) {
  332: 		# $f is a directory (unlink won't work)
  333: 		if ((system 'rm', '-r', "$dir/$f") == 0) {
  334: 		    logger('DEBUG', "removed directory $f");
  335: 		} else {
  336: 		    logger('ERROR', "unable to delete directory $f!");
  337: 		    $errcnt++;
  338: 		}
  339: 	    } else {
  340: 		logger('ERROR', "funny object $dir/$f!");
  341: 		$errcnt++;
  342: 	    }
  343: 	} else {
  344: 	    # $f is not in the list
  345: 	    if (-d "$dir/$f") {
  346: 		# recurse into directories
  347: 		logger('DEBUG', "enter $dir/$f");
  348: 		delete_all_files($files, "$dir/$f");
  349: 	    }
  350: 	}
  351:     }
  352: }
  353: 
  354: 
  355: #######################################################
  356: # main
  357: #
  358: 
  359: logger('START', "archiver $version at $archdate");
  360: 
  361: # make shure the right user is running this program
  362: my $user = getlogin;
  363: if (($user ne "archive")&&($user ne "root")) {
  364:     logger("WARNING", "you ($user) should be archive or root user to run this program!");
  365: }
  366: 
  367: # check for .archived file
  368: if (-f "$docdir/.archived") {
  369:     if (not $force_archive) {
  370: 	logger('ABORT', "already archived! (.archived file exists)");
  371: 	exit 1;
  372:     } else {
  373: 	logger('WARNING', "resource already archived? (.archived file exists)");
  374: 	$warncnt++;
  375:     }
  376: }
  377: 
  378: # use metacheck first
  379: my $check_opts="-add-files";
  380: if ($rewrite_index) {
  381:     $check_opts = "-replace";
  382: }
  383: if (open CHECK, "$checkprog $check_opts $docdir |") {
  384:     my @errors;
  385:     my $msg;
  386:     while (<CHECK>) {
  387: 	chomp;
  388: 	if (/^ERROR/) {
  389: 	    push @errors, $_;
  390: 	}
  391: 	$msg = $_;
  392:     }
  393:     if ($msg =~ /^DONE/) {
  394: 	logger('DEBUG', "checking index file: $msg");
  395: 	logger('INFO', "resource '$docdir' check OK");
  396:     } else {
  397: 	logger('DEBUG', "errors checking index file:\n    " . join("\n    ", @errors) . "\n    $msg");
  398: 	logger('ABORT', "resource '$docdir' check failed!");
  399: 	exit 1;
  400:     }
  401: } else {
  402:     logger('ABORT', "unable to run $checkprog");
  403:     exit 1;
  404: }
  405: # if (system("$checkprog $docdir >/dev/null") == 0) {
  406: #     logger('INFO', "resource '$docdir' check OK");
  407: # } else {
  408: #     logger('ABORT', "resource '$docdir' check failed!!");
  409: #     exit 1;
  410: # }
  411: 
  412: # read index.meta file
  413: my ($document, $rootnode) = MPIWGStor::read_xml($metafile);
  414: 
  415: # check file and add archive date
  416: my $files_to_archive = read_resource_meta($rootnode);
  417: 
  418: logger('INFO', (scalar keys %$files_to_archive) . " files to archive");
  419: 
  420: # remove .archived file
  421: if (-f "$docdir/.archived") {
  422:     if (unlink "$docdir/.archived") {
  423: 	logger('WARNING', "existing .archived file has been removed!");
  424: 	$warncnt++;
  425:     } else {
  426: 	logger('ERROR', "unable to remove existing .archived file!");
  427: 	$errcnt++;
  428:     }
  429: }
  430: 
  431: # remove junk files
  432: if ($delete_junk_files) {
  433:     delete_all_files(\%MPIWGStor::junk_files, $docdir);
  434: }
  435: 
  436: # write new index.meta
  437: if ($errcnt > 0) {
  438:     logger('ABORT', "there were errors!");
  439:     exit 1;
  440: } else {
  441:     if ($fix_xml) {
  442: 	MPIWGStor::write_xml($document, $metafile);
  443:     }
  444: }
  445: 
  446: # start archiving
  447: my $archived_files = run_archive();
  448: my $num_archfiles = scalar keys %$archived_files;
  449: 
  450: logger('INFO', "$num_archfiles files archived");
  451: 
  452: # check list of archived files
  453: check_files($files_to_archive, $archived_files);
  454: 
  455: # delete files if all went OK
  456: if ($errcnt == 0) {
  457:     system("touch", "$docdir/.archived");
  458:     # remove junk files (again)
  459:     if ($delete_junk_files) {
  460: 	delete_all_files(\%MPIWGStor::junk_files, $docdir);
  461:     }
  462:     # remove archived files
  463:     if ($delete_data_files) {
  464: 	delete_files($archived_files);
  465:     }
  466: }
  467: 
  468: logger('INFO', "$warncnt warnings");
  469: logger('INFO', "$errcnt errors");
  470: if ($errcnt > 0) {
  471:     logger('ABORT', "there were errors! ($num_archfiles files archived) at " . stime(time));
  472:     exit 1;
  473: } else {
  474:     logger('DONE', "$num_archfiles files archived at " . stime(time));
  475: }

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>