File:  [Repository] / foxridge-archiver / archiver.pl
Revision 1.4: download - view: text, annotated - select for diffs - revision graph
Wed Mar 23 12:12:08 2005 UTC (19 years, 2 months ago) by casties
Branches: MAIN
CVS tags: HEAD

updated command line option parsing and logging

    1: #!/usr/local/bin/perl -w
    2: 
    3: use strict;
    4: 
    5: use XML::LibXML;
    6: 
    7: # MPIWG libraries
    8: use lib '/usr/local/mpiwg/archive';
    9: use MPIWGStor;
   10: 
   11: # make output unbuffered
   12: $|=1;
   13: 
   14: #######################################################
   15: # internal parameters
   16: #
   17: 
   18: # program version
   19: my $version = "0.6 (ROC 23.3.2005)";
   20: 
   21: # short help
   22: my $shorthelp = "MPIWG archiver $version\nuse:\n  archiver [options] docpath\noptions:\n  -premigrate  don't delete archived files\n";
   23: 
   24: # read command line parameters
   25: my $args = MPIWGStor::parseargs;
   26: 
   27: # debug level
   28: my $debug = (exists $$args{'debug'}) ? $$args{'debug'} : 0;
   29: 
   30: # rewrite XML file (necessary for archive date!)
   31: my $fix_xml = 1;
   32: my $xml_changed = 0;
   33: # XML namespace (not really implemented!)
   34: my $namespace = "";
   35: 
   36: # archive name (archive-path element, usually == $docdir)
   37: my $archname;
   38: # archive storage date (now)
   39: my $archdate = stime(time);
   40: 
   41: # delete "junk" files before archiving
   42: my $delete_junk_files = 1;
   43: 
   44: # delete data files after archiving
   45: my $delete_data_files = 1;
   46: 
   47: 
   48: #######################################################
   49: # external programs
   50: #
   51: my $archprog = "/opt/tivoli/tsm/client/ba/bin/dsmc";
   52: if (! -x $archprog) {
   53:     logger('ABORT', "TSM client program '$archprog' missing!!\n");
   54:     exit 1;
   55: }
   56: my $checkprog = "/usr/local/mpiwg/archive/metacheck";
   57: if (! -x $checkprog) {
   58:     logger('ABORT', "meta data checking program '$checkprog' missing!!\n");
   59:     exit 1;
   60: }
   61: # log file for archiver
   62: my $log_file = "/var/log/mpiwg-archiver.log";
   63: if (! open LOG, ">>$log_file") {
   64:     logger('ABORT', "unable to write log file '$log_file'!!\n");
   65:     exit 1;
   66: }
   67: 
   68: #######################################################
   69: # check parameters that were passed to the program
   70: #
   71: if ($#ARGV < 0) {
   72:     print $shorthelp;
   73:     exit 1;
   74: }
   75: my $docdir = $$args{'path'};
   76: # strip double slashes
   77: $docdir =~ s/\/\//\//;
   78: # strip trailing slashes
   79: $docdir =~ s/\/+$//;
   80: if (! -d $docdir) {
   81:     logger('ABORT', "document directory \'$docdir\' doesn't exist!\n");
   82:     exit 1;
   83: }
   84: # don't delete archived files with "-premigrate"
   85: if (exists $$args{'premigrate'}) {
   86:     $delete_data_files = not $$args{'premigrate'};
   87: }
   88: if ($delete_data_files) {
   89:     logger('INFO', "going to remove successfully archived files from disk");
   90: }
   91: 
   92: my $metafile = "$docdir/index.meta";
   93: if (! -f $metafile) {
   94:     logger('ABORT', "metadata index file \'$metafile\' doesn't exist!\n");
   95:     exit 1;
   96: }
   97: 
   98: #######################################################
   99: # internal variables
  100: #
  101: 
  102: # number of errors
  103: my $errcnt = 0;
  104: # number of warnings
  105: my $warncnt = 0;
  106: 
  107: #######################################################
  108: # subroutines
  109: #
  110: 
  111: #
  112: # $files = read_resource_meta($rootnode)
  113: #
  114: # checks general resource meta information and reads the list of files
  115: #
  116: sub read_resource_meta {
  117:     my ($rootnode) = @_;
  118:     my %files;
  119:     #
  120:     # archive path
  121:     #
  122:     # get archive-path
  123:     $archname = MPIWGStor::sstrip($rootnode->findvalue('child::archive-path'));
  124:     if (! $archname) {
  125: 	logger('ABORT', "archive-name element missing!!\n");
  126: 	exit 1;
  127:     }
  128: 
  129:     #
  130:     # files
  131:     #
  132:     my @filenodes = $rootnode->findnodes('child::file');
  133:     foreach my $fn (@filenodes) {
  134: 	my $name = MPIWGStor::sstrip($fn->findvalue('child::name'));
  135: 	my $path = MPIWGStor::sstrip($fn->findvalue('child::path'));
  136: 	logger('DEBUG', "FILE ($path)$name\n");
  137: 	my $f = ($path) ? "$path/$name" : "$name";
  138: 	$files{$f} = $name;
  139:     }
  140: 
  141:     #
  142:     # archive-storage-date
  143:     #
  144:     my $stordatenode = ($rootnode->find('child::archive-storage-date'))->get_node(1);
  145:     if ($stordatenode) {
  146: 	logger('WARNING', "archive storage date exists! Resource already archived?\n");
  147: 	$warncnt++;
  148: 	# delete old date
  149: 	$stordatenode->removeChildNodes;
  150:     } else {
  151: 	# create new storage date node
  152: 	$stordatenode = $rootnode->addNewChild($namespace, "archive-storage-date");
  153: 	# move after archive-path
  154: 	$rootnode->insertAfter($stordatenode, ($rootnode->find('child::archive-path'))->get_node(1));
  155:     }
  156:     $stordatenode->appendTextNode($archdate);
  157:     $xml_changed++;
  158:     return \%files;
  159: }
  160: 
  161: 
  162: #
  163: # $%files = run_archive
  164: #
  165: # runs the archiver program on $docdir and returns a list of archived files
  166: #
  167: sub run_archive {
  168:     my %files;
  169:     print LOG "START archiver $version $archdate\n";
  170:     my $archcmd = $archprog;
  171:     $archcmd .= " archive -archsymlinkasfile=no -subdir=yes";
  172:     $archcmd .= " -description='$archname'";
  173:     $archcmd .= " '$docdir/'";
  174: 
  175:     print LOG "CMD: $archcmd\n";
  176:     if (open ARCH, "$archcmd 2>&1 |") {
  177: 	while (<ARCH>) {
  178: 	    chomp;
  179: 	    print LOG "ARCH: $_\n";
  180: 	    if (/Normal File-->\s+[\d,]+\s+(.*)\s+\[Sent\]/) {
  181: 		print "  ARCH: file '$1'\n";
  182: 		$files{$1} = "ok";
  183: 	    }
  184: 	    if (/^Archive processing of .* finished without failure./) {
  185: 		print "  ARCH: OK\n";
  186: 	    }
  187: 	}
  188:     } else {
  189: 	logger('ABORT', "unable to start archive command '$archcmd'!!\n");
  190: 	exit 1;
  191:     }
  192: 
  193:     return \%files;
  194: }
  195: 
  196: 
  197: #
  198: # check_files(\%files_to_archive, \%archived_files)
  199: #
  200: # compares the list of archived and to be archived files
  201: #
  202: sub check_files {
  203:     my ($to_archive, $archived) = @_;
  204: 
  205:     my $nt = scalar keys %$to_archive;
  206:     my $na = scalar keys %$archived;
  207: 
  208:     foreach my $ft (sort keys %$to_archive) {
  209: 	my $fp = "$docdir/$ft";
  210: 	#print "  fp: $fp\n";
  211: 	if ($$archived{$fp}) {
  212: 	    logger('DEBUG', "$ft archived OK\n");
  213: 	    $$archived{$fp} = "OK";
  214: 	} else {
  215: 	    logger('ERROR', "file '$ft' missing from archive!\n");
  216: 	    $errcnt++;
  217: 	}
  218:     }
  219: 
  220:     foreach my $fa (sort keys %$archived) {
  221: 	if ($$archived{$fa} ne "OK") {
  222: 	    my ($fn, $fp) = MPIWGStor::split_file_path($fa);
  223: 	    if ($MPIWGStor::index_files{$fn}) {
  224: 		logger('DEBUG', "$fa ignored\n");
  225: 		$na--;
  226: 	    } else {
  227: 		logger('WARNING', "$fa archived but not in list!\n");
  228: 		$warncnt++;
  229: 	    }
  230: 	}
  231:     }
  232: 
  233:     if ($nt > $na) {
  234: 	logger('WARNING', "less files were archived ($na vs. $nt)!\n");
  235:     } elsif ($na > $nt) {
  236: 	logger('WARNING', "more files were archived ($na vs. $nt)!\n");
  237:     }
  238: 
  239: }
  240: 
  241: 
  242: #
  243: # delete_files(\%files)
  244: #
  245: # deletes the files from the list (of absolute files) and their directories
  246: # if they are empty
  247: #
  248: sub delete_files {
  249:     my ($files) = @_;
  250:     my %dirs;
  251: 
  252:     foreach my $f (sort keys %$files) {
  253: 	my ($fn, $fp) = MPIWGStor::split_file_path($f);
  254: 	# collect all unique directories
  255:         if ($fp && (! $dirs{$fp})) {
  256: 	    $dirs{$fp} = $fp;
  257: 	}
  258: 	# don't delete index files
  259: 	next if ($MPIWGStor::index_files{$fn});
  260: 	# no file no delete
  261: 	next unless (-f $f);
  262: 	# delete files
  263: 	if (unlink $f) {
  264: 	    logger('INFO', "remove $f ($fn)\n");
  265: 	} else {
  266: 	    logger('ERROR', "unable to delete $f!\n");
  267: 	    $errcnt++;
  268: 	}
  269:     }
  270:     # try to delete all empty directories
  271:     my @dirkeys = sort keys %dirs;
  272:     # starting at the end to get to the subdirectories first
  273:     for (my $i = $#dirkeys; $i >= 0; $i--) {
  274: 	my $d = $dirkeys[$i];
  275: 	# dont't remove document dir (shouldn't be empty anyway)
  276: 	next if ($d eq $docdir);
  277: 	if (-d $d) {
  278: 	    logger('INFO', "remove dir $d\n");
  279: 	    rmdir $d;
  280: 	}
  281:     }
  282: }
  283: 
  284: 
  285: #
  286: # delete_all_files(\%files, $dir)
  287: #
  288: # deletes all files with names from the list %files
  289: # in the directory $dir and its subdirectories 
  290: #
  291: sub delete_all_files {
  292:     my ($files, $dir) = @_;
  293: 
  294:     if (! opendir DIR, $dir) {
  295: 	logger('ERROR', "unable to read directory $dir!\n");
  296: 	$errcnt++;
  297: 	return;
  298:     }
  299:     my @fl = readdir DIR;
  300:     closedir DIR;
  301: 
  302:     foreach my $f (@fl) {
  303: 	next if ($f =~ /^\.{1,2}$/);
  304: 	if ($$files{$f}) {
  305: 	    # $f is in the file list
  306: 	    if (-f "$dir/$f") {
  307: 		# $f is a file
  308: 		if (unlink "$dir/$f") {
  309: 		    logger('INFO', "removed $f\n");
  310: 		} else {
  311: 		    logger('ERROR', "unable to delete $f!\n");
  312: 		    $errcnt++;
  313: 		}
  314: 	    } elsif (-d _) {
  315: 		# $f is a directory (unlink won't work)
  316: 		if ((system 'rm', '-r', "$dir/$f") == 0) {
  317: 		    logger('INFO', "removed directory $f\n");
  318: 		} else {
  319: 		    logger('ERROR', "unable to delete directory $f!\n");
  320: 		    $errcnt++;
  321: 		}
  322: 	    } else {
  323: 		logger('ERROR', "funny object $dir/$f!\n");
  324: 		$errcnt++;
  325: 	    }
  326: 	} else {
  327: 	    # $f is not in the list
  328: 	    if (-d "$dir/$f") {
  329: 		# recurse into directories
  330: 		logger('DEBUG', "enter $dir/$f\n");
  331: 		delete_all_files($files, "$dir/$f");
  332: 	    }
  333: 	}
  334:     }
  335: }
  336: 
  337: 
  338: #######################################################
  339: # main
  340: #
  341: 
  342: logger('START', "archiver $version at $archdate\n");
  343: 
  344: # make shure the right user is running this program
  345: my $user = getlogin;
  346: #if (($user ne "archive")||($user ne "root")) {
  347: #    logger("ABORT", "you ($user) must be archive or root user to run this program!");
  348: #    exit 1;
  349: #}
  350: 
  351: # use metacheck first
  352: if (system("$checkprog $docdir >/dev/null") == 0) {
  353:     logger('INFO', "resource '$docdir' check OK\n");
  354: } else {
  355:     logger('ABORT', "resource '$docdir' check failed!!\n");
  356:     exit 1;
  357: }
  358: 
  359: # read index.meta file
  360: my ($document, $rootnode) = MPIWGStor::read_xml($metafile);
  361: 
  362: # check file and add archive date
  363: my $files_to_archive = read_resource_meta($rootnode);
  364: 
  365: print "INFO: ", scalar keys %$files_to_archive, " files to archive\n";
  366: 
  367: # check for .archived file
  368: if (-f "$docdir/.archived") {
  369:     if (unlink "$docdir/.archived") {
  370: 	logger('WARNING', "existing .archived file has been removed! Resource already archived?\n");
  371: 	$warncnt++;
  372:     } else {
  373: 	logger('ERROR', "unable to remove existing .archived file!\n");
  374: 	$errcnt++;
  375:     }
  376: }
  377: 
  378: # remove junk files
  379: if ($delete_junk_files) {
  380:     delete_all_files(\%MPIWGStor::junk_files, $docdir);
  381: }
  382: 
  383: # write new index.meta
  384: if ($errcnt > 0) {
  385:     logger('ABORT', "there were errors!\n");
  386:     exit 1;
  387: } else {
  388:     if ($fix_xml) {
  389: 	MPIWGStor::write_xml($document, $metafile);
  390:     }
  391: }
  392: 
  393: # start archiving
  394: my $archived_files = run_archive();
  395: my $num_archfiles = scalar keys %$archived_files;
  396: 
  397: logger('INFO', "$num_archfiles files archived\n");
  398: 
  399: # check list of archived files
  400: check_files($files_to_archive, $archived_files);
  401: 
  402: # delete files if all went OK
  403: if ($errcnt == 0) {
  404:     system("touch", "$docdir/.archived");
  405:     # remove junk files (again)
  406:     if ($delete_junk_files) {
  407: 	delete_all_files(\%MPIWGStor::junk_files, $docdir);
  408:     }
  409:     # remove archived files
  410:     if ($delete_data_files) {
  411: 	delete_files($archived_files);
  412:     }
  413: }
  414: 
  415: logger('INFO', "$warncnt warnings\n");
  416: logger('INFO', "$errcnt errors\n");
  417: if ($errcnt > 0) {
  418:     logger('ABORT', "there were errors! ($num_archfiles files archived) at " . stime(time));
  419:     exit 1;
  420: } else {
  421:     logger('DONE', "$num_archfiles files archived at " . stime(time));
  422: }

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>