File:  [Repository] / foxridge-archiver / archiver.pl
Revision 1.3: download - view: text, annotated - select for diffs - revision graph
Mon Jul 12 15:49:17 2004 UTC (19 years, 10 months ago) by casties
Branches: MAIN
CVS tags: HEAD
small fix for number of hash keys

    1: #!/usr/local/bin/perl -w
    2: 
    3: use strict;
    4: 
    5: use XML::LibXML;
    6: 
    7: # MPIWG libraries
    8: use lib '/usr/local/mpiwg/archive';
    9: use MPIWGStor;
   10: 
   11: # make output unbuffered
   12: $|=1;
   13: 
   14: #######################################################
   15: # internal parameters
   16: #
   17: 
   18: # program version
   19: my $version = "0.5.2 (12.7.2004)";
   20: 
   21: # rewrite XML file (necessary for archive date!)
   22: my $fix_xml = 1;
   23: my $xml_changed = 0;
   24: # XML namespace (not really implemented!)
   25: my $namespace = "";
   26: 
   27: # archive name (archive-path element, usually == $docdir)
   28: my $archname;
   29: # archive storage date (now)
   30: my $archdate = stime(time);
   31: 
   32: # delete "junk" files before archiving
   33: my $delete_junk_files = 1;
   34: 
   35: # delete data files after archiving
   36: my $delete_data_files = 1;
   37: 
   38: 
   39: #######################################################
   40: # external programs
   41: #
   42: my $archprog = "/opt/tivoli/tsm/client/ba/bin/dsmc";
   43: if (! -x $archprog) {
   44:     print "ABORT: TSM client program '$archprog' missing!!\n";
   45:     exit 1;
   46: }
   47: my $checkprog = "/usr/local/mpiwg/archive/metacheck";
   48: if (! -x $checkprog) {
   49:     print "ABORT: meta data checking program '$checkprog' missing!!\n";
   50:     exit 1;
   51: }
   52: # log file for archiver
   53: my $log_file = "/var/log/mpiwg-archiver.log";
   54: if (! open LOG, ">>$log_file") {
   55:     print "ABORT: unable to write log file '$log_file'!!\n";
   56:     exit 1;
   57: }
   58: 
   59: #######################################################
   60: # check parameters that were passed to the program
   61: #
   62: if ($#ARGV < 0) {
   63:     print "ABORT: no document directory given!\n";
   64:     exit 1;
   65: }
   66: my $docdir = $ARGV[0];
   67: # strip double slashes
   68: $docdir =~ s/\/\//\//;
   69: # strip trailing slashes
   70: $docdir =~ s/\/+$//;
   71: if (! -d $docdir) {
   72:     print "ABORT: document directory \'$docdir\' doesn't exist!\n";
   73:     exit 1;
   74: }
   75: if (($#ARGV > 0)&&($ARGV[1] eq "-premigrate")) {
   76:     $delete_data_files = 0;
   77: }
   78: 
   79: my $metafile = "$docdir/index.meta";
   80: if (! -f $metafile) {
   81:     print "ABORT: metadata index file \'$metafile\' doesn't exist!\n";
   82:     exit 1;
   83: }
   84: 
   85: #######################################################
   86: # internal variables
   87: #
   88: 
   89: # number of errors
   90: my $errcnt = 0;
   91: # number of warnings
   92: my $warncnt = 0;
   93: 
   94: #######################################################
   95: # subroutines
   96: #
   97: 
   98: #
   99: # $files = read_resource_meta($rootnode)
  100: #
  101: # checks general resource meta information and reads the list of files
  102: #
  103: sub read_resource_meta {
  104:     my ($rootnode) = @_;
  105:     my %files;
  106:     #
  107:     # archive path
  108:     #
  109:     # get archive-path
  110:     $archname = MPIWGStor::sstrip($rootnode->findvalue('child::archive-path'));
  111:     if (! $archname) {
  112: 	print "ABORT: archive-name element missing!!\n";
  113: 	exit 1;
  114:     }
  115: 
  116:     #
  117:     # files
  118:     #
  119:     my @filenodes = $rootnode->findnodes('child::file');
  120:     foreach my $fn (@filenodes) {
  121: 	my $name = MPIWGStor::sstrip($fn->findvalue('child::name'));
  122: 	my $path = MPIWGStor::sstrip($fn->findvalue('child::path'));
  123: 	print "FILE: ($path)$name\n";
  124: 	my $f = ($path) ? "$path/$name" : "$name";
  125: 	$files{$f} = $name;
  126:     }
  127: 
  128:     #
  129:     # archive-storage-date
  130:     #
  131:     my $stordatenode = ($rootnode->find('child::archive-storage-date'))->get_node(1);
  132:     if ($stordatenode) {
  133: 	print "WARNING: archive storage date exists! Resource already archived?\n";
  134: 	$warncnt++;
  135: 	# delete old date
  136: 	$stordatenode->removeChildNodes;
  137:     } else {
  138: 	# create new storage date node
  139: 	$stordatenode = $rootnode->addNewChild($namespace, "archive-storage-date");
  140: 	# move after archive-path
  141: 	$rootnode->insertAfter($stordatenode, ($rootnode->find('child::archive-path'))->get_node(1));
  142:     }
  143:     $stordatenode->appendTextNode($archdate);
  144:     $xml_changed++;
  145:     return \%files;
  146: }
  147: 
  148: 
  149: #
  150: # $%files = run_archive
  151: #
  152: # runs the archiver program on $docdir and returns a list of archived files
  153: #
  154: sub run_archive {
  155:     my %files;
  156:     print LOG "START archiver $version $archdate\n";
  157:     my $archcmd = $archprog;
  158:     $archcmd .= " archive -archsymlinkasfile=no -subdir=yes";
  159:     $archcmd .= " -description='$archname'";
  160:     $archcmd .= " '$docdir/'";
  161: 
  162:     print LOG "CMD: $archcmd\n";
  163:     if (open ARCH, "$archcmd 2>&1 |") {
  164: 	while (<ARCH>) {
  165: 	    chomp;
  166: 	    print LOG "ARCH: $_\n";
  167: 	    if (/Normal File-->\s+[\d,]+\s+(.*)\s+\[Sent\]/) {
  168: 		print "  ARCH: file '$1'\n";
  169: 		$files{$1} = "ok";
  170: 	    }
  171: 	    if (/^Archive processing of .* finished without failure./) {
  172: 		print "  ARCH: OK\n";
  173: 	    }
  174: 	}
  175:     } else {
  176: 	print "ABORT: unable to start archive command '$archcmd'!!\n";
  177: 	exit 1;
  178:     }
  179: 
  180:     return \%files;
  181: }
  182: 
  183: 
  184: #
  185: # check_files(\%files_to_archive, \%archived_files)
  186: #
  187: # compares the list of archived and to be archived files
  188: #
  189: sub check_files {
  190:     my ($to_archive, $archived) = @_;
  191: 
  192:     my $nt = scalar keys %$to_archive;
  193:     my $na = scalar keys %$archived;
  194: 
  195:     foreach my $ft (sort keys %$to_archive) {
  196: 	my $fp = "$docdir/$ft";
  197: 	#print "  fp: $fp\n";
  198: 	if ($$archived{$fp}) {
  199: 	    print "DEBUG: $ft archived OK\n";
  200: 	    $$archived{$fp} = "OK";
  201: 	} else {
  202: 	    print "ERROR: file '$ft' missing from archive!\n";
  203: 	    $errcnt++;
  204: 	}
  205:     }
  206: 
  207:     foreach my $fa (sort keys %$archived) {
  208: 	if ($$archived{$fa} ne "OK") {
  209: 	    my ($fn, $fp) = MPIWGStor::split_file_path($fa);
  210: 	    if ($MPIWGStor::index_files{$fn}) {
  211: 		print "DEBUG: $fa ignored\n";
  212: 		$na--;
  213: 	    } else {
  214: 		print "WARNING: $fa archived but not in list!\n";
  215: 		$warncnt++;
  216: 	    }
  217: 	}
  218:     }
  219: 
  220:     if ($nt > $na) {
  221: 	print "WARNING: less files were archived ($na vs. $nt)!\n";
  222:     } elsif ($na > $nt) {
  223: 	print "WARNING: more files were archived ($na vs. $nt)!\n";
  224:     }
  225: 
  226: }
  227: 
  228: 
  229: #
  230: # delete_files(\%files)
  231: #
  232: # deletes the files from the list (of absolute files) and their directories
  233: # if they are empty
  234: #
  235: sub delete_files {
  236:     my ($files) = @_;
  237:     my %dirs;
  238: 
  239:     foreach my $f (sort keys %$files) {
  240: 	my ($fn, $fp) = MPIWGStor::split_file_path($f);
  241: 	# collect all unique directories
  242:         if ($fp && (! $dirs{$fp})) {
  243: 	    $dirs{$fp} = $fp;
  244: 	}
  245: 	# don't delete index files
  246: 	next if ($MPIWGStor::index_files{$fn});
  247: 	# no file no delete
  248: 	next unless (-f $f);
  249: 	# delete files
  250: 	if (unlink $f) {
  251: 	    print "INFO: remove $f ($fn)\n";
  252: 	} else {
  253: 	    print "ERROR: unable to delete $f!\n";
  254: 	    $errcnt++;
  255: 	}
  256:     }
  257:     # try to delete all empty directories
  258:     my @dirkeys = sort keys %dirs;
  259:     # starting at the end to get to the subdirectories first
  260:     for (my $i = $#dirkeys; $i >= 0; $i--) {
  261: 	my $d = $dirkeys[$i];
  262: 	# dont't remove document dir (shouldn't be empty anyway)
  263: 	next if ($d eq $docdir);
  264: 	if (-d $d) {
  265: 	    print "INFO: remove dir $d\n";
  266: 	    rmdir $d;
  267: 	}
  268:     }
  269: }
  270: 
  271: 
  272: #
  273: # delete_all_files(\%files, $dir)
  274: #
  275: # deletes all files with names from the list %files
  276: # in the directory $dir and its subdirectories 
  277: #
  278: sub delete_all_files {
  279:     my ($files, $dir) = @_;
  280: 
  281:     if (! opendir DIR, $dir) {
  282: 	print "ERROR: unable to read directory $dir!\n";
  283: 	$errcnt++;
  284: 	return;
  285:     }
  286:     my @fl = readdir DIR;
  287:     closedir DIR;
  288: 
  289:     foreach my $f (@fl) {
  290: 	next if ($f =~ /^\.{1,2}$/);
  291: 	if ($$files{$f}) {
  292: 	    # $f is in the file list
  293: 	    if (-f "$dir/$f") {
  294: 		# $f is a file
  295: 		if (unlink "$dir/$f") {
  296: 		    print "INFO: removed $f\n";
  297: 		} else {
  298: 		    print "ERROR: unable to delete $f!\n";
  299: 		    $errcnt++;
  300: 		}
  301: 	    } elsif (-d _) {
  302: 		# $f is a directory (unlink won't work)
  303: 		if ((system 'rm', '-r', "$dir/$f") == 0) {
  304: 		    print "INFO: removed directory $f\n";
  305: 		} else {
  306: 		    print "ERROR: unable to delete directory $f!\n";
  307: 		    $errcnt++;
  308: 		}
  309: 	    } else {
  310: 		print "ERROR: funny object $dir/$f!\n";
  311: 		$errcnt++;
  312: 	    }
  313: 	} else {
  314: 	    # $f is not in the list
  315: 	    if (-d "$dir/$f") {
  316: 		# recurse into directories
  317: 		print "DEBUG: enter $dir/$f\n";
  318: 		delete_all_files($files, "$dir/$f");
  319: 	    }
  320: 	}
  321:     }
  322: }
  323: 
  324: 
  325: #######################################################
  326: # main
  327: #
  328: 
  329: print "START: archiver $version at $archdate\n";
  330: 
  331: # make shure the right user is running this program
  332: my $user = getlogin;
  333: #if (($user ne "archive")||($user ne "root")) {
  334: #    logger("ABORT", "you ($user) must be archive or root user to run this program!");
  335: #    exit 1;
  336: #}
  337: 
  338: # use metacheck first
  339: if (system("$checkprog $docdir >/dev/null") == 0) {
  340:     print "INFO: resource '$docdir' check OK\n";
  341: } else {
  342:     print "ABORT: resource '$docdir' check failed!!\n";
  343:     exit 1;
  344: }
  345: 
  346: # read index.meta file
  347: my ($document, $rootnode) = MPIWGStor::read_xml($metafile);
  348: 
  349: # check file and add archive date
  350: my $files_to_archive = read_resource_meta($rootnode);
  351: 
  352: print "INFO: ", scalar keys %$files_to_archive, " files to archive\n";
  353: 
  354: # check for .archived file
  355: if (-f "$docdir/.archived") {
  356:     if (unlink "$docdir/.archived") {
  357: 	print "WARNING: existing .archived file has been removed! Resource already archived?\n";
  358: 	$warncnt++;
  359:     } else {
  360: 	print "ERROR: unable to remove existing .archived file!\n";
  361: 	$errcnt++;
  362:     }
  363: }
  364: 
  365: # remove junk files
  366: if ($delete_junk_files) {
  367:     delete_all_files(\%MPIWGStor::junk_files, $docdir);
  368: }
  369: 
  370: # write new index.meta
  371: if ($errcnt > 0) {
  372:     print "ABORT: there were errors!\n";
  373:     exit 1;
  374: } else {
  375:     if ($fix_xml) {
  376: 	MPIWGStor::write_xml($document, $metafile);
  377:     }
  378: }
  379: 
  380: # start archiving
  381: my $archived_files = run_archive();
  382: my $num_archfiles = scalar keys %$archived_files;
  383: 
  384: print "INFO: $num_archfiles files archived\n";
  385: 
  386: # check list of archived files
  387: check_files($files_to_archive, $archived_files);
  388: 
  389: # delete files if all went OK
  390: if ($errcnt == 0) {
  391:     system("touch", "$docdir/.archived");
  392:     # remove junk files (again)
  393:     if ($delete_junk_files) {
  394: 	delete_all_files(\%MPIWGStor::junk_files, $docdir);
  395:     }
  396:     # remove archived files
  397:     if ($delete_data_files) {
  398: 	delete_files($archived_files);
  399:     }
  400: }
  401: 
  402: print "INFO: $warncnt warnings\n";
  403: print "INFO: $errcnt errors\n";
  404: if ($errcnt > 0) {
  405:     print "ABORT: there were errors! ($num_archfiles files archived) at ", stime(time), "\n";
  406:     exit 1;
  407: } else {
  408:     print "DONE: $num_archfiles files archived at ", stime(time), "\n";
  409: }

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>