File:  [Repository] / foxridge-archiver / archiver.pl
Revision 1.1.1.1 (vendor branch): download - view: text, annotated - select for diffs - revision graph
Thu Jun 17 15:58:42 2004 UTC (20 years ago) by casties
Branches: vendor
CVS tags: release
first import

    1: #!/usr/local/bin/perl -w
    2: 
    3: use strict;
    4: 
    5: use XML::LibXML;
    6: 
    7: # MPIWG libraries
    8: use lib '/usr/local/mpiwg/archive';
    9: use MPIWGStor;
   10: 
   11: # make output unbuffered
   12: $|=1;
   13: 
   14: #######################################################
   15: # internal parameters
   16: #
   17: 
   18: # program version
   19: my $version = "0.5 (15.1.2004)";
   20: 
   21: # rewrite XML file (necessary for archive date!)
   22: my $fix_xml = 1;
   23: my $xml_changed = 0;
   24: # XML namespace (not really implemented!)
   25: my $namespace = "";
   26: 
   27: # archive name (archive-path element, usually == $docdir)
   28: my $archname;
   29: # archive storage date (now)
   30: my $archdate = stime(time);
   31: 
   32: # delete "junk" files before archiving
   33: my $delete_junk_files = 1;
   34: 
   35: # delete data files after archiving
   36: my $delete_data_files = 1;
   37: 
   38: 
   39: #######################################################
   40: # external programs
   41: #
   42: my $archprog = "/opt/tivoli/tsm/client/ba/bin/dsmc";
   43: if (! -x $archprog) {
   44:     print "ABORT: TSM client program '$archprog' missing!!\n";
   45:     exit 1;
   46: }
   47: my $checkprog = "/usr/local/mpiwg/archive/metacheck";
   48: if (! -x $checkprog) {
   49:     print "ABORT: meta data checking program '$checkprog' missing!!\n";
   50:     exit 1;
   51: }
   52: # log file for archiver
   53: my $log_file = "/var/log/mpiwg-archiver.log";
   54: if (! open LOG, ">>$log_file") {
   55:     print "ABORT: unable to write log file '$log_file'!!\n";
   56:     exit 1;
   57: }
   58: 
   59: #######################################################
   60: # check parameters that were passed to the program
   61: #
   62: if ($#ARGV < 0) {
   63:     print "ABORT: no document directory given!\n";
   64:     exit 1;
   65: }
   66: my $docdir = $ARGV[0];
   67: # strip trailing slashes
   68: $docdir =~ s/\/$//;
   69: if (! -d $docdir) {
   70:     print "ABORT: document directory \'$docdir\' doesn't exist!\n";
   71:     exit 1;
   72: }
   73: if (($#ARGV > 0)&&($ARGV[1] eq "-premigrate")) {
   74:     $delete_data_files = 0;
   75: }
   76: 
   77: my $metafile = "$docdir/index.meta";
   78: if (! -f $metafile) {
   79:     print "ABORT: metadata index file \'$metafile\' doesn't exist!\n";
   80:     exit 1;
   81: }
   82: 
   83: #######################################################
   84: # internal variables
   85: #
   86: 
   87: # number of errors
   88: my $errcnt = 0;
   89: # number of warnings
   90: my $warncnt = 0;
   91: 
   92: #######################################################
   93: # subroutines
   94: #
   95: 
   96: #
   97: # $files = read_resource_meta($rootnode)
   98: #
   99: # checks general resource meta information and reads the list of files
  100: #
  101: sub read_resource_meta {
  102:     my ($rootnode) = @_;
  103:     my %files;
  104:     #
  105:     # archive path
  106:     #
  107:     # get archive-path
  108:     $archname = MPIWGStor::sstrip($rootnode->findvalue('child::archive-path'));
  109:     if (! $archname) {
  110: 	print "ABORT: archive-name element missing!!\n";
  111: 	exit 1;
  112:     }
  113: 
  114:     #
  115:     # files
  116:     #
  117:     my @filenodes = $rootnode->findnodes('child::file');
  118:     foreach my $fn (@filenodes) {
  119: 	my $name = MPIWGStor::sstrip($fn->findvalue('child::name'));
  120: 	my $path = MPIWGStor::sstrip($fn->findvalue('child::path'));
  121: 	print "FILE: ($path)$name\n";
  122: 	my $f = ($path) ? "$path/$name" : "$name";
  123: 	$files{$f} = $name;
  124:     }
  125: 
  126:     #
  127:     # archive-storage-date
  128:     #
  129:     my $stordatenode = ($rootnode->find('child::archive-storage-date'))->get_node(1);
  130:     if ($stordatenode) {
  131: 	print "WARNING: archive storage date exists! Resource already archived?\n";
  132: 	$warncnt++;
  133: 	# delete old date
  134: 	$stordatenode->removeChildNodes;
  135:     } else {
  136: 	# create new storage date node
  137: 	$stordatenode = $rootnode->addNewChild($namespace, "archive-storage-date");
  138: 	# move after archive-path
  139: 	$rootnode->insertAfter($stordatenode, ($rootnode->find('child::archive-path'))->get_node(1));
  140:     }
  141:     $stordatenode->appendTextNode($archdate);
  142:     $xml_changed++;
  143:     return \%files;
  144: }
  145: 
  146: 
  147: #
  148: # $%files = run_archive
  149: #
  150: # runs the archiver program on $docdir and returns a list of archived files
  151: #
  152: sub run_archive {
  153:     my %files;
  154:     print LOG "START archiver $version $archdate\n";
  155:     my $archcmd = $archprog;
  156:     $archcmd .= " archive -archsymlinkasfile=no -subdir=yes";
  157:     $archcmd .= " -description='$archname'";
  158:     $archcmd .= " '$docdir/'";
  159: 
  160:     print LOG "CMD: $archcmd\n";
  161:     if (open ARCH, "$archcmd 2>&1 |") {
  162: 	while (<ARCH>) {
  163: 	    chomp;
  164: 	    print LOG "ARCH: $_\n";
  165: 	    if (/Normal File-->\s+[\d,]+\s+(.*)\s+\[Sent\]/) {
  166: 		print "  ARCH: file '$1'\n";
  167: 		$files{$1} = "ok";
  168: 	    }
  169: 	    if (/^Archive processing of .* finished without failure./) {
  170: 		print "  ARCH: OK\n";
  171: 	    }
  172: 	}
  173:     } else {
  174: 	print "ABORT: unable to start archive command '$archcmd'!!\n";
  175: 	exit 1;
  176:     }
  177: 
  178:     return \%files;
  179: }
  180: 
  181: 
  182: #
  183: # check_files(\%files_to_archive, \%archived_files)
  184: #
  185: # compares the list of archived and to be archived files
  186: #
  187: sub check_files {
  188:     my ($to_archive, $archived) = @_;
  189: 
  190:     my $nt = scalar keys %$to_archive;
  191:     my $na = scalar keys %$archived;
  192: 
  193:     foreach my $ft (sort keys %$to_archive) {
  194: 	my $fp = "$docdir/$ft";
  195: 	#print "  fp: $fp\n";
  196: 	if ($$archived{$fp}) {
  197: 	    print "DEBUG: $ft archived OK\n";
  198: 	    $$archived{$fp} = "OK";
  199: 	} else {
  200: 	    print "ERROR: file '$ft' missing from archive!\n";
  201: 	    $errcnt++;
  202: 	}
  203:     }
  204: 
  205:     foreach my $fa (sort keys %$archived) {
  206: 	if ($$archived{$fa} ne "OK") {
  207: 	    my ($fn, $fp) = MPIWGStor::split_file_path($fa);
  208: 	    if ($MPIWGStor::index_files{$fn}) {
  209: 		print "DEBUG: $fa ignored\n";
  210: 		$na--;
  211: 	    } else {
  212: 		print "WARNING: $fa archived but not in list!\n";
  213: 		$warncnt++;
  214: 	    }
  215: 	}
  216:     }
  217: 
  218:     if ($nt > $na) {
  219: 	print "WARNING: less files were archived ($na vs. $nt)!\n";
  220:     } elsif ($na > $nt) {
  221: 	print "WARNING: more files were archived ($na vs. $nt)!\n";
  222:     }
  223: 
  224: }
  225: 
  226: 
  227: #
  228: # delete_files(\%files)
  229: #
  230: # deletes the files from the list (of absolute files) and their directories
  231: # if they are empty
  232: #
  233: sub delete_files {
  234:     my ($files) = @_;
  235:     my %dirs;
  236: 
  237:     foreach my $f (sort keys %$files) {
  238: 	my ($fn, $fp) = MPIWGStor::split_file_path($f);
  239: 	# collect all unique directories
  240:         if ($fp && (! $dirs{$fp})) {
  241: 	    $dirs{$fp} = $fp;
  242: 	}
  243: 	# don't delete index files
  244: 	next if ($MPIWGStor::index_files{$fn});
  245: 	# no file no delete
  246: 	next unless (-f $f);
  247: 	# delete files
  248: 	if (unlink $f) {
  249: 	    print "INFO: remove $f ($fn)\n";
  250: 	} else {
  251: 	    print "ERROR: unable to delete $f!\n";
  252: 	    $errcnt++;
  253: 	}
  254:     }
  255:     # try to delete all empty directories
  256:     my @dirkeys = sort keys %dirs;
  257:     # starting at the end to get to the subdirectories first
  258:     for (my $i = $#dirkeys; $i >= 0; $i--) {
  259: 	my $d = $dirkeys[$i];
  260: 	# dont't remove document dir (shouldn't be empty anyway)
  261: 	next if ($d eq $docdir);
  262: 	if (-d $d) {
  263: 	    print "INFO: remove dir $d\n";
  264: 	    rmdir $d;
  265: 	}
  266:     }
  267: }
  268: 
  269: 
  270: #
  271: # delete_all_files(\%files, $dir)
  272: #
  273: # deletes all files with names from the list %files
  274: # in the directory $dir and its subdirectories 
  275: #
  276: sub delete_all_files {
  277:     my ($files, $dir) = @_;
  278: 
  279:     if (! opendir DIR, $dir) {
  280: 	print "ERROR: unable to read directory $dir!\n";
  281: 	$errcnt++;
  282: 	return;
  283:     }
  284:     my @fl = readdir DIR;
  285:     closedir DIR;
  286: 
  287:     foreach my $f (@fl) {
  288: 	next if ($f =~ /^\.{1,2}$/);
  289: 	if ($$files{$f}) {
  290: 	    # $f is in the file list
  291: 	    if (-f "$dir/$f") {
  292: 		# $f is a file
  293: 		if (unlink "$dir/$f") {
  294: 		    print "INFO: removed $f\n";
  295: 		} else {
  296: 		    print "ERROR: unable to delete $f!\n";
  297: 		    $errcnt++;
  298: 		}
  299: 	    } elsif (-d _) {
  300: 		# $f is a directory (unlink won't work)
  301: 		if ((system 'rm', '-r', "$dir/$f") == 0) {
  302: 		    print "INFO: removed directory $f\n";
  303: 		} else {
  304: 		    print "ERROR: unable to delete directory $f!\n";
  305: 		    $errcnt++;
  306: 		}
  307: 	    } else {
  308: 		print "ERROR: funny object $dir/$f!\n";
  309: 		$errcnt++;
  310: 	    }
  311: 	} else {
  312: 	    # $f is not in the list
  313: 	    if (-d "$dir/$f") {
  314: 		# recurse into directories
  315: 		print "DEBUG: enter $dir/$f\n";
  316: 		delete_all_files($files, "$dir/$f");
  317: 	    }
  318: 	}
  319:     }
  320: }
  321: 
  322: 
  323: #######################################################
  324: # main
  325: #
  326: 
  327: print "START: archiver $version at $archdate\n";
  328: 
  329: # make shure the right user is running this program
  330: my $user = getlogin;
  331: #if (($user ne "archive")||($user ne "root")) {
  332: #    logger("ABORT", "you ($user) must be archive or root user to run this program!");
  333: #    exit 1;
  334: #}
  335: 
  336: # use metacheck first
  337: if (system("$checkprog $docdir >/dev/null") == 0) {
  338:     print "INFO: resource '$docdir' check OK\n";
  339: } else {
  340:     print "ABORT: resource '$docdir' check failed!!\n";
  341:     exit 1;
  342: }
  343: 
  344: # read index.meta file
  345: my ($document, $rootnode) = MPIWGStor::read_xml($metafile);
  346: 
  347: # check file and add archive date
  348: my $files_to_archive = read_resource_meta($rootnode);
  349: 
  350: print "INFO: ", scalar keys %$files_to_archive, " files to archive\n";
  351: 
  352: # check for .archived file
  353: if (-f "$docdir/.archived") {
  354:     if (unlink "$docdir/.archived") {
  355: 	print "WARNING: existing .archived file has been removed! Resource already archived?\n";
  356: 	$warncnt++;
  357:     } else {
  358: 	print "ERROR: unable to remove existing .archived file!\n";
  359: 	$errcnt++;
  360:     }
  361: }
  362: 
  363: # remove junk files
  364: if ($delete_junk_files) {
  365:     delete_all_files(\%MPIWGStor::junk_files, $docdir);
  366: }
  367: 
  368: # write new index.meta
  369: if ($errcnt > 0) {
  370:     print "ABORT: there were errors!\n";
  371:     exit 1;
  372: } else {
  373:     if ($fix_xml) {
  374: 	MPIWGStor::write_xml($document, $metafile);
  375:     }
  376: }
  377: 
  378: # start archiving
  379: my $archived_files = run_archive;
  380: 
  381: print "INFO: ", scalar keys %$archived_files, " files archived\n";
  382: 
  383: # check list of archived files
  384: check_files($files_to_archive, $archived_files);
  385: 
  386: # delete files if all went OK
  387: if ($errcnt == 0) {
  388:     system("touch", "$docdir/.archived");
  389:     # remove junk files (again)
  390:     if ($delete_junk_files) {
  391: 	delete_all_files(\%MPIWGStor::junk_files, $docdir);
  392:     }
  393:     # remove archived files
  394:     if ($delete_data_files) {
  395: 	delete_files($archived_files);
  396:     }
  397: }
  398: 
  399: print "INFO: $warncnt warnings\n";
  400: print "INFO: $errcnt errors\n";
  401: my $num_archfiles = %$archived_files + 1;
  402: if ($errcnt > 0) {
  403:     print "ABORT: there were errors! ($num_archfiles files archived) at ", stime(time), "\n";
  404:     exit 1;
  405: } else {
  406:     print "DONE: $num_archfiles files archived at ", stime(time), "\n";
  407: }

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>