File:  [Repository] / foxridge-archiver / unarchiver.pl
Revision 1.1.1.1 (vendor branch): download - view: text, annotated - select for diffs - revision graph
Thu Jun 17 15:58:42 2004 UTC (20 years ago) by casties
Branches: vendor
CVS tags: release
first import

    1: #!/usr/local/bin/perl -w
    2: 
    3: use strict;
    4: 
    5: use XML::LibXML;
    6: 
    7: # MPIWG libraries
    8: use lib '/usr/local/mpiwg/archive';
    9: use MPIWGStor;
   10: 
   11: # make output unbuffered
   12: $|=1;
   13: 
   14: 
   15: #######################################################
   16: # internal parameters
   17: #
   18: 
   19: # program version
   20: my $version = "0.1 (24.9.2003)";
   21: 
   22: # read command line parameters
   23: my $args = parseargs;
   24: 
   25: # debug level
   26: $debug = (exists $$args{'debug'}) ? ($$args{'debug'}) : 0;
   27: 
   28: # rewrite XML file (necessary for archive date!)
   29: my $fix_xml = 1;
   30: my $xml_changed = 0;
   31: # XML namespace (not really implemented!)
   32: my $namespace = "";
   33: 
   34: # archive name (archive-path element, usually == $docdir)
   35: my $archname;
   36: # archive storage date
   37: my $archdate;
   38: 
   39: #######################################################
   40: # external programs
   41: #
   42: my $archprog = "/opt/tivoli/tsm/client/ba/bin/dsmc";
   43: if (! -x $archprog) {
   44:     logger("ABORT", "TSM client program '$archprog' missing!!");
   45:     exit 1;
   46: }
   47: my $checkprog = "/usr/local/mpiwg/archive/archivecheck";
   48: if (! -x $checkprog) {
   49:     logger("ABORT", "archive checking program '$checkprog' missing!!");
   50:     exit 1;
   51: }
   52: # log file for archiver
   53: my $log_file = "/var/tmp/unarchiver.log";
   54: if (! open LOG, ">>$log_file") {
   55:     logger("ABORT", "unable to write log file '$log_file'!!");
   56:     exit 1;
   57: }
   58: 
   59: #######################################################
   60: # check parameters that were passed to the program
   61: #
   62: my $docdir = $$args{'path'};
   63: if (! $docdir) {
   64:     print "ABORT: no document directory given!\n";
   65:     exit 1;
   66: }
   67: # strip trailing slashes
   68: $docdir =~ s/\/$//;
   69: if (! -d $docdir) {
   70:     print "ABORT: document directory \'$docdir\' doesn't exist!\n";
   71:     exit 1;
   72: }
   73: 
   74: my $metafile = "$docdir/index.meta";
   75: if (! -f $metafile) {
   76:     print "ABORT: metadata index file \'$metafile\' doesn't exist!\n";
   77:     exit 1;
   78: }
   79: 
   80: 
   81: #######################################################
   82: # internal variables
   83: #
   84: 
   85: # number of errors
   86: my $errcnt = 0;
   87: # number of warnings
   88: my $warncnt = 0;
   89: 
   90: #######################################################
   91: # subroutines
   92: #
   93: 
   94: 
   95: #
   96: # $files = read_resource_meta($rootnode)
   97: #
   98: # checks general resource meta information and reads the list of files
   99: #
  100: sub read_resource_meta {
  101:     my ($rootnode) = @_;
  102:     my %files;
  103:     #
  104:     # archive path
  105:     #
  106:     # get archive-path
  107:     $archname = sstrip($rootnode->findvalue('child::archive-path'));
  108:     if (! $archname) {
  109: 	logger("ABORT", "archive-name element missing!!");
  110: 	exit 1;
  111:     }
  112: 
  113:     #
  114:     # files
  115:     #
  116:     my @filenodes = $rootnode->findnodes('child::file');
  117:     foreach my $fn (@filenodes) {
  118: 	my $name = sstrip($fn->findvalue('child::name'));
  119: 	my $path = sstrip($fn->findvalue('child::path'));
  120: 	logger("DEBUG", "FILE: ($path)$name");
  121: 	my $f = ($path) ? "$path/$name" : "$name";
  122: 	$files{$f} = [$name];
  123:     }
  124: 
  125:     #
  126:     # dirs
  127:     #
  128:     my @dirnodes = $rootnode->findnodes('child::dir');
  129:     foreach my $fn (@dirnodes) {
  130: 	my $name = sstrip($fn->findvalue('child::name'));
  131: 	my $path = sstrip($fn->findvalue('child::path'));
  132: 	logger("DEBUG", "DIR: ($path)$name");
  133: 	my $f = ($path) ? "$path/$name" : "$name";
  134: 	$files{$f} = [$name];
  135:     }
  136: 
  137:     #
  138:     # archive-storage-date
  139:     #
  140:     my $archdate = $rootnode->find('child::archive-storage-date');
  141:     if ($archdate) {
  142: 	logger("INFO", "archive storage date: $archdate");
  143:     } else {
  144: 	logger("ERROR", "archive storage date missing!");
  145: 	$errcnt++;
  146:     }
  147: 
  148:     #
  149:     # archive-recall-date
  150:     #
  151:     my $recalldatenode = ($rootnode->find('child::archive-recall-date'))->get_node(1);
  152:     if ($recalldatenode) {
  153: 	print "INFO: archive recall date exists!\n";
  154: 	# delete old date
  155: 	$recalldatenode->removeChildNodes;
  156:     } else {
  157: 	# create new storage date node
  158: 	$recalldatenode = $rootnode->addNewChild($namespace, "archive-recall-date");
  159: 	# move after archive-path
  160: 	$rootnode->insertAfter($recalldatenode, ($rootnode->find('child::archive-storage-date'))->get_node(1));
  161:     }
  162:     $recalldatenode->appendTextNode(scalar localtime);
  163:     $xml_changed++;
  164: 
  165:     return \%files;
  166: }
  167: 
  168: 
  169: #
  170: # $%files = run_retrieve
  171: #
  172: # runs the retriever program on $docdir and returns a list of archived files
  173: #
  174: # Sample output:
  175: # Retrieving          17,234 /mpiwg/archive/data/test/auto_titit_123/pageimg/essen-wind1.jpg [Done]
  176: #
  177: sub run_retrieve {
  178:     my %files;
  179:     print LOG "START unarchive $version ", scalar localtime, "\n";
  180:     my $archcmd = $archprog;
  181:     $archcmd .= " retrieve -subdir=yes -replace=all";
  182:     $archcmd .= " -description='$archname'";
  183:     $archcmd .= " '$docdir/'";
  184: 
  185:     my $archcnt = 0;
  186:     print LOG "CMD: $archcmd\n";
  187:     if (open ARCH, "$archcmd 2>&1 |") {
  188: 	while (<ARCH>) {
  189: 	    chomp;
  190: 	    print LOG "ARCH: $_\n";
  191: 	    if (/
  192: 		Retrieving
  193: 		\s+([\d,]+)    # size
  194: 		\s+(\S+)       # file name
  195: 		\s+\[Done\]
  196: 		/x) {
  197: 		my $size = $1;
  198: 		my $file = $2;
  199: 		$size =~ s/,//g;
  200: 		logger("DEBUG", "  RETRIEVE: file '$file'");
  201: 		$archcnt++;
  202: 		if ($files{$file}) {
  203: 		    logger("WARNING", "file $file seems to be archived multiple times.");
  204: 		    $warncnt++;
  205: 		} 
  206: 		$files{$file} = [$size];
  207: 	    }
  208: 	}
  209: 	logger("INFO", "$archcnt archives of " . (scalar keys %files) . " files.");
  210:     } else {
  211: 	logger("ABORT", "unable to start archive command '$archcmd'!!");
  212: 	exit 1;
  213:     }
  214:     return \%files;
  215: }
  216: 
  217: 
  218: #
  219: # check_files(\%files_to_retrieve, \%retrieved_files)
  220: #
  221: # compares the list of archived and retrieved files
  222: #
  223: sub check_files {
  224:     my ($to_retrieve, $retrieved) = @_;
  225: 
  226:     my $nt = scalar keys %$to_retrieve;
  227:     my $na = scalar keys %$retrieved;
  228: 
  229:     foreach my $ft (sort keys %$to_retrieve) {
  230: 	my $fp = "$docdir/$ft";
  231: 	#logger("DEBUG", "  fp: $fp");
  232: 	if ($$retrieved{$fp}) {
  233: 	    logger("DEBUG", "$ft retrieved OK");
  234: 	    $$retrieved{$fp}->[1] = "OK";
  235: 	} else {
  236: 	    logger("ERROR", "file entry '$ft' missing from archive!");
  237: 	    $errcnt++;
  238: 	}
  239:     }
  240: 
  241:     foreach my $fa (sort keys %$retrieved) {
  242: 	if (! $$retrieved{$fa}->[1]) {
  243: 	    my ($fn, $fp) = split_file_path($fa);
  244: 	    if ($index_files{$fn}) {
  245: 		logger("DEBUG", "$fa ignored");
  246: 		$na--;
  247: 	    } else {
  248: 		logger("WARNING", "$fa retrieved but not in list!");
  249: 		$warncnt++;
  250: 	    }
  251: 	}
  252:     }
  253: 
  254:     if ($nt > $na) {
  255: 	logger("WARNING", "less files were retrieved ($na vs. $nt)!");
  256: 	$warncnt++;
  257:     } elsif ($na > $nt) {
  258: 	logger("WARNING", "more files were retrieved ($na vs. $nt)!");
  259: 	$warncnt++;
  260:     }
  261: 
  262: }
  263: 
  264: 
  265: 
  266: #######################################################
  267: # main
  268: #
  269: 
  270: logger("INFO", "unarchiver $version");
  271: 
  272: # make shure the right user is running this program
  273: my $user = getlogin;
  274: if (($user ne "archive")&&($user ne "root")) {
  275:     logger("ABORT", "you must be archive or root user to run this program!");
  276:     exit 1;
  277: }
  278: 
  279: # use checkarchive first
  280: if (system("$checkprog $docdir >/dev/null") == 0) {
  281:     logger("INFO", "archive '$docdir' check OK");
  282: } else {
  283:     logger("ABORT", "archive '$docdir' check failed!!");
  284:     exit 1;
  285: }
  286: 
  287: # read index.meta file
  288: my ($document, $rootnode) = read_xml($metafile);
  289: 
  290: # check index file
  291: my $archived_files = read_resource_meta($rootnode);
  292: my $num_archived_files = scalar keys %$archived_files;
  293: 
  294: # check for .archived file
  295: if (-f "$docdir/.archived") {
  296:     logger("INFO", ".archived file exists.");
  297: } else {
  298:     logger("WARNING", "no .archived file!");
  299:     $warncnt++;
  300: }
  301: 
  302: logger("INFO", "$num_archived_files files to retrieve.");
  303: 
  304: # retrieve
  305: my $retrieved_files = run_retrieve;
  306: 
  307: my $num_arch_files = (scalar keys %$retrieved_files);
  308: if ($num_arch_files == 0) {
  309:     logger("ABORT", "no files retrieved!!");
  310:     exit 1;
  311: }
  312: logger("INFO", "$num_arch_files files retrieved");
  313: 
  314: # check list of archived files
  315: check_files($archived_files, $retrieved_files);
  316: 
  317: # rewrite index.meta file
  318: write_xml($document, $metafile);
  319: 
  320: logger("INFO", "$warncnt warnings");
  321: logger("INFO", "$errcnt errors");
  322: if ($errcnt == 0) {
  323:     logger("DONE", "" . (scalar keys %$retrieved_files) . " archived files retrieved");
  324: } else {
  325:     logger("ABORT", "there were $errcnt errors!!");
  326:     exit 1;
  327: }

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>