#!/usr/local/bin/perl -w use strict; use XML::LibXML; # MPIWG libraries use lib '/usr/local/mpiwg/archive'; use MPIWGStor; # make output unbuffered $|=1; ####################################################### # internal parameters # # program version my $version = "0.1 (24.9.2003)"; # read command line parameters my $args = parseargs; # debug level $debug = (exists $$args{'debug'}) ? ($$args{'debug'}) : 0; # rewrite XML file (necessary for archive date!) my $fix_xml = 1; my $xml_changed = 0; # XML namespace (not really implemented!) my $namespace = ""; # archive name (archive-path element, usually == $docdir) my $archname; # archive storage date my $archdate; ####################################################### # external programs # my $archprog = "/opt/tivoli/tsm/client/ba/bin/dsmc"; if (! -x $archprog) { logger("ABORT", "TSM client program '$archprog' missing!!"); exit 1; } my $checkprog = "/usr/local/mpiwg/archive/archivecheck"; if (! -x $checkprog) { logger("ABORT", "archive checking program '$checkprog' missing!!"); exit 1; } # log file for archiver my $log_file = "/var/tmp/unarchiver.log"; if (! open LOG, ">>$log_file") { logger("ABORT", "unable to write log file '$log_file'!!"); exit 1; } ####################################################### # check parameters that were passed to the program # my $docdir = $$args{'path'}; if (! $docdir) { print "ABORT: no document directory given!\n"; exit 1; } # strip trailing slashes $docdir =~ s/\/$//; if (! -d $docdir) { print "ABORT: document directory \'$docdir\' doesn't exist!\n"; exit 1; } my $metafile = "$docdir/index.meta"; if (! -f $metafile) { print "ABORT: metadata index file \'$metafile\' doesn't exist!\n"; exit 1; } ####################################################### # internal variables # # number of errors my $errcnt = 0; # number of warnings my $warncnt = 0; ####################################################### # subroutines # # # $files = read_resource_meta($rootnode) # # checks general resource meta information and reads the list of files # sub read_resource_meta { my ($rootnode) = @_; my %files; # # archive path # # get archive-path $archname = sstrip($rootnode->findvalue('child::archive-path')); if (! $archname) { logger("ABORT", "archive-name element missing!!"); exit 1; } # # files # my @filenodes = $rootnode->findnodes('child::file'); foreach my $fn (@filenodes) { my $name = sstrip($fn->findvalue('child::name')); my $path = sstrip($fn->findvalue('child::path')); logger("DEBUG", "FILE: ($path)$name"); my $f = ($path) ? "$path/$name" : "$name"; $files{$f} = [$name]; } # # dirs # my @dirnodes = $rootnode->findnodes('child::dir'); foreach my $fn (@dirnodes) { my $name = sstrip($fn->findvalue('child::name')); my $path = sstrip($fn->findvalue('child::path')); logger("DEBUG", "DIR: ($path)$name"); my $f = ($path) ? "$path/$name" : "$name"; $files{$f} = [$name]; } # # archive-storage-date # my $archdate = $rootnode->find('child::archive-storage-date'); if ($archdate) { logger("INFO", "archive storage date: $archdate"); } else { logger("ERROR", "archive storage date missing!"); $errcnt++; } # # archive-recall-date # my $recalldatenode = ($rootnode->find('child::archive-recall-date'))->get_node(1); if ($recalldatenode) { print "INFO: archive recall date exists!\n"; # delete old date $recalldatenode->removeChildNodes; } else { # create new storage date node $recalldatenode = $rootnode->addNewChild($namespace, "archive-recall-date"); # move after archive-path $rootnode->insertAfter($recalldatenode, ($rootnode->find('child::archive-storage-date'))->get_node(1)); } $recalldatenode->appendTextNode(scalar localtime); $xml_changed++; return \%files; } # # $%files = run_retrieve # # runs the retriever program on $docdir and returns a list of archived files # # Sample output: # Retrieving 17,234 /mpiwg/archive/data/test/auto_titit_123/pageimg/essen-wind1.jpg [Done] # sub run_retrieve { my %files; print LOG "START unarchive $version ", scalar localtime, "\n"; my $archcmd = $archprog; $archcmd .= " retrieve -subdir=yes -replace=all"; $archcmd .= " -description='$archname'"; $archcmd .= " '$docdir/'"; my $archcnt = 0; print LOG "CMD: $archcmd\n"; if (open ARCH, "$archcmd 2>&1 |") { while () { chomp; print LOG "ARCH: $_\n"; if (/ Retrieving \s+([\d,]+) # size \s+(\S+) # file name \s+\[Done\] /x) { my $size = $1; my $file = $2; $size =~ s/,//g; logger("DEBUG", " RETRIEVE: file '$file'"); $archcnt++; if ($files{$file}) { logger("WARNING", "file $file seems to be archived multiple times."); $warncnt++; } $files{$file} = [$size]; } } logger("INFO", "$archcnt archives of " . (scalar keys %files) . " files."); } else { logger("ABORT", "unable to start archive command '$archcmd'!!"); exit 1; } return \%files; } # # check_files(\%files_to_retrieve, \%retrieved_files) # # compares the list of archived and retrieved files # sub check_files { my ($to_retrieve, $retrieved) = @_; my $nt = scalar keys %$to_retrieve; my $na = scalar keys %$retrieved; foreach my $ft (sort keys %$to_retrieve) { my $fp = "$docdir/$ft"; #logger("DEBUG", " fp: $fp"); if ($$retrieved{$fp}) { logger("DEBUG", "$ft retrieved OK"); $$retrieved{$fp}->[1] = "OK"; } else { logger("ERROR", "file entry '$ft' missing from archive!"); $errcnt++; } } foreach my $fa (sort keys %$retrieved) { if (! $$retrieved{$fa}->[1]) { my ($fn, $fp) = split_file_path($fa); if ($index_files{$fn}) { logger("DEBUG", "$fa ignored"); $na--; } else { logger("WARNING", "$fa retrieved but not in list!"); $warncnt++; } } } if ($nt > $na) { logger("WARNING", "less files were retrieved ($na vs. $nt)!"); $warncnt++; } elsif ($na > $nt) { logger("WARNING", "more files were retrieved ($na vs. $nt)!"); $warncnt++; } } ####################################################### # main # logger("INFO", "unarchiver $version"); # make shure the right user is running this program my $user = getlogin; if (($user ne "archive")&&($user ne "root")) { logger("ABORT", "you must be archive or root user to run this program!"); exit 1; } # use checkarchive first if (system("$checkprog $docdir >/dev/null") == 0) { logger("INFO", "archive '$docdir' check OK"); } else { logger("ABORT", "archive '$docdir' check failed!!"); exit 1; } # read index.meta file my ($document, $rootnode) = read_xml($metafile); # check index file my $archived_files = read_resource_meta($rootnode); my $num_archived_files = scalar keys %$archived_files; # check for .archived file if (-f "$docdir/.archived") { logger("INFO", ".archived file exists."); } else { logger("WARNING", "no .archived file!"); $warncnt++; } logger("INFO", "$num_archived_files files to retrieve."); # retrieve my $retrieved_files = run_retrieve; my $num_arch_files = (scalar keys %$retrieved_files); if ($num_arch_files == 0) { logger("ABORT", "no files retrieved!!"); exit 1; } logger("INFO", "$num_arch_files files retrieved"); # check list of archived files check_files($archived_files, $retrieved_files); # rewrite index.meta file write_xml($document, $metafile); logger("INFO", "$warncnt warnings"); logger("INFO", "$errcnt errors"); if ($errcnt == 0) { logger("DONE", "" . (scalar keys %$retrieved_files) . " archived files retrieved"); } else { logger("ABORT", "there were $errcnt errors!!"); exit 1; }