Mercurial > hg > foxridge-archiver
diff unarchiver.pl @ 0:30497c6a3eca
Initial revision
author | casties |
---|---|
date | Thu, 17 Jun 2004 17:58:42 +0200 |
parents | |
children | c4e6fc065b6d |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/unarchiver.pl Thu Jun 17 17:58:42 2004 +0200 @@ -0,0 +1,327 @@ +#!/usr/local/bin/perl -w + +use strict; + +use XML::LibXML; + +# MPIWG libraries +use lib '/usr/local/mpiwg/archive'; +use MPIWGStor; + +# make output unbuffered +$|=1; + + +####################################################### +# internal parameters +# + +# program version +my $version = "0.1 (24.9.2003)"; + +# read command line parameters +my $args = parseargs; + +# debug level +$debug = (exists $$args{'debug'}) ? ($$args{'debug'}) : 0; + +# rewrite XML file (necessary for archive date!) +my $fix_xml = 1; +my $xml_changed = 0; +# XML namespace (not really implemented!) +my $namespace = ""; + +# archive name (archive-path element, usually == $docdir) +my $archname; +# archive storage date +my $archdate; + +####################################################### +# external programs +# +my $archprog = "/opt/tivoli/tsm/client/ba/bin/dsmc"; +if (! -x $archprog) { + logger("ABORT", "TSM client program '$archprog' missing!!"); + exit 1; +} +my $checkprog = "/usr/local/mpiwg/archive/archivecheck"; +if (! -x $checkprog) { + logger("ABORT", "archive checking program '$checkprog' missing!!"); + exit 1; +} +# log file for archiver +my $log_file = "/var/tmp/unarchiver.log"; +if (! open LOG, ">>$log_file") { + logger("ABORT", "unable to write log file '$log_file'!!"); + exit 1; +} + +####################################################### +# check parameters that were passed to the program +# +my $docdir = $$args{'path'}; +if (! $docdir) { + print "ABORT: no document directory given!\n"; + exit 1; +} +# strip trailing slashes +$docdir =~ s/\/$//; +if (! -d $docdir) { + print "ABORT: document directory \'$docdir\' doesn't exist!\n"; + exit 1; +} + +my $metafile = "$docdir/index.meta"; +if (! -f $metafile) { + print "ABORT: metadata index file \'$metafile\' doesn't exist!\n"; + exit 1; +} + + +####################################################### +# internal variables +# + +# number of errors +my $errcnt = 0; +# number of warnings +my $warncnt = 0; + +####################################################### +# subroutines +# + + +# +# $files = read_resource_meta($rootnode) +# +# checks general resource meta information and reads the list of files +# +sub read_resource_meta { + my ($rootnode) = @_; + my %files; + # + # archive path + # + # get archive-path + $archname = sstrip($rootnode->findvalue('child::archive-path')); + if (! $archname) { + logger("ABORT", "archive-name element missing!!"); + exit 1; + } + + # + # files + # + my @filenodes = $rootnode->findnodes('child::file'); + foreach my $fn (@filenodes) { + my $name = sstrip($fn->findvalue('child::name')); + my $path = sstrip($fn->findvalue('child::path')); + logger("DEBUG", "FILE: ($path)$name"); + my $f = ($path) ? "$path/$name" : "$name"; + $files{$f} = [$name]; + } + + # + # dirs + # + my @dirnodes = $rootnode->findnodes('child::dir'); + foreach my $fn (@dirnodes) { + my $name = sstrip($fn->findvalue('child::name')); + my $path = sstrip($fn->findvalue('child::path')); + logger("DEBUG", "DIR: ($path)$name"); + my $f = ($path) ? "$path/$name" : "$name"; + $files{$f} = [$name]; + } + + # + # archive-storage-date + # + my $archdate = $rootnode->find('child::archive-storage-date'); + if ($archdate) { + logger("INFO", "archive storage date: $archdate"); + } else { + logger("ERROR", "archive storage date missing!"); + $errcnt++; + } + + # + # archive-recall-date + # + my $recalldatenode = ($rootnode->find('child::archive-recall-date'))->get_node(1); + if ($recalldatenode) { + print "INFO: archive recall date exists!\n"; + # delete old date + $recalldatenode->removeChildNodes; + } else { + # create new storage date node + $recalldatenode = $rootnode->addNewChild($namespace, "archive-recall-date"); + # move after archive-path + $rootnode->insertAfter($recalldatenode, ($rootnode->find('child::archive-storage-date'))->get_node(1)); + } + $recalldatenode->appendTextNode(scalar localtime); + $xml_changed++; + + return \%files; +} + + +# +# $%files = run_retrieve +# +# runs the retriever program on $docdir and returns a list of archived files +# +# Sample output: +# Retrieving 17,234 /mpiwg/archive/data/test/auto_titit_123/pageimg/essen-wind1.jpg [Done] +# +sub run_retrieve { + my %files; + print LOG "START unarchive $version ", scalar localtime, "\n"; + my $archcmd = $archprog; + $archcmd .= " retrieve -subdir=yes -replace=all"; + $archcmd .= " -description='$archname'"; + $archcmd .= " '$docdir/'"; + + my $archcnt = 0; + print LOG "CMD: $archcmd\n"; + if (open ARCH, "$archcmd 2>&1 |") { + while (<ARCH>) { + chomp; + print LOG "ARCH: $_\n"; + if (/ + Retrieving + \s+([\d,]+) # size + \s+(\S+) # file name + \s+\[Done\] + /x) { + my $size = $1; + my $file = $2; + $size =~ s/,//g; + logger("DEBUG", " RETRIEVE: file '$file'"); + $archcnt++; + if ($files{$file}) { + logger("WARNING", "file $file seems to be archived multiple times."); + $warncnt++; + } + $files{$file} = [$size]; + } + } + logger("INFO", "$archcnt archives of " . (scalar keys %files) . " files."); + } else { + logger("ABORT", "unable to start archive command '$archcmd'!!"); + exit 1; + } + return \%files; +} + + +# +# check_files(\%files_to_retrieve, \%retrieved_files) +# +# compares the list of archived and retrieved files +# +sub check_files { + my ($to_retrieve, $retrieved) = @_; + + my $nt = scalar keys %$to_retrieve; + my $na = scalar keys %$retrieved; + + foreach my $ft (sort keys %$to_retrieve) { + my $fp = "$docdir/$ft"; + #logger("DEBUG", " fp: $fp"); + if ($$retrieved{$fp}) { + logger("DEBUG", "$ft retrieved OK"); + $$retrieved{$fp}->[1] = "OK"; + } else { + logger("ERROR", "file entry '$ft' missing from archive!"); + $errcnt++; + } + } + + foreach my $fa (sort keys %$retrieved) { + if (! $$retrieved{$fa}->[1]) { + my ($fn, $fp) = split_file_path($fa); + if ($index_files{$fn}) { + logger("DEBUG", "$fa ignored"); + $na--; + } else { + logger("WARNING", "$fa retrieved but not in list!"); + $warncnt++; + } + } + } + + if ($nt > $na) { + logger("WARNING", "less files were retrieved ($na vs. $nt)!"); + $warncnt++; + } elsif ($na > $nt) { + logger("WARNING", "more files were retrieved ($na vs. $nt)!"); + $warncnt++; + } + +} + + + +####################################################### +# main +# + +logger("INFO", "unarchiver $version"); + +# make shure the right user is running this program +my $user = getlogin; +if (($user ne "archive")&&($user ne "root")) { + logger("ABORT", "you must be archive or root user to run this program!"); + exit 1; +} + +# use checkarchive first +if (system("$checkprog $docdir >/dev/null") == 0) { + logger("INFO", "archive '$docdir' check OK"); +} else { + logger("ABORT", "archive '$docdir' check failed!!"); + exit 1; +} + +# read index.meta file +my ($document, $rootnode) = read_xml($metafile); + +# check index file +my $archived_files = read_resource_meta($rootnode); +my $num_archived_files = scalar keys %$archived_files; + +# check for .archived file +if (-f "$docdir/.archived") { + logger("INFO", ".archived file exists."); +} else { + logger("WARNING", "no .archived file!"); + $warncnt++; +} + +logger("INFO", "$num_archived_files files to retrieve."); + +# retrieve +my $retrieved_files = run_retrieve; + +my $num_arch_files = (scalar keys %$retrieved_files); +if ($num_arch_files == 0) { + logger("ABORT", "no files retrieved!!"); + exit 1; +} +logger("INFO", "$num_arch_files files retrieved"); + +# check list of archived files +check_files($archived_files, $retrieved_files); + +# rewrite index.meta file +write_xml($document, $metafile); + +logger("INFO", "$warncnt warnings"); +logger("INFO", "$errcnt errors"); +if ($errcnt == 0) { + logger("DONE", "" . (scalar keys %$retrieved_files) . " archived files retrieved"); +} else { + logger("ABORT", "there were $errcnt errors!!"); + exit 1; +}