--- foxridge-archiver/unarchiver.pl 2004/06/17 15:58:42 1.1 +++ foxridge-archiver/unarchiver.pl 2005/09/20 17:30:34 1.5 @@ -3,6 +3,7 @@ use strict; use XML::LibXML; +use FileHandle; # MPIWG libraries use lib '/usr/local/mpiwg/archive'; @@ -17,7 +18,7 @@ $|=1; # # program version -my $version = "0.1 (24.9.2003)"; +my $version = "0.3.1 (21.6.2005)"; # read command line parameters my $args = parseargs; @@ -55,6 +56,7 @@ if (! open LOG, ">>$log_file") { logger("ABORT", "unable to write log file '$log_file'!!"); exit 1; } +LOG->autoflush(1); ####################################################### # check parameters that were passed to the program @@ -77,7 +79,6 @@ if (! -f $metafile) { exit 1; } - ####################################################### # internal variables # @@ -91,6 +92,14 @@ my $warncnt = 0; # subroutines # +# construct document's parent dir +sub get_parent { + my ($dirname) = @_; + my $dirparent = $dirname; + $dirparent =~ s!/[^/]+$!!; + return $dirparent; +} + # # $files = read_resource_meta($rootnode) @@ -167,22 +176,37 @@ sub read_resource_meta { # -# $%files = run_retrieve +# $num_files = run_retrieve($docdir, $docmount, \%files) # -# runs the retriever program on $docdir and returns a list of archived files +# Runs the retriever program on $docdir and returns the number of unarchived files. +# All filenames are put in %files. +# $docmount is the mount point of the doc partition in cases when the new mount point +# is different. # # Sample output: -# Retrieving 17,234 /mpiwg/archive/data/test/auto_titit_123/pageimg/essen-wind1.jpg [Done] -# +# (old!) Retrieving 17,234 /mpiwg/archive/data/test/auto_titit_123/pageimg/essen-wind1.jpg [Done] +# Retrieving 42,406,326 /mpiwg/archive/data/library/B980G582/raw/00015.tif --> /mpiwg/archive/data/library/B980G582/raw/00015.tif [Done] sub run_retrieve { - my %files; - print LOG "START unarchive $version ", scalar localtime, "\n"; + my ($archdir, $archmount, $files) = @_; + my $archparent; + if ($archmount eq $archdir) { + # no explicit mount point + $archparent = get_parent($archdir); + } else { + # destination dir is mount point + $archparent = $archmount; + } + logger("INFO", "looking for archives in $archmount..."); + + print LOG "START unarchive $version on ", scalar localtime, "\n"; my $archcmd = $archprog; $archcmd .= " retrieve -subdir=yes -replace=all"; - $archcmd .= " -description='$archname'"; - $archcmd .= " '$docdir/'"; + $archcmd .= " -description='$archname'"; # archive name + $archcmd .= " '$archmount/'"; # archive mount point + $archcmd .= " '$archparent/'"; # destination dir name my $archcnt = 0; + my $numfiles = 0; print LOG "CMD: $archcmd\n"; if (open ARCH, "$archcmd 2>&1 |") { while () { @@ -192,6 +216,8 @@ sub run_retrieve { Retrieving \s+([\d,]+) # size \s+(\S+) # file name + \s+--> + \s+(\S+) # destination file name \s+\[Done\] /x) { my $size = $1; @@ -199,19 +225,20 @@ sub run_retrieve { $size =~ s/,//g; logger("DEBUG", " RETRIEVE: file '$file'"); $archcnt++; - if ($files{$file}) { + if ($$files{$file}) { logger("WARNING", "file $file seems to be archived multiple times."); $warncnt++; } - $files{$file} = [$size]; + $$files{$file} = [$size]; } } - logger("INFO", "$archcnt archives of " . (scalar keys %files) . " files."); + $numfiles = (scalar keys %$files); + logger("INFO", "$archcnt archives of $numfiles files (in $archmount)."); } else { logger("ABORT", "unable to start archive command '$archcmd'!!"); exit 1; } - return \%files; + return $numfiles; } @@ -271,16 +298,16 @@ logger("INFO", "unarchiver $version"); # make shure the right user is running this program my $user = getlogin; -if (($user ne "archive")&&($user ne "root")) { +if (($user)&&($user ne "archive")&&($user ne "root")) { logger("ABORT", "you must be archive or root user to run this program!"); exit 1; } # use checkarchive first if (system("$checkprog $docdir >/dev/null") == 0) { - logger("INFO", "archive '$docdir' check OK"); + logger("INFO", "archive \"$docdir\" check OK"); } else { - logger("ABORT", "archive '$docdir' check failed!!"); + logger("ABORT", "archive \"$docdir\" check failed!!"); exit 1; } @@ -301,18 +328,37 @@ if (-f "$docdir/.archived") { logger("INFO", "$num_archived_files files to retrieve."); +# save current index.meta +park_file($metafile); + # retrieve -my $retrieved_files = run_retrieve; +my %retrieved_files = (); +my $archcnt = 0; + +if ($docdir =~ /\/mpiwg\/archive\/data\/(.*)/) { + # TSM needs two different paths because of historical mount points :-( + # try the new one first + $archcnt = run_retrieve($docdir, "/mpiwg/archive", \%retrieved_files); + if ($archcnt == 0) { + # and then the old one + $archcnt = run_retrieve($docdir, "/mpiwg/archive/data", \%retrieved_files); + } +} else { + # otherwise we assume that it works + $archcnt += run_retrieve($docdir, $docdir, \%retrieved_files); +} + +# restore current index.meta +unpark_file($metafile); -my $num_arch_files = (scalar keys %$retrieved_files); -if ($num_arch_files == 0) { +if ($archcnt == 0) { logger("ABORT", "no files retrieved!!"); exit 1; } -logger("INFO", "$num_arch_files files retrieved"); +logger("INFO", "$archcnt files retrieved"); # check list of archived files -check_files($archived_files, $retrieved_files); +check_files($archived_files, \%retrieved_files); # rewrite index.meta file write_xml($document, $metafile); @@ -320,7 +366,7 @@ write_xml($document, $metafile); logger("INFO", "$warncnt warnings"); logger("INFO", "$errcnt errors"); if ($errcnt == 0) { - logger("DONE", "" . (scalar keys %$retrieved_files) . " archived files retrieved"); + logger("DONE", "$archcnt archived files retrieved"); } else { logger("ABORT", "there were $errcnt errors!!"); exit 1;