version 1.1, 2004/06/17 15:58:42
|
version 1.9, 2017/03/16 17:00:43
|
Line 1
|
Line 1
|
#!/usr/local/bin/perl -w |
#!/usr/bin/perl -w |
|
|
use strict; |
use strict; |
|
|
use XML::LibXML; |
use XML::LibXML; |
|
use FileHandle; |
|
|
# MPIWG libraries |
# MPIWG libraries |
use lib '/usr/local/mpiwg/archive'; |
use lib '/usr/local/mpiwg/archive'; |
Line 17 $|=1;
|
Line 18 $|=1;
|
# |
# |
|
|
# program version |
# program version |
my $version = "0.1 (24.9.2003)"; |
my $version = "0.3.4 (11.8.2009)"; |
|
|
# read command line parameters |
# read command line parameters |
my $args = parseargs; |
my $args = parseargs; |
Line 55 if (! open LOG, ">>$log_file") {
|
Line 56 if (! open LOG, ">>$log_file") {
|
logger("ABORT", "unable to write log file '$log_file'!!"); |
logger("ABORT", "unable to write log file '$log_file'!!"); |
exit 1; |
exit 1; |
} |
} |
|
LOG->autoflush(1); |
|
|
####################################################### |
####################################################### |
# check parameters that were passed to the program |
# check parameters that were passed to the program |
Line 77 if (! -f $metafile) {
|
Line 79 if (! -f $metafile) {
|
exit 1; |
exit 1; |
} |
} |
|
|
|
|
####################################################### |
####################################################### |
# internal variables |
# internal variables |
# |
# |
Line 91 my $warncnt = 0;
|
Line 92 my $warncnt = 0;
|
# subroutines |
# subroutines |
# |
# |
|
|
|
# construct document's parent dir |
|
sub get_parent { |
|
my ($dirname) = @_; |
|
my $dirparent = $dirname; |
|
$dirparent =~ s!/[^/]+$!!; |
|
return $dirparent; |
|
} |
|
|
|
|
# |
# |
# $files = read_resource_meta($rootnode) |
# $files = read_resource_meta($rootnode) |
Line 167 sub read_resource_meta {
|
Line 176 sub read_resource_meta {
|
|
|
|
|
# |
# |
# $%files = run_retrieve |
# $num_files = run_retrieve($docdir, $docmount, \%files) |
# |
# |
# runs the retriever program on $docdir and returns a list of archived files |
# Runs the retriever program on $docdir and returns the number of unarchived files. |
|
# All filenames are put in %files. |
|
# $docmount is the mount point of the doc partition in cases when the new mount point |
|
# is different. |
# |
# |
# Sample output: |
# Sample output: |
# Retrieving 17,234 /mpiwg/archive/data/test/auto_titit_123/pageimg/essen-wind1.jpg [Done] |
# (old!) Retrieving 17,234 /mpiwg/archive/data/test/auto_titit_123/pageimg/essen-wind1.jpg [Done] |
# |
# Retrieving 42,406,326 /mpiwg/archive/data/library/B980G582/raw/00015.tif --> /mpiwg/archive/data/library/B980G582/raw/00015.tif [Done] |
sub run_retrieve { |
sub run_retrieve { |
my %files; |
my ($archdir, $archmount, $files) = @_; |
print LOG "START unarchive $version ", scalar localtime, "\n"; |
my $archparent; |
|
if ($archmount eq $archdir) { |
|
# no explicit mount point |
|
$archparent = get_parent($archdir); |
|
} else { |
|
# destination dir is mount point |
|
$archparent = $archmount; |
|
} |
|
logger("INFO", "looking for archives in $archmount..."); |
|
|
|
print LOG "START unarchive $version on ", scalar localtime, "\n"; |
my $archcmd = $archprog; |
my $archcmd = $archprog; |
$archcmd .= " retrieve -subdir=yes -replace=all"; |
$archcmd .= " retrieve -subdir=yes -replace=all -ifnewer"; |
$archcmd .= " -description='$archname'"; |
$archcmd .= " -description='$archname'"; # archive name |
$archcmd .= " '$docdir/'"; |
$archcmd .= " '$archmount/'"; # archive mount point |
|
$archcmd .= " '$archparent/'"; # destination dir name |
|
|
|
logger('INFO', "querying TSM server for $archmount, please wait..."); |
|
|
my $archcnt = 0; |
my $archcnt = 0; |
|
my $numfiles = 0; |
print LOG "CMD: $archcmd\n"; |
print LOG "CMD: $archcmd\n"; |
if (open ARCH, "$archcmd 2>&1 |") { |
if (open ARCH, "$archcmd 2>&1 |") { |
while (<ARCH>) { |
while (<ARCH>) { |
Line 192 sub run_retrieve {
|
Line 218 sub run_retrieve {
|
Retrieving |
Retrieving |
\s+([\d,]+) # size |
\s+([\d,]+) # size |
\s+(\S+) # file name |
\s+(\S+) # file name |
|
\s+--> |
|
\s+(\S+) # destination file name |
\s+\[Done\] |
\s+\[Done\] |
/x) { |
/x) { |
my $size = $1; |
my $size = $1; |
Line 199 sub run_retrieve {
|
Line 227 sub run_retrieve {
|
$size =~ s/,//g; |
$size =~ s/,//g; |
logger("DEBUG", " RETRIEVE: file '$file'"); |
logger("DEBUG", " RETRIEVE: file '$file'"); |
$archcnt++; |
$archcnt++; |
if ($files{$file}) { |
if ($$files{$file}) { |
logger("WARNING", "file $file seems to be archived multiple times."); |
logger("WARNING", "file $file seems to be archived multiple times."); |
$warncnt++; |
$warncnt++; |
} |
} |
$files{$file} = [$size]; |
$$files{$file} = [$size]; |
} |
} |
} |
} |
logger("INFO", "$archcnt archives of " . (scalar keys %files) . " files."); |
$numfiles = (scalar keys %$files); |
|
logger("INFO", "$archcnt archives of $numfiles files (in $archmount)."); |
} else { |
} else { |
logger("ABORT", "unable to start archive command '$archcmd'!!"); |
logger("ABORT", "unable to start archive command '$archcmd'!!"); |
exit 1; |
exit 1; |
} |
} |
return \%files; |
return $numfiles; |
} |
} |
|
|
|
|
Line 270 sub check_files {
|
Line 299 sub check_files {
|
logger("INFO", "unarchiver $version"); |
logger("INFO", "unarchiver $version"); |
|
|
# make shure the right user is running this program |
# make shure the right user is running this program |
my $user = getlogin; |
my $user = getlogin || getpwuid($<); |
if (($user ne "archive")&&($user ne "root")) { |
if (($user)&&($user ne "archive")&&($user ne "root")) { |
logger("ABORT", "you must be archive or root user to run this program!"); |
logger("ABORT", "you must be archive or root user to run this program!"); |
exit 1; |
exit 1; |
} |
} |
|
|
# use checkarchive first |
# use checkarchive first |
if (system("$checkprog $docdir >/dev/null") == 0) { |
if (system("$checkprog $docdir >/dev/null") == 0) { |
logger("INFO", "archive '$docdir' check OK"); |
logger("INFO", "archive \"$docdir\" check OK"); |
} else { |
} else { |
logger("ABORT", "archive '$docdir' check failed!!"); |
logger("ABORT", "archive \"$docdir\" check failed!!"); |
exit 1; |
exit 1; |
} |
} |
|
|
Line 301 if (-f "$docdir/.archived") {
|
Line 330 if (-f "$docdir/.archived") {
|
|
|
logger("INFO", "$num_archived_files files to retrieve."); |
logger("INFO", "$num_archived_files files to retrieve."); |
|
|
|
# save current index.meta |
|
park_file($metafile); |
|
|
# retrieve |
# retrieve |
my $retrieved_files = run_retrieve; |
my %retrieved_files = (); |
|
my $archcnt = 0; |
|
|
|
if ($docdir =~ /\/mpiwg\/archive\/data\/(.*)/) { |
|
# TSM needs two different paths because of historical mount points :-( |
|
# try the new one first |
|
$archcnt = run_retrieve($docdir, "/mpiwg/archive", \%retrieved_files); |
|
if ($archcnt == 0) { |
|
# and then the old one |
|
$archcnt = run_retrieve($docdir, "/mpiwg/archive/data", \%retrieved_files); |
|
} |
|
} else { |
|
# otherwise we assume that it works |
|
$archcnt += run_retrieve($docdir, $docdir, \%retrieved_files); |
|
} |
|
|
|
# restore current index.meta |
|
unpark_file($metafile); |
|
|
my $num_arch_files = (scalar keys %$retrieved_files); |
if ($archcnt == 0) { |
if ($num_arch_files == 0) { |
|
logger("ABORT", "no files retrieved!!"); |
logger("ABORT", "no files retrieved!!"); |
exit 1; |
exit 1; |
} |
} |
logger("INFO", "$num_arch_files files retrieved"); |
logger("INFO", "$archcnt files retrieved"); |
|
|
# check list of archived files |
# check list of archived files |
check_files($archived_files, $retrieved_files); |
check_files($archived_files, \%retrieved_files); |
|
|
# rewrite index.meta file |
# rewrite index.meta file |
write_xml($document, $metafile); |
write_xml($document, $metafile); |
Line 320 write_xml($document, $metafile);
|
Line 368 write_xml($document, $metafile);
|
logger("INFO", "$warncnt warnings"); |
logger("INFO", "$warncnt warnings"); |
logger("INFO", "$errcnt errors"); |
logger("INFO", "$errcnt errors"); |
if ($errcnt == 0) { |
if ($errcnt == 0) { |
logger("DONE", "" . (scalar keys %$retrieved_files) . " archived files retrieved"); |
logger("DONE", "$archcnt archived files retrieved"); |
} else { |
} else { |
logger("ABORT", "there were $errcnt errors!!"); |
logger("ABORT", "there were $errcnt errors!!"); |
exit 1; |
exit 1; |