diff unarchiver.pl @ 0:30497c6a3eca

Initial revision
author casties
date Thu, 17 Jun 2004 17:58:42 +0200
parents
children c4e6fc065b6d
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/unarchiver.pl	Thu Jun 17 17:58:42 2004 +0200
@@ -0,0 +1,327 @@
+#!/usr/local/bin/perl -w
+
+use strict;
+
+use XML::LibXML;
+
+# MPIWG libraries
+use lib '/usr/local/mpiwg/archive';
+use MPIWGStor;
+
+# make output unbuffered
+$|=1;
+
+
+#######################################################
+# internal parameters
+#
+
+# program version
+my $version = "0.1 (24.9.2003)";
+
+# read command line parameters
+my $args = parseargs;
+
+# debug level
+$debug = (exists $$args{'debug'}) ? ($$args{'debug'}) : 0;
+
+# rewrite XML file (necessary for archive date!)
+my $fix_xml = 1;
+my $xml_changed = 0;
+# XML namespace (not really implemented!)
+my $namespace = "";
+
+# archive name (archive-path element, usually == $docdir)
+my $archname;
+# archive storage date
+my $archdate;
+
+#######################################################
+# external programs
+#
+my $archprog = "/opt/tivoli/tsm/client/ba/bin/dsmc";
+if (! -x $archprog) {
+    logger("ABORT", "TSM client program '$archprog' missing!!");
+    exit 1;
+}
+my $checkprog = "/usr/local/mpiwg/archive/archivecheck";
+if (! -x $checkprog) {
+    logger("ABORT", "archive checking program '$checkprog' missing!!");
+    exit 1;
+}
+# log file for archiver
+my $log_file = "/var/tmp/unarchiver.log";
+if (! open LOG, ">>$log_file") {
+    logger("ABORT", "unable to write log file '$log_file'!!");
+    exit 1;
+}
+
+#######################################################
+# check parameters that were passed to the program
+#
+my $docdir = $$args{'path'};
+if (! $docdir) {
+    print "ABORT: no document directory given!\n";
+    exit 1;
+}
+# strip trailing slashes
+$docdir =~ s/\/$//;
+if (! -d $docdir) {
+    print "ABORT: document directory \'$docdir\' doesn't exist!\n";
+    exit 1;
+}
+
+my $metafile = "$docdir/index.meta";
+if (! -f $metafile) {
+    print "ABORT: metadata index file \'$metafile\' doesn't exist!\n";
+    exit 1;
+}
+
+
+#######################################################
+# internal variables
+#
+
+# number of errors
+my $errcnt = 0;
+# number of warnings
+my $warncnt = 0;
+
+#######################################################
+# subroutines
+#
+
+
+#
+# $files = read_resource_meta($rootnode)
+#
+# checks general resource meta information and reads the list of files
+#
+sub read_resource_meta {
+    my ($rootnode) = @_;
+    my %files;
+    #
+    # archive path
+    #
+    # get archive-path
+    $archname = sstrip($rootnode->findvalue('child::archive-path'));
+    if (! $archname) {
+	logger("ABORT", "archive-name element missing!!");
+	exit 1;
+    }
+
+    #
+    # files
+    #
+    my @filenodes = $rootnode->findnodes('child::file');
+    foreach my $fn (@filenodes) {
+	my $name = sstrip($fn->findvalue('child::name'));
+	my $path = sstrip($fn->findvalue('child::path'));
+	logger("DEBUG", "FILE: ($path)$name");
+	my $f = ($path) ? "$path/$name" : "$name";
+	$files{$f} = [$name];
+    }
+
+    #
+    # dirs
+    #
+    my @dirnodes = $rootnode->findnodes('child::dir');
+    foreach my $fn (@dirnodes) {
+	my $name = sstrip($fn->findvalue('child::name'));
+	my $path = sstrip($fn->findvalue('child::path'));
+	logger("DEBUG", "DIR: ($path)$name");
+	my $f = ($path) ? "$path/$name" : "$name";
+	$files{$f} = [$name];
+    }
+
+    #
+    # archive-storage-date
+    #
+    my $archdate = $rootnode->find('child::archive-storage-date');
+    if ($archdate) {
+	logger("INFO", "archive storage date: $archdate");
+    } else {
+	logger("ERROR", "archive storage date missing!");
+	$errcnt++;
+    }
+
+    #
+    # archive-recall-date
+    #
+    my $recalldatenode = ($rootnode->find('child::archive-recall-date'))->get_node(1);
+    if ($recalldatenode) {
+	print "INFO: archive recall date exists!\n";
+	# delete old date
+	$recalldatenode->removeChildNodes;
+    } else {
+	# create new storage date node
+	$recalldatenode = $rootnode->addNewChild($namespace, "archive-recall-date");
+	# move after archive-path
+	$rootnode->insertAfter($recalldatenode, ($rootnode->find('child::archive-storage-date'))->get_node(1));
+    }
+    $recalldatenode->appendTextNode(scalar localtime);
+    $xml_changed++;
+
+    return \%files;
+}
+
+
+#
+# $%files = run_retrieve
+#
+# runs the retriever program on $docdir and returns a list of archived files
+#
+# Sample output:
+# Retrieving          17,234 /mpiwg/archive/data/test/auto_titit_123/pageimg/essen-wind1.jpg [Done]
+#
+sub run_retrieve {
+    my %files;
+    print LOG "START unarchive $version ", scalar localtime, "\n";
+    my $archcmd = $archprog;
+    $archcmd .= " retrieve -subdir=yes -replace=all";
+    $archcmd .= " -description='$archname'";
+    $archcmd .= " '$docdir/'";
+
+    my $archcnt = 0;
+    print LOG "CMD: $archcmd\n";
+    if (open ARCH, "$archcmd 2>&1 |") {
+	while (<ARCH>) {
+	    chomp;
+	    print LOG "ARCH: $_\n";
+	    if (/
+		Retrieving
+		\s+([\d,]+)    # size
+		\s+(\S+)       # file name
+		\s+\[Done\]
+		/x) {
+		my $size = $1;
+		my $file = $2;
+		$size =~ s/,//g;
+		logger("DEBUG", "  RETRIEVE: file '$file'");
+		$archcnt++;
+		if ($files{$file}) {
+		    logger("WARNING", "file $file seems to be archived multiple times.");
+		    $warncnt++;
+		} 
+		$files{$file} = [$size];
+	    }
+	}
+	logger("INFO", "$archcnt archives of " . (scalar keys %files) . " files.");
+    } else {
+	logger("ABORT", "unable to start archive command '$archcmd'!!");
+	exit 1;
+    }
+    return \%files;
+}
+
+
+#
+# check_files(\%files_to_retrieve, \%retrieved_files)
+#
+# compares the list of archived and retrieved files
+#
+sub check_files {
+    my ($to_retrieve, $retrieved) = @_;
+
+    my $nt = scalar keys %$to_retrieve;
+    my $na = scalar keys %$retrieved;
+
+    foreach my $ft (sort keys %$to_retrieve) {
+	my $fp = "$docdir/$ft";
+	#logger("DEBUG", "  fp: $fp");
+	if ($$retrieved{$fp}) {
+	    logger("DEBUG", "$ft retrieved OK");
+	    $$retrieved{$fp}->[1] = "OK";
+	} else {
+	    logger("ERROR", "file entry '$ft' missing from archive!");
+	    $errcnt++;
+	}
+    }
+
+    foreach my $fa (sort keys %$retrieved) {
+	if (! $$retrieved{$fa}->[1]) {
+	    my ($fn, $fp) = split_file_path($fa);
+	    if ($index_files{$fn}) {
+		logger("DEBUG", "$fa ignored");
+		$na--;
+	    } else {
+		logger("WARNING", "$fa retrieved but not in list!");
+		$warncnt++;
+	    }
+	}
+    }
+
+    if ($nt > $na) {
+	logger("WARNING", "less files were retrieved ($na vs. $nt)!");
+	$warncnt++;
+    } elsif ($na > $nt) {
+	logger("WARNING", "more files were retrieved ($na vs. $nt)!");
+	$warncnt++;
+    }
+
+}
+
+
+
+#######################################################
+# main
+#
+
+logger("INFO", "unarchiver $version");
+
+# make shure the right user is running this program
+my $user = getlogin;
+if (($user ne "archive")&&($user ne "root")) {
+    logger("ABORT", "you must be archive or root user to run this program!");
+    exit 1;
+}
+
+# use checkarchive first
+if (system("$checkprog $docdir >/dev/null") == 0) {
+    logger("INFO", "archive '$docdir' check OK");
+} else {
+    logger("ABORT", "archive '$docdir' check failed!!");
+    exit 1;
+}
+
+# read index.meta file
+my ($document, $rootnode) = read_xml($metafile);
+
+# check index file
+my $archived_files = read_resource_meta($rootnode);
+my $num_archived_files = scalar keys %$archived_files;
+
+# check for .archived file
+if (-f "$docdir/.archived") {
+    logger("INFO", ".archived file exists.");
+} else {
+    logger("WARNING", "no .archived file!");
+    $warncnt++;
+}
+
+logger("INFO", "$num_archived_files files to retrieve.");
+
+# retrieve
+my $retrieved_files = run_retrieve;
+
+my $num_arch_files = (scalar keys %$retrieved_files);
+if ($num_arch_files == 0) {
+    logger("ABORT", "no files retrieved!!");
+    exit 1;
+}
+logger("INFO", "$num_arch_files files retrieved");
+
+# check list of archived files
+check_files($archived_files, $retrieved_files);
+
+# rewrite index.meta file
+write_xml($document, $metafile);
+
+logger("INFO", "$warncnt warnings");
+logger("INFO", "$errcnt errors");
+if ($errcnt == 0) {
+    logger("DONE", "" . (scalar keys %$retrieved_files) . " archived files retrieved");
+} else {
+    logger("ABORT", "there were $errcnt errors!!");
+    exit 1;
+}