changeset 18:fdf4ceb36db1

fixed problem with dir names in metacheck new version of metacheck defaults to not change index file new version of archiver uses new version of metacheck
author casties
date Tue, 20 Sep 2005 19:24:57 +0200
parents 6c5c7743acb1
children 320c4b93bf39
files MPIWGStor.pm archivemany.sh archiver.pl metacheck.pl
diffstat 4 files changed, 210 insertions(+), 95 deletions(-) [+]
line wrap: on
line diff
--- a/MPIWGStor.pm	Wed Aug 31 12:50:00 2005 +0200
+++ b/MPIWGStor.pm	Tue Sep 20 19:24:57 2005 +0200
@@ -140,12 +140,17 @@
 # file name (the last path element) and path
 #
 sub split_file_path {
-    my ($fn) = @_;
+    my ($fn, $nodot) = @_;
 
     if ($fn =~ /^(.*)\/([^\/]+)$/) {
         return ($2, $1);
+    } 
+    # only file name
+    if ($nodot) {
+	return ($fn, '');
+    } else {
+	return ($fn, '.');
     }
-    return ($fn, '.'); # only file name
 }
 
 
--- a/archivemany.sh	Wed Aug 31 12:50:00 2005 +0200
+++ b/archivemany.sh	Tue Sep 20 19:24:57 2005 +0200
@@ -2,6 +2,8 @@
 
 VERSION="archivemany V0.1 (14.6.2005 ROC)"
 
+#set -x
+
 if [ -z "$1$2" ]
 then
     echo $VERSION
@@ -12,6 +14,7 @@
 
 BASEDIR="$1"
 DIRLIST="$2"
+PARAM="$3"
 
 if [ ! -d $BASEDIR ]
 then
@@ -30,7 +33,7 @@
     if [ -d "$BASEDIR/$DIR" ]
     then
 	echo "archiving $DIR..."
-	if /usr/local/mpiwg/archive/archiver "$BASEDIR/$DIR"
+	if /usr/local/mpiwg/archive/archiver "$PARAM" "$BASEDIR/$DIR"
 	then
 	    echo "  done on `date`"
 	else
--- a/archiver.pl	Wed Aug 31 12:50:00 2005 +0200
+++ b/archiver.pl	Tue Sep 20 19:24:57 2005 +0200
@@ -16,25 +16,40 @@
 #
 
 # program version
-my $version = "0.6 (ROC 23.3.2005)";
+my $version = "0.7 (ROC 20.9.2005)";
 
 # short help
-my $shorthelp = "MPIWG archiver $version\nuse:\n  archiver [options] docpath\noptions:\n  -premigrate  don't delete archived files\n";
+my $help = "MPIWG archiver $version
+use:  archiver [options] docpath
+options:
+  -debug  show debugging info
+  -premigrate  don't delete archived files
+  -force  archive even if already archived
+";
 
 # read command line parameters
 my $args = MPIWGStor::parseargs;
+if (! scalar(%$args)) {
+    print $help, "\n";
+    exit 1;
+}
 
 # debug level
-my $debug = (exists $$args{'debug'}) ? $$args{'debug'} : 0;
+$debug = (exists $$args{'debug'}) ? $$args{'debug'} : 0;
+
+# force archiving
+my $force_archive = (exists $$args{'force'}) ? $$args{'force'} : 0;
 
 # rewrite XML file (necessary for archive date!)
 my $fix_xml = 1;
 my $xml_changed = 0;
+
 # XML namespace (not really implemented!)
 my $namespace = "";
 
 # archive name (archive-path element, usually == $docdir)
 my $archname;
+
 # archive storage date (now)
 my $archdate = stime(time);
 
@@ -44,43 +59,6 @@
 # delete data files after archiving
 my $delete_data_files = 1;
 
-
-#######################################################
-# external programs
-#
-my $archprog = "/opt/tivoli/tsm/client/ba/bin/dsmc";
-if (! -x $archprog) {
-    logger('ABORT', "TSM client program '$archprog' missing!!\n");
-    exit 1;
-}
-my $checkprog = "/usr/local/mpiwg/archive/metacheck";
-if (! -x $checkprog) {
-    logger('ABORT', "meta data checking program '$checkprog' missing!!\n");
-    exit 1;
-}
-# log file for archiver
-my $log_file = "/var/log/mpiwg-archiver.log";
-if (! open LOG, ">>$log_file") {
-    logger('ABORT', "unable to write log file '$log_file'!!\n");
-    exit 1;
-}
-
-#######################################################
-# check parameters that were passed to the program
-#
-if ($#ARGV < 0) {
-    print $shorthelp;
-    exit 1;
-}
-my $docdir = $$args{'path'};
-# strip double slashes
-$docdir =~ s/\/\//\//;
-# strip trailing slashes
-$docdir =~ s/\/+$//;
-if (! -d $docdir) {
-    logger('ABORT', "document directory \'$docdir\' doesn't exist!\n");
-    exit 1;
-}
 # don't delete archived files with "-premigrate"
 if (exists $$args{'premigrate'}) {
     $delete_data_files = not $$args{'premigrate'};
@@ -89,9 +67,44 @@
     logger('INFO', "going to remove successfully archived files from disk");
 }
 
+
+#######################################################
+# external programs
+#
+my $archprog = "/opt/tivoli/tsm/client/ba/bin/dsmc";
+if (! -x $archprog) {
+    logger('ABORT', "TSM client program '$archprog' missing!");
+    exit 1;
+}
+my $checkprog = "/usr/local/mpiwg/archive/metacheck";
+if (! -x $checkprog) {
+    logger('ABORT', "meta data checking program '$checkprog' missing!");
+    exit 1;
+}
+# log file for archiver
+my $log_file = "/var/log/mpiwg-archiver.log";
+if (! open LOG, ">>$log_file") {
+    logger('ABORT', "unable to write log file '$log_file'!");
+    exit 1;
+}
+
+#######################################################
+# check parameters that were passed to the program
+#
+
+my $docdir = $$args{'path'};
+# strip double slashes
+$docdir =~ s/\/\//\//;
+# strip trailing slashes
+$docdir =~ s/\/+$//;
+if (! -d $docdir) {
+    logger('ABORT', "document directory \'$docdir\' doesn't exist!");
+    exit 1;
+}
+
 my $metafile = "$docdir/index.meta";
 if (! -f $metafile) {
-    logger('ABORT', "metadata index file \'$metafile\' doesn't exist!\n");
+    logger('ABORT', "metadata index file \'$metafile\' doesn't exist!");
     exit 1;
 }
 
@@ -122,7 +135,7 @@
     # get archive-path
     $archname = MPIWGStor::sstrip($rootnode->findvalue('child::archive-path'));
     if (! $archname) {
-	logger('ABORT', "archive-name element missing!!\n");
+	logger('ABORT', "archive-name element missing!");
 	exit 1;
     }
 
@@ -133,7 +146,7 @@
     foreach my $fn (@filenodes) {
 	my $name = MPIWGStor::sstrip($fn->findvalue('child::name'));
 	my $path = MPIWGStor::sstrip($fn->findvalue('child::path'));
-	logger('DEBUG', "FILE ($path)$name\n");
+	logger('DEBUG', "FILE ($path)$name");
 	my $f = ($path) ? "$path/$name" : "$name";
 	$files{$f} = $name;
     }
@@ -143,7 +156,7 @@
     #
     my $stordatenode = ($rootnode->find('child::archive-storage-date'))->get_node(1);
     if ($stordatenode) {
-	logger('WARNING', "archive storage date exists! Resource already archived?\n");
+	logger('WARNING', "archive storage date exists! Resource already archived?");
 	$warncnt++;
 	# delete old date
 	$stordatenode->removeChildNodes;
@@ -186,7 +199,7 @@
 	    }
 	}
     } else {
-	logger('ABORT', "unable to start archive command '$archcmd'!!\n");
+	logger('ABORT', "unable to start archive command '$archcmd'!!");
 	exit 1;
     }
 
@@ -209,10 +222,10 @@
 	my $fp = "$docdir/$ft";
 	#print "  fp: $fp\n";
 	if ($$archived{$fp}) {
-	    logger('DEBUG', "$ft archived OK\n");
+	    logger('DEBUG', "$ft archived OK");
 	    $$archived{$fp} = "OK";
 	} else {
-	    logger('ERROR', "file '$ft' missing from archive!\n");
+	    logger('ERROR', "file '$ft' missing from archive!");
 	    $errcnt++;
 	}
     }
@@ -221,19 +234,19 @@
 	if ($$archived{$fa} ne "OK") {
 	    my ($fn, $fp) = MPIWGStor::split_file_path($fa);
 	    if ($MPIWGStor::index_files{$fn}) {
-		logger('DEBUG', "$fa ignored\n");
+		logger('DEBUG', "$fa ignored");
 		$na--;
 	    } else {
-		logger('WARNING', "$fa archived but not in list!\n");
+		logger('WARNING', "$fa archived but not in list!");
 		$warncnt++;
 	    }
 	}
     }
 
     if ($nt > $na) {
-	logger('WARNING', "less files were archived ($na vs. $nt)!\n");
+	logger('WARNING', "less files were archived ($na vs. $nt)!");
     } elsif ($na > $nt) {
-	logger('WARNING', "more files were archived ($na vs. $nt)!\n");
+	logger('WARNING', "more files were archived ($na vs. $nt)!");
     }
 
 }
@@ -261,9 +274,9 @@
 	next unless (-f $f);
 	# delete files
 	if (unlink $f) {
-	    logger('INFO', "remove $f ($fn)\n");
+	    logger('INFO', "remove $f ($fn)");
 	} else {
-	    logger('ERROR', "unable to delete $f!\n");
+	    logger('ERROR', "unable to delete $f!");
 	    $errcnt++;
 	}
     }
@@ -275,7 +288,7 @@
 	# dont't remove document dir (shouldn't be empty anyway)
 	next if ($d eq $docdir);
 	if (-d $d) {
-	    logger('INFO', "remove dir $d\n");
+	    logger('INFO', "remove dir $d");
 	    rmdir $d;
 	}
     }
@@ -292,7 +305,7 @@
     my ($files, $dir) = @_;
 
     if (! opendir DIR, $dir) {
-	logger('ERROR', "unable to read directory $dir!\n");
+	logger('ERROR', "unable to read directory $dir!");
 	$errcnt++;
 	return;
     }
@@ -306,28 +319,28 @@
 	    if (-f "$dir/$f") {
 		# $f is a file
 		if (unlink "$dir/$f") {
-		    logger('INFO', "removed $f\n");
+		    logger('INFO', "removed $f");
 		} else {
-		    logger('ERROR', "unable to delete $f!\n");
+		    logger('ERROR', "unable to delete $f!");
 		    $errcnt++;
 		}
 	    } elsif (-d _) {
 		# $f is a directory (unlink won't work)
 		if ((system 'rm', '-r', "$dir/$f") == 0) {
-		    logger('INFO', "removed directory $f\n");
+		    logger('INFO', "removed directory $f");
 		} else {
-		    logger('ERROR', "unable to delete directory $f!\n");
+		    logger('ERROR', "unable to delete directory $f!");
 		    $errcnt++;
 		}
 	    } else {
-		logger('ERROR', "funny object $dir/$f!\n");
+		logger('ERROR', "funny object $dir/$f!");
 		$errcnt++;
 	    }
 	} else {
 	    # $f is not in the list
 	    if (-d "$dir/$f") {
 		# recurse into directories
-		logger('DEBUG', "enter $dir/$f\n");
+		logger('DEBUG', "enter $dir/$f");
 		delete_all_files($files, "$dir/$f");
 	    }
 	}
@@ -339,22 +352,55 @@
 # main
 #
 
-logger('START', "archiver $version at $archdate\n");
+logger('START', "archiver $version at $archdate");
 
 # make shure the right user is running this program
 my $user = getlogin;
-#if (($user ne "archive")||($user ne "root")) {
-#    logger("ABORT", "you ($user) must be archive or root user to run this program!");
-#    exit 1;
-#}
+if (($user ne "archive")&&($user ne "root")) {
+    logger("ABORT", "you ($user) must be archive or root user to run this program!");
+    exit 1;
+}
+
+# check for .archived file
+if (-f "$docdir/.archived") {
+    if (not $force_archive) {
+	logger('ABORT', "already archived! (.archived file exists)");
+	exit 1;
+    } else {
+	logger('WARNING', "resource already archived? (.archived file exists)");
+	$warncnt++;
+    }
+}
 
 # use metacheck first
-if (system("$checkprog $docdir >/dev/null") == 0) {
-    logger('INFO', "resource '$docdir' check OK\n");
+if (open CHECK, "$checkprog -add-files $docdir |") {
+    my @errors;
+    my $msg;
+    while (<CHECK>) {
+	chomp;
+	if (/^ERROR/) {
+	    push @errors, $_;
+	}
+	$msg = $_;
+    }
+    if ($msg =~ /^DONE/) {
+	logger('DEBUG', "checking index file: $msg");
+	logger('INFO', "resource '$docdir' check OK");
+    } else {
+	logger('DEBUG', "errors checking index file:\n    " . join("\n    ", @errors) . "\n    $msg");
+	logger('ABORT', "resource '$docdir' check failed!");
+	exit 1;
+    }
 } else {
-    logger('ABORT', "resource '$docdir' check failed!!\n");
+    logger('ABORT', "unable to run $checkprog");
     exit 1;
 }
+# if (system("$checkprog $docdir >/dev/null") == 0) {
+#     logger('INFO', "resource '$docdir' check OK");
+# } else {
+#     logger('ABORT', "resource '$docdir' check failed!!");
+#     exit 1;
+# }
 
 # read index.meta file
 my ($document, $rootnode) = MPIWGStor::read_xml($metafile);
@@ -362,15 +408,15 @@
 # check file and add archive date
 my $files_to_archive = read_resource_meta($rootnode);
 
-print "INFO: ", scalar keys %$files_to_archive, " files to archive\n";
+logger('INFO', (scalar keys %$files_to_archive) . " files to archive");
 
-# check for .archived file
+# remove .archived file
 if (-f "$docdir/.archived") {
     if (unlink "$docdir/.archived") {
-	logger('WARNING', "existing .archived file has been removed! Resource already archived?\n");
+	logger('WARNING', "existing .archived file has been removed!");
 	$warncnt++;
     } else {
-	logger('ERROR', "unable to remove existing .archived file!\n");
+	logger('ERROR', "unable to remove existing .archived file!");
 	$errcnt++;
     }
 }
@@ -382,7 +428,7 @@
 
 # write new index.meta
 if ($errcnt > 0) {
-    logger('ABORT', "there were errors!\n");
+    logger('ABORT', "there were errors!");
     exit 1;
 } else {
     if ($fix_xml) {
@@ -394,7 +440,7 @@
 my $archived_files = run_archive();
 my $num_archfiles = scalar keys %$archived_files;
 
-logger('INFO', "$num_archfiles files archived\n");
+logger('INFO', "$num_archfiles files archived");
 
 # check list of archived files
 check_files($files_to_archive, $archived_files);
@@ -412,8 +458,8 @@
     }
 }
 
-logger('INFO', "$warncnt warnings\n");
-logger('INFO', "$errcnt errors\n");
+logger('INFO', "$warncnt warnings");
+logger('INFO', "$errcnt errors");
 if ($errcnt > 0) {
     logger('ABORT', "there were errors! ($num_archfiles files archived) at " . stime(time));
     exit 1;
--- a/metacheck.pl	Wed Aug 31 12:50:00 2005 +0200
+++ b/metacheck.pl	Tue Sep 20 19:24:57 2005 +0200
@@ -14,16 +14,35 @@
 #
 
 # program version
-my $version = "0.5.3 (8.7.2004)";
+my $version = "0.6.0 (20.9.2005)";
+my $help = 
+"use: metacheck [options] docdir
+options:
+  -debug  show debugging info
+  -dry-run  simulate, dont'do anything
+  -checkonly  leave existing index file untouched
+  -add-files  add file tags for missing files
+  -replace  rewrite index file to match current files
+";
+logger("INFO", "metacheck $version");
 
 # read command line parameters
 my $args = MPIWGStor::parseargs;
+if (! scalar(%$args)) {
+    print $help, "\n";
+    exit 1;
+}
 
 # debug level
 $debug = (exists $$args{'debug'}) ? $$args{'debug'} : 0;
 
+# simulate action only
+my $dry_run = (exists $$args{'dry-run'}) ? $$args{'dry-run'} : 0;
+logger('DEBUG', "dry-run: $dry_run");
+
 # check only or fix index file also
-my $check_only = (exists $$args{'checkonly'}) ? $$args{'checkonly'} : 0;
+my $check_only = (exists $$args{'checkonly'}) ? $$args{'checkonly'} : 1;
+
 # add file tags for missing files
 my $fix_files = ! $check_only;
 # add tags for file size and date
@@ -32,6 +51,31 @@
 my $fix_dirs = ! $check_only;
 # rewrite XML file (necessary for fix_files and fix_dirs)
 my $fix_xml = ! $check_only;
+# rewrite complete index file
+my $do_rewrite = 0;
+
+# add file tags for missing files
+if (exists $$args{'add-files'}) {
+    $check_only = 0;
+    $fix_files = 1;
+    $fix_dirs = 1;
+    $fix_xml = 1;
+    $do_rewrite = 0;
+    logger('DEBUG', "add-files: true");
+}
+
+# completely rewrite index file
+if (exists $$args{'replace'}) {
+    $check_only = 0;
+    $fix_files = 1;
+    $fix_dirs = 1;
+    $fix_xml = 1;
+    $do_rewrite = 1;
+    logger('DEBUG', "replace: true");
+}
+logger('DEBUG', "checkonly: $check_only");
+
+
 my $xml_changed = 0;
 # XML namespace (not really implemented!)
 my $namespace = "";
@@ -150,18 +194,25 @@
 	}
 	# check with dirs on filesystem 
 	my $fn;
-	if ($dirpath) {
+	if ($dirpath && ($dirpath ne '.')) {
 	    $fn = "$dirpath/$dirname";
 	} else {
 	    $fn = "$dirname";
 	}
-        #logger("dir: \"$dirname\", \"$dirpath\"");
+        #logger('DEBUG', "dir: \"$dirname\", \"$dirpath\", fn: \"$fn\"");
 	if ($$fsdirs{$fn}) {
 	    #logger("  OK ($$fsdirs{$fn})");
 	    $okdirs{$fn} = $dirname;
 	} else {
-	    logger("ERROR", "directory $dirname (in $dirpath/) missing on disk!");
-	    $errcnt++;
+	    if ($do_rewrite) {
+		# remove dir tag
+		logger("WARNING", "directory $dirname (in $dirpath/) no longer on disk!");
+		$dirnode->unbindNode();
+		$warncnt++;
+	    } else {
+		logger("ERROR", "directory $dirname (in $dirpath/) missing on disk!");
+		$errcnt++;
+	    }
 	}
     }
     #logger("dirs: ", (scalar keys %$fsdirs), " vs ", (scalar keys %okdirs), "");
@@ -171,7 +222,7 @@
 	foreach my $f (sort keys %$fsdirs) {
 	    # was this dir missing?
 	    if (! $okdirs{$f}) {
-		my ($name, $path) = split_file_path($f);
+		my ($name, $path) = split_file_path($f, 1);
 		# name must be valid
 		if (! valid_dir_name($name)) {
 		    $path = "." unless ($path);
@@ -262,8 +313,15 @@
 		$xml_changed++;
 	    }
 	} else {
-	    logger("ERROR", "file $filename (in $filepath/) missing on disk!");
-	    $errcnt++;
+	    if ($do_rewrite) {
+		# remove file tag
+		logger("WARNING", "file $filename (in $filepath/) no longer on disk!");
+		$filenode->unbindNode();
+		$warncnt++;
+	    } else {
+		logger("ERROR", "file $filename (in $filepath/) missing on disk!");
+		$errcnt++;
+	    }
 	}
     }
     #logger("files: ", (scalar keys %$fsfiles), " vs ", (scalar keys %okfiles), "");
@@ -271,7 +329,7 @@
 	# number of file tags and files don't match
 	# iterate through all files
 	foreach my $f (sort keys %$fsfiles) {
-	    my ($name, $path) = split_file_path($f);
+	    my ($name, $path) = split_file_path($f, 1);
 	    # was this file missing?
 	    if (! $okfiles{$f}) {
 		# is an ignoreable file?
@@ -390,15 +448,13 @@
 # main
 #
 
-logger("INFO", "metacheck $version");
-    
 my ($document, $rootnode) = read_xml($metafile);
 
 check_resource_meta($rootnode);
 
 my $fnum = fs_read_files($docdir, "", \%files, \%dirs);
 logger("INFO", "$fnum files on FS");
-#foreach (keys %files) {logger("  file ($_): $files{$_}");}
+#foreach (keys %dirs) {logger('DEBUG', "  dir ($_): $dirs{$_}");}
 
 check_files($rootnode, \%files);
 check_dirs($rootnode, \%dirs);
@@ -406,11 +462,16 @@
 logger("INFO", "$warncnt warnings");
 logger("INFO", "$errcnt errors");
 if ($errcnt > 0) {
-    logger("ABORT", "there were errors!");
+    logger("ABORT", "there were $errcnt errors!");
     exit 1;
 } else {
     if ($fix_xml) {
-	write_xml($document, $metafile);
+	if ($dry_run) {
+	    logger('INFO', "would write $metafile");
+	    logger('DEBUG', $document->toString(1));
+	} else {
+	    write_xml($document, $metafile);
+	}
     }
     logger("DONE", "index file checked successfully!");
 }