#!/usr/bin/perl -w
use strict;
use XML::LibXML;
use lib '/usr/local/mpiwg/archive';
use MPIWGStor;
# make output unbuffered
$|=1;
#######################################################
# internal parameters
#
# program version
my $version = "0.6.0 (20.9.2005)";
my $help =
"use: metacheck [options] docdir
options:
-debug show debugging info
-dry-run simulate, dont'do anything
-checkonly leave existing index file untouched
-add-files add file tags for missing files
-replace rewrite index file to match current files
";
logger("INFO", "metacheck $version");
# read command line parameters
my $args = MPIWGStor::parseargs;
if (! scalar(%$args)) {
print $help, "\n";
exit 1;
}
# debug level
$debug = (exists $$args{'debug'}) ? $$args{'debug'} : 0;
# simulate action only
my $dry_run = (exists $$args{'dry-run'}) ? $$args{'dry-run'} : 0;
logger('DEBUG', "dry-run: $dry_run");
# check only or fix index file also
my $check_only = (exists $$args{'checkonly'}) ? $$args{'checkonly'} : 1;
# add file tags for missing files
my $fix_files = ! $check_only;
# add tags for file size and date
my $fix_fs_meta = 1;
# add dir tags for missing directories
my $fix_dirs = ! $check_only;
# rewrite XML file (necessary for fix_files and fix_dirs)
my $fix_xml = ! $check_only;
# rewrite complete index file
my $do_rewrite = 0;
# add file tags for missing files
if (exists $$args{'add-files'}) {
$check_only = 0;
$fix_files = 1;
$fix_dirs = 1;
$fix_xml = 1;
$do_rewrite = 0;
logger('DEBUG', "add-files: true");
}
# completely rewrite index file
if (exists $$args{'replace'}) {
$check_only = 0;
$fix_files = 1;
$fix_dirs = 1;
$fix_xml = 1;
$do_rewrite = 1;
logger('DEBUG', "replace: true");
}
logger('DEBUG', "checkonly: $check_only");
my $xml_changed = 0;
# XML namespace (not really implemented!)
my $namespace = "";
#######################################################
# check parameters that were passed to the program
#
my $docdir = $$args{'path'};
if (! $docdir) {
logger("ABORT", "no document directory given!");
exit 1;
}
# strip double slashes
$docdir =~ s/\/\//\//;
# strip trailing slashes
$docdir =~ s/\/$//;
if (! -d $docdir) {
logger("ABORT", "document directory \'$docdir\' doesn't exist!");
exit 1;
}
my $metafile = "$docdir/index.meta";
if (! -f $metafile) {
logger("ABORT", "metadata index file \'$metafile\' doesn't exist!");
exit 1;
}
#######################################################
# internal variables
#
# all files in the document directory tree
my %files;
# all directories in the document directory tree
my %dirs;
# number of errors
my $errcnt = 0;
# number of warnings
my $warncnt = 0;
#######################################################
# subroutines
#
#
# fs_read_files($realdir, $docdir, \%files, \%dirs)
#
# reads all files and directories below $realdir and puts the
# files in %files and directories in %dirs
# $docdir is only for recursion, it should be empty when called
# from outside
#
sub fs_read_files {
my ($directory, $docdir, $files, $dirs) = @_;
my $cnt = 0;
if (! opendir DIR, $directory) {
return 0;
}
my @dirfiles = readdir DIR;
foreach my $fn (@dirfiles) {
# ignore names starting with a dot
next if ($fn =~ /^\./);
# ignore other silly files
next if ($junk_files{$fn});
$cnt++;
my $f = "$directory/$fn";
my $docf = ($docdir) ? "$docdir/$fn" : $fn;
#logger("fs_file: \"$f\"");
if (-f $f) {
#logger(" is file");
my ($dev,$ino,$mode,$nlink,$uid,$gid,$rdev,$size,
$atime,$mtime,$ctime,$blksize,$blocks)
= stat(_);
$$files{$docf} = [$fn, $size, $mtime];
} elsif (-d _) {
#logger(" is dir");
$$dirs{$docf} = $fn;
# recurse into directory
$cnt += fs_read_files($f, $docf, $files, $dirs);
}
}
return $cnt;
}
#
# check_dirs($rootnode, \%dirs)
#
# reads all dir elements under $rootnode and compares with the directory list
# in %dirs
#
sub check_dirs {
my ($root, $fsdirs) = @_;
#
# iterate all dir tags
#
my @dirnodes = $root->findnodes('child::dir');
my %okdirs;
foreach my $dirnode (@dirnodes) {
my $dirname = sstrip($dirnode->find('child::name'));
my $dirpath = sstrip($dirnode->find('child::path'));
my $description = sstrip($dirnode->find('child::description'));
# name must be valid
if (! valid_dir_name($dirname)) {
if ($do_rewrite) {
logger("WARNING", "directory name ($dirpath) $dirname in index file invalid!");
$warncnt++;
} else {
logger("ERROR", "directory name ($dirpath) $dirname invalid!");
$errcnt++;
}
}
# description can be present
if (! $description) {
logger("WARNING", "description for directory $dirname (in $dirpath/) missing!");
$warncnt++;
}
# check with dirs on filesystem
my $fn;
if ($dirpath && ($dirpath ne '.')) {
$fn = "$dirpath/$dirname";
} else {
$fn = "$dirname";
}
#logger('DEBUG', "dir: \"$dirname\", \"$dirpath\", fn: \"$fn\"");
if ($$fsdirs{$fn}) {
#logger(" OK ($$fsdirs{$fn})");
$okdirs{$fn} = $dirname;
} else {
if ($do_rewrite) {
# remove dir tag
logger("WARNING", "directory $dirname (in $dirpath/) no longer on disk!");
$dirnode->unbindNode();
$warncnt++;
} else {
logger("ERROR", "directory $dirname (in $dirpath/) missing on disk!");
$errcnt++;
}
}
}
#logger("dirs: ", (scalar keys %$fsdirs), " vs ", (scalar keys %okdirs), "");
if ((scalar keys %$fsdirs) != (scalar keys %okdirs)) {
# number of dir tags and dirs don't match
# iterate through all dirs
foreach my $f (sort keys %$fsdirs) {
# was this dir missing?
if (! $okdirs{$f}) {
my ($name, $path) = split_file_path($f, 1);
# name must be valid
if (! valid_dir_name($name)) {
$path = "." unless ($path);
logger("ERROR", "directory name $name (in $path/) invalid!");
$errcnt++;
next;
}
if ($fix_dirs) {
# add missing dir tag
my $dir_node = $root->addNewChild($namespace, "dir");
$xml_changed++;
# add name
my $name_node = $dir_node->addNewChild($namespace, "name");
$name_node->appendTextNode($name);
# add path
if ($path) {
my $path_node = $dir_node->addNewChild($namespace, "path");
$path_node->appendTextNode($path);
}
logger("INFO", "directory $f to be added to index file!");
} else {
logger("ERROR", "directory $f missing in index file!");
$errcnt++;
}
}
}
}
}
#
# check_files($rootnode, \%files)
#
# reads all file elements under $rootnode and compares with the file list
# in %files
#
sub check_files {
my ($root, $fsfiles) = @_;
#
# iterate all file tags
#
my @filenodes = $root->findnodes('child::file');
my %okfiles;
foreach my $filenode (@filenodes) {
my $filename = sstrip($filenode->find('child::name'));
my $filepath = sstrip($filenode->find('child::path'));
my $filesize = sstrip($filenode->find('child::size'));
my $filedate = sstrip($filenode->find('child::date'));
# name must be valid
if (! valid_file_name($filename)) {
if ($do_rewrite) {
logger("WARNING", "file name ($filepath)$filename in index file invalid!");
$warncnt++;
} else {
logger("ERROR", "file name ($filepath)$filename invalid!");
$errcnt++;
}
}
my $fn = ($filepath) ? "$filepath/$filename" : "$filename";
#logger("file: \"$filename\", \"$filepath\"");
if ($$fsfiles{$fn}) {
#logger(" OK ($$fsfiles{$fn})");
$okfiles{$fn} = $filename;
# check file size and date
if ($filesize) {
if ($filesize != $$fsfiles{$fn}->[1]) {
logger("WARNING", "size of file $fn changed: $filesize to $$fsfiles{$fn}->[1]");
$warncnt++;
}
}
# file date
if ($filedate) {
if ($filedate ne stime($$fsfiles{$fn}->[2])) {
logger("WARNING", "date of file $fn changed: $filedate to ", stime($$fsfiles{$fn}->[2]), "");
$warncnt++;
}
}
# update file size and date
if ($fix_fs_meta) {
# delete size and date
foreach my $n ($filenode->findnodes('child::size')) {
$filenode->removeChild($n);
}
foreach my $n ($filenode->findnodes('child::date')) {
$filenode->removeChild($n);
}
# add new size and date
my $node = $filenode->addNewChild($namespace, "size");
$node->appendTextNode($$fsfiles{$fn}->[1]);
$node = $filenode->addNewChild($namespace, "date");
$node->appendTextNode(stime($$fsfiles{$fn}->[2]));
$xml_changed++;
}
} else {
if ($do_rewrite) {
# remove file tag
logger("WARNING", "file $filename (in $filepath/) no longer on disk!");
$filenode->unbindNode();
$warncnt++;
} else {
logger("ERROR", "file $filename (in $filepath/) missing on disk!");
$errcnt++;
}
}
}
#logger("files: ", (scalar keys %$fsfiles), " vs ", (scalar keys %okfiles), "");
if ((scalar keys %$fsfiles) != (scalar keys %okfiles)) {
# number of file tags and files don't match
# iterate through all files
foreach my $f (sort keys %$fsfiles) {
my ($name, $path) = split_file_path($f, 1);
# was this file missing?
if (! $okfiles{$f}) {
# is an ignoreable file?
if ($index_files{$name}) {
next;
}
# name must be valid
if (! valid_dir_name($name)) {
$path = "." unless ($path);
logger("ERROR", "file name $name (in $path/) invalid!");
$errcnt++;
next;
}
if ($fix_files) {
# add missing file tag
my $file_node = $root->addNewChild($namespace, "file");
$xml_changed++;
# add name
my $name_node = $file_node->addNewChild($namespace, "name");
$name_node->appendTextNode($name);
# add path
if ($path) {
my $path_node = $file_node->addNewChild($namespace, "path");
$path_node->appendTextNode($path);
}
# add size
my $size_node = $file_node->addNewChild($namespace, "size");
$size_node->appendTextNode($$fsfiles{$f}->[1]);
# add date
my $date_node = $file_node->addNewChild($namespace, "date");
$date_node->appendTextNode(stime($$fsfiles{$f}->[2]));
logger("INFO", "file $f to be added to index file!");
} else {
logger("ERROR", "file $f missing in index file!");
$errcnt++;
}
}
}
}
}
#
# check_resource_meta($rootnode)
#
# checks general resource meta information
#
sub check_resource_meta {
my ($rootnode) = @_;
#
# description
#
my $description = $rootnode->findvalue('child::description');
if (! $description) {
logger("ERROR", "resource description element missing!");
$errcnt++;
}
#
# name
#
my $name = sstrip($rootnode->findvalue('child::name'));
if ($name) {
my ($dirname, $dirpath) = split_file_path($docdir);
if ($dirname ne $name) {
logger("ERROR", "resource name element '$name' does not match directory name '$dirname'!");
$errcnt++;
}
} else {
logger("ERROR", "resource name element missing!");
$errcnt++;
}
#
# archive path
#
my $realpath;
# get real path
if ($docdir =~ /^\//) {
# docdir is absolute
$realpath = $docdir;
} else {
# docdir is relative -- try with the shell
if (open PWDCMD, "cd $docdir ; pwd|") {
$realpath = <PWDCMD>;
chomp $realpath;
}
}
if (! $realpath) {
logger("ERROR", "unable to check real archive path!");
$errcnt++;
return;
}
# get archive-path
my $archnode = ($rootnode->find('child::archive-path'))->get_node(1);
if ($archnode) {
my $arch = sstrip($archnode->textContent);
if ($arch ne $realpath) {
logger("WARNING", "incorrect archive-path '$arch' will be changed to '$realpath'!");
$warncnt++;
# correct archive-path
$archnode->removeChildNodes;
$archnode->appendTextNode($realpath);
$xml_changed++;
}
} else {
# add archive-path
$archnode = $rootnode->addNewChild($namespace, "archive-path");
$archnode->appendTextNode($realpath);
$xml_changed++;
}
}
#######################################################
# main
#
my ($document, $rootnode) = read_xml($metafile);
check_resource_meta($rootnode);
my $fnum = fs_read_files($docdir, "", \%files, \%dirs);
logger("INFO", "$fnum files on FS");
#foreach (keys %dirs) {logger('DEBUG', " dir ($_): $dirs{$_}");}
check_files($rootnode, \%files);
check_dirs($rootnode, \%dirs);
logger("INFO", "$warncnt warnings");
logger("INFO", "$errcnt errors");
if ($errcnt > 0) {
logger("ABORT", "there were $errcnt errors!");
exit 1;
} else {
if ($fix_xml) {
if ($dry_run) {
logger('INFO', "would write $metafile");
logger('DEBUG', $document->toString(1));
} else {
write_xml($document, $metafile);
}
}
logger("DONE", "index file checked successfully!");
}
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>