#!/usr/local/bin/perl -w use strict; use XML::LibXML; use lib '/usr/local/mpiwg/archive'; use MPIWGStor; # make output unbuffered $|=1; ####################################################### # internal parameters # # program version my $version = "0.5.2 (7.1.2004)"; # read command line parameters my $args = MPIWGStor::parseargs; # debug level $debug = (exists $$args{'debug'}) ? $$args{'debug'} : 0; # check only or fix index file also my $check_only = (exists $$args{'checkonly'}) ? $$args{'checkonly'} : 0; # add file tags for missing files my $fix_files = ! $check_only; # add tags for file size and date my $fix_fs_meta = 1; # add dir tags for missing directories my $fix_dirs = ! $check_only; # rewrite XML file (necessary for fix_files and fix_dirs) my $fix_xml = ! $check_only; my $xml_changed = 0; # XML namespace (not really implemented!) my $namespace = ""; ####################################################### # check parameters that were passed to the program # my $docdir = $$args{'path'}; if (! $docdir) { logger("ABORT", "no document directory given!"); exit 1; } # strip trailing slashes $docdir =~ s/\/$//; if (! -d $docdir) { logger("ABORT", "document directory \'$docdir\' doesn't exist!"); exit 1; } my $metafile = "$docdir/index.meta"; if (! -f $metafile) { logger("ABORT", "metadata index file \'$metafile\' doesn't exist!"); exit 1; } ####################################################### # internal variables # # all files in the document directory tree my %files; # all directories in the document directory tree my %dirs; # number of errors my $errcnt = 0; # number of warnings my $warncnt = 0; ####################################################### # subroutines # # # fs_read_files($realdir, $docdir, \%files, \%dirs) # # reads all files and directories below $realdir and puts the # files in %files and directories in %dirs # $docdir is only for recursion, it should be empty when called # from outside # sub fs_read_files { my ($directory, $docdir, $files, $dirs) = @_; my $cnt = 0; if (! opendir DIR, $directory) { return 0; } my @dirfiles = readdir DIR; foreach my $fn (@dirfiles) { # ignore names starting with a dot next if ($fn =~ /^\./); # ignore other silly files next if ($junk_files{$fn}); $cnt++; my $f = "$directory/$fn"; my $docf = ($docdir) ? "$docdir/$fn" : $fn; #logger("fs_file: \"$f\""); if (-f $f) { #logger(" is file"); my ($dev,$ino,$mode,$nlink,$uid,$gid,$rdev,$size, $atime,$mtime,$ctime,$blksize,$blocks) = stat(_); $$files{$docf} = [$fn, $size, $mtime]; } elsif (-d _) { #logger(" is dir"); $$dirs{$docf} = $fn; # recurse into directory $cnt += fs_read_files($f, $docf, $files, $dirs); } } return $cnt; } # # check_dirs($rootnode, \%dirs) # # reads all dir elements under $rootnode and compares with the directory list # in %dirs # sub check_dirs { my ($root, $fsdirs) = @_; # # iterate all dir tags # my @dirnodes = $root->findnodes('child::dir'); my %okdirs; foreach my $dirnode (@dirnodes) { my $dirname = sstrip($dirnode->find('child::name')); my $dirpath = sstrip($dirnode->find('child::path')); my $description = sstrip($dirnode->find('child::description')); # name must be valid if (! valid_dir_name($dirname)) { logger("ERROR", "directory name ($dirpath) $dirname invalid!"); $errcnt++; } # description can be present if (! $description) { logger("WARNING", "description for directory $dirname (in $dirpath/) missing!"); $warncnt++; } # check with dirs on filesystem my $fn; if ($dirpath) { $fn = "$dirpath/$dirname"; } else { $fn = "$dirname"; } #logger("dir: \"$dirname\", \"$dirpath\""); if ($$fsdirs{$fn}) { #logger(" OK ($$fsdirs{$fn})"); $okdirs{$fn} = $dirname; } else { logger("ERROR", "directory $dirname (in $dirpath/) missing on disk!"); $errcnt++; } } #logger("dirs: ", (scalar keys %$fsdirs), " vs ", (scalar keys %okdirs), ""); if ((scalar keys %$fsdirs) != (scalar keys %okdirs)) { # number of dir tags and dirs don't match # iterate through all dirs foreach my $f (sort keys %$fsdirs) { # was this dir missing? if (! $okdirs{$f}) { my ($name, $path) = split_file_path($f); # name must be valid if (! valid_dir_name($name)) { $path = "." unless ($path); logger("ERROR", "directory name $name (in $path/) invalid!"); $errcnt++; next; } if ($fix_dirs) { # add missing dir tag my $dir_node = $root->addNewChild($namespace, "dir"); $xml_changed++; # add name my $name_node = $dir_node->addNewChild($namespace, "name"); $name_node->appendTextNode($name); # add path if ($path) { my $path_node = $dir_node->addNewChild($namespace, "path"); $path_node->appendTextNode($path); } logger("INFO", "directory $f to be added to index file!"); } else { logger("ERROR", "directory $f missing in index file!"); $errcnt++; } } } } } # # check_files($rootnode, \%files) # # reads all file elements under $rootnode and compares with the file list # in %files # sub check_files { my ($root, $fsfiles) = @_; # # iterate all file tags # my @filenodes = $root->findnodes('child::file'); my %okfiles; foreach my $filenode (@filenodes) { my $filename = sstrip($filenode->find('child::name')); my $filepath = sstrip($filenode->find('child::path')); my $filesize = sstrip($filenode->find('child::size')); my $filedate = sstrip($filenode->find('child::date')); # name must be valid if (! valid_file_name($filename)) { logger("ERROR", "file name ($filepath)$filename invalid!"); $errcnt++; } my $fn = ($filepath) ? "$filepath/$filename" : "$filename"; #logger("file: \"$filename\", \"$filepath\""); if ($$fsfiles{$fn}) { #logger(" OK ($$fsfiles{$fn})"); $okfiles{$fn} = $filename; # check file size and date if ($filesize) { if ($filesize != $$fsfiles{$fn}->[1]) { logger("WARNING", "size of file $fn changed: $filesize to $$fsfiles{$fn}->[1]"); $warncnt++; } } # file date if ($filedate) { if ($filedate ne stime($$fsfiles{$fn}->[2])) { logger("WARNING", "date of file $fn changed: $filedate to ", stime($$fsfiles{$fn}->[2]), ""); $warncnt++; } } # update file size and date if ($fix_fs_meta) { # delete size and date foreach my $n ($filenode->findnodes('child::size')) { $filenode->removeChild($n); } foreach my $n ($filenode->findnodes('child::date')) { $filenode->removeChild($n); } # add new size and date my $node = $filenode->addNewChild($namespace, "size"); $node->appendTextNode($$fsfiles{$fn}->[1]); $node = $filenode->addNewChild($namespace, "date"); $node->appendTextNode(stime($$fsfiles{$fn}->[2])); $xml_changed++; } } else { logger("ERROR", "file $filename (in $filepath/) missing on disk!"); $errcnt++; } } #logger("files: ", (scalar keys %$fsfiles), " vs ", (scalar keys %okfiles), ""); if ((scalar keys %$fsfiles) != (scalar keys %okfiles)) { # number of file tags and files don't match # iterate through all files foreach my $f (sort keys %$fsfiles) { my ($name, $path) = split_file_path($f); # was this file missing? if (! $okfiles{$f}) { # is an ignoreable file? if ($index_files{$name}) { next; } # name must be valid if (! valid_dir_name($name)) { $path = "." unless ($path); logger("ERROR", "file name $name (in $path/) invalid!"); $errcnt++; next; } if ($fix_files) { # add missing file tag my $file_node = $root->addNewChild($namespace, "file"); $xml_changed++; # add name my $name_node = $file_node->addNewChild($namespace, "name"); $name_node->appendTextNode($name); # add path if ($path) { my $path_node = $file_node->addNewChild($namespace, "path"); $path_node->appendTextNode($path); } # add size my $size_node = $file_node->addNewChild($namespace, "size"); $size_node->appendTextNode($$fsfiles{$f}->[1]); # add date my $date_node = $file_node->addNewChild($namespace, "date"); $date_node->appendTextNode(stime($$fsfiles{$f}->[2])); logger("INFO", "file $f to be added to index file!"); } else { logger("ERROR", "file $f missing in index file!"); $errcnt++; } } } } } # # check_resource_meta($rootnode) # # checks general resource meta information # sub check_resource_meta { my ($rootnode) = @_; # # description # my $description = $rootnode->findvalue('child::description'); if (! $description) { logger("ERROR", "resource description element missing!"); $errcnt++; } # # name # my $name = sstrip($rootnode->findvalue('child::name')); if ($name) { my ($dirname, $dirpath) = split_file_path($docdir); if ($dirname ne $name) { logger("ERROR", "resource name element '$name' does not match directory name '$dirname'!"); $errcnt++; } } else { logger("ERROR", "resource name element missing!"); $errcnt++; } # # archive path # my $realpath; # get real path if ($docdir =~ /^\//) { # docdir is absolute $realpath = $docdir; } else { # docdir is relative -- try with the shell if (open PWDCMD, "cd $docdir ; pwd|") { $realpath = ; chomp $realpath; } } if (! $realpath) { logger("ERROR", "unable to check real archive path!"); $errcnt++; return; } # get archive-path my $archnode = ($rootnode->find('child::archive-path'))->get_node(1); if ($archnode) { my $arch = sstrip($archnode->textContent); if ($arch ne $realpath) { logger("WARNING", "incorrect archive-path '$arch' will be changed to '$realpath'!"); $warncnt++; # correct archive-path $archnode->removeChildNodes; $archnode->appendTextNode($realpath); $xml_changed++; } } else { # add archive-path $archnode = $rootnode->addNewChild($namespace, "archive-path"); $archnode->appendTextNode($realpath); $xml_changed++; } } ####################################################### # main # logger("INFO", "metacheck $version"); my ($document, $rootnode) = read_xml($metafile); check_resource_meta($rootnode); my $fnum = fs_read_files($docdir, "", \%files, \%dirs); logger("INFO", "$fnum files on FS"); #foreach (keys %files) {logger(" file ($_): $files{$_}");} check_files($rootnode, \%files); check_dirs($rootnode, \%dirs); logger("INFO", "$warncnt warnings"); logger("INFO", "$errcnt errors"); if ($errcnt > 0) { logger("ABORT", "there were errors!"); exit 1; } else { if ($fix_xml) { write_xml($document, $metafile); } logger("DONE", "index file checked successfully!"); }