--- foxridge-archiver/metacheck.pl 2004/07/08 17:30:05 1.3 +++ foxridge-archiver/metacheck.pl 2017/03/16 17:00:43 1.6 @@ -1,4 +1,4 @@ -#!/usr/local/bin/perl -w +#!/usr/bin/perl -w use strict; use XML::LibXML; @@ -14,16 +14,35 @@ $|=1; # # program version -my $version = "0.5.3 (8.7.2004)"; +my $version = "0.6.0 (20.9.2005)"; +my $help = +"use: metacheck [options] docdir +options: + -debug show debugging info + -dry-run simulate, dont'do anything + -checkonly leave existing index file untouched + -add-files add file tags for missing files + -replace rewrite index file to match current files +"; +logger("INFO", "metacheck $version"); # read command line parameters my $args = MPIWGStor::parseargs; +if (! scalar(%$args)) { + print $help, "\n"; + exit 1; +} # debug level $debug = (exists $$args{'debug'}) ? $$args{'debug'} : 0; +# simulate action only +my $dry_run = (exists $$args{'dry-run'}) ? $$args{'dry-run'} : 0; +logger('DEBUG', "dry-run: $dry_run"); + # check only or fix index file also -my $check_only = (exists $$args{'checkonly'}) ? $$args{'checkonly'} : 0; +my $check_only = (exists $$args{'checkonly'}) ? $$args{'checkonly'} : 1; + # add file tags for missing files my $fix_files = ! $check_only; # add tags for file size and date @@ -32,6 +51,31 @@ my $fix_fs_meta = 1; my $fix_dirs = ! $check_only; # rewrite XML file (necessary for fix_files and fix_dirs) my $fix_xml = ! $check_only; +# rewrite complete index file +my $do_rewrite = 0; + +# add file tags for missing files +if (exists $$args{'add-files'}) { + $check_only = 0; + $fix_files = 1; + $fix_dirs = 1; + $fix_xml = 1; + $do_rewrite = 0; + logger('DEBUG', "add-files: true"); +} + +# completely rewrite index file +if (exists $$args{'replace'}) { + $check_only = 0; + $fix_files = 1; + $fix_dirs = 1; + $fix_xml = 1; + $do_rewrite = 1; + logger('DEBUG', "replace: true"); +} +logger('DEBUG', "checkonly: $check_only"); + + my $xml_changed = 0; # XML namespace (not really implemented!) my $namespace = ""; @@ -140,8 +184,13 @@ sub check_dirs { my $description = sstrip($dirnode->find('child::description')); # name must be valid if (! valid_dir_name($dirname)) { - logger("ERROR", "directory name ($dirpath) $dirname invalid!"); - $errcnt++; + if ($do_rewrite) { + logger("WARNING", "directory name ($dirpath) $dirname in index file invalid!"); + $warncnt++; + } else { + logger("ERROR", "directory name ($dirpath) $dirname invalid!"); + $errcnt++; + } } # description can be present if (! $description) { @@ -150,18 +199,25 @@ sub check_dirs { } # check with dirs on filesystem my $fn; - if ($dirpath) { + if ($dirpath && ($dirpath ne '.')) { $fn = "$dirpath/$dirname"; } else { $fn = "$dirname"; } - #logger("dir: \"$dirname\", \"$dirpath\""); + #logger('DEBUG', "dir: \"$dirname\", \"$dirpath\", fn: \"$fn\""); if ($$fsdirs{$fn}) { #logger(" OK ($$fsdirs{$fn})"); $okdirs{$fn} = $dirname; } else { - logger("ERROR", "directory $dirname (in $dirpath/) missing on disk!"); - $errcnt++; + if ($do_rewrite) { + # remove dir tag + logger("WARNING", "directory $dirname (in $dirpath/) no longer on disk!"); + $dirnode->unbindNode(); + $warncnt++; + } else { + logger("ERROR", "directory $dirname (in $dirpath/) missing on disk!"); + $errcnt++; + } } } #logger("dirs: ", (scalar keys %$fsdirs), " vs ", (scalar keys %okdirs), ""); @@ -171,7 +227,7 @@ sub check_dirs { foreach my $f (sort keys %$fsdirs) { # was this dir missing? if (! $okdirs{$f}) { - my ($name, $path) = split_file_path($f); + my ($name, $path) = split_file_path($f, 1); # name must be valid if (! valid_dir_name($name)) { $path = "." unless ($path); @@ -223,8 +279,13 @@ sub check_files { my $filedate = sstrip($filenode->find('child::date')); # name must be valid if (! valid_file_name($filename)) { - logger("ERROR", "file name ($filepath)$filename invalid!"); - $errcnt++; + if ($do_rewrite) { + logger("WARNING", "file name ($filepath)$filename in index file invalid!"); + $warncnt++; + } else { + logger("ERROR", "file name ($filepath)$filename invalid!"); + $errcnt++; + } } my $fn = ($filepath) ? "$filepath/$filename" : "$filename"; #logger("file: \"$filename\", \"$filepath\""); @@ -262,8 +323,15 @@ sub check_files { $xml_changed++; } } else { - logger("ERROR", "file $filename (in $filepath/) missing on disk!"); - $errcnt++; + if ($do_rewrite) { + # remove file tag + logger("WARNING", "file $filename (in $filepath/) no longer on disk!"); + $filenode->unbindNode(); + $warncnt++; + } else { + logger("ERROR", "file $filename (in $filepath/) missing on disk!"); + $errcnt++; + } } } #logger("files: ", (scalar keys %$fsfiles), " vs ", (scalar keys %okfiles), ""); @@ -271,7 +339,7 @@ sub check_files { # number of file tags and files don't match # iterate through all files foreach my $f (sort keys %$fsfiles) { - my ($name, $path) = split_file_path($f); + my ($name, $path) = split_file_path($f, 1); # was this file missing? if (! $okfiles{$f}) { # is an ignoreable file? @@ -390,15 +458,13 @@ sub check_resource_meta { # main # -logger("INFO", "metacheck $version"); - my ($document, $rootnode) = read_xml($metafile); check_resource_meta($rootnode); my $fnum = fs_read_files($docdir, "", \%files, \%dirs); logger("INFO", "$fnum files on FS"); -#foreach (keys %files) {logger(" file ($_): $files{$_}");} +#foreach (keys %dirs) {logger('DEBUG', " dir ($_): $dirs{$_}");} check_files($rootnode, \%files); check_dirs($rootnode, \%dirs); @@ -406,11 +472,16 @@ check_dirs($rootnode, \%dirs); logger("INFO", "$warncnt warnings"); logger("INFO", "$errcnt errors"); if ($errcnt > 0) { - logger("ABORT", "there were errors!"); + logger("ABORT", "there were $errcnt errors!"); exit 1; } else { if ($fix_xml) { - write_xml($document, $metafile); + if ($dry_run) { + logger('INFO', "would write $metafile"); + logger('DEBUG', $document->toString(1)); + } else { + write_xml($document, $metafile); + } } logger("DONE", "index file checked successfully!"); }