File:  [Repository] / foxridge-archiver / metacheck.pl
Revision 1.4: download - view: text, annotated - select for diffs - revision graph
Tue Sep 20 17:24:57 2005 UTC (18 years, 9 months ago) by casties
Branches: MAIN
CVS tags: HEAD
fixed problem with dir names in metacheck
new version of metacheck defaults to not change index file
new version of archiver uses new version of metacheck

    1: #!/usr/local/bin/perl -w
    2: 
    3: use strict;
    4: use XML::LibXML;
    5: 
    6: use lib '/usr/local/mpiwg/archive';
    7: use MPIWGStor;
    8: 
    9: # make output unbuffered
   10: $|=1;
   11: 
   12: #######################################################
   13: # internal parameters
   14: #
   15: 
   16: # program version
   17: my $version = "0.6.0 (20.9.2005)";
   18: my $help = 
   19: "use: metacheck [options] docdir
   20: options:
   21:   -debug  show debugging info
   22:   -dry-run  simulate, dont'do anything
   23:   -checkonly  leave existing index file untouched
   24:   -add-files  add file tags for missing files
   25:   -replace  rewrite index file to match current files
   26: ";
   27: logger("INFO", "metacheck $version");
   28: 
   29: # read command line parameters
   30: my $args = MPIWGStor::parseargs;
   31: if (! scalar(%$args)) {
   32:     print $help, "\n";
   33:     exit 1;
   34: }
   35: 
   36: # debug level
   37: $debug = (exists $$args{'debug'}) ? $$args{'debug'} : 0;
   38: 
   39: # simulate action only
   40: my $dry_run = (exists $$args{'dry-run'}) ? $$args{'dry-run'} : 0;
   41: logger('DEBUG', "dry-run: $dry_run");
   42: 
   43: # check only or fix index file also
   44: my $check_only = (exists $$args{'checkonly'}) ? $$args{'checkonly'} : 1;
   45: 
   46: # add file tags for missing files
   47: my $fix_files = ! $check_only;
   48: # add tags for file size and date
   49: my $fix_fs_meta = 1;
   50: # add dir tags for missing directories
   51: my $fix_dirs = ! $check_only;
   52: # rewrite XML file (necessary for fix_files and fix_dirs)
   53: my $fix_xml = ! $check_only;
   54: # rewrite complete index file
   55: my $do_rewrite = 0;
   56: 
   57: # add file tags for missing files
   58: if (exists $$args{'add-files'}) {
   59:     $check_only = 0;
   60:     $fix_files = 1;
   61:     $fix_dirs = 1;
   62:     $fix_xml = 1;
   63:     $do_rewrite = 0;
   64:     logger('DEBUG', "add-files: true");
   65: }
   66: 
   67: # completely rewrite index file
   68: if (exists $$args{'replace'}) {
   69:     $check_only = 0;
   70:     $fix_files = 1;
   71:     $fix_dirs = 1;
   72:     $fix_xml = 1;
   73:     $do_rewrite = 1;
   74:     logger('DEBUG', "replace: true");
   75: }
   76: logger('DEBUG', "checkonly: $check_only");
   77: 
   78: 
   79: my $xml_changed = 0;
   80: # XML namespace (not really implemented!)
   81: my $namespace = "";
   82: 
   83: 
   84: #######################################################
   85: # check parameters that were passed to the program
   86: #
   87: my $docdir = $$args{'path'};
   88: if (! $docdir) {
   89:     logger("ABORT", "no document directory given!");
   90:     exit 1;
   91: }
   92: # strip double slashes
   93: $docdir =~ s/\/\//\//;
   94: # strip trailing slashes
   95: $docdir =~ s/\/$//;
   96: if (! -d $docdir) {
   97:     logger("ABORT", "document directory \'$docdir\' doesn't exist!");
   98:     exit 1;
   99: }
  100: 
  101: my $metafile = "$docdir/index.meta";
  102: if (! -f $metafile) {
  103:     logger("ABORT", "metadata index file \'$metafile\' doesn't exist!");
  104:     exit 1;
  105: }
  106: 
  107: #######################################################
  108: # internal variables
  109: #
  110: 
  111: # all files in the document directory tree
  112: my %files;
  113: # all directories in the document directory tree
  114: my %dirs;
  115: # number of errors
  116: my $errcnt = 0;
  117: # number of warnings
  118: my $warncnt = 0;
  119: 
  120: #######################################################
  121: # subroutines
  122: #
  123: 
  124: #
  125: # fs_read_files($realdir, $docdir, \%files, \%dirs)
  126: #
  127: # reads all files and directories below $realdir and puts the
  128: # files in %files and directories in %dirs
  129: # $docdir is only for recursion, it should be empty when called 
  130: # from outside
  131: #
  132: sub fs_read_files {
  133:     my ($directory, $docdir, $files, $dirs) = @_;    
  134:     my $cnt = 0;
  135: 
  136:     if (! opendir DIR, $directory) {
  137: 	return 0;
  138:     }
  139:     my @dirfiles = readdir DIR;
  140:     foreach my $fn (@dirfiles) {
  141: 	# ignore names starting with a dot
  142: 	next if ($fn =~ /^\./);
  143: 	# ignore other silly files
  144: 	next if ($junk_files{$fn});
  145: 
  146: 	$cnt++;
  147: 	my $f = "$directory/$fn";
  148: 	my $docf = ($docdir) ? "$docdir/$fn" : $fn;
  149: 	#logger("fs_file: \"$f\"");
  150: 	if (-f $f) {
  151: 	    #logger("  is file");
  152: 	    my ($dev,$ino,$mode,$nlink,$uid,$gid,$rdev,$size,
  153:                       $atime,$mtime,$ctime,$blksize,$blocks)
  154:                           = stat(_); 
  155: 	    $$files{$docf} = [$fn, $size, $mtime];
  156: 	} elsif (-d _) {
  157: 	    #logger("  is dir");
  158: 	    $$dirs{$docf} = $fn;
  159: 	    # recurse into directory
  160: 	    $cnt += fs_read_files($f, $docf, $files, $dirs);
  161: 	}
  162:     }
  163:     return $cnt;
  164: }
  165: 
  166: 
  167: #
  168: # check_dirs($rootnode, \%dirs)
  169: #
  170: # reads all dir elements under $rootnode and compares with the directory list
  171: # in %dirs
  172: #
  173: sub check_dirs {
  174:     my ($root, $fsdirs) = @_;
  175: 
  176:     #
  177:     # iterate all dir tags
  178:     #
  179:     my @dirnodes = $root->findnodes('child::dir');
  180:     my %okdirs;
  181:     foreach my $dirnode (@dirnodes) {
  182: 	my $dirname = sstrip($dirnode->find('child::name'));
  183: 	my $dirpath = sstrip($dirnode->find('child::path'));
  184: 	my $description = sstrip($dirnode->find('child::description'));
  185: 	# name must be valid
  186: 	if (! valid_dir_name($dirname)) {
  187: 	    logger("ERROR", "directory name ($dirpath) $dirname invalid!");
  188: 	    $errcnt++;
  189: 	}
  190: 	# description can be present
  191: 	if (! $description) {
  192: 	    logger("WARNING", "description for directory $dirname (in $dirpath/) missing!");
  193: 	    $warncnt++;
  194: 	}
  195: 	# check with dirs on filesystem 
  196: 	my $fn;
  197: 	if ($dirpath && ($dirpath ne '.')) {
  198: 	    $fn = "$dirpath/$dirname";
  199: 	} else {
  200: 	    $fn = "$dirname";
  201: 	}
  202:         #logger('DEBUG', "dir: \"$dirname\", \"$dirpath\", fn: \"$fn\"");
  203: 	if ($$fsdirs{$fn}) {
  204: 	    #logger("  OK ($$fsdirs{$fn})");
  205: 	    $okdirs{$fn} = $dirname;
  206: 	} else {
  207: 	    if ($do_rewrite) {
  208: 		# remove dir tag
  209: 		logger("WARNING", "directory $dirname (in $dirpath/) no longer on disk!");
  210: 		$dirnode->unbindNode();
  211: 		$warncnt++;
  212: 	    } else {
  213: 		logger("ERROR", "directory $dirname (in $dirpath/) missing on disk!");
  214: 		$errcnt++;
  215: 	    }
  216: 	}
  217:     }
  218:     #logger("dirs: ", (scalar keys %$fsdirs), " vs ", (scalar keys %okdirs), "");
  219:     if ((scalar keys %$fsdirs) != (scalar keys %okdirs)) {
  220: 	# number of dir tags and dirs don't match
  221: 	# iterate through all dirs
  222: 	foreach my $f (sort keys %$fsdirs) {
  223: 	    # was this dir missing?
  224: 	    if (! $okdirs{$f}) {
  225: 		my ($name, $path) = split_file_path($f, 1);
  226: 		# name must be valid
  227: 		if (! valid_dir_name($name)) {
  228: 		    $path = "." unless ($path);
  229: 		    logger("ERROR", "directory name $name (in $path/) invalid!");
  230: 		    $errcnt++;
  231: 		    next;
  232: 		}
  233: 		if ($fix_dirs) {
  234: 		    # add missing dir tag
  235: 		    my $dir_node = $root->addNewChild($namespace, "dir");
  236: 		    $xml_changed++;
  237: 		    # add name
  238: 		    my $name_node = $dir_node->addNewChild($namespace, "name");
  239: 		    $name_node->appendTextNode($name);
  240: 		    # add path
  241: 		    if ($path) {
  242: 			my $path_node = $dir_node->addNewChild($namespace, "path");
  243: 			$path_node->appendTextNode($path);
  244: 		    }
  245: 		    logger("INFO", "directory $f to be added to index file!");
  246: 		} else {
  247: 		    logger("ERROR", "directory $f missing in index file!");
  248: 		    $errcnt++;
  249: 		}
  250: 	    }
  251: 	}
  252:     }
  253: }
  254: 
  255: 
  256: #
  257: # check_files($rootnode, \%files)
  258: #
  259: # reads all file elements under $rootnode and compares with the file list
  260: # in %files
  261: #
  262: sub check_files {
  263:     my ($root, $fsfiles) = @_;
  264: 
  265:     #
  266:     # iterate all file tags
  267:     #
  268:     my @filenodes = $root->findnodes('child::file');
  269:     my %okfiles;
  270:     foreach my $filenode (@filenodes) {
  271: 	my $filename = sstrip($filenode->find('child::name'));
  272: 	my $filepath = sstrip($filenode->find('child::path'));
  273: 	my $filesize = sstrip($filenode->find('child::size'));
  274: 	my $filedate = sstrip($filenode->find('child::date'));
  275: 	# name must be valid
  276: 	if (! valid_file_name($filename)) {
  277: 	    logger("ERROR", "file name ($filepath)$filename invalid!");
  278: 	    $errcnt++;
  279: 	}
  280: 	my $fn = ($filepath) ? "$filepath/$filename" : "$filename";
  281:         #logger("file: \"$filename\", \"$filepath\"");
  282: 	if ($$fsfiles{$fn}) {
  283: 	    #logger("  OK ($$fsfiles{$fn})");
  284: 	    $okfiles{$fn} = $filename;
  285: 	    # check file size and date
  286: 	    if ($filesize) {
  287: 		if ($filesize != $$fsfiles{$fn}->[1]) {
  288: 		    logger("WARNING", "size of file $fn changed: $filesize to $$fsfiles{$fn}->[1]");
  289: 		    $warncnt++;
  290: 		}
  291: 	    }
  292: 	    # file date
  293: 	    if ($filedate) {
  294: 		if ($filedate ne stime($$fsfiles{$fn}->[2])) {
  295: 		    logger("WARNING", "date of file $fn changed: $filedate to ", stime($$fsfiles{$fn}->[2]), "");
  296: 		    $warncnt++;
  297: 		}
  298: 	    }
  299: 	    # update file size and date
  300: 	    if ($fix_fs_meta) {
  301: 		# delete size and date
  302: 		foreach my $n ($filenode->findnodes('child::size')) {
  303: 		    $filenode->removeChild($n);
  304: 		}
  305: 		foreach my $n ($filenode->findnodes('child::date')) {
  306: 		    $filenode->removeChild($n);
  307: 		}
  308: 		# add new size and date
  309: 		my $node = $filenode->addNewChild($namespace, "size");
  310: 		$node->appendTextNode($$fsfiles{$fn}->[1]);
  311: 		$node = $filenode->addNewChild($namespace, "date");
  312: 		$node->appendTextNode(stime($$fsfiles{$fn}->[2]));
  313: 		$xml_changed++;
  314: 	    }
  315: 	} else {
  316: 	    if ($do_rewrite) {
  317: 		# remove file tag
  318: 		logger("WARNING", "file $filename (in $filepath/) no longer on disk!");
  319: 		$filenode->unbindNode();
  320: 		$warncnt++;
  321: 	    } else {
  322: 		logger("ERROR", "file $filename (in $filepath/) missing on disk!");
  323: 		$errcnt++;
  324: 	    }
  325: 	}
  326:     }
  327:     #logger("files: ", (scalar keys %$fsfiles), " vs ", (scalar keys %okfiles), "");
  328:     if ((scalar keys %$fsfiles) != (scalar keys %okfiles)) {
  329: 	# number of file tags and files don't match
  330: 	# iterate through all files
  331: 	foreach my $f (sort keys %$fsfiles) {
  332: 	    my ($name, $path) = split_file_path($f, 1);
  333: 	    # was this file missing?
  334: 	    if (! $okfiles{$f}) {
  335: 		# is an ignoreable file?
  336: 		if ($index_files{$name}) {
  337: 		    next;
  338: 		}
  339: 		# name must be valid
  340: 		if (! valid_dir_name($name)) {
  341: 		    $path = "." unless ($path);
  342: 		    logger("ERROR", "file name $name (in $path/) invalid!");
  343: 		    $errcnt++;
  344: 		    next;
  345: 		}
  346: 		if ($fix_files) {
  347: 		    # add missing file tag
  348: 		    my $file_node = $root->addNewChild($namespace, "file");
  349: 		    $xml_changed++;
  350: 		    # add name
  351: 		    my $name_node = $file_node->addNewChild($namespace, "name");
  352: 		    $name_node->appendTextNode($name);
  353: 		    # add path
  354: 		    if ($path) {
  355: 			my $path_node = $file_node->addNewChild($namespace, "path");
  356: 			$path_node->appendTextNode($path);
  357: 		    }
  358: 		    # add size
  359: 		    my $size_node = $file_node->addNewChild($namespace, "size");
  360: 		    $size_node->appendTextNode($$fsfiles{$f}->[1]);
  361: 		    # add date
  362: 		    my $date_node = $file_node->addNewChild($namespace, "date");
  363: 		    $date_node->appendTextNode(stime($$fsfiles{$f}->[2]));
  364: 		    logger("INFO", "file $f to be added to index file!");
  365: 		} else {
  366: 		    logger("ERROR", "file $f missing in index file!");
  367: 		    $errcnt++;
  368: 		}
  369: 	    }
  370: 	}
  371:     }
  372: }
  373: 
  374: #
  375: # check_resource_meta($rootnode)
  376: #
  377: # checks general resource meta information
  378: #
  379: sub check_resource_meta {
  380:     my ($rootnode) = @_;
  381: 
  382:     #
  383:     # description
  384:     #
  385:     my $description = $rootnode->findvalue('child::description');
  386:     if (! $description) {
  387: 	logger("ERROR", "resource description element missing!");
  388: 	$errcnt++;
  389:     }
  390:     #
  391:     # name
  392:     #
  393:     my $name = sstrip($rootnode->findvalue('child::name'));
  394:     if ($name) {
  395: 	my ($dirname, $dirpath) = split_file_path($docdir);
  396: 	if ($dirname ne $name) {
  397: 	    logger("ERROR", "resource name element '$name' does not match directory name '$dirname'!");
  398: 	    $errcnt++;
  399: 	}
  400:     } else {
  401: 	logger("ERROR", "resource name element missing!");
  402: 	$errcnt++;
  403:     }
  404:     #
  405:     # archive path
  406:     #
  407:     my $realpath;
  408:     # get real path
  409:     if ($docdir =~ /^\//) {
  410: 	# docdir is absolute
  411: 	$realpath = $docdir;
  412:     } else {
  413: 	# docdir is relative -- try with the shell
  414: 	if (open PWDCMD, "cd $docdir ; pwd|") {
  415: 	    $realpath = <PWDCMD>;
  416: 	    chomp $realpath;
  417: 	}
  418:     }
  419:     if (! $realpath) {
  420: 	logger("ERROR", "unable to check real archive path!");
  421: 	$errcnt++;
  422: 	return;
  423:     }
  424:     # get archive-path
  425:     my $archnode = ($rootnode->find('child::archive-path'))->get_node(1);
  426:     if ($archnode) {
  427: 	my $arch = sstrip($archnode->textContent);
  428: 	if ($arch ne $realpath) {
  429: 	    logger("WARNING", "incorrect archive-path '$arch' will be changed to '$realpath'!");
  430: 	    $warncnt++;
  431: 	    # correct archive-path
  432: 	    $archnode->removeChildNodes;
  433: 	    $archnode->appendTextNode($realpath);
  434: 	    $xml_changed++;
  435: 	}
  436:     } else {
  437: 	# add archive-path
  438: 	$archnode = $rootnode->addNewChild($namespace, "archive-path");
  439: 	$archnode->appendTextNode($realpath);
  440: 	$xml_changed++;
  441:     }
  442: 
  443: }
  444: 
  445:     
  446: 
  447: #######################################################
  448: # main
  449: #
  450: 
  451: my ($document, $rootnode) = read_xml($metafile);
  452: 
  453: check_resource_meta($rootnode);
  454: 
  455: my $fnum = fs_read_files($docdir, "", \%files, \%dirs);
  456: logger("INFO", "$fnum files on FS");
  457: #foreach (keys %dirs) {logger('DEBUG', "  dir ($_): $dirs{$_}");}
  458: 
  459: check_files($rootnode, \%files);
  460: check_dirs($rootnode, \%dirs);
  461: 
  462: logger("INFO", "$warncnt warnings");
  463: logger("INFO", "$errcnt errors");
  464: if ($errcnt > 0) {
  465:     logger("ABORT", "there were $errcnt errors!");
  466:     exit 1;
  467: } else {
  468:     if ($fix_xml) {
  469: 	if ($dry_run) {
  470: 	    logger('INFO', "would write $metafile");
  471: 	    logger('DEBUG', $document->toString(1));
  472: 	} else {
  473: 	    write_xml($document, $metafile);
  474: 	}
  475:     }
  476:     logger("DONE", "index file checked successfully!");
  477: }

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>