File:  [Repository] / foxridge-archiver / metacheck.pl
Revision 1.5: download - view: text, annotated - select for diffs - revision graph
Tue Oct 4 14:36:32 2005 UTC (18 years, 8 months ago) by casties
Branches: MAIN
CVS tags: HEAD
-replace deals with invalid filenames in index files now

    1: #!/usr/local/bin/perl -w
    2: 
    3: use strict;
    4: use XML::LibXML;
    5: 
    6: use lib '/usr/local/mpiwg/archive';
    7: use MPIWGStor;
    8: 
    9: # make output unbuffered
   10: $|=1;
   11: 
   12: #######################################################
   13: # internal parameters
   14: #
   15: 
   16: # program version
   17: my $version = "0.6.0 (20.9.2005)";
   18: my $help = 
   19: "use: metacheck [options] docdir
   20: options:
   21:   -debug  show debugging info
   22:   -dry-run  simulate, dont'do anything
   23:   -checkonly  leave existing index file untouched
   24:   -add-files  add file tags for missing files
   25:   -replace  rewrite index file to match current files
   26: ";
   27: logger("INFO", "metacheck $version");
   28: 
   29: # read command line parameters
   30: my $args = MPIWGStor::parseargs;
   31: if (! scalar(%$args)) {
   32:     print $help, "\n";
   33:     exit 1;
   34: }
   35: 
   36: # debug level
   37: $debug = (exists $$args{'debug'}) ? $$args{'debug'} : 0;
   38: 
   39: # simulate action only
   40: my $dry_run = (exists $$args{'dry-run'}) ? $$args{'dry-run'} : 0;
   41: logger('DEBUG', "dry-run: $dry_run");
   42: 
   43: # check only or fix index file also
   44: my $check_only = (exists $$args{'checkonly'}) ? $$args{'checkonly'} : 1;
   45: 
   46: # add file tags for missing files
   47: my $fix_files = ! $check_only;
   48: # add tags for file size and date
   49: my $fix_fs_meta = 1;
   50: # add dir tags for missing directories
   51: my $fix_dirs = ! $check_only;
   52: # rewrite XML file (necessary for fix_files and fix_dirs)
   53: my $fix_xml = ! $check_only;
   54: # rewrite complete index file
   55: my $do_rewrite = 0;
   56: 
   57: # add file tags for missing files
   58: if (exists $$args{'add-files'}) {
   59:     $check_only = 0;
   60:     $fix_files = 1;
   61:     $fix_dirs = 1;
   62:     $fix_xml = 1;
   63:     $do_rewrite = 0;
   64:     logger('DEBUG', "add-files: true");
   65: }
   66: 
   67: # completely rewrite index file
   68: if (exists $$args{'replace'}) {
   69:     $check_only = 0;
   70:     $fix_files = 1;
   71:     $fix_dirs = 1;
   72:     $fix_xml = 1;
   73:     $do_rewrite = 1;
   74:     logger('DEBUG', "replace: true");
   75: }
   76: logger('DEBUG', "checkonly: $check_only");
   77: 
   78: 
   79: my $xml_changed = 0;
   80: # XML namespace (not really implemented!)
   81: my $namespace = "";
   82: 
   83: 
   84: #######################################################
   85: # check parameters that were passed to the program
   86: #
   87: my $docdir = $$args{'path'};
   88: if (! $docdir) {
   89:     logger("ABORT", "no document directory given!");
   90:     exit 1;
   91: }
   92: # strip double slashes
   93: $docdir =~ s/\/\//\//;
   94: # strip trailing slashes
   95: $docdir =~ s/\/$//;
   96: if (! -d $docdir) {
   97:     logger("ABORT", "document directory \'$docdir\' doesn't exist!");
   98:     exit 1;
   99: }
  100: 
  101: my $metafile = "$docdir/index.meta";
  102: if (! -f $metafile) {
  103:     logger("ABORT", "metadata index file \'$metafile\' doesn't exist!");
  104:     exit 1;
  105: }
  106: 
  107: #######################################################
  108: # internal variables
  109: #
  110: 
  111: # all files in the document directory tree
  112: my %files;
  113: # all directories in the document directory tree
  114: my %dirs;
  115: # number of errors
  116: my $errcnt = 0;
  117: # number of warnings
  118: my $warncnt = 0;
  119: 
  120: #######################################################
  121: # subroutines
  122: #
  123: 
  124: #
  125: # fs_read_files($realdir, $docdir, \%files, \%dirs)
  126: #
  127: # reads all files and directories below $realdir and puts the
  128: # files in %files and directories in %dirs
  129: # $docdir is only for recursion, it should be empty when called 
  130: # from outside
  131: #
  132: sub fs_read_files {
  133:     my ($directory, $docdir, $files, $dirs) = @_;    
  134:     my $cnt = 0;
  135: 
  136:     if (! opendir DIR, $directory) {
  137: 	return 0;
  138:     }
  139:     my @dirfiles = readdir DIR;
  140:     foreach my $fn (@dirfiles) {
  141: 	# ignore names starting with a dot
  142: 	next if ($fn =~ /^\./);
  143: 	# ignore other silly files
  144: 	next if ($junk_files{$fn});
  145: 
  146: 	$cnt++;
  147: 	my $f = "$directory/$fn";
  148: 	my $docf = ($docdir) ? "$docdir/$fn" : $fn;
  149: 	#logger("fs_file: \"$f\"");
  150: 	if (-f $f) {
  151: 	    #logger("  is file");
  152: 	    my ($dev,$ino,$mode,$nlink,$uid,$gid,$rdev,$size,
  153:                       $atime,$mtime,$ctime,$blksize,$blocks)
  154:                           = stat(_); 
  155: 	    $$files{$docf} = [$fn, $size, $mtime];
  156: 	} elsif (-d _) {
  157: 	    #logger("  is dir");
  158: 	    $$dirs{$docf} = $fn;
  159: 	    # recurse into directory
  160: 	    $cnt += fs_read_files($f, $docf, $files, $dirs);
  161: 	}
  162:     }
  163:     return $cnt;
  164: }
  165: 
  166: 
  167: #
  168: # check_dirs($rootnode, \%dirs)
  169: #
  170: # reads all dir elements under $rootnode and compares with the directory list
  171: # in %dirs
  172: #
  173: sub check_dirs {
  174:     my ($root, $fsdirs) = @_;
  175: 
  176:     #
  177:     # iterate all dir tags
  178:     #
  179:     my @dirnodes = $root->findnodes('child::dir');
  180:     my %okdirs;
  181:     foreach my $dirnode (@dirnodes) {
  182: 	my $dirname = sstrip($dirnode->find('child::name'));
  183: 	my $dirpath = sstrip($dirnode->find('child::path'));
  184: 	my $description = sstrip($dirnode->find('child::description'));
  185: 	# name must be valid
  186: 	if (! valid_dir_name($dirname)) {
  187: 	    if ($do_rewrite) {
  188: 		logger("WARNING", "directory name ($dirpath) $dirname in index file invalid!");
  189: 		$warncnt++;
  190: 	    } else {
  191: 		logger("ERROR", "directory name ($dirpath) $dirname invalid!");
  192: 		$errcnt++;
  193: 	    }
  194: 	}
  195: 	# description can be present
  196: 	if (! $description) {
  197: 	    logger("WARNING", "description for directory $dirname (in $dirpath/) missing!");
  198: 	    $warncnt++;
  199: 	}
  200: 	# check with dirs on filesystem 
  201: 	my $fn;
  202: 	if ($dirpath && ($dirpath ne '.')) {
  203: 	    $fn = "$dirpath/$dirname";
  204: 	} else {
  205: 	    $fn = "$dirname";
  206: 	}
  207:         #logger('DEBUG', "dir: \"$dirname\", \"$dirpath\", fn: \"$fn\"");
  208: 	if ($$fsdirs{$fn}) {
  209: 	    #logger("  OK ($$fsdirs{$fn})");
  210: 	    $okdirs{$fn} = $dirname;
  211: 	} else {
  212: 	    if ($do_rewrite) {
  213: 		# remove dir tag
  214: 		logger("WARNING", "directory $dirname (in $dirpath/) no longer on disk!");
  215: 		$dirnode->unbindNode();
  216: 		$warncnt++;
  217: 	    } else {
  218: 		logger("ERROR", "directory $dirname (in $dirpath/) missing on disk!");
  219: 		$errcnt++;
  220: 	    }
  221: 	}
  222:     }
  223:     #logger("dirs: ", (scalar keys %$fsdirs), " vs ", (scalar keys %okdirs), "");
  224:     if ((scalar keys %$fsdirs) != (scalar keys %okdirs)) {
  225: 	# number of dir tags and dirs don't match
  226: 	# iterate through all dirs
  227: 	foreach my $f (sort keys %$fsdirs) {
  228: 	    # was this dir missing?
  229: 	    if (! $okdirs{$f}) {
  230: 		my ($name, $path) = split_file_path($f, 1);
  231: 		# name must be valid
  232: 		if (! valid_dir_name($name)) {
  233: 		    $path = "." unless ($path);
  234: 		    logger("ERROR", "directory name $name (in $path/) invalid!");
  235: 		    $errcnt++;
  236: 		    next;
  237: 		}
  238: 		if ($fix_dirs) {
  239: 		    # add missing dir tag
  240: 		    my $dir_node = $root->addNewChild($namespace, "dir");
  241: 		    $xml_changed++;
  242: 		    # add name
  243: 		    my $name_node = $dir_node->addNewChild($namespace, "name");
  244: 		    $name_node->appendTextNode($name);
  245: 		    # add path
  246: 		    if ($path) {
  247: 			my $path_node = $dir_node->addNewChild($namespace, "path");
  248: 			$path_node->appendTextNode($path);
  249: 		    }
  250: 		    logger("INFO", "directory $f to be added to index file!");
  251: 		} else {
  252: 		    logger("ERROR", "directory $f missing in index file!");
  253: 		    $errcnt++;
  254: 		}
  255: 	    }
  256: 	}
  257:     }
  258: }
  259: 
  260: 
  261: #
  262: # check_files($rootnode, \%files)
  263: #
  264: # reads all file elements under $rootnode and compares with the file list
  265: # in %files
  266: #
  267: sub check_files {
  268:     my ($root, $fsfiles) = @_;
  269: 
  270:     #
  271:     # iterate all file tags
  272:     #
  273:     my @filenodes = $root->findnodes('child::file');
  274:     my %okfiles;
  275:     foreach my $filenode (@filenodes) {
  276: 	my $filename = sstrip($filenode->find('child::name'));
  277: 	my $filepath = sstrip($filenode->find('child::path'));
  278: 	my $filesize = sstrip($filenode->find('child::size'));
  279: 	my $filedate = sstrip($filenode->find('child::date'));
  280: 	# name must be valid
  281: 	if (! valid_file_name($filename)) {
  282: 	    if ($do_rewrite) {
  283: 		logger("WARNING", "file name ($filepath)$filename in index file invalid!");
  284: 		$warncnt++;
  285: 	    } else {
  286: 		logger("ERROR", "file name ($filepath)$filename invalid!");
  287: 		$errcnt++;
  288: 	    }
  289: 	}
  290: 	my $fn = ($filepath) ? "$filepath/$filename" : "$filename";
  291:         #logger("file: \"$filename\", \"$filepath\"");
  292: 	if ($$fsfiles{$fn}) {
  293: 	    #logger("  OK ($$fsfiles{$fn})");
  294: 	    $okfiles{$fn} = $filename;
  295: 	    # check file size and date
  296: 	    if ($filesize) {
  297: 		if ($filesize != $$fsfiles{$fn}->[1]) {
  298: 		    logger("WARNING", "size of file $fn changed: $filesize to $$fsfiles{$fn}->[1]");
  299: 		    $warncnt++;
  300: 		}
  301: 	    }
  302: 	    # file date
  303: 	    if ($filedate) {
  304: 		if ($filedate ne stime($$fsfiles{$fn}->[2])) {
  305: 		    logger("WARNING", "date of file $fn changed: $filedate to ", stime($$fsfiles{$fn}->[2]), "");
  306: 		    $warncnt++;
  307: 		}
  308: 	    }
  309: 	    # update file size and date
  310: 	    if ($fix_fs_meta) {
  311: 		# delete size and date
  312: 		foreach my $n ($filenode->findnodes('child::size')) {
  313: 		    $filenode->removeChild($n);
  314: 		}
  315: 		foreach my $n ($filenode->findnodes('child::date')) {
  316: 		    $filenode->removeChild($n);
  317: 		}
  318: 		# add new size and date
  319: 		my $node = $filenode->addNewChild($namespace, "size");
  320: 		$node->appendTextNode($$fsfiles{$fn}->[1]);
  321: 		$node = $filenode->addNewChild($namespace, "date");
  322: 		$node->appendTextNode(stime($$fsfiles{$fn}->[2]));
  323: 		$xml_changed++;
  324: 	    }
  325: 	} else {
  326: 	    if ($do_rewrite) {
  327: 		# remove file tag
  328: 		logger("WARNING", "file $filename (in $filepath/) no longer on disk!");
  329: 		$filenode->unbindNode();
  330: 		$warncnt++;
  331: 	    } else {
  332: 		logger("ERROR", "file $filename (in $filepath/) missing on disk!");
  333: 		$errcnt++;
  334: 	    }
  335: 	}
  336:     }
  337:     #logger("files: ", (scalar keys %$fsfiles), " vs ", (scalar keys %okfiles), "");
  338:     if ((scalar keys %$fsfiles) != (scalar keys %okfiles)) {
  339: 	# number of file tags and files don't match
  340: 	# iterate through all files
  341: 	foreach my $f (sort keys %$fsfiles) {
  342: 	    my ($name, $path) = split_file_path($f, 1);
  343: 	    # was this file missing?
  344: 	    if (! $okfiles{$f}) {
  345: 		# is an ignoreable file?
  346: 		if ($index_files{$name}) {
  347: 		    next;
  348: 		}
  349: 		# name must be valid
  350: 		if (! valid_dir_name($name)) {
  351: 		    $path = "." unless ($path);
  352: 		    logger("ERROR", "file name $name (in $path/) invalid!");
  353: 		    $errcnt++;
  354: 		    next;
  355: 		}
  356: 		if ($fix_files) {
  357: 		    # add missing file tag
  358: 		    my $file_node = $root->addNewChild($namespace, "file");
  359: 		    $xml_changed++;
  360: 		    # add name
  361: 		    my $name_node = $file_node->addNewChild($namespace, "name");
  362: 		    $name_node->appendTextNode($name);
  363: 		    # add path
  364: 		    if ($path) {
  365: 			my $path_node = $file_node->addNewChild($namespace, "path");
  366: 			$path_node->appendTextNode($path);
  367: 		    }
  368: 		    # add size
  369: 		    my $size_node = $file_node->addNewChild($namespace, "size");
  370: 		    $size_node->appendTextNode($$fsfiles{$f}->[1]);
  371: 		    # add date
  372: 		    my $date_node = $file_node->addNewChild($namespace, "date");
  373: 		    $date_node->appendTextNode(stime($$fsfiles{$f}->[2]));
  374: 		    logger("INFO", "file $f to be added to index file!");
  375: 		} else {
  376: 		    logger("ERROR", "file $f missing in index file!");
  377: 		    $errcnt++;
  378: 		}
  379: 	    }
  380: 	}
  381:     }
  382: }
  383: 
  384: #
  385: # check_resource_meta($rootnode)
  386: #
  387: # checks general resource meta information
  388: #
  389: sub check_resource_meta {
  390:     my ($rootnode) = @_;
  391: 
  392:     #
  393:     # description
  394:     #
  395:     my $description = $rootnode->findvalue('child::description');
  396:     if (! $description) {
  397: 	logger("ERROR", "resource description element missing!");
  398: 	$errcnt++;
  399:     }
  400:     #
  401:     # name
  402:     #
  403:     my $name = sstrip($rootnode->findvalue('child::name'));
  404:     if ($name) {
  405: 	my ($dirname, $dirpath) = split_file_path($docdir);
  406: 	if ($dirname ne $name) {
  407: 	    logger("ERROR", "resource name element '$name' does not match directory name '$dirname'!");
  408: 	    $errcnt++;
  409: 	}
  410:     } else {
  411: 	logger("ERROR", "resource name element missing!");
  412: 	$errcnt++;
  413:     }
  414:     #
  415:     # archive path
  416:     #
  417:     my $realpath;
  418:     # get real path
  419:     if ($docdir =~ /^\//) {
  420: 	# docdir is absolute
  421: 	$realpath = $docdir;
  422:     } else {
  423: 	# docdir is relative -- try with the shell
  424: 	if (open PWDCMD, "cd $docdir ; pwd|") {
  425: 	    $realpath = <PWDCMD>;
  426: 	    chomp $realpath;
  427: 	}
  428:     }
  429:     if (! $realpath) {
  430: 	logger("ERROR", "unable to check real archive path!");
  431: 	$errcnt++;
  432: 	return;
  433:     }
  434:     # get archive-path
  435:     my $archnode = ($rootnode->find('child::archive-path'))->get_node(1);
  436:     if ($archnode) {
  437: 	my $arch = sstrip($archnode->textContent);
  438: 	if ($arch ne $realpath) {
  439: 	    logger("WARNING", "incorrect archive-path '$arch' will be changed to '$realpath'!");
  440: 	    $warncnt++;
  441: 	    # correct archive-path
  442: 	    $archnode->removeChildNodes;
  443: 	    $archnode->appendTextNode($realpath);
  444: 	    $xml_changed++;
  445: 	}
  446:     } else {
  447: 	# add archive-path
  448: 	$archnode = $rootnode->addNewChild($namespace, "archive-path");
  449: 	$archnode->appendTextNode($realpath);
  450: 	$xml_changed++;
  451:     }
  452: 
  453: }
  454: 
  455:     
  456: 
  457: #######################################################
  458: # main
  459: #
  460: 
  461: my ($document, $rootnode) = read_xml($metafile);
  462: 
  463: check_resource_meta($rootnode);
  464: 
  465: my $fnum = fs_read_files($docdir, "", \%files, \%dirs);
  466: logger("INFO", "$fnum files on FS");
  467: #foreach (keys %dirs) {logger('DEBUG', "  dir ($_): $dirs{$_}");}
  468: 
  469: check_files($rootnode, \%files);
  470: check_dirs($rootnode, \%dirs);
  471: 
  472: logger("INFO", "$warncnt warnings");
  473: logger("INFO", "$errcnt errors");
  474: if ($errcnt > 0) {
  475:     logger("ABORT", "there were $errcnt errors!");
  476:     exit 1;
  477: } else {
  478:     if ($fix_xml) {
  479: 	if ($dry_run) {
  480: 	    logger('INFO', "would write $metafile");
  481: 	    logger('DEBUG', $document->toString(1));
  482: 	} else {
  483: 	    write_xml($document, $metafile);
  484: 	}
  485:     }
  486:     logger("DONE", "index file checked successfully!");
  487: }

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>