File:  [Repository] / foxridge-archiver / metacheck.pl
Revision 1.2: download - view: text, annotated - select for diffs - revision graph
Thu Jul 8 17:27:59 2004 UTC (19 years, 11 months ago) by casties
Branches: MAIN
CVS tags: HEAD
small fix for // in pathnames

    1: #!/usr/local/bin/perl -w
    2: 
    3: use strict;
    4: use XML::LibXML;
    5: 
    6: use lib '/usr/local/mpiwg/archive';
    7: use MPIWGStor;
    8: 
    9: # make output unbuffered
   10: $|=1;
   11: 
   12: #######################################################
   13: # internal parameters
   14: #
   15: 
   16: # program version
   17: my $version = "0.5.2 (7.1.2004)";
   18: 
   19: # read command line parameters
   20: my $args = MPIWGStor::parseargs;
   21: 
   22: # debug level
   23: $debug = (exists $$args{'debug'}) ? $$args{'debug'} : 0;
   24: 
   25: # check only or fix index file also
   26: my $check_only = (exists $$args{'checkonly'}) ? $$args{'checkonly'} : 0;
   27: # add file tags for missing files
   28: my $fix_files = ! $check_only;
   29: # add tags for file size and date
   30: my $fix_fs_meta = 1;
   31: # add dir tags for missing directories
   32: my $fix_dirs = ! $check_only;
   33: # rewrite XML file (necessary for fix_files and fix_dirs)
   34: my $fix_xml = ! $check_only;
   35: my $xml_changed = 0;
   36: # XML namespace (not really implemented!)
   37: my $namespace = "";
   38: 
   39: 
   40: #######################################################
   41: # check parameters that were passed to the program
   42: #
   43: my $docdir = $$args{'path'};
   44: if (! $docdir) {
   45:     logger("ABORT", "no document directory given!");
   46:     exit 1;
   47: }
   48: # strip double slashes
   49: $docdir =~ s/\/\//\//;
   50: # strip trailing slashes
   51: $docdir =~ s/\/$//;
   52: if (! -d $docdir) {
   53:     logger("ABORT", "document directory \'$docdir\' doesn't exist!");
   54:     exit 1;
   55: }
   56: 
   57: my $metafile = "$docdir/index.meta";
   58: if (! -f $metafile) {
   59:     logger("ABORT", "metadata index file \'$metafile\' doesn't exist!");
   60:     exit 1;
   61: }
   62: 
   63: #######################################################
   64: # internal variables
   65: #
   66: 
   67: # all files in the document directory tree
   68: my %files;
   69: # all directories in the document directory tree
   70: my %dirs;
   71: # number of errors
   72: my $errcnt = 0;
   73: # number of warnings
   74: my $warncnt = 0;
   75: 
   76: #######################################################
   77: # subroutines
   78: #
   79: 
   80: #
   81: # fs_read_files($realdir, $docdir, \%files, \%dirs)
   82: #
   83: # reads all files and directories below $realdir and puts the
   84: # files in %files and directories in %dirs
   85: # $docdir is only for recursion, it should be empty when called 
   86: # from outside
   87: #
   88: sub fs_read_files {
   89:     my ($directory, $docdir, $files, $dirs) = @_;    
   90:     my $cnt = 0;
   91: 
   92:     if (! opendir DIR, $directory) {
   93: 	return 0;
   94:     }
   95:     my @dirfiles = readdir DIR;
   96:     foreach my $fn (@dirfiles) {
   97: 	# ignore names starting with a dot
   98: 	next if ($fn =~ /^\./);
   99: 	# ignore other silly files
  100: 	next if ($junk_files{$fn});
  101: 
  102: 	$cnt++;
  103: 	my $f = "$directory/$fn";
  104: 	my $docf = ($docdir) ? "$docdir/$fn" : $fn;
  105: 	#logger("fs_file: \"$f\"");
  106: 	if (-f $f) {
  107: 	    #logger("  is file");
  108: 	    my ($dev,$ino,$mode,$nlink,$uid,$gid,$rdev,$size,
  109:                       $atime,$mtime,$ctime,$blksize,$blocks)
  110:                           = stat(_); 
  111: 	    $$files{$docf} = [$fn, $size, $mtime];
  112: 	} elsif (-d _) {
  113: 	    #logger("  is dir");
  114: 	    $$dirs{$docf} = $fn;
  115: 	    # recurse into directory
  116: 	    $cnt += fs_read_files($f, $docf, $files, $dirs);
  117: 	}
  118:     }
  119:     return $cnt;
  120: }
  121: 
  122: 
  123: #
  124: # check_dirs($rootnode, \%dirs)
  125: #
  126: # reads all dir elements under $rootnode and compares with the directory list
  127: # in %dirs
  128: #
  129: sub check_dirs {
  130:     my ($root, $fsdirs) = @_;
  131: 
  132:     #
  133:     # iterate all dir tags
  134:     #
  135:     my @dirnodes = $root->findnodes('child::dir');
  136:     my %okdirs;
  137:     foreach my $dirnode (@dirnodes) {
  138: 	my $dirname = sstrip($dirnode->find('child::name'));
  139: 	my $dirpath = sstrip($dirnode->find('child::path'));
  140: 	my $description = sstrip($dirnode->find('child::description'));
  141: 	# name must be valid
  142: 	if (! valid_dir_name($dirname)) {
  143: 	    logger("ERROR", "directory name ($dirpath) $dirname invalid!");
  144: 	    $errcnt++;
  145: 	}
  146: 	# description can be present
  147: 	if (! $description) {
  148: 	    logger("WARNING", "description for directory $dirname (in $dirpath/) missing!");
  149: 	    $warncnt++;
  150: 	}
  151: 	# check with dirs on filesystem 
  152: 	my $fn;
  153: 	if ($dirpath) {
  154: 	    $fn = "$dirpath/$dirname";
  155: 	} else {
  156: 	    $fn = "$dirname";
  157: 	}
  158:         #logger("dir: \"$dirname\", \"$dirpath\"");
  159: 	if ($$fsdirs{$fn}) {
  160: 	    #logger("  OK ($$fsdirs{$fn})");
  161: 	    $okdirs{$fn} = $dirname;
  162: 	} else {
  163: 	    logger("ERROR", "directory $dirname (in $dirpath/) missing on disk!");
  164: 	    $errcnt++;
  165: 	}
  166:     }
  167:     #logger("dirs: ", (scalar keys %$fsdirs), " vs ", (scalar keys %okdirs), "");
  168:     if ((scalar keys %$fsdirs) != (scalar keys %okdirs)) {
  169: 	# number of dir tags and dirs don't match
  170: 	# iterate through all dirs
  171: 	foreach my $f (sort keys %$fsdirs) {
  172: 	    # was this dir missing?
  173: 	    if (! $okdirs{$f}) {
  174: 		my ($name, $path) = split_file_path($f);
  175: 		# name must be valid
  176: 		if (! valid_dir_name($name)) {
  177: 		    $path = "." unless ($path);
  178: 		    logger("ERROR", "directory name $name (in $path/) invalid!");
  179: 		    $errcnt++;
  180: 		    next;
  181: 		}
  182: 		if ($fix_dirs) {
  183: 		    # add missing dir tag
  184: 		    my $dir_node = $root->addNewChild($namespace, "dir");
  185: 		    $xml_changed++;
  186: 		    # add name
  187: 		    my $name_node = $dir_node->addNewChild($namespace, "name");
  188: 		    $name_node->appendTextNode($name);
  189: 		    # add path
  190: 		    if ($path) {
  191: 			my $path_node = $dir_node->addNewChild($namespace, "path");
  192: 			$path_node->appendTextNode($path);
  193: 		    }
  194: 		    logger("INFO", "directory $f to be added to index file!");
  195: 		} else {
  196: 		    logger("ERROR", "directory $f missing in index file!");
  197: 		    $errcnt++;
  198: 		}
  199: 	    }
  200: 	}
  201:     }
  202: }
  203: 
  204: 
  205: #
  206: # check_files($rootnode, \%files)
  207: #
  208: # reads all file elements under $rootnode and compares with the file list
  209: # in %files
  210: #
  211: sub check_files {
  212:     my ($root, $fsfiles) = @_;
  213: 
  214:     #
  215:     # iterate all file tags
  216:     #
  217:     my @filenodes = $root->findnodes('child::file');
  218:     my %okfiles;
  219:     foreach my $filenode (@filenodes) {
  220: 	my $filename = sstrip($filenode->find('child::name'));
  221: 	my $filepath = sstrip($filenode->find('child::path'));
  222: 	my $filesize = sstrip($filenode->find('child::size'));
  223: 	my $filedate = sstrip($filenode->find('child::date'));
  224: 	# name must be valid
  225: 	if (! valid_file_name($filename)) {
  226: 	    logger("ERROR", "file name ($filepath)$filename invalid!");
  227: 	    $errcnt++;
  228: 	}
  229: 	my $fn = ($filepath) ? "$filepath/$filename" : "$filename";
  230:         #logger("file: \"$filename\", \"$filepath\"");
  231: 	if ($$fsfiles{$fn}) {
  232: 	    #logger("  OK ($$fsfiles{$fn})");
  233: 	    $okfiles{$fn} = $filename;
  234: 	    # check file size and date
  235: 	    if ($filesize) {
  236: 		if ($filesize != $$fsfiles{$fn}->[1]) {
  237: 		    logger("WARNING", "size of file $fn changed: $filesize to $$fsfiles{$fn}->[1]");
  238: 		    $warncnt++;
  239: 		}
  240: 	    }
  241: 	    # file date
  242: 	    if ($filedate) {
  243: 		if ($filedate ne stime($$fsfiles{$fn}->[2])) {
  244: 		    logger("WARNING", "date of file $fn changed: $filedate to ", stime($$fsfiles{$fn}->[2]), "");
  245: 		    $warncnt++;
  246: 		}
  247: 	    }
  248: 	    # update file size and date
  249: 	    if ($fix_fs_meta) {
  250: 		# delete size and date
  251: 		foreach my $n ($filenode->findnodes('child::size')) {
  252: 		    $filenode->removeChild($n);
  253: 		}
  254: 		foreach my $n ($filenode->findnodes('child::date')) {
  255: 		    $filenode->removeChild($n);
  256: 		}
  257: 		# add new size and date
  258: 		my $node = $filenode->addNewChild($namespace, "size");
  259: 		$node->appendTextNode($$fsfiles{$fn}->[1]);
  260: 		$node = $filenode->addNewChild($namespace, "date");
  261: 		$node->appendTextNode(stime($$fsfiles{$fn}->[2]));
  262: 		$xml_changed++;
  263: 	    }
  264: 	} else {
  265: 	    logger("ERROR", "file $filename (in $filepath/) missing on disk!");
  266: 	    $errcnt++;
  267: 	}
  268:     }
  269:     #logger("files: ", (scalar keys %$fsfiles), " vs ", (scalar keys %okfiles), "");
  270:     if ((scalar keys %$fsfiles) != (scalar keys %okfiles)) {
  271: 	# number of file tags and files don't match
  272: 	# iterate through all files
  273: 	foreach my $f (sort keys %$fsfiles) {
  274: 	    my ($name, $path) = split_file_path($f);
  275: 	    # was this file missing?
  276: 	    if (! $okfiles{$f}) {
  277: 		# is an ignoreable file?
  278: 		if ($index_files{$name}) {
  279: 		    next;
  280: 		}
  281: 		# name must be valid
  282: 		if (! valid_dir_name($name)) {
  283: 		    $path = "." unless ($path);
  284: 		    logger("ERROR", "file name $name (in $path/) invalid!");
  285: 		    $errcnt++;
  286: 		    next;
  287: 		}
  288: 		if ($fix_files) {
  289: 		    # add missing file tag
  290: 		    my $file_node = $root->addNewChild($namespace, "file");
  291: 		    $xml_changed++;
  292: 		    # add name
  293: 		    my $name_node = $file_node->addNewChild($namespace, "name");
  294: 		    $name_node->appendTextNode($name);
  295: 		    # add path
  296: 		    if ($path) {
  297: 			my $path_node = $file_node->addNewChild($namespace, "path");
  298: 			$path_node->appendTextNode($path);
  299: 		    }
  300: 		    # add size
  301: 		    my $size_node = $file_node->addNewChild($namespace, "size");
  302: 		    $size_node->appendTextNode($$fsfiles{$f}->[1]);
  303: 		    # add date
  304: 		    my $date_node = $file_node->addNewChild($namespace, "date");
  305: 		    $date_node->appendTextNode(stime($$fsfiles{$f}->[2]));
  306: 		    logger("INFO", "file $f to be added to index file!");
  307: 		} else {
  308: 		    logger("ERROR", "file $f missing in index file!");
  309: 		    $errcnt++;
  310: 		}
  311: 	    }
  312: 	}
  313:     }
  314: }
  315: 
  316: #
  317: # check_resource_meta($rootnode)
  318: #
  319: # checks general resource meta information
  320: #
  321: sub check_resource_meta {
  322:     my ($rootnode) = @_;
  323: 
  324:     #
  325:     # description
  326:     #
  327:     my $description = $rootnode->findvalue('child::description');
  328:     if (! $description) {
  329: 	logger("ERROR", "resource description element missing!");
  330: 	$errcnt++;
  331:     }
  332:     #
  333:     # name
  334:     #
  335:     my $name = sstrip($rootnode->findvalue('child::name'));
  336:     if ($name) {
  337: 	my ($dirname, $dirpath) = split_file_path($docdir);
  338: 	if ($dirname ne $name) {
  339: 	    logger("ERROR", "resource name element '$name' does not match directory name '$dirname'!");
  340: 	    $errcnt++;
  341: 	}
  342:     } else {
  343: 	logger("ERROR", "resource name element missing!");
  344: 	$errcnt++;
  345:     }
  346:     #
  347:     # archive path
  348:     #
  349:     my $realpath;
  350:     # get real path
  351:     if ($docdir =~ /^\//) {
  352: 	# docdir is absolute
  353: 	$realpath = $docdir;
  354:     } else {
  355: 	# docdir is relative -- try with the shell
  356: 	if (open PWDCMD, "cd $docdir ; pwd|") {
  357: 	    $realpath = <PWDCMD>;
  358: 	    chomp $realpath;
  359: 	}
  360:     }
  361:     if (! $realpath) {
  362: 	logger("ERROR", "unable to check real archive path!");
  363: 	$errcnt++;
  364: 	return;
  365:     }
  366:     # get archive-path
  367:     my $archnode = ($rootnode->find('child::archive-path'))->get_node(1);
  368:     if ($archnode) {
  369: 	my $arch = sstrip($archnode->textContent);
  370: 	if ($arch ne $realpath) {
  371: 	    logger("WARNING", "incorrect archive-path '$arch' will be changed to '$realpath'!");
  372: 	    $warncnt++;
  373: 	    # correct archive-path
  374: 	    $archnode->removeChildNodes;
  375: 	    $archnode->appendTextNode($realpath);
  376: 	    $xml_changed++;
  377: 	}
  378:     } else {
  379: 	# add archive-path
  380: 	$archnode = $rootnode->addNewChild($namespace, "archive-path");
  381: 	$archnode->appendTextNode($realpath);
  382: 	$xml_changed++;
  383:     }
  384: 
  385: }
  386: 
  387:     
  388: 
  389: #######################################################
  390: # main
  391: #
  392: 
  393: logger("INFO", "metacheck $version");
  394:     
  395: my ($document, $rootnode) = read_xml($metafile);
  396: 
  397: check_resource_meta($rootnode);
  398: 
  399: my $fnum = fs_read_files($docdir, "", \%files, \%dirs);
  400: logger("INFO", "$fnum files on FS");
  401: #foreach (keys %files) {logger("  file ($_): $files{$_}");}
  402: 
  403: check_files($rootnode, \%files);
  404: check_dirs($rootnode, \%dirs);
  405: 
  406: logger("INFO", "$warncnt warnings");
  407: logger("INFO", "$errcnt errors");
  408: if ($errcnt > 0) {
  409:     logger("ABORT", "there were errors!");
  410:     exit 1;
  411: } else {
  412:     if ($fix_xml) {
  413: 	write_xml($document, $metafile);
  414:     }
  415:     logger("DONE", "index file checked successfully!");
  416: }

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>