File:  [Repository] / foxridge-archiver / metacheck.pl
Revision 1.1: download - view: text, annotated - select for diffs - revision graph
Thu Jun 17 15:58:42 2004 UTC (20 years ago) by casties
Branches: MAIN
CVS tags: HEAD
Initial revision

    1: #!/usr/local/bin/perl -w
    2: 
    3: use strict;
    4: use XML::LibXML;
    5: 
    6: use lib '/usr/local/mpiwg/archive';
    7: use MPIWGStor;
    8: 
    9: # make output unbuffered
   10: $|=1;
   11: 
   12: #######################################################
   13: # internal parameters
   14: #
   15: 
   16: # program version
   17: my $version = "0.5.2 (7.1.2004)";
   18: 
   19: # read command line parameters
   20: my $args = MPIWGStor::parseargs;
   21: 
   22: # debug level
   23: $debug = (exists $$args{'debug'}) ? $$args{'debug'} : 0;
   24: 
   25: # check only or fix index file also
   26: my $check_only = (exists $$args{'checkonly'}) ? $$args{'checkonly'} : 0;
   27: # add file tags for missing files
   28: my $fix_files = ! $check_only;
   29: # add tags for file size and date
   30: my $fix_fs_meta = 1;
   31: # add dir tags for missing directories
   32: my $fix_dirs = ! $check_only;
   33: # rewrite XML file (necessary for fix_files and fix_dirs)
   34: my $fix_xml = ! $check_only;
   35: my $xml_changed = 0;
   36: # XML namespace (not really implemented!)
   37: my $namespace = "";
   38: 
   39: 
   40: #######################################################
   41: # check parameters that were passed to the program
   42: #
   43: my $docdir = $$args{'path'};
   44: if (! $docdir) {
   45:     logger("ABORT", "no document directory given!");
   46:     exit 1;
   47: }
   48: # strip trailing slashes
   49: $docdir =~ s/\/$//;
   50: if (! -d $docdir) {
   51:     logger("ABORT", "document directory \'$docdir\' doesn't exist!");
   52:     exit 1;
   53: }
   54: 
   55: my $metafile = "$docdir/index.meta";
   56: if (! -f $metafile) {
   57:     logger("ABORT", "metadata index file \'$metafile\' doesn't exist!");
   58:     exit 1;
   59: }
   60: 
   61: #######################################################
   62: # internal variables
   63: #
   64: 
   65: # all files in the document directory tree
   66: my %files;
   67: # all directories in the document directory tree
   68: my %dirs;
   69: # number of errors
   70: my $errcnt = 0;
   71: # number of warnings
   72: my $warncnt = 0;
   73: 
   74: #######################################################
   75: # subroutines
   76: #
   77: 
   78: #
   79: # fs_read_files($realdir, $docdir, \%files, \%dirs)
   80: #
   81: # reads all files and directories below $realdir and puts the
   82: # files in %files and directories in %dirs
   83: # $docdir is only for recursion, it should be empty when called 
   84: # from outside
   85: #
   86: sub fs_read_files {
   87:     my ($directory, $docdir, $files, $dirs) = @_;    
   88:     my $cnt = 0;
   89: 
   90:     if (! opendir DIR, $directory) {
   91: 	return 0;
   92:     }
   93:     my @dirfiles = readdir DIR;
   94:     foreach my $fn (@dirfiles) {
   95: 	# ignore names starting with a dot
   96: 	next if ($fn =~ /^\./);
   97: 	# ignore other silly files
   98: 	next if ($junk_files{$fn});
   99: 
  100: 	$cnt++;
  101: 	my $f = "$directory/$fn";
  102: 	my $docf = ($docdir) ? "$docdir/$fn" : $fn;
  103: 	#logger("fs_file: \"$f\"");
  104: 	if (-f $f) {
  105: 	    #logger("  is file");
  106: 	    my ($dev,$ino,$mode,$nlink,$uid,$gid,$rdev,$size,
  107:                       $atime,$mtime,$ctime,$blksize,$blocks)
  108:                           = stat(_); 
  109: 	    $$files{$docf} = [$fn, $size, $mtime];
  110: 	} elsif (-d _) {
  111: 	    #logger("  is dir");
  112: 	    $$dirs{$docf} = $fn;
  113: 	    # recurse into directory
  114: 	    $cnt += fs_read_files($f, $docf, $files, $dirs);
  115: 	}
  116:     }
  117:     return $cnt;
  118: }
  119: 
  120: 
  121: #
  122: # check_dirs($rootnode, \%dirs)
  123: #
  124: # reads all dir elements under $rootnode and compares with the directory list
  125: # in %dirs
  126: #
  127: sub check_dirs {
  128:     my ($root, $fsdirs) = @_;
  129: 
  130:     #
  131:     # iterate all dir tags
  132:     #
  133:     my @dirnodes = $root->findnodes('child::dir');
  134:     my %okdirs;
  135:     foreach my $dirnode (@dirnodes) {
  136: 	my $dirname = sstrip($dirnode->find('child::name'));
  137: 	my $dirpath = sstrip($dirnode->find('child::path'));
  138: 	my $description = sstrip($dirnode->find('child::description'));
  139: 	# name must be valid
  140: 	if (! valid_dir_name($dirname)) {
  141: 	    logger("ERROR", "directory name ($dirpath) $dirname invalid!");
  142: 	    $errcnt++;
  143: 	}
  144: 	# description can be present
  145: 	if (! $description) {
  146: 	    logger("WARNING", "description for directory $dirname (in $dirpath/) missing!");
  147: 	    $warncnt++;
  148: 	}
  149: 	# check with dirs on filesystem 
  150: 	my $fn;
  151: 	if ($dirpath) {
  152: 	    $fn = "$dirpath/$dirname";
  153: 	} else {
  154: 	    $fn = "$dirname";
  155: 	}
  156:         #logger("dir: \"$dirname\", \"$dirpath\"");
  157: 	if ($$fsdirs{$fn}) {
  158: 	    #logger("  OK ($$fsdirs{$fn})");
  159: 	    $okdirs{$fn} = $dirname;
  160: 	} else {
  161: 	    logger("ERROR", "directory $dirname (in $dirpath/) missing on disk!");
  162: 	    $errcnt++;
  163: 	}
  164:     }
  165:     #logger("dirs: ", (scalar keys %$fsdirs), " vs ", (scalar keys %okdirs), "");
  166:     if ((scalar keys %$fsdirs) != (scalar keys %okdirs)) {
  167: 	# number of dir tags and dirs don't match
  168: 	# iterate through all dirs
  169: 	foreach my $f (sort keys %$fsdirs) {
  170: 	    # was this dir missing?
  171: 	    if (! $okdirs{$f}) {
  172: 		my ($name, $path) = split_file_path($f);
  173: 		# name must be valid
  174: 		if (! valid_dir_name($name)) {
  175: 		    $path = "." unless ($path);
  176: 		    logger("ERROR", "directory name $name (in $path/) invalid!");
  177: 		    $errcnt++;
  178: 		    next;
  179: 		}
  180: 		if ($fix_dirs) {
  181: 		    # add missing dir tag
  182: 		    my $dir_node = $root->addNewChild($namespace, "dir");
  183: 		    $xml_changed++;
  184: 		    # add name
  185: 		    my $name_node = $dir_node->addNewChild($namespace, "name");
  186: 		    $name_node->appendTextNode($name);
  187: 		    # add path
  188: 		    if ($path) {
  189: 			my $path_node = $dir_node->addNewChild($namespace, "path");
  190: 			$path_node->appendTextNode($path);
  191: 		    }
  192: 		    logger("INFO", "directory $f to be added to index file!");
  193: 		} else {
  194: 		    logger("ERROR", "directory $f missing in index file!");
  195: 		    $errcnt++;
  196: 		}
  197: 	    }
  198: 	}
  199:     }
  200: }
  201: 
  202: 
  203: #
  204: # check_files($rootnode, \%files)
  205: #
  206: # reads all file elements under $rootnode and compares with the file list
  207: # in %files
  208: #
  209: sub check_files {
  210:     my ($root, $fsfiles) = @_;
  211: 
  212:     #
  213:     # iterate all file tags
  214:     #
  215:     my @filenodes = $root->findnodes('child::file');
  216:     my %okfiles;
  217:     foreach my $filenode (@filenodes) {
  218: 	my $filename = sstrip($filenode->find('child::name'));
  219: 	my $filepath = sstrip($filenode->find('child::path'));
  220: 	my $filesize = sstrip($filenode->find('child::size'));
  221: 	my $filedate = sstrip($filenode->find('child::date'));
  222: 	# name must be valid
  223: 	if (! valid_file_name($filename)) {
  224: 	    logger("ERROR", "file name ($filepath)$filename invalid!");
  225: 	    $errcnt++;
  226: 	}
  227: 	my $fn = ($filepath) ? "$filepath/$filename" : "$filename";
  228:         #logger("file: \"$filename\", \"$filepath\"");
  229: 	if ($$fsfiles{$fn}) {
  230: 	    #logger("  OK ($$fsfiles{$fn})");
  231: 	    $okfiles{$fn} = $filename;
  232: 	    # check file size and date
  233: 	    if ($filesize) {
  234: 		if ($filesize != $$fsfiles{$fn}->[1]) {
  235: 		    logger("WARNING", "size of file $fn changed: $filesize to $$fsfiles{$fn}->[1]");
  236: 		    $warncnt++;
  237: 		}
  238: 	    }
  239: 	    # file date
  240: 	    if ($filedate) {
  241: 		if ($filedate ne stime($$fsfiles{$fn}->[2])) {
  242: 		    logger("WARNING", "date of file $fn changed: $filedate to ", stime($$fsfiles{$fn}->[2]), "");
  243: 		    $warncnt++;
  244: 		}
  245: 	    }
  246: 	    # update file size and date
  247: 	    if ($fix_fs_meta) {
  248: 		# delete size and date
  249: 		foreach my $n ($filenode->findnodes('child::size')) {
  250: 		    $filenode->removeChild($n);
  251: 		}
  252: 		foreach my $n ($filenode->findnodes('child::date')) {
  253: 		    $filenode->removeChild($n);
  254: 		}
  255: 		# add new size and date
  256: 		my $node = $filenode->addNewChild($namespace, "size");
  257: 		$node->appendTextNode($$fsfiles{$fn}->[1]);
  258: 		$node = $filenode->addNewChild($namespace, "date");
  259: 		$node->appendTextNode(stime($$fsfiles{$fn}->[2]));
  260: 		$xml_changed++;
  261: 	    }
  262: 	} else {
  263: 	    logger("ERROR", "file $filename (in $filepath/) missing on disk!");
  264: 	    $errcnt++;
  265: 	}
  266:     }
  267:     #logger("files: ", (scalar keys %$fsfiles), " vs ", (scalar keys %okfiles), "");
  268:     if ((scalar keys %$fsfiles) != (scalar keys %okfiles)) {
  269: 	# number of file tags and files don't match
  270: 	# iterate through all files
  271: 	foreach my $f (sort keys %$fsfiles) {
  272: 	    my ($name, $path) = split_file_path($f);
  273: 	    # was this file missing?
  274: 	    if (! $okfiles{$f}) {
  275: 		# is an ignoreable file?
  276: 		if ($index_files{$name}) {
  277: 		    next;
  278: 		}
  279: 		# name must be valid
  280: 		if (! valid_dir_name($name)) {
  281: 		    $path = "." unless ($path);
  282: 		    logger("ERROR", "file name $name (in $path/) invalid!");
  283: 		    $errcnt++;
  284: 		    next;
  285: 		}
  286: 		if ($fix_files) {
  287: 		    # add missing file tag
  288: 		    my $file_node = $root->addNewChild($namespace, "file");
  289: 		    $xml_changed++;
  290: 		    # add name
  291: 		    my $name_node = $file_node->addNewChild($namespace, "name");
  292: 		    $name_node->appendTextNode($name);
  293: 		    # add path
  294: 		    if ($path) {
  295: 			my $path_node = $file_node->addNewChild($namespace, "path");
  296: 			$path_node->appendTextNode($path);
  297: 		    }
  298: 		    # add size
  299: 		    my $size_node = $file_node->addNewChild($namespace, "size");
  300: 		    $size_node->appendTextNode($$fsfiles{$f}->[1]);
  301: 		    # add date
  302: 		    my $date_node = $file_node->addNewChild($namespace, "date");
  303: 		    $date_node->appendTextNode(stime($$fsfiles{$f}->[2]));
  304: 		    logger("INFO", "file $f to be added to index file!");
  305: 		} else {
  306: 		    logger("ERROR", "file $f missing in index file!");
  307: 		    $errcnt++;
  308: 		}
  309: 	    }
  310: 	}
  311:     }
  312: }
  313: 
  314: #
  315: # check_resource_meta($rootnode)
  316: #
  317: # checks general resource meta information
  318: #
  319: sub check_resource_meta {
  320:     my ($rootnode) = @_;
  321: 
  322:     #
  323:     # description
  324:     #
  325:     my $description = $rootnode->findvalue('child::description');
  326:     if (! $description) {
  327: 	logger("ERROR", "resource description element missing!");
  328: 	$errcnt++;
  329:     }
  330:     #
  331:     # name
  332:     #
  333:     my $name = sstrip($rootnode->findvalue('child::name'));
  334:     if ($name) {
  335: 	my ($dirname, $dirpath) = split_file_path($docdir);
  336: 	if ($dirname ne $name) {
  337: 	    logger("ERROR", "resource name element '$name' does not match directory name '$dirname'!");
  338: 	    $errcnt++;
  339: 	}
  340:     } else {
  341: 	logger("ERROR", "resource name element missing!");
  342: 	$errcnt++;
  343:     }
  344:     #
  345:     # archive path
  346:     #
  347:     my $realpath;
  348:     # get real path
  349:     if ($docdir =~ /^\//) {
  350: 	# docdir is absolute
  351: 	$realpath = $docdir;
  352:     } else {
  353: 	# docdir is relative -- try with the shell
  354: 	if (open PWDCMD, "cd $docdir ; pwd|") {
  355: 	    $realpath = <PWDCMD>;
  356: 	    chomp $realpath;
  357: 	}
  358:     }
  359:     if (! $realpath) {
  360: 	logger("ERROR", "unable to check real archive path!");
  361: 	$errcnt++;
  362: 	return;
  363:     }
  364:     # get archive-path
  365:     my $archnode = ($rootnode->find('child::archive-path'))->get_node(1);
  366:     if ($archnode) {
  367: 	my $arch = sstrip($archnode->textContent);
  368: 	if ($arch ne $realpath) {
  369: 	    logger("WARNING", "incorrect archive-path '$arch' will be changed to '$realpath'!");
  370: 	    $warncnt++;
  371: 	    # correct archive-path
  372: 	    $archnode->removeChildNodes;
  373: 	    $archnode->appendTextNode($realpath);
  374: 	    $xml_changed++;
  375: 	}
  376:     } else {
  377: 	# add archive-path
  378: 	$archnode = $rootnode->addNewChild($namespace, "archive-path");
  379: 	$archnode->appendTextNode($realpath);
  380: 	$xml_changed++;
  381:     }
  382: 
  383: }
  384: 
  385:     
  386: 
  387: #######################################################
  388: # main
  389: #
  390: 
  391: logger("INFO", "metacheck $version");
  392:     
  393: my ($document, $rootnode) = read_xml($metafile);
  394: 
  395: check_resource_meta($rootnode);
  396: 
  397: my $fnum = fs_read_files($docdir, "", \%files, \%dirs);
  398: logger("INFO", "$fnum files on FS");
  399: #foreach (keys %files) {logger("  file ($_): $files{$_}");}
  400: 
  401: check_files($rootnode, \%files);
  402: check_dirs($rootnode, \%dirs);
  403: 
  404: logger("INFO", "$warncnt warnings");
  405: logger("INFO", "$errcnt errors");
  406: if ($errcnt > 0) {
  407:     logger("ABORT", "there were errors!");
  408:     exit 1;
  409: } else {
  410:     if ($fix_xml) {
  411: 	write_xml($document, $metafile);
  412:     }
  413:     logger("DONE", "index file checked successfully!");
  414: }

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>