Annotation of foxridge-archiver/metacheck.pl, revision 1.1

1.1     ! casties     1: #!/usr/local/bin/perl -w
        !             2: 
        !             3: use strict;
        !             4: use XML::LibXML;
        !             5: 
        !             6: use lib '/usr/local/mpiwg/archive';
        !             7: use MPIWGStor;
        !             8: 
        !             9: # make output unbuffered
        !            10: $|=1;
        !            11: 
        !            12: #######################################################
        !            13: # internal parameters
        !            14: #
        !            15: 
        !            16: # program version
        !            17: my $version = "0.5.2 (7.1.2004)";
        !            18: 
        !            19: # read command line parameters
        !            20: my $args = MPIWGStor::parseargs;
        !            21: 
        !            22: # debug level
        !            23: $debug = (exists $$args{'debug'}) ? $$args{'debug'} : 0;
        !            24: 
        !            25: # check only or fix index file also
        !            26: my $check_only = (exists $$args{'checkonly'}) ? $$args{'checkonly'} : 0;
        !            27: # add file tags for missing files
        !            28: my $fix_files = ! $check_only;
        !            29: # add tags for file size and date
        !            30: my $fix_fs_meta = 1;
        !            31: # add dir tags for missing directories
        !            32: my $fix_dirs = ! $check_only;
        !            33: # rewrite XML file (necessary for fix_files and fix_dirs)
        !            34: my $fix_xml = ! $check_only;
        !            35: my $xml_changed = 0;
        !            36: # XML namespace (not really implemented!)
        !            37: my $namespace = "";
        !            38: 
        !            39: 
        !            40: #######################################################
        !            41: # check parameters that were passed to the program
        !            42: #
        !            43: my $docdir = $$args{'path'};
        !            44: if (! $docdir) {
        !            45:     logger("ABORT", "no document directory given!");
        !            46:     exit 1;
        !            47: }
        !            48: # strip trailing slashes
        !            49: $docdir =~ s/\/$//;
        !            50: if (! -d $docdir) {
        !            51:     logger("ABORT", "document directory \'$docdir\' doesn't exist!");
        !            52:     exit 1;
        !            53: }
        !            54: 
        !            55: my $metafile = "$docdir/index.meta";
        !            56: if (! -f $metafile) {
        !            57:     logger("ABORT", "metadata index file \'$metafile\' doesn't exist!");
        !            58:     exit 1;
        !            59: }
        !            60: 
        !            61: #######################################################
        !            62: # internal variables
        !            63: #
        !            64: 
        !            65: # all files in the document directory tree
        !            66: my %files;
        !            67: # all directories in the document directory tree
        !            68: my %dirs;
        !            69: # number of errors
        !            70: my $errcnt = 0;
        !            71: # number of warnings
        !            72: my $warncnt = 0;
        !            73: 
        !            74: #######################################################
        !            75: # subroutines
        !            76: #
        !            77: 
        !            78: #
        !            79: # fs_read_files($realdir, $docdir, \%files, \%dirs)
        !            80: #
        !            81: # reads all files and directories below $realdir and puts the
        !            82: # files in %files and directories in %dirs
        !            83: # $docdir is only for recursion, it should be empty when called 
        !            84: # from outside
        !            85: #
        !            86: sub fs_read_files {
        !            87:     my ($directory, $docdir, $files, $dirs) = @_;    
        !            88:     my $cnt = 0;
        !            89: 
        !            90:     if (! opendir DIR, $directory) {
        !            91:    return 0;
        !            92:     }
        !            93:     my @dirfiles = readdir DIR;
        !            94:     foreach my $fn (@dirfiles) {
        !            95:    # ignore names starting with a dot
        !            96:    next if ($fn =~ /^\./);
        !            97:    # ignore other silly files
        !            98:    next if ($junk_files{$fn});
        !            99: 
        !           100:    $cnt++;
        !           101:    my $f = "$directory/$fn";
        !           102:    my $docf = ($docdir) ? "$docdir/$fn" : $fn;
        !           103:    #logger("fs_file: \"$f\"");
        !           104:    if (-f $f) {
        !           105:        #logger("  is file");
        !           106:        my ($dev,$ino,$mode,$nlink,$uid,$gid,$rdev,$size,
        !           107:                       $atime,$mtime,$ctime,$blksize,$blocks)
        !           108:                           = stat(_); 
        !           109:        $$files{$docf} = [$fn, $size, $mtime];
        !           110:    } elsif (-d _) {
        !           111:        #logger("  is dir");
        !           112:        $$dirs{$docf} = $fn;
        !           113:        # recurse into directory
        !           114:        $cnt += fs_read_files($f, $docf, $files, $dirs);
        !           115:    }
        !           116:     }
        !           117:     return $cnt;
        !           118: }
        !           119: 
        !           120: 
        !           121: #
        !           122: # check_dirs($rootnode, \%dirs)
        !           123: #
        !           124: # reads all dir elements under $rootnode and compares with the directory list
        !           125: # in %dirs
        !           126: #
        !           127: sub check_dirs {
        !           128:     my ($root, $fsdirs) = @_;
        !           129: 
        !           130:     #
        !           131:     # iterate all dir tags
        !           132:     #
        !           133:     my @dirnodes = $root->findnodes('child::dir');
        !           134:     my %okdirs;
        !           135:     foreach my $dirnode (@dirnodes) {
        !           136:    my $dirname = sstrip($dirnode->find('child::name'));
        !           137:    my $dirpath = sstrip($dirnode->find('child::path'));
        !           138:    my $description = sstrip($dirnode->find('child::description'));
        !           139:    # name must be valid
        !           140:    if (! valid_dir_name($dirname)) {
        !           141:        logger("ERROR", "directory name ($dirpath) $dirname invalid!");
        !           142:        $errcnt++;
        !           143:    }
        !           144:    # description can be present
        !           145:    if (! $description) {
        !           146:        logger("WARNING", "description for directory $dirname (in $dirpath/) missing!");
        !           147:        $warncnt++;
        !           148:    }
        !           149:    # check with dirs on filesystem 
        !           150:    my $fn;
        !           151:    if ($dirpath) {
        !           152:        $fn = "$dirpath/$dirname";
        !           153:    } else {
        !           154:        $fn = "$dirname";
        !           155:    }
        !           156:         #logger("dir: \"$dirname\", \"$dirpath\"");
        !           157:    if ($$fsdirs{$fn}) {
        !           158:        #logger("  OK ($$fsdirs{$fn})");
        !           159:        $okdirs{$fn} = $dirname;
        !           160:    } else {
        !           161:        logger("ERROR", "directory $dirname (in $dirpath/) missing on disk!");
        !           162:        $errcnt++;
        !           163:    }
        !           164:     }
        !           165:     #logger("dirs: ", (scalar keys %$fsdirs), " vs ", (scalar keys %okdirs), "");
        !           166:     if ((scalar keys %$fsdirs) != (scalar keys %okdirs)) {
        !           167:    # number of dir tags and dirs don't match
        !           168:    # iterate through all dirs
        !           169:    foreach my $f (sort keys %$fsdirs) {
        !           170:        # was this dir missing?
        !           171:        if (! $okdirs{$f}) {
        !           172:        my ($name, $path) = split_file_path($f);
        !           173:        # name must be valid
        !           174:        if (! valid_dir_name($name)) {
        !           175:            $path = "." unless ($path);
        !           176:            logger("ERROR", "directory name $name (in $path/) invalid!");
        !           177:            $errcnt++;
        !           178:            next;
        !           179:        }
        !           180:        if ($fix_dirs) {
        !           181:            # add missing dir tag
        !           182:            my $dir_node = $root->addNewChild($namespace, "dir");
        !           183:            $xml_changed++;
        !           184:            # add name
        !           185:            my $name_node = $dir_node->addNewChild($namespace, "name");
        !           186:            $name_node->appendTextNode($name);
        !           187:            # add path
        !           188:            if ($path) {
        !           189:            my $path_node = $dir_node->addNewChild($namespace, "path");
        !           190:            $path_node->appendTextNode($path);
        !           191:            }
        !           192:            logger("INFO", "directory $f to be added to index file!");
        !           193:        } else {
        !           194:            logger("ERROR", "directory $f missing in index file!");
        !           195:            $errcnt++;
        !           196:        }
        !           197:        }
        !           198:    }
        !           199:     }
        !           200: }
        !           201: 
        !           202: 
        !           203: #
        !           204: # check_files($rootnode, \%files)
        !           205: #
        !           206: # reads all file elements under $rootnode and compares with the file list
        !           207: # in %files
        !           208: #
        !           209: sub check_files {
        !           210:     my ($root, $fsfiles) = @_;
        !           211: 
        !           212:     #
        !           213:     # iterate all file tags
        !           214:     #
        !           215:     my @filenodes = $root->findnodes('child::file');
        !           216:     my %okfiles;
        !           217:     foreach my $filenode (@filenodes) {
        !           218:    my $filename = sstrip($filenode->find('child::name'));
        !           219:    my $filepath = sstrip($filenode->find('child::path'));
        !           220:    my $filesize = sstrip($filenode->find('child::size'));
        !           221:    my $filedate = sstrip($filenode->find('child::date'));
        !           222:    # name must be valid
        !           223:    if (! valid_file_name($filename)) {
        !           224:        logger("ERROR", "file name ($filepath)$filename invalid!");
        !           225:        $errcnt++;
        !           226:    }
        !           227:    my $fn = ($filepath) ? "$filepath/$filename" : "$filename";
        !           228:         #logger("file: \"$filename\", \"$filepath\"");
        !           229:    if ($$fsfiles{$fn}) {
        !           230:        #logger("  OK ($$fsfiles{$fn})");
        !           231:        $okfiles{$fn} = $filename;
        !           232:        # check file size and date
        !           233:        if ($filesize) {
        !           234:        if ($filesize != $$fsfiles{$fn}->[1]) {
        !           235:            logger("WARNING", "size of file $fn changed: $filesize to $$fsfiles{$fn}->[1]");
        !           236:            $warncnt++;
        !           237:        }
        !           238:        }
        !           239:        # file date
        !           240:        if ($filedate) {
        !           241:        if ($filedate ne stime($$fsfiles{$fn}->[2])) {
        !           242:            logger("WARNING", "date of file $fn changed: $filedate to ", stime($$fsfiles{$fn}->[2]), "");
        !           243:            $warncnt++;
        !           244:        }
        !           245:        }
        !           246:        # update file size and date
        !           247:        if ($fix_fs_meta) {
        !           248:        # delete size and date
        !           249:        foreach my $n ($filenode->findnodes('child::size')) {
        !           250:            $filenode->removeChild($n);
        !           251:        }
        !           252:        foreach my $n ($filenode->findnodes('child::date')) {
        !           253:            $filenode->removeChild($n);
        !           254:        }
        !           255:        # add new size and date
        !           256:        my $node = $filenode->addNewChild($namespace, "size");
        !           257:        $node->appendTextNode($$fsfiles{$fn}->[1]);
        !           258:        $node = $filenode->addNewChild($namespace, "date");
        !           259:        $node->appendTextNode(stime($$fsfiles{$fn}->[2]));
        !           260:        $xml_changed++;
        !           261:        }
        !           262:    } else {
        !           263:        logger("ERROR", "file $filename (in $filepath/) missing on disk!");
        !           264:        $errcnt++;
        !           265:    }
        !           266:     }
        !           267:     #logger("files: ", (scalar keys %$fsfiles), " vs ", (scalar keys %okfiles), "");
        !           268:     if ((scalar keys %$fsfiles) != (scalar keys %okfiles)) {
        !           269:    # number of file tags and files don't match
        !           270:    # iterate through all files
        !           271:    foreach my $f (sort keys %$fsfiles) {
        !           272:        my ($name, $path) = split_file_path($f);
        !           273:        # was this file missing?
        !           274:        if (! $okfiles{$f}) {
        !           275:        # is an ignoreable file?
        !           276:        if ($index_files{$name}) {
        !           277:            next;
        !           278:        }
        !           279:        # name must be valid
        !           280:        if (! valid_dir_name($name)) {
        !           281:            $path = "." unless ($path);
        !           282:            logger("ERROR", "file name $name (in $path/) invalid!");
        !           283:            $errcnt++;
        !           284:            next;
        !           285:        }
        !           286:        if ($fix_files) {
        !           287:            # add missing file tag
        !           288:            my $file_node = $root->addNewChild($namespace, "file");
        !           289:            $xml_changed++;
        !           290:            # add name
        !           291:            my $name_node = $file_node->addNewChild($namespace, "name");
        !           292:            $name_node->appendTextNode($name);
        !           293:            # add path
        !           294:            if ($path) {
        !           295:            my $path_node = $file_node->addNewChild($namespace, "path");
        !           296:            $path_node->appendTextNode($path);
        !           297:            }
        !           298:            # add size
        !           299:            my $size_node = $file_node->addNewChild($namespace, "size");
        !           300:            $size_node->appendTextNode($$fsfiles{$f}->[1]);
        !           301:            # add date
        !           302:            my $date_node = $file_node->addNewChild($namespace, "date");
        !           303:            $date_node->appendTextNode(stime($$fsfiles{$f}->[2]));
        !           304:            logger("INFO", "file $f to be added to index file!");
        !           305:        } else {
        !           306:            logger("ERROR", "file $f missing in index file!");
        !           307:            $errcnt++;
        !           308:        }
        !           309:        }
        !           310:    }
        !           311:     }
        !           312: }
        !           313: 
        !           314: #
        !           315: # check_resource_meta($rootnode)
        !           316: #
        !           317: # checks general resource meta information
        !           318: #
        !           319: sub check_resource_meta {
        !           320:     my ($rootnode) = @_;
        !           321: 
        !           322:     #
        !           323:     # description
        !           324:     #
        !           325:     my $description = $rootnode->findvalue('child::description');
        !           326:     if (! $description) {
        !           327:    logger("ERROR", "resource description element missing!");
        !           328:    $errcnt++;
        !           329:     }
        !           330:     #
        !           331:     # name
        !           332:     #
        !           333:     my $name = sstrip($rootnode->findvalue('child::name'));
        !           334:     if ($name) {
        !           335:    my ($dirname, $dirpath) = split_file_path($docdir);
        !           336:    if ($dirname ne $name) {
        !           337:        logger("ERROR", "resource name element '$name' does not match directory name '$dirname'!");
        !           338:        $errcnt++;
        !           339:    }
        !           340:     } else {
        !           341:    logger("ERROR", "resource name element missing!");
        !           342:    $errcnt++;
        !           343:     }
        !           344:     #
        !           345:     # archive path
        !           346:     #
        !           347:     my $realpath;
        !           348:     # get real path
        !           349:     if ($docdir =~ /^\//) {
        !           350:    # docdir is absolute
        !           351:    $realpath = $docdir;
        !           352:     } else {
        !           353:    # docdir is relative -- try with the shell
        !           354:    if (open PWDCMD, "cd $docdir ; pwd|") {
        !           355:        $realpath = <PWDCMD>;
        !           356:        chomp $realpath;
        !           357:    }
        !           358:     }
        !           359:     if (! $realpath) {
        !           360:    logger("ERROR", "unable to check real archive path!");
        !           361:    $errcnt++;
        !           362:    return;
        !           363:     }
        !           364:     # get archive-path
        !           365:     my $archnode = ($rootnode->find('child::archive-path'))->get_node(1);
        !           366:     if ($archnode) {
        !           367:    my $arch = sstrip($archnode->textContent);
        !           368:    if ($arch ne $realpath) {
        !           369:        logger("WARNING", "incorrect archive-path '$arch' will be changed to '$realpath'!");
        !           370:        $warncnt++;
        !           371:        # correct archive-path
        !           372:        $archnode->removeChildNodes;
        !           373:        $archnode->appendTextNode($realpath);
        !           374:        $xml_changed++;
        !           375:    }
        !           376:     } else {
        !           377:    # add archive-path
        !           378:    $archnode = $rootnode->addNewChild($namespace, "archive-path");
        !           379:    $archnode->appendTextNode($realpath);
        !           380:    $xml_changed++;
        !           381:     }
        !           382: 
        !           383: }
        !           384: 
        !           385:     
        !           386: 
        !           387: #######################################################
        !           388: # main
        !           389: #
        !           390: 
        !           391: logger("INFO", "metacheck $version");
        !           392:     
        !           393: my ($document, $rootnode) = read_xml($metafile);
        !           394: 
        !           395: check_resource_meta($rootnode);
        !           396: 
        !           397: my $fnum = fs_read_files($docdir, "", \%files, \%dirs);
        !           398: logger("INFO", "$fnum files on FS");
        !           399: #foreach (keys %files) {logger("  file ($_): $files{$_}");}
        !           400: 
        !           401: check_files($rootnode, \%files);
        !           402: check_dirs($rootnode, \%dirs);
        !           403: 
        !           404: logger("INFO", "$warncnt warnings");
        !           405: logger("INFO", "$errcnt errors");
        !           406: if ($errcnt > 0) {
        !           407:     logger("ABORT", "there were errors!");
        !           408:     exit 1;
        !           409: } else {
        !           410:     if ($fix_xml) {
        !           411:    write_xml($document, $metafile);
        !           412:     }
        !           413:     logger("DONE", "index file checked successfully!");
        !           414: }

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>