Annotation of foxridge-archiver/metacheck.pl, revision 1.4

1.1       casties     1: #!/usr/local/bin/perl -w
                      2: 
                      3: use strict;
                      4: use XML::LibXML;
                      5: 
                      6: use lib '/usr/local/mpiwg/archive';
                      7: use MPIWGStor;
                      8: 
                      9: # make output unbuffered
                     10: $|=1;
                     11: 
                     12: #######################################################
                     13: # internal parameters
                     14: #
                     15: 
                     16: # program version
1.4     ! casties    17: my $version = "0.6.0 (20.9.2005)";
        !            18: my $help = 
        !            19: "use: metacheck [options] docdir
        !            20: options:
        !            21:   -debug  show debugging info
        !            22:   -dry-run  simulate, dont'do anything
        !            23:   -checkonly  leave existing index file untouched
        !            24:   -add-files  add file tags for missing files
        !            25:   -replace  rewrite index file to match current files
        !            26: ";
        !            27: logger("INFO", "metacheck $version");
1.1       casties    28: 
                     29: # read command line parameters
                     30: my $args = MPIWGStor::parseargs;
1.4     ! casties    31: if (! scalar(%$args)) {
        !            32:     print $help, "\n";
        !            33:     exit 1;
        !            34: }
1.1       casties    35: 
                     36: # debug level
                     37: $debug = (exists $$args{'debug'}) ? $$args{'debug'} : 0;
                     38: 
1.4     ! casties    39: # simulate action only
        !            40: my $dry_run = (exists $$args{'dry-run'}) ? $$args{'dry-run'} : 0;
        !            41: logger('DEBUG', "dry-run: $dry_run");
        !            42: 
1.1       casties    43: # check only or fix index file also
1.4     ! casties    44: my $check_only = (exists $$args{'checkonly'}) ? $$args{'checkonly'} : 1;
        !            45: 
1.1       casties    46: # add file tags for missing files
                     47: my $fix_files = ! $check_only;
                     48: # add tags for file size and date
                     49: my $fix_fs_meta = 1;
                     50: # add dir tags for missing directories
                     51: my $fix_dirs = ! $check_only;
                     52: # rewrite XML file (necessary for fix_files and fix_dirs)
                     53: my $fix_xml = ! $check_only;
1.4     ! casties    54: # rewrite complete index file
        !            55: my $do_rewrite = 0;
        !            56: 
        !            57: # add file tags for missing files
        !            58: if (exists $$args{'add-files'}) {
        !            59:     $check_only = 0;
        !            60:     $fix_files = 1;
        !            61:     $fix_dirs = 1;
        !            62:     $fix_xml = 1;
        !            63:     $do_rewrite = 0;
        !            64:     logger('DEBUG', "add-files: true");
        !            65: }
        !            66: 
        !            67: # completely rewrite index file
        !            68: if (exists $$args{'replace'}) {
        !            69:     $check_only = 0;
        !            70:     $fix_files = 1;
        !            71:     $fix_dirs = 1;
        !            72:     $fix_xml = 1;
        !            73:     $do_rewrite = 1;
        !            74:     logger('DEBUG', "replace: true");
        !            75: }
        !            76: logger('DEBUG', "checkonly: $check_only");
        !            77: 
        !            78: 
1.1       casties    79: my $xml_changed = 0;
                     80: # XML namespace (not really implemented!)
                     81: my $namespace = "";
                     82: 
                     83: 
                     84: #######################################################
                     85: # check parameters that were passed to the program
                     86: #
                     87: my $docdir = $$args{'path'};
                     88: if (! $docdir) {
                     89:     logger("ABORT", "no document directory given!");
                     90:     exit 1;
                     91: }
1.2       casties    92: # strip double slashes
                     93: $docdir =~ s/\/\//\//;
1.1       casties    94: # strip trailing slashes
                     95: $docdir =~ s/\/$//;
                     96: if (! -d $docdir) {
                     97:     logger("ABORT", "document directory \'$docdir\' doesn't exist!");
                     98:     exit 1;
                     99: }
                    100: 
                    101: my $metafile = "$docdir/index.meta";
                    102: if (! -f $metafile) {
                    103:     logger("ABORT", "metadata index file \'$metafile\' doesn't exist!");
                    104:     exit 1;
                    105: }
                    106: 
                    107: #######################################################
                    108: # internal variables
                    109: #
                    110: 
                    111: # all files in the document directory tree
                    112: my %files;
                    113: # all directories in the document directory tree
                    114: my %dirs;
                    115: # number of errors
                    116: my $errcnt = 0;
                    117: # number of warnings
                    118: my $warncnt = 0;
                    119: 
                    120: #######################################################
                    121: # subroutines
                    122: #
                    123: 
                    124: #
                    125: # fs_read_files($realdir, $docdir, \%files, \%dirs)
                    126: #
                    127: # reads all files and directories below $realdir and puts the
                    128: # files in %files and directories in %dirs
                    129: # $docdir is only for recursion, it should be empty when called 
                    130: # from outside
                    131: #
                    132: sub fs_read_files {
                    133:     my ($directory, $docdir, $files, $dirs) = @_;    
                    134:     my $cnt = 0;
                    135: 
                    136:     if (! opendir DIR, $directory) {
                    137:    return 0;
                    138:     }
                    139:     my @dirfiles = readdir DIR;
                    140:     foreach my $fn (@dirfiles) {
                    141:    # ignore names starting with a dot
                    142:    next if ($fn =~ /^\./);
                    143:    # ignore other silly files
                    144:    next if ($junk_files{$fn});
                    145: 
                    146:    $cnt++;
                    147:    my $f = "$directory/$fn";
                    148:    my $docf = ($docdir) ? "$docdir/$fn" : $fn;
                    149:    #logger("fs_file: \"$f\"");
                    150:    if (-f $f) {
                    151:        #logger("  is file");
                    152:        my ($dev,$ino,$mode,$nlink,$uid,$gid,$rdev,$size,
                    153:                       $atime,$mtime,$ctime,$blksize,$blocks)
                    154:                           = stat(_); 
                    155:        $$files{$docf} = [$fn, $size, $mtime];
                    156:    } elsif (-d _) {
                    157:        #logger("  is dir");
                    158:        $$dirs{$docf} = $fn;
                    159:        # recurse into directory
                    160:        $cnt += fs_read_files($f, $docf, $files, $dirs);
                    161:    }
                    162:     }
                    163:     return $cnt;
                    164: }
                    165: 
                    166: 
                    167: #
                    168: # check_dirs($rootnode, \%dirs)
                    169: #
                    170: # reads all dir elements under $rootnode and compares with the directory list
                    171: # in %dirs
                    172: #
                    173: sub check_dirs {
                    174:     my ($root, $fsdirs) = @_;
                    175: 
                    176:     #
                    177:     # iterate all dir tags
                    178:     #
                    179:     my @dirnodes = $root->findnodes('child::dir');
                    180:     my %okdirs;
                    181:     foreach my $dirnode (@dirnodes) {
                    182:    my $dirname = sstrip($dirnode->find('child::name'));
                    183:    my $dirpath = sstrip($dirnode->find('child::path'));
                    184:    my $description = sstrip($dirnode->find('child::description'));
                    185:    # name must be valid
                    186:    if (! valid_dir_name($dirname)) {
                    187:        logger("ERROR", "directory name ($dirpath) $dirname invalid!");
                    188:        $errcnt++;
                    189:    }
                    190:    # description can be present
                    191:    if (! $description) {
                    192:        logger("WARNING", "description for directory $dirname (in $dirpath/) missing!");
                    193:        $warncnt++;
                    194:    }
                    195:    # check with dirs on filesystem 
                    196:    my $fn;
1.4     ! casties   197:    if ($dirpath && ($dirpath ne '.')) {
1.1       casties   198:        $fn = "$dirpath/$dirname";
                    199:    } else {
                    200:        $fn = "$dirname";
                    201:    }
1.4     ! casties   202:         #logger('DEBUG', "dir: \"$dirname\", \"$dirpath\", fn: \"$fn\"");
1.1       casties   203:    if ($$fsdirs{$fn}) {
                    204:        #logger("  OK ($$fsdirs{$fn})");
                    205:        $okdirs{$fn} = $dirname;
                    206:    } else {
1.4     ! casties   207:        if ($do_rewrite) {
        !           208:        # remove dir tag
        !           209:        logger("WARNING", "directory $dirname (in $dirpath/) no longer on disk!");
        !           210:        $dirnode->unbindNode();
        !           211:        $warncnt++;
        !           212:        } else {
        !           213:        logger("ERROR", "directory $dirname (in $dirpath/) missing on disk!");
        !           214:        $errcnt++;
        !           215:        }
1.1       casties   216:    }
                    217:     }
                    218:     #logger("dirs: ", (scalar keys %$fsdirs), " vs ", (scalar keys %okdirs), "");
                    219:     if ((scalar keys %$fsdirs) != (scalar keys %okdirs)) {
                    220:    # number of dir tags and dirs don't match
                    221:    # iterate through all dirs
                    222:    foreach my $f (sort keys %$fsdirs) {
                    223:        # was this dir missing?
                    224:        if (! $okdirs{$f}) {
1.4     ! casties   225:        my ($name, $path) = split_file_path($f, 1);
1.1       casties   226:        # name must be valid
                    227:        if (! valid_dir_name($name)) {
                    228:            $path = "." unless ($path);
                    229:            logger("ERROR", "directory name $name (in $path/) invalid!");
                    230:            $errcnt++;
                    231:            next;
                    232:        }
                    233:        if ($fix_dirs) {
                    234:            # add missing dir tag
                    235:            my $dir_node = $root->addNewChild($namespace, "dir");
                    236:            $xml_changed++;
                    237:            # add name
                    238:            my $name_node = $dir_node->addNewChild($namespace, "name");
                    239:            $name_node->appendTextNode($name);
                    240:            # add path
                    241:            if ($path) {
                    242:            my $path_node = $dir_node->addNewChild($namespace, "path");
                    243:            $path_node->appendTextNode($path);
                    244:            }
                    245:            logger("INFO", "directory $f to be added to index file!");
                    246:        } else {
                    247:            logger("ERROR", "directory $f missing in index file!");
                    248:            $errcnt++;
                    249:        }
                    250:        }
                    251:    }
                    252:     }
                    253: }
                    254: 
                    255: 
                    256: #
                    257: # check_files($rootnode, \%files)
                    258: #
                    259: # reads all file elements under $rootnode and compares with the file list
                    260: # in %files
                    261: #
                    262: sub check_files {
                    263:     my ($root, $fsfiles) = @_;
                    264: 
                    265:     #
                    266:     # iterate all file tags
                    267:     #
                    268:     my @filenodes = $root->findnodes('child::file');
                    269:     my %okfiles;
                    270:     foreach my $filenode (@filenodes) {
                    271:    my $filename = sstrip($filenode->find('child::name'));
                    272:    my $filepath = sstrip($filenode->find('child::path'));
                    273:    my $filesize = sstrip($filenode->find('child::size'));
                    274:    my $filedate = sstrip($filenode->find('child::date'));
                    275:    # name must be valid
                    276:    if (! valid_file_name($filename)) {
                    277:        logger("ERROR", "file name ($filepath)$filename invalid!");
                    278:        $errcnt++;
                    279:    }
                    280:    my $fn = ($filepath) ? "$filepath/$filename" : "$filename";
                    281:         #logger("file: \"$filename\", \"$filepath\"");
                    282:    if ($$fsfiles{$fn}) {
                    283:        #logger("  OK ($$fsfiles{$fn})");
                    284:        $okfiles{$fn} = $filename;
                    285:        # check file size and date
                    286:        if ($filesize) {
                    287:        if ($filesize != $$fsfiles{$fn}->[1]) {
                    288:            logger("WARNING", "size of file $fn changed: $filesize to $$fsfiles{$fn}->[1]");
                    289:            $warncnt++;
                    290:        }
                    291:        }
                    292:        # file date
                    293:        if ($filedate) {
                    294:        if ($filedate ne stime($$fsfiles{$fn}->[2])) {
                    295:            logger("WARNING", "date of file $fn changed: $filedate to ", stime($$fsfiles{$fn}->[2]), "");
                    296:            $warncnt++;
                    297:        }
                    298:        }
                    299:        # update file size and date
                    300:        if ($fix_fs_meta) {
                    301:        # delete size and date
                    302:        foreach my $n ($filenode->findnodes('child::size')) {
                    303:            $filenode->removeChild($n);
                    304:        }
                    305:        foreach my $n ($filenode->findnodes('child::date')) {
                    306:            $filenode->removeChild($n);
                    307:        }
                    308:        # add new size and date
                    309:        my $node = $filenode->addNewChild($namespace, "size");
                    310:        $node->appendTextNode($$fsfiles{$fn}->[1]);
                    311:        $node = $filenode->addNewChild($namespace, "date");
                    312:        $node->appendTextNode(stime($$fsfiles{$fn}->[2]));
                    313:        $xml_changed++;
                    314:        }
                    315:    } else {
1.4     ! casties   316:        if ($do_rewrite) {
        !           317:        # remove file tag
        !           318:        logger("WARNING", "file $filename (in $filepath/) no longer on disk!");
        !           319:        $filenode->unbindNode();
        !           320:        $warncnt++;
        !           321:        } else {
        !           322:        logger("ERROR", "file $filename (in $filepath/) missing on disk!");
        !           323:        $errcnt++;
        !           324:        }
1.1       casties   325:    }
                    326:     }
                    327:     #logger("files: ", (scalar keys %$fsfiles), " vs ", (scalar keys %okfiles), "");
                    328:     if ((scalar keys %$fsfiles) != (scalar keys %okfiles)) {
                    329:    # number of file tags and files don't match
                    330:    # iterate through all files
                    331:    foreach my $f (sort keys %$fsfiles) {
1.4     ! casties   332:        my ($name, $path) = split_file_path($f, 1);
1.1       casties   333:        # was this file missing?
                    334:        if (! $okfiles{$f}) {
                    335:        # is an ignoreable file?
                    336:        if ($index_files{$name}) {
                    337:            next;
                    338:        }
                    339:        # name must be valid
                    340:        if (! valid_dir_name($name)) {
                    341:            $path = "." unless ($path);
                    342:            logger("ERROR", "file name $name (in $path/) invalid!");
                    343:            $errcnt++;
                    344:            next;
                    345:        }
                    346:        if ($fix_files) {
                    347:            # add missing file tag
                    348:            my $file_node = $root->addNewChild($namespace, "file");
                    349:            $xml_changed++;
                    350:            # add name
                    351:            my $name_node = $file_node->addNewChild($namespace, "name");
                    352:            $name_node->appendTextNode($name);
                    353:            # add path
                    354:            if ($path) {
                    355:            my $path_node = $file_node->addNewChild($namespace, "path");
                    356:            $path_node->appendTextNode($path);
                    357:            }
                    358:            # add size
                    359:            my $size_node = $file_node->addNewChild($namespace, "size");
                    360:            $size_node->appendTextNode($$fsfiles{$f}->[1]);
                    361:            # add date
                    362:            my $date_node = $file_node->addNewChild($namespace, "date");
                    363:            $date_node->appendTextNode(stime($$fsfiles{$f}->[2]));
                    364:            logger("INFO", "file $f to be added to index file!");
                    365:        } else {
                    366:            logger("ERROR", "file $f missing in index file!");
                    367:            $errcnt++;
                    368:        }
                    369:        }
                    370:    }
                    371:     }
                    372: }
                    373: 
                    374: #
                    375: # check_resource_meta($rootnode)
                    376: #
                    377: # checks general resource meta information
                    378: #
                    379: sub check_resource_meta {
                    380:     my ($rootnode) = @_;
                    381: 
                    382:     #
                    383:     # description
                    384:     #
                    385:     my $description = $rootnode->findvalue('child::description');
                    386:     if (! $description) {
                    387:    logger("ERROR", "resource description element missing!");
                    388:    $errcnt++;
                    389:     }
                    390:     #
                    391:     # name
                    392:     #
                    393:     my $name = sstrip($rootnode->findvalue('child::name'));
                    394:     if ($name) {
                    395:    my ($dirname, $dirpath) = split_file_path($docdir);
                    396:    if ($dirname ne $name) {
                    397:        logger("ERROR", "resource name element '$name' does not match directory name '$dirname'!");
                    398:        $errcnt++;
                    399:    }
                    400:     } else {
                    401:    logger("ERROR", "resource name element missing!");
                    402:    $errcnt++;
                    403:     }
                    404:     #
                    405:     # archive path
                    406:     #
                    407:     my $realpath;
                    408:     # get real path
                    409:     if ($docdir =~ /^\//) {
                    410:    # docdir is absolute
                    411:    $realpath = $docdir;
                    412:     } else {
                    413:    # docdir is relative -- try with the shell
                    414:    if (open PWDCMD, "cd $docdir ; pwd|") {
                    415:        $realpath = <PWDCMD>;
                    416:        chomp $realpath;
                    417:    }
                    418:     }
                    419:     if (! $realpath) {
                    420:    logger("ERROR", "unable to check real archive path!");
                    421:    $errcnt++;
                    422:    return;
                    423:     }
                    424:     # get archive-path
                    425:     my $archnode = ($rootnode->find('child::archive-path'))->get_node(1);
                    426:     if ($archnode) {
                    427:    my $arch = sstrip($archnode->textContent);
                    428:    if ($arch ne $realpath) {
                    429:        logger("WARNING", "incorrect archive-path '$arch' will be changed to '$realpath'!");
                    430:        $warncnt++;
                    431:        # correct archive-path
                    432:        $archnode->removeChildNodes;
                    433:        $archnode->appendTextNode($realpath);
                    434:        $xml_changed++;
                    435:    }
                    436:     } else {
                    437:    # add archive-path
                    438:    $archnode = $rootnode->addNewChild($namespace, "archive-path");
                    439:    $archnode->appendTextNode($realpath);
                    440:    $xml_changed++;
                    441:     }
                    442: 
                    443: }
                    444: 
                    445:     
                    446: 
                    447: #######################################################
                    448: # main
                    449: #
                    450: 
                    451: my ($document, $rootnode) = read_xml($metafile);
                    452: 
                    453: check_resource_meta($rootnode);
                    454: 
                    455: my $fnum = fs_read_files($docdir, "", \%files, \%dirs);
                    456: logger("INFO", "$fnum files on FS");
1.4     ! casties   457: #foreach (keys %dirs) {logger('DEBUG', "  dir ($_): $dirs{$_}");}
1.1       casties   458: 
                    459: check_files($rootnode, \%files);
                    460: check_dirs($rootnode, \%dirs);
                    461: 
                    462: logger("INFO", "$warncnt warnings");
                    463: logger("INFO", "$errcnt errors");
                    464: if ($errcnt > 0) {
1.4     ! casties   465:     logger("ABORT", "there were $errcnt errors!");
1.1       casties   466:     exit 1;
                    467: } else {
                    468:     if ($fix_xml) {
1.4     ! casties   469:    if ($dry_run) {
        !           470:        logger('INFO', "would write $metafile");
        !           471:        logger('DEBUG', $document->toString(1));
        !           472:    } else {
        !           473:        write_xml($document, $metafile);
        !           474:    }
1.1       casties   475:     }
                    476:     logger("DONE", "index file checked successfully!");
                    477: }

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>