Annotation of foxridge-archiver/metacheck.pl, revision 1.6

1.6     ! casties     1: #!/usr/bin/perl -w
1.1       casties     2: 
                      3: use strict;
                      4: use XML::LibXML;
                      5: 
                      6: use lib '/usr/local/mpiwg/archive';
                      7: use MPIWGStor;
                      8: 
                      9: # make output unbuffered
                     10: $|=1;
                     11: 
                     12: #######################################################
                     13: # internal parameters
                     14: #
                     15: 
                     16: # program version
1.4       casties    17: my $version = "0.6.0 (20.9.2005)";
                     18: my $help = 
                     19: "use: metacheck [options] docdir
                     20: options:
                     21:   -debug  show debugging info
                     22:   -dry-run  simulate, dont'do anything
                     23:   -checkonly  leave existing index file untouched
                     24:   -add-files  add file tags for missing files
                     25:   -replace  rewrite index file to match current files
                     26: ";
                     27: logger("INFO", "metacheck $version");
1.1       casties    28: 
                     29: # read command line parameters
                     30: my $args = MPIWGStor::parseargs;
1.4       casties    31: if (! scalar(%$args)) {
                     32:     print $help, "\n";
                     33:     exit 1;
                     34: }
1.1       casties    35: 
                     36: # debug level
                     37: $debug = (exists $$args{'debug'}) ? $$args{'debug'} : 0;
                     38: 
1.4       casties    39: # simulate action only
                     40: my $dry_run = (exists $$args{'dry-run'}) ? $$args{'dry-run'} : 0;
                     41: logger('DEBUG', "dry-run: $dry_run");
                     42: 
1.1       casties    43: # check only or fix index file also
1.4       casties    44: my $check_only = (exists $$args{'checkonly'}) ? $$args{'checkonly'} : 1;
                     45: 
1.1       casties    46: # add file tags for missing files
                     47: my $fix_files = ! $check_only;
                     48: # add tags for file size and date
                     49: my $fix_fs_meta = 1;
                     50: # add dir tags for missing directories
                     51: my $fix_dirs = ! $check_only;
                     52: # rewrite XML file (necessary for fix_files and fix_dirs)
                     53: my $fix_xml = ! $check_only;
1.4       casties    54: # rewrite complete index file
                     55: my $do_rewrite = 0;
                     56: 
                     57: # add file tags for missing files
                     58: if (exists $$args{'add-files'}) {
                     59:     $check_only = 0;
                     60:     $fix_files = 1;
                     61:     $fix_dirs = 1;
                     62:     $fix_xml = 1;
                     63:     $do_rewrite = 0;
                     64:     logger('DEBUG', "add-files: true");
                     65: }
                     66: 
                     67: # completely rewrite index file
                     68: if (exists $$args{'replace'}) {
                     69:     $check_only = 0;
                     70:     $fix_files = 1;
                     71:     $fix_dirs = 1;
                     72:     $fix_xml = 1;
                     73:     $do_rewrite = 1;
                     74:     logger('DEBUG', "replace: true");
                     75: }
                     76: logger('DEBUG', "checkonly: $check_only");
                     77: 
                     78: 
1.1       casties    79: my $xml_changed = 0;
                     80: # XML namespace (not really implemented!)
                     81: my $namespace = "";
                     82: 
                     83: 
                     84: #######################################################
                     85: # check parameters that were passed to the program
                     86: #
                     87: my $docdir = $$args{'path'};
                     88: if (! $docdir) {
                     89:     logger("ABORT", "no document directory given!");
                     90:     exit 1;
                     91: }
1.2       casties    92: # strip double slashes
                     93: $docdir =~ s/\/\//\//;
1.1       casties    94: # strip trailing slashes
                     95: $docdir =~ s/\/$//;
                     96: if (! -d $docdir) {
                     97:     logger("ABORT", "document directory \'$docdir\' doesn't exist!");
                     98:     exit 1;
                     99: }
                    100: 
                    101: my $metafile = "$docdir/index.meta";
                    102: if (! -f $metafile) {
                    103:     logger("ABORT", "metadata index file \'$metafile\' doesn't exist!");
                    104:     exit 1;
                    105: }
                    106: 
                    107: #######################################################
                    108: # internal variables
                    109: #
                    110: 
                    111: # all files in the document directory tree
                    112: my %files;
                    113: # all directories in the document directory tree
                    114: my %dirs;
                    115: # number of errors
                    116: my $errcnt = 0;
                    117: # number of warnings
                    118: my $warncnt = 0;
                    119: 
                    120: #######################################################
                    121: # subroutines
                    122: #
                    123: 
                    124: #
                    125: # fs_read_files($realdir, $docdir, \%files, \%dirs)
                    126: #
                    127: # reads all files and directories below $realdir and puts the
                    128: # files in %files and directories in %dirs
                    129: # $docdir is only for recursion, it should be empty when called 
                    130: # from outside
                    131: #
                    132: sub fs_read_files {
                    133:     my ($directory, $docdir, $files, $dirs) = @_;    
                    134:     my $cnt = 0;
                    135: 
                    136:     if (! opendir DIR, $directory) {
                    137:    return 0;
                    138:     }
                    139:     my @dirfiles = readdir DIR;
                    140:     foreach my $fn (@dirfiles) {
                    141:    # ignore names starting with a dot
                    142:    next if ($fn =~ /^\./);
                    143:    # ignore other silly files
                    144:    next if ($junk_files{$fn});
                    145: 
                    146:    $cnt++;
                    147:    my $f = "$directory/$fn";
                    148:    my $docf = ($docdir) ? "$docdir/$fn" : $fn;
                    149:    #logger("fs_file: \"$f\"");
                    150:    if (-f $f) {
                    151:        #logger("  is file");
                    152:        my ($dev,$ino,$mode,$nlink,$uid,$gid,$rdev,$size,
                    153:                       $atime,$mtime,$ctime,$blksize,$blocks)
                    154:                           = stat(_); 
                    155:        $$files{$docf} = [$fn, $size, $mtime];
                    156:    } elsif (-d _) {
                    157:        #logger("  is dir");
                    158:        $$dirs{$docf} = $fn;
                    159:        # recurse into directory
                    160:        $cnt += fs_read_files($f, $docf, $files, $dirs);
                    161:    }
                    162:     }
                    163:     return $cnt;
                    164: }
                    165: 
                    166: 
                    167: #
                    168: # check_dirs($rootnode, \%dirs)
                    169: #
                    170: # reads all dir elements under $rootnode and compares with the directory list
                    171: # in %dirs
                    172: #
                    173: sub check_dirs {
                    174:     my ($root, $fsdirs) = @_;
                    175: 
                    176:     #
                    177:     # iterate all dir tags
                    178:     #
                    179:     my @dirnodes = $root->findnodes('child::dir');
                    180:     my %okdirs;
                    181:     foreach my $dirnode (@dirnodes) {
                    182:    my $dirname = sstrip($dirnode->find('child::name'));
                    183:    my $dirpath = sstrip($dirnode->find('child::path'));
                    184:    my $description = sstrip($dirnode->find('child::description'));
                    185:    # name must be valid
                    186:    if (! valid_dir_name($dirname)) {
1.5       casties   187:        if ($do_rewrite) {
                    188:        logger("WARNING", "directory name ($dirpath) $dirname in index file invalid!");
                    189:        $warncnt++;
                    190:        } else {
                    191:        logger("ERROR", "directory name ($dirpath) $dirname invalid!");
                    192:        $errcnt++;
                    193:        }
1.1       casties   194:    }
                    195:    # description can be present
                    196:    if (! $description) {
                    197:        logger("WARNING", "description for directory $dirname (in $dirpath/) missing!");
                    198:        $warncnt++;
                    199:    }
                    200:    # check with dirs on filesystem 
                    201:    my $fn;
1.4       casties   202:    if ($dirpath && ($dirpath ne '.')) {
1.1       casties   203:        $fn = "$dirpath/$dirname";
                    204:    } else {
                    205:        $fn = "$dirname";
                    206:    }
1.4       casties   207:         #logger('DEBUG', "dir: \"$dirname\", \"$dirpath\", fn: \"$fn\"");
1.1       casties   208:    if ($$fsdirs{$fn}) {
                    209:        #logger("  OK ($$fsdirs{$fn})");
                    210:        $okdirs{$fn} = $dirname;
                    211:    } else {
1.4       casties   212:        if ($do_rewrite) {
                    213:        # remove dir tag
                    214:        logger("WARNING", "directory $dirname (in $dirpath/) no longer on disk!");
                    215:        $dirnode->unbindNode();
                    216:        $warncnt++;
                    217:        } else {
                    218:        logger("ERROR", "directory $dirname (in $dirpath/) missing on disk!");
                    219:        $errcnt++;
                    220:        }
1.1       casties   221:    }
                    222:     }
                    223:     #logger("dirs: ", (scalar keys %$fsdirs), " vs ", (scalar keys %okdirs), "");
                    224:     if ((scalar keys %$fsdirs) != (scalar keys %okdirs)) {
                    225:    # number of dir tags and dirs don't match
                    226:    # iterate through all dirs
                    227:    foreach my $f (sort keys %$fsdirs) {
                    228:        # was this dir missing?
                    229:        if (! $okdirs{$f}) {
1.4       casties   230:        my ($name, $path) = split_file_path($f, 1);
1.1       casties   231:        # name must be valid
                    232:        if (! valid_dir_name($name)) {
                    233:            $path = "." unless ($path);
                    234:            logger("ERROR", "directory name $name (in $path/) invalid!");
                    235:            $errcnt++;
                    236:            next;
                    237:        }
                    238:        if ($fix_dirs) {
                    239:            # add missing dir tag
                    240:            my $dir_node = $root->addNewChild($namespace, "dir");
                    241:            $xml_changed++;
                    242:            # add name
                    243:            my $name_node = $dir_node->addNewChild($namespace, "name");
                    244:            $name_node->appendTextNode($name);
                    245:            # add path
                    246:            if ($path) {
                    247:            my $path_node = $dir_node->addNewChild($namespace, "path");
                    248:            $path_node->appendTextNode($path);
                    249:            }
                    250:            logger("INFO", "directory $f to be added to index file!");
                    251:        } else {
                    252:            logger("ERROR", "directory $f missing in index file!");
                    253:            $errcnt++;
                    254:        }
                    255:        }
                    256:    }
                    257:     }
                    258: }
                    259: 
                    260: 
                    261: #
                    262: # check_files($rootnode, \%files)
                    263: #
                    264: # reads all file elements under $rootnode and compares with the file list
                    265: # in %files
                    266: #
                    267: sub check_files {
                    268:     my ($root, $fsfiles) = @_;
                    269: 
                    270:     #
                    271:     # iterate all file tags
                    272:     #
                    273:     my @filenodes = $root->findnodes('child::file');
                    274:     my %okfiles;
                    275:     foreach my $filenode (@filenodes) {
                    276:    my $filename = sstrip($filenode->find('child::name'));
                    277:    my $filepath = sstrip($filenode->find('child::path'));
                    278:    my $filesize = sstrip($filenode->find('child::size'));
                    279:    my $filedate = sstrip($filenode->find('child::date'));
                    280:    # name must be valid
                    281:    if (! valid_file_name($filename)) {
1.5       casties   282:        if ($do_rewrite) {
                    283:        logger("WARNING", "file name ($filepath)$filename in index file invalid!");
                    284:        $warncnt++;
                    285:        } else {
                    286:        logger("ERROR", "file name ($filepath)$filename invalid!");
                    287:        $errcnt++;
                    288:        }
1.1       casties   289:    }
                    290:    my $fn = ($filepath) ? "$filepath/$filename" : "$filename";
                    291:         #logger("file: \"$filename\", \"$filepath\"");
                    292:    if ($$fsfiles{$fn}) {
                    293:        #logger("  OK ($$fsfiles{$fn})");
                    294:        $okfiles{$fn} = $filename;
                    295:        # check file size and date
                    296:        if ($filesize) {
                    297:        if ($filesize != $$fsfiles{$fn}->[1]) {
                    298:            logger("WARNING", "size of file $fn changed: $filesize to $$fsfiles{$fn}->[1]");
                    299:            $warncnt++;
                    300:        }
                    301:        }
                    302:        # file date
                    303:        if ($filedate) {
                    304:        if ($filedate ne stime($$fsfiles{$fn}->[2])) {
                    305:            logger("WARNING", "date of file $fn changed: $filedate to ", stime($$fsfiles{$fn}->[2]), "");
                    306:            $warncnt++;
                    307:        }
                    308:        }
                    309:        # update file size and date
                    310:        if ($fix_fs_meta) {
                    311:        # delete size and date
                    312:        foreach my $n ($filenode->findnodes('child::size')) {
                    313:            $filenode->removeChild($n);
                    314:        }
                    315:        foreach my $n ($filenode->findnodes('child::date')) {
                    316:            $filenode->removeChild($n);
                    317:        }
                    318:        # add new size and date
                    319:        my $node = $filenode->addNewChild($namespace, "size");
                    320:        $node->appendTextNode($$fsfiles{$fn}->[1]);
                    321:        $node = $filenode->addNewChild($namespace, "date");
                    322:        $node->appendTextNode(stime($$fsfiles{$fn}->[2]));
                    323:        $xml_changed++;
                    324:        }
                    325:    } else {
1.4       casties   326:        if ($do_rewrite) {
                    327:        # remove file tag
                    328:        logger("WARNING", "file $filename (in $filepath/) no longer on disk!");
                    329:        $filenode->unbindNode();
                    330:        $warncnt++;
                    331:        } else {
                    332:        logger("ERROR", "file $filename (in $filepath/) missing on disk!");
                    333:        $errcnt++;
                    334:        }
1.1       casties   335:    }
                    336:     }
                    337:     #logger("files: ", (scalar keys %$fsfiles), " vs ", (scalar keys %okfiles), "");
                    338:     if ((scalar keys %$fsfiles) != (scalar keys %okfiles)) {
                    339:    # number of file tags and files don't match
                    340:    # iterate through all files
                    341:    foreach my $f (sort keys %$fsfiles) {
1.4       casties   342:        my ($name, $path) = split_file_path($f, 1);
1.1       casties   343:        # was this file missing?
                    344:        if (! $okfiles{$f}) {
                    345:        # is an ignoreable file?
                    346:        if ($index_files{$name}) {
                    347:            next;
                    348:        }
                    349:        # name must be valid
                    350:        if (! valid_dir_name($name)) {
                    351:            $path = "." unless ($path);
                    352:            logger("ERROR", "file name $name (in $path/) invalid!");
                    353:            $errcnt++;
                    354:            next;
                    355:        }
                    356:        if ($fix_files) {
                    357:            # add missing file tag
                    358:            my $file_node = $root->addNewChild($namespace, "file");
                    359:            $xml_changed++;
                    360:            # add name
                    361:            my $name_node = $file_node->addNewChild($namespace, "name");
                    362:            $name_node->appendTextNode($name);
                    363:            # add path
                    364:            if ($path) {
                    365:            my $path_node = $file_node->addNewChild($namespace, "path");
                    366:            $path_node->appendTextNode($path);
                    367:            }
                    368:            # add size
                    369:            my $size_node = $file_node->addNewChild($namespace, "size");
                    370:            $size_node->appendTextNode($$fsfiles{$f}->[1]);
                    371:            # add date
                    372:            my $date_node = $file_node->addNewChild($namespace, "date");
                    373:            $date_node->appendTextNode(stime($$fsfiles{$f}->[2]));
                    374:            logger("INFO", "file $f to be added to index file!");
                    375:        } else {
                    376:            logger("ERROR", "file $f missing in index file!");
                    377:            $errcnt++;
                    378:        }
                    379:        }
                    380:    }
                    381:     }
                    382: }
                    383: 
                    384: #
                    385: # check_resource_meta($rootnode)
                    386: #
                    387: # checks general resource meta information
                    388: #
                    389: sub check_resource_meta {
                    390:     my ($rootnode) = @_;
                    391: 
                    392:     #
                    393:     # description
                    394:     #
                    395:     my $description = $rootnode->findvalue('child::description');
                    396:     if (! $description) {
                    397:    logger("ERROR", "resource description element missing!");
                    398:    $errcnt++;
                    399:     }
                    400:     #
                    401:     # name
                    402:     #
                    403:     my $name = sstrip($rootnode->findvalue('child::name'));
                    404:     if ($name) {
                    405:    my ($dirname, $dirpath) = split_file_path($docdir);
                    406:    if ($dirname ne $name) {
                    407:        logger("ERROR", "resource name element '$name' does not match directory name '$dirname'!");
                    408:        $errcnt++;
                    409:    }
                    410:     } else {
                    411:    logger("ERROR", "resource name element missing!");
                    412:    $errcnt++;
                    413:     }
                    414:     #
                    415:     # archive path
                    416:     #
                    417:     my $realpath;
                    418:     # get real path
                    419:     if ($docdir =~ /^\//) {
                    420:    # docdir is absolute
                    421:    $realpath = $docdir;
                    422:     } else {
                    423:    # docdir is relative -- try with the shell
                    424:    if (open PWDCMD, "cd $docdir ; pwd|") {
                    425:        $realpath = <PWDCMD>;
                    426:        chomp $realpath;
                    427:    }
                    428:     }
                    429:     if (! $realpath) {
                    430:    logger("ERROR", "unable to check real archive path!");
                    431:    $errcnt++;
                    432:    return;
                    433:     }
                    434:     # get archive-path
                    435:     my $archnode = ($rootnode->find('child::archive-path'))->get_node(1);
                    436:     if ($archnode) {
                    437:    my $arch = sstrip($archnode->textContent);
                    438:    if ($arch ne $realpath) {
                    439:        logger("WARNING", "incorrect archive-path '$arch' will be changed to '$realpath'!");
                    440:        $warncnt++;
                    441:        # correct archive-path
                    442:        $archnode->removeChildNodes;
                    443:        $archnode->appendTextNode($realpath);
                    444:        $xml_changed++;
                    445:    }
                    446:     } else {
                    447:    # add archive-path
                    448:    $archnode = $rootnode->addNewChild($namespace, "archive-path");
                    449:    $archnode->appendTextNode($realpath);
                    450:    $xml_changed++;
                    451:     }
                    452: 
                    453: }
                    454: 
                    455:     
                    456: 
                    457: #######################################################
                    458: # main
                    459: #
                    460: 
                    461: my ($document, $rootnode) = read_xml($metafile);
                    462: 
                    463: check_resource_meta($rootnode);
                    464: 
                    465: my $fnum = fs_read_files($docdir, "", \%files, \%dirs);
                    466: logger("INFO", "$fnum files on FS");
1.4       casties   467: #foreach (keys %dirs) {logger('DEBUG', "  dir ($_): $dirs{$_}");}
1.1       casties   468: 
                    469: check_files($rootnode, \%files);
                    470: check_dirs($rootnode, \%dirs);
                    471: 
                    472: logger("INFO", "$warncnt warnings");
                    473: logger("INFO", "$errcnt errors");
                    474: if ($errcnt > 0) {
1.4       casties   475:     logger("ABORT", "there were $errcnt errors!");
1.1       casties   476:     exit 1;
                    477: } else {
                    478:     if ($fix_xml) {
1.4       casties   479:    if ($dry_run) {
                    480:        logger('INFO', "would write $metafile");
                    481:        logger('DEBUG', $document->toString(1));
                    482:    } else {
                    483:        write_xml($document, $metafile);
                    484:    }
1.1       casties   485:     }
                    486:     logger("DONE", "index file checked successfully!");
                    487: }

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>