Annotation of foxridge-archiver/metacheck.pl, revision 1.1.1.1

1.1       casties     1: #!/usr/local/bin/perl -w
                      2: 
                      3: use strict;
                      4: use XML::LibXML;
                      5: 
                      6: use lib '/usr/local/mpiwg/archive';
                      7: use MPIWGStor;
                      8: 
                      9: # make output unbuffered
                     10: $|=1;
                     11: 
                     12: #######################################################
                     13: # internal parameters
                     14: #
                     15: 
                     16: # program version
                     17: my $version = "0.5.2 (7.1.2004)";
                     18: 
                     19: # read command line parameters
                     20: my $args = MPIWGStor::parseargs;
                     21: 
                     22: # debug level
                     23: $debug = (exists $$args{'debug'}) ? $$args{'debug'} : 0;
                     24: 
                     25: # check only or fix index file also
                     26: my $check_only = (exists $$args{'checkonly'}) ? $$args{'checkonly'} : 0;
                     27: # add file tags for missing files
                     28: my $fix_files = ! $check_only;
                     29: # add tags for file size and date
                     30: my $fix_fs_meta = 1;
                     31: # add dir tags for missing directories
                     32: my $fix_dirs = ! $check_only;
                     33: # rewrite XML file (necessary for fix_files and fix_dirs)
                     34: my $fix_xml = ! $check_only;
                     35: my $xml_changed = 0;
                     36: # XML namespace (not really implemented!)
                     37: my $namespace = "";
                     38: 
                     39: 
                     40: #######################################################
                     41: # check parameters that were passed to the program
                     42: #
                     43: my $docdir = $$args{'path'};
                     44: if (! $docdir) {
                     45:     logger("ABORT", "no document directory given!");
                     46:     exit 1;
                     47: }
                     48: # strip trailing slashes
                     49: $docdir =~ s/\/$//;
                     50: if (! -d $docdir) {
                     51:     logger("ABORT", "document directory \'$docdir\' doesn't exist!");
                     52:     exit 1;
                     53: }
                     54: 
                     55: my $metafile = "$docdir/index.meta";
                     56: if (! -f $metafile) {
                     57:     logger("ABORT", "metadata index file \'$metafile\' doesn't exist!");
                     58:     exit 1;
                     59: }
                     60: 
                     61: #######################################################
                     62: # internal variables
                     63: #
                     64: 
                     65: # all files in the document directory tree
                     66: my %files;
                     67: # all directories in the document directory tree
                     68: my %dirs;
                     69: # number of errors
                     70: my $errcnt = 0;
                     71: # number of warnings
                     72: my $warncnt = 0;
                     73: 
                     74: #######################################################
                     75: # subroutines
                     76: #
                     77: 
                     78: #
                     79: # fs_read_files($realdir, $docdir, \%files, \%dirs)
                     80: #
                     81: # reads all files and directories below $realdir and puts the
                     82: # files in %files and directories in %dirs
                     83: # $docdir is only for recursion, it should be empty when called 
                     84: # from outside
                     85: #
                     86: sub fs_read_files {
                     87:     my ($directory, $docdir, $files, $dirs) = @_;    
                     88:     my $cnt = 0;
                     89: 
                     90:     if (! opendir DIR, $directory) {
                     91:    return 0;
                     92:     }
                     93:     my @dirfiles = readdir DIR;
                     94:     foreach my $fn (@dirfiles) {
                     95:    # ignore names starting with a dot
                     96:    next if ($fn =~ /^\./);
                     97:    # ignore other silly files
                     98:    next if ($junk_files{$fn});
                     99: 
                    100:    $cnt++;
                    101:    my $f = "$directory/$fn";
                    102:    my $docf = ($docdir) ? "$docdir/$fn" : $fn;
                    103:    #logger("fs_file: \"$f\"");
                    104:    if (-f $f) {
                    105:        #logger("  is file");
                    106:        my ($dev,$ino,$mode,$nlink,$uid,$gid,$rdev,$size,
                    107:                       $atime,$mtime,$ctime,$blksize,$blocks)
                    108:                           = stat(_); 
                    109:        $$files{$docf} = [$fn, $size, $mtime];
                    110:    } elsif (-d _) {
                    111:        #logger("  is dir");
                    112:        $$dirs{$docf} = $fn;
                    113:        # recurse into directory
                    114:        $cnt += fs_read_files($f, $docf, $files, $dirs);
                    115:    }
                    116:     }
                    117:     return $cnt;
                    118: }
                    119: 
                    120: 
                    121: #
                    122: # check_dirs($rootnode, \%dirs)
                    123: #
                    124: # reads all dir elements under $rootnode and compares with the directory list
                    125: # in %dirs
                    126: #
                    127: sub check_dirs {
                    128:     my ($root, $fsdirs) = @_;
                    129: 
                    130:     #
                    131:     # iterate all dir tags
                    132:     #
                    133:     my @dirnodes = $root->findnodes('child::dir');
                    134:     my %okdirs;
                    135:     foreach my $dirnode (@dirnodes) {
                    136:    my $dirname = sstrip($dirnode->find('child::name'));
                    137:    my $dirpath = sstrip($dirnode->find('child::path'));
                    138:    my $description = sstrip($dirnode->find('child::description'));
                    139:    # name must be valid
                    140:    if (! valid_dir_name($dirname)) {
                    141:        logger("ERROR", "directory name ($dirpath) $dirname invalid!");
                    142:        $errcnt++;
                    143:    }
                    144:    # description can be present
                    145:    if (! $description) {
                    146:        logger("WARNING", "description for directory $dirname (in $dirpath/) missing!");
                    147:        $warncnt++;
                    148:    }
                    149:    # check with dirs on filesystem 
                    150:    my $fn;
                    151:    if ($dirpath) {
                    152:        $fn = "$dirpath/$dirname";
                    153:    } else {
                    154:        $fn = "$dirname";
                    155:    }
                    156:         #logger("dir: \"$dirname\", \"$dirpath\"");
                    157:    if ($$fsdirs{$fn}) {
                    158:        #logger("  OK ($$fsdirs{$fn})");
                    159:        $okdirs{$fn} = $dirname;
                    160:    } else {
                    161:        logger("ERROR", "directory $dirname (in $dirpath/) missing on disk!");
                    162:        $errcnt++;
                    163:    }
                    164:     }
                    165:     #logger("dirs: ", (scalar keys %$fsdirs), " vs ", (scalar keys %okdirs), "");
                    166:     if ((scalar keys %$fsdirs) != (scalar keys %okdirs)) {
                    167:    # number of dir tags and dirs don't match
                    168:    # iterate through all dirs
                    169:    foreach my $f (sort keys %$fsdirs) {
                    170:        # was this dir missing?
                    171:        if (! $okdirs{$f}) {
                    172:        my ($name, $path) = split_file_path($f);
                    173:        # name must be valid
                    174:        if (! valid_dir_name($name)) {
                    175:            $path = "." unless ($path);
                    176:            logger("ERROR", "directory name $name (in $path/) invalid!");
                    177:            $errcnt++;
                    178:            next;
                    179:        }
                    180:        if ($fix_dirs) {
                    181:            # add missing dir tag
                    182:            my $dir_node = $root->addNewChild($namespace, "dir");
                    183:            $xml_changed++;
                    184:            # add name
                    185:            my $name_node = $dir_node->addNewChild($namespace, "name");
                    186:            $name_node->appendTextNode($name);
                    187:            # add path
                    188:            if ($path) {
                    189:            my $path_node = $dir_node->addNewChild($namespace, "path");
                    190:            $path_node->appendTextNode($path);
                    191:            }
                    192:            logger("INFO", "directory $f to be added to index file!");
                    193:        } else {
                    194:            logger("ERROR", "directory $f missing in index file!");
                    195:            $errcnt++;
                    196:        }
                    197:        }
                    198:    }
                    199:     }
                    200: }
                    201: 
                    202: 
                    203: #
                    204: # check_files($rootnode, \%files)
                    205: #
                    206: # reads all file elements under $rootnode and compares with the file list
                    207: # in %files
                    208: #
                    209: sub check_files {
                    210:     my ($root, $fsfiles) = @_;
                    211: 
                    212:     #
                    213:     # iterate all file tags
                    214:     #
                    215:     my @filenodes = $root->findnodes('child::file');
                    216:     my %okfiles;
                    217:     foreach my $filenode (@filenodes) {
                    218:    my $filename = sstrip($filenode->find('child::name'));
                    219:    my $filepath = sstrip($filenode->find('child::path'));
                    220:    my $filesize = sstrip($filenode->find('child::size'));
                    221:    my $filedate = sstrip($filenode->find('child::date'));
                    222:    # name must be valid
                    223:    if (! valid_file_name($filename)) {
                    224:        logger("ERROR", "file name ($filepath)$filename invalid!");
                    225:        $errcnt++;
                    226:    }
                    227:    my $fn = ($filepath) ? "$filepath/$filename" : "$filename";
                    228:         #logger("file: \"$filename\", \"$filepath\"");
                    229:    if ($$fsfiles{$fn}) {
                    230:        #logger("  OK ($$fsfiles{$fn})");
                    231:        $okfiles{$fn} = $filename;
                    232:        # check file size and date
                    233:        if ($filesize) {
                    234:        if ($filesize != $$fsfiles{$fn}->[1]) {
                    235:            logger("WARNING", "size of file $fn changed: $filesize to $$fsfiles{$fn}->[1]");
                    236:            $warncnt++;
                    237:        }
                    238:        }
                    239:        # file date
                    240:        if ($filedate) {
                    241:        if ($filedate ne stime($$fsfiles{$fn}->[2])) {
                    242:            logger("WARNING", "date of file $fn changed: $filedate to ", stime($$fsfiles{$fn}->[2]), "");
                    243:            $warncnt++;
                    244:        }
                    245:        }
                    246:        # update file size and date
                    247:        if ($fix_fs_meta) {
                    248:        # delete size and date
                    249:        foreach my $n ($filenode->findnodes('child::size')) {
                    250:            $filenode->removeChild($n);
                    251:        }
                    252:        foreach my $n ($filenode->findnodes('child::date')) {
                    253:            $filenode->removeChild($n);
                    254:        }
                    255:        # add new size and date
                    256:        my $node = $filenode->addNewChild($namespace, "size");
                    257:        $node->appendTextNode($$fsfiles{$fn}->[1]);
                    258:        $node = $filenode->addNewChild($namespace, "date");
                    259:        $node->appendTextNode(stime($$fsfiles{$fn}->[2]));
                    260:        $xml_changed++;
                    261:        }
                    262:    } else {
                    263:        logger("ERROR", "file $filename (in $filepath/) missing on disk!");
                    264:        $errcnt++;
                    265:    }
                    266:     }
                    267:     #logger("files: ", (scalar keys %$fsfiles), " vs ", (scalar keys %okfiles), "");
                    268:     if ((scalar keys %$fsfiles) != (scalar keys %okfiles)) {
                    269:    # number of file tags and files don't match
                    270:    # iterate through all files
                    271:    foreach my $f (sort keys %$fsfiles) {
                    272:        my ($name, $path) = split_file_path($f);
                    273:        # was this file missing?
                    274:        if (! $okfiles{$f}) {
                    275:        # is an ignoreable file?
                    276:        if ($index_files{$name}) {
                    277:            next;
                    278:        }
                    279:        # name must be valid
                    280:        if (! valid_dir_name($name)) {
                    281:            $path = "." unless ($path);
                    282:            logger("ERROR", "file name $name (in $path/) invalid!");
                    283:            $errcnt++;
                    284:            next;
                    285:        }
                    286:        if ($fix_files) {
                    287:            # add missing file tag
                    288:            my $file_node = $root->addNewChild($namespace, "file");
                    289:            $xml_changed++;
                    290:            # add name
                    291:            my $name_node = $file_node->addNewChild($namespace, "name");
                    292:            $name_node->appendTextNode($name);
                    293:            # add path
                    294:            if ($path) {
                    295:            my $path_node = $file_node->addNewChild($namespace, "path");
                    296:            $path_node->appendTextNode($path);
                    297:            }
                    298:            # add size
                    299:            my $size_node = $file_node->addNewChild($namespace, "size");
                    300:            $size_node->appendTextNode($$fsfiles{$f}->[1]);
                    301:            # add date
                    302:            my $date_node = $file_node->addNewChild($namespace, "date");
                    303:            $date_node->appendTextNode(stime($$fsfiles{$f}->[2]));
                    304:            logger("INFO", "file $f to be added to index file!");
                    305:        } else {
                    306:            logger("ERROR", "file $f missing in index file!");
                    307:            $errcnt++;
                    308:        }
                    309:        }
                    310:    }
                    311:     }
                    312: }
                    313: 
                    314: #
                    315: # check_resource_meta($rootnode)
                    316: #
                    317: # checks general resource meta information
                    318: #
                    319: sub check_resource_meta {
                    320:     my ($rootnode) = @_;
                    321: 
                    322:     #
                    323:     # description
                    324:     #
                    325:     my $description = $rootnode->findvalue('child::description');
                    326:     if (! $description) {
                    327:    logger("ERROR", "resource description element missing!");
                    328:    $errcnt++;
                    329:     }
                    330:     #
                    331:     # name
                    332:     #
                    333:     my $name = sstrip($rootnode->findvalue('child::name'));
                    334:     if ($name) {
                    335:    my ($dirname, $dirpath) = split_file_path($docdir);
                    336:    if ($dirname ne $name) {
                    337:        logger("ERROR", "resource name element '$name' does not match directory name '$dirname'!");
                    338:        $errcnt++;
                    339:    }
                    340:     } else {
                    341:    logger("ERROR", "resource name element missing!");
                    342:    $errcnt++;
                    343:     }
                    344:     #
                    345:     # archive path
                    346:     #
                    347:     my $realpath;
                    348:     # get real path
                    349:     if ($docdir =~ /^\//) {
                    350:    # docdir is absolute
                    351:    $realpath = $docdir;
                    352:     } else {
                    353:    # docdir is relative -- try with the shell
                    354:    if (open PWDCMD, "cd $docdir ; pwd|") {
                    355:        $realpath = <PWDCMD>;
                    356:        chomp $realpath;
                    357:    }
                    358:     }
                    359:     if (! $realpath) {
                    360:    logger("ERROR", "unable to check real archive path!");
                    361:    $errcnt++;
                    362:    return;
                    363:     }
                    364:     # get archive-path
                    365:     my $archnode = ($rootnode->find('child::archive-path'))->get_node(1);
                    366:     if ($archnode) {
                    367:    my $arch = sstrip($archnode->textContent);
                    368:    if ($arch ne $realpath) {
                    369:        logger("WARNING", "incorrect archive-path '$arch' will be changed to '$realpath'!");
                    370:        $warncnt++;
                    371:        # correct archive-path
                    372:        $archnode->removeChildNodes;
                    373:        $archnode->appendTextNode($realpath);
                    374:        $xml_changed++;
                    375:    }
                    376:     } else {
                    377:    # add archive-path
                    378:    $archnode = $rootnode->addNewChild($namespace, "archive-path");
                    379:    $archnode->appendTextNode($realpath);
                    380:    $xml_changed++;
                    381:     }
                    382: 
                    383: }
                    384: 
                    385:     
                    386: 
                    387: #######################################################
                    388: # main
                    389: #
                    390: 
                    391: logger("INFO", "metacheck $version");
                    392:     
                    393: my ($document, $rootnode) = read_xml($metafile);
                    394: 
                    395: check_resource_meta($rootnode);
                    396: 
                    397: my $fnum = fs_read_files($docdir, "", \%files, \%dirs);
                    398: logger("INFO", "$fnum files on FS");
                    399: #foreach (keys %files) {logger("  file ($_): $files{$_}");}
                    400: 
                    401: check_files($rootnode, \%files);
                    402: check_dirs($rootnode, \%dirs);
                    403: 
                    404: logger("INFO", "$warncnt warnings");
                    405: logger("INFO", "$errcnt errors");
                    406: if ($errcnt > 0) {
                    407:     logger("ABORT", "there were errors!");
                    408:     exit 1;
                    409: } else {
                    410:     if ($fix_xml) {
                    411:    write_xml($document, $metafile);
                    412:     }
                    413:     logger("DONE", "index file checked successfully!");
                    414: }

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>