Annotation of foxridge-archiver/metacheck.pl, revision 1.3

1.1       casties     1: #!/usr/local/bin/perl -w
                      2: 
                      3: use strict;
                      4: use XML::LibXML;
                      5: 
                      6: use lib '/usr/local/mpiwg/archive';
                      7: use MPIWGStor;
                      8: 
                      9: # make output unbuffered
                     10: $|=1;
                     11: 
                     12: #######################################################
                     13: # internal parameters
                     14: #
                     15: 
                     16: # program version
1.3     ! casties    17: my $version = "0.5.3 (8.7.2004)";
1.1       casties    18: 
                     19: # read command line parameters
                     20: my $args = MPIWGStor::parseargs;
                     21: 
                     22: # debug level
                     23: $debug = (exists $$args{'debug'}) ? $$args{'debug'} : 0;
                     24: 
                     25: # check only or fix index file also
                     26: my $check_only = (exists $$args{'checkonly'}) ? $$args{'checkonly'} : 0;
                     27: # add file tags for missing files
                     28: my $fix_files = ! $check_only;
                     29: # add tags for file size and date
                     30: my $fix_fs_meta = 1;
                     31: # add dir tags for missing directories
                     32: my $fix_dirs = ! $check_only;
                     33: # rewrite XML file (necessary for fix_files and fix_dirs)
                     34: my $fix_xml = ! $check_only;
                     35: my $xml_changed = 0;
                     36: # XML namespace (not really implemented!)
                     37: my $namespace = "";
                     38: 
                     39: 
                     40: #######################################################
                     41: # check parameters that were passed to the program
                     42: #
                     43: my $docdir = $$args{'path'};
                     44: if (! $docdir) {
                     45:     logger("ABORT", "no document directory given!");
                     46:     exit 1;
                     47: }
1.2       casties    48: # strip double slashes
                     49: $docdir =~ s/\/\//\//;
1.1       casties    50: # strip trailing slashes
                     51: $docdir =~ s/\/$//;
                     52: if (! -d $docdir) {
                     53:     logger("ABORT", "document directory \'$docdir\' doesn't exist!");
                     54:     exit 1;
                     55: }
                     56: 
                     57: my $metafile = "$docdir/index.meta";
                     58: if (! -f $metafile) {
                     59:     logger("ABORT", "metadata index file \'$metafile\' doesn't exist!");
                     60:     exit 1;
                     61: }
                     62: 
                     63: #######################################################
                     64: # internal variables
                     65: #
                     66: 
                     67: # all files in the document directory tree
                     68: my %files;
                     69: # all directories in the document directory tree
                     70: my %dirs;
                     71: # number of errors
                     72: my $errcnt = 0;
                     73: # number of warnings
                     74: my $warncnt = 0;
                     75: 
                     76: #######################################################
                     77: # subroutines
                     78: #
                     79: 
                     80: #
                     81: # fs_read_files($realdir, $docdir, \%files, \%dirs)
                     82: #
                     83: # reads all files and directories below $realdir and puts the
                     84: # files in %files and directories in %dirs
                     85: # $docdir is only for recursion, it should be empty when called 
                     86: # from outside
                     87: #
                     88: sub fs_read_files {
                     89:     my ($directory, $docdir, $files, $dirs) = @_;    
                     90:     my $cnt = 0;
                     91: 
                     92:     if (! opendir DIR, $directory) {
                     93:    return 0;
                     94:     }
                     95:     my @dirfiles = readdir DIR;
                     96:     foreach my $fn (@dirfiles) {
                     97:    # ignore names starting with a dot
                     98:    next if ($fn =~ /^\./);
                     99:    # ignore other silly files
                    100:    next if ($junk_files{$fn});
                    101: 
                    102:    $cnt++;
                    103:    my $f = "$directory/$fn";
                    104:    my $docf = ($docdir) ? "$docdir/$fn" : $fn;
                    105:    #logger("fs_file: \"$f\"");
                    106:    if (-f $f) {
                    107:        #logger("  is file");
                    108:        my ($dev,$ino,$mode,$nlink,$uid,$gid,$rdev,$size,
                    109:                       $atime,$mtime,$ctime,$blksize,$blocks)
                    110:                           = stat(_); 
                    111:        $$files{$docf} = [$fn, $size, $mtime];
                    112:    } elsif (-d _) {
                    113:        #logger("  is dir");
                    114:        $$dirs{$docf} = $fn;
                    115:        # recurse into directory
                    116:        $cnt += fs_read_files($f, $docf, $files, $dirs);
                    117:    }
                    118:     }
                    119:     return $cnt;
                    120: }
                    121: 
                    122: 
                    123: #
                    124: # check_dirs($rootnode, \%dirs)
                    125: #
                    126: # reads all dir elements under $rootnode and compares with the directory list
                    127: # in %dirs
                    128: #
                    129: sub check_dirs {
                    130:     my ($root, $fsdirs) = @_;
                    131: 
                    132:     #
                    133:     # iterate all dir tags
                    134:     #
                    135:     my @dirnodes = $root->findnodes('child::dir');
                    136:     my %okdirs;
                    137:     foreach my $dirnode (@dirnodes) {
                    138:    my $dirname = sstrip($dirnode->find('child::name'));
                    139:    my $dirpath = sstrip($dirnode->find('child::path'));
                    140:    my $description = sstrip($dirnode->find('child::description'));
                    141:    # name must be valid
                    142:    if (! valid_dir_name($dirname)) {
                    143:        logger("ERROR", "directory name ($dirpath) $dirname invalid!");
                    144:        $errcnt++;
                    145:    }
                    146:    # description can be present
                    147:    if (! $description) {
                    148:        logger("WARNING", "description for directory $dirname (in $dirpath/) missing!");
                    149:        $warncnt++;
                    150:    }
                    151:    # check with dirs on filesystem 
                    152:    my $fn;
                    153:    if ($dirpath) {
                    154:        $fn = "$dirpath/$dirname";
                    155:    } else {
                    156:        $fn = "$dirname";
                    157:    }
                    158:         #logger("dir: \"$dirname\", \"$dirpath\"");
                    159:    if ($$fsdirs{$fn}) {
                    160:        #logger("  OK ($$fsdirs{$fn})");
                    161:        $okdirs{$fn} = $dirname;
                    162:    } else {
                    163:        logger("ERROR", "directory $dirname (in $dirpath/) missing on disk!");
                    164:        $errcnt++;
                    165:    }
                    166:     }
                    167:     #logger("dirs: ", (scalar keys %$fsdirs), " vs ", (scalar keys %okdirs), "");
                    168:     if ((scalar keys %$fsdirs) != (scalar keys %okdirs)) {
                    169:    # number of dir tags and dirs don't match
                    170:    # iterate through all dirs
                    171:    foreach my $f (sort keys %$fsdirs) {
                    172:        # was this dir missing?
                    173:        if (! $okdirs{$f}) {
                    174:        my ($name, $path) = split_file_path($f);
                    175:        # name must be valid
                    176:        if (! valid_dir_name($name)) {
                    177:            $path = "." unless ($path);
                    178:            logger("ERROR", "directory name $name (in $path/) invalid!");
                    179:            $errcnt++;
                    180:            next;
                    181:        }
                    182:        if ($fix_dirs) {
                    183:            # add missing dir tag
                    184:            my $dir_node = $root->addNewChild($namespace, "dir");
                    185:            $xml_changed++;
                    186:            # add name
                    187:            my $name_node = $dir_node->addNewChild($namespace, "name");
                    188:            $name_node->appendTextNode($name);
                    189:            # add path
                    190:            if ($path) {
                    191:            my $path_node = $dir_node->addNewChild($namespace, "path");
                    192:            $path_node->appendTextNode($path);
                    193:            }
                    194:            logger("INFO", "directory $f to be added to index file!");
                    195:        } else {
                    196:            logger("ERROR", "directory $f missing in index file!");
                    197:            $errcnt++;
                    198:        }
                    199:        }
                    200:    }
                    201:     }
                    202: }
                    203: 
                    204: 
                    205: #
                    206: # check_files($rootnode, \%files)
                    207: #
                    208: # reads all file elements under $rootnode and compares with the file list
                    209: # in %files
                    210: #
                    211: sub check_files {
                    212:     my ($root, $fsfiles) = @_;
                    213: 
                    214:     #
                    215:     # iterate all file tags
                    216:     #
                    217:     my @filenodes = $root->findnodes('child::file');
                    218:     my %okfiles;
                    219:     foreach my $filenode (@filenodes) {
                    220:    my $filename = sstrip($filenode->find('child::name'));
                    221:    my $filepath = sstrip($filenode->find('child::path'));
                    222:    my $filesize = sstrip($filenode->find('child::size'));
                    223:    my $filedate = sstrip($filenode->find('child::date'));
                    224:    # name must be valid
                    225:    if (! valid_file_name($filename)) {
                    226:        logger("ERROR", "file name ($filepath)$filename invalid!");
                    227:        $errcnt++;
                    228:    }
                    229:    my $fn = ($filepath) ? "$filepath/$filename" : "$filename";
                    230:         #logger("file: \"$filename\", \"$filepath\"");
                    231:    if ($$fsfiles{$fn}) {
                    232:        #logger("  OK ($$fsfiles{$fn})");
                    233:        $okfiles{$fn} = $filename;
                    234:        # check file size and date
                    235:        if ($filesize) {
                    236:        if ($filesize != $$fsfiles{$fn}->[1]) {
                    237:            logger("WARNING", "size of file $fn changed: $filesize to $$fsfiles{$fn}->[1]");
                    238:            $warncnt++;
                    239:        }
                    240:        }
                    241:        # file date
                    242:        if ($filedate) {
                    243:        if ($filedate ne stime($$fsfiles{$fn}->[2])) {
                    244:            logger("WARNING", "date of file $fn changed: $filedate to ", stime($$fsfiles{$fn}->[2]), "");
                    245:            $warncnt++;
                    246:        }
                    247:        }
                    248:        # update file size and date
                    249:        if ($fix_fs_meta) {
                    250:        # delete size and date
                    251:        foreach my $n ($filenode->findnodes('child::size')) {
                    252:            $filenode->removeChild($n);
                    253:        }
                    254:        foreach my $n ($filenode->findnodes('child::date')) {
                    255:            $filenode->removeChild($n);
                    256:        }
                    257:        # add new size and date
                    258:        my $node = $filenode->addNewChild($namespace, "size");
                    259:        $node->appendTextNode($$fsfiles{$fn}->[1]);
                    260:        $node = $filenode->addNewChild($namespace, "date");
                    261:        $node->appendTextNode(stime($$fsfiles{$fn}->[2]));
                    262:        $xml_changed++;
                    263:        }
                    264:    } else {
                    265:        logger("ERROR", "file $filename (in $filepath/) missing on disk!");
                    266:        $errcnt++;
                    267:    }
                    268:     }
                    269:     #logger("files: ", (scalar keys %$fsfiles), " vs ", (scalar keys %okfiles), "");
                    270:     if ((scalar keys %$fsfiles) != (scalar keys %okfiles)) {
                    271:    # number of file tags and files don't match
                    272:    # iterate through all files
                    273:    foreach my $f (sort keys %$fsfiles) {
                    274:        my ($name, $path) = split_file_path($f);
                    275:        # was this file missing?
                    276:        if (! $okfiles{$f}) {
                    277:        # is an ignoreable file?
                    278:        if ($index_files{$name}) {
                    279:            next;
                    280:        }
                    281:        # name must be valid
                    282:        if (! valid_dir_name($name)) {
                    283:            $path = "." unless ($path);
                    284:            logger("ERROR", "file name $name (in $path/) invalid!");
                    285:            $errcnt++;
                    286:            next;
                    287:        }
                    288:        if ($fix_files) {
                    289:            # add missing file tag
                    290:            my $file_node = $root->addNewChild($namespace, "file");
                    291:            $xml_changed++;
                    292:            # add name
                    293:            my $name_node = $file_node->addNewChild($namespace, "name");
                    294:            $name_node->appendTextNode($name);
                    295:            # add path
                    296:            if ($path) {
                    297:            my $path_node = $file_node->addNewChild($namespace, "path");
                    298:            $path_node->appendTextNode($path);
                    299:            }
                    300:            # add size
                    301:            my $size_node = $file_node->addNewChild($namespace, "size");
                    302:            $size_node->appendTextNode($$fsfiles{$f}->[1]);
                    303:            # add date
                    304:            my $date_node = $file_node->addNewChild($namespace, "date");
                    305:            $date_node->appendTextNode(stime($$fsfiles{$f}->[2]));
                    306:            logger("INFO", "file $f to be added to index file!");
                    307:        } else {
                    308:            logger("ERROR", "file $f missing in index file!");
                    309:            $errcnt++;
                    310:        }
                    311:        }
                    312:    }
                    313:     }
                    314: }
                    315: 
                    316: #
                    317: # check_resource_meta($rootnode)
                    318: #
                    319: # checks general resource meta information
                    320: #
                    321: sub check_resource_meta {
                    322:     my ($rootnode) = @_;
                    323: 
                    324:     #
                    325:     # description
                    326:     #
                    327:     my $description = $rootnode->findvalue('child::description');
                    328:     if (! $description) {
                    329:    logger("ERROR", "resource description element missing!");
                    330:    $errcnt++;
                    331:     }
                    332:     #
                    333:     # name
                    334:     #
                    335:     my $name = sstrip($rootnode->findvalue('child::name'));
                    336:     if ($name) {
                    337:    my ($dirname, $dirpath) = split_file_path($docdir);
                    338:    if ($dirname ne $name) {
                    339:        logger("ERROR", "resource name element '$name' does not match directory name '$dirname'!");
                    340:        $errcnt++;
                    341:    }
                    342:     } else {
                    343:    logger("ERROR", "resource name element missing!");
                    344:    $errcnt++;
                    345:     }
                    346:     #
                    347:     # archive path
                    348:     #
                    349:     my $realpath;
                    350:     # get real path
                    351:     if ($docdir =~ /^\//) {
                    352:    # docdir is absolute
                    353:    $realpath = $docdir;
                    354:     } else {
                    355:    # docdir is relative -- try with the shell
                    356:    if (open PWDCMD, "cd $docdir ; pwd|") {
                    357:        $realpath = <PWDCMD>;
                    358:        chomp $realpath;
                    359:    }
                    360:     }
                    361:     if (! $realpath) {
                    362:    logger("ERROR", "unable to check real archive path!");
                    363:    $errcnt++;
                    364:    return;
                    365:     }
                    366:     # get archive-path
                    367:     my $archnode = ($rootnode->find('child::archive-path'))->get_node(1);
                    368:     if ($archnode) {
                    369:    my $arch = sstrip($archnode->textContent);
                    370:    if ($arch ne $realpath) {
                    371:        logger("WARNING", "incorrect archive-path '$arch' will be changed to '$realpath'!");
                    372:        $warncnt++;
                    373:        # correct archive-path
                    374:        $archnode->removeChildNodes;
                    375:        $archnode->appendTextNode($realpath);
                    376:        $xml_changed++;
                    377:    }
                    378:     } else {
                    379:    # add archive-path
                    380:    $archnode = $rootnode->addNewChild($namespace, "archive-path");
                    381:    $archnode->appendTextNode($realpath);
                    382:    $xml_changed++;
                    383:     }
                    384: 
                    385: }
                    386: 
                    387:     
                    388: 
                    389: #######################################################
                    390: # main
                    391: #
                    392: 
                    393: logger("INFO", "metacheck $version");
                    394:     
                    395: my ($document, $rootnode) = read_xml($metafile);
                    396: 
                    397: check_resource_meta($rootnode);
                    398: 
                    399: my $fnum = fs_read_files($docdir, "", \%files, \%dirs);
                    400: logger("INFO", "$fnum files on FS");
                    401: #foreach (keys %files) {logger("  file ($_): $files{$_}");}
                    402: 
                    403: check_files($rootnode, \%files);
                    404: check_dirs($rootnode, \%dirs);
                    405: 
                    406: logger("INFO", "$warncnt warnings");
                    407: logger("INFO", "$errcnt errors");
                    408: if ($errcnt > 0) {
                    409:     logger("ABORT", "there were errors!");
                    410:     exit 1;
                    411: } else {
                    412:     if ($fix_xml) {
                    413:    write_xml($document, $metafile);
                    414:     }
                    415:     logger("DONE", "index file checked successfully!");
                    416: }

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>