Annotation of foxridge-archiver/archivecheck.pl, revision 1.1.1.1

1.1       casties     1: #!/usr/local/bin/perl -w
                      2: 
                      3: use strict;
                      4: 
                      5: use XML::LibXML;
                      6: 
                      7: # MPIWG libraries
                      8: use lib '/usr/local/mpiwg/archive';
                      9: use MPIWGStor;
                     10: 
                     11: # make output unbuffered
                     12: $|=1;
                     13: 
                     14: #######################################################
                     15: # internal parameters
                     16: #
                     17: 
                     18: # program version
                     19: my $version = "0.3 (24.9.2003)";
                     20: 
                     21: # read command line parameters
                     22: my $args = parseargs;
                     23: 
                     24: # debug level
                     25: $debug = (exists $$args{'debug'}) ? $$args{'debug'} : 0;
                     26: 
                     27: # XML namespace (not really implemented!)
                     28: my $namespace = "";
                     29: 
                     30: # archive name (archive-path element, usually == $docdir)
                     31: my $archname;
                     32: # archive storage date
                     33: my $archdate;
                     34: 
                     35: 
                     36: #######################################################
                     37: # external programs
                     38: #
                     39: my $archprog = "/opt/tivoli/tsm/client/ba/bin/dsmc";
                     40: if (! -x $archprog) {
                     41:     logger("ABORT", "TSM client program '$archprog' missing!!");
                     42:     exit 1;
                     43: }
                     44: # my $checkprog = "/usr/local/mpiwg/archive/metacheck";
                     45: # if (! -x $checkprog) {
                     46: #     logge("ABORT", "meta data checking program '$checkprog' missing!!");
                     47: #     exit 1;
                     48: # }
                     49: # log file for archiver
                     50: my $log_file = "/var/tmp/archivecheck.log";
                     51: if (! open LOG, ">>$log_file") {
                     52:     logger("ABORT", "unable to write log file '$log_file'!!");
                     53:     exit 1;
                     54: }
                     55: 
                     56: #######################################################
                     57: # check parameters that were passed to the program
                     58: #
                     59: my $docdir = $$args{'path'};
                     60: if (! $docdir) {
                     61:     print "ABORT: no document directory given!\n";
                     62:     exit 1;
                     63: }
                     64: # strip trailing slashes
                     65: $docdir =~ s/\/$//;
                     66: if (! -d $docdir) {
                     67:     print "ABORT: document directory \'$docdir\' doesn't exist!\n";
                     68:     exit 1;
                     69: }
                     70: 
                     71: my $metafile = "$docdir/index.meta";
                     72: if (! -f $metafile) {
                     73:     print "ABORT: metadata index file \'$metafile\' doesn't exist!\n";
                     74:     exit 1;
                     75: }
                     76: 
                     77: #######################################################
                     78: # internal variables
                     79: #
                     80: 
                     81: # number of errors
                     82: my $errcnt = 0;
                     83: # number of warnings
                     84: my $warncnt = 0;
                     85: 
                     86: #######################################################
                     87: # subroutines
                     88: #
                     89: 
                     90: 
                     91: #
                     92: # $files = read_resource_meta($rootnode)
                     93: #
                     94: # checks general resource meta information and reads the list of files
                     95: #
                     96: sub read_resource_meta {
                     97:     my ($rootnode) = @_;
                     98:     my %files;
                     99:     #
                    100:     # archive path
                    101:     #
                    102:     # get archive-path
                    103:     $archname = sstrip($rootnode->findvalue('child::archive-path'));
                    104:     if (! $archname) {
                    105:    logger("ABORT", "archive-name element missing!!");
                    106:    exit 1;
                    107:     }
                    108: 
                    109:     #
                    110:     # files
                    111:     #
                    112:     my @filenodes = $rootnode->findnodes('child::file');
                    113:     foreach my $fn (@filenodes) {
                    114:    my $name = sstrip($fn->findvalue('child::name'));
                    115:    my $path = sstrip($fn->findvalue('child::path'));
                    116:    logger("DEBUG", "FILE: ($path)$name");
                    117:    my $f = ($path) ? "$path/$name" : "$name";
                    118:    $files{$f} = [$name];
                    119:     }
                    120: 
                    121:     #
                    122:     # dirs
                    123:     #
                    124:     my @dirnodes = $rootnode->findnodes('child::dir');
                    125:     foreach my $fn (@dirnodes) {
                    126:    my $name = sstrip($fn->findvalue('child::name'));
                    127:    my $path = sstrip($fn->findvalue('child::path'));
                    128:    logger("DEBUG", "DIR: ($path)$name");
                    129:    my $f = ($path) ? "$path/$name" : "$name";
                    130:    $files{$f} = [$name];
                    131:     }
                    132: 
                    133:     #
                    134:     # archive-storage-date
                    135:     #
                    136:     my $archdate = $rootnode->find('child::archive-storage-date');
                    137:     if ($archdate) {
                    138:    logger("INFO", "archive storage date: $archdate");
                    139:     } else {
                    140:    logger("ERROR", "archive storage date missing!");
                    141:    $errcnt++;
                    142:     }
                    143:     return \%files;
                    144: }
                    145: 
                    146: 
                    147: #
                    148: # fs_read_files($realdir, $docdir, \%files, \%dirs)
                    149: #
                    150: # reads all files and directories below $realdir and puts the
                    151: # files in %files and directories in %dirs
                    152: # $docdir is only for recursion, it should be empty when called 
                    153: # from outside
                    154: #
                    155: sub fs_read_files {
                    156:     my ($directory, $docdir, $files, $dirs) = @_;    
                    157:     my $cnt = 0;
                    158: 
                    159:     if (! opendir DIR, $directory) {
                    160:    return 0;
                    161:     }
                    162:     my @dirfiles = readdir DIR;
                    163:     foreach my $fn (@dirfiles) {
                    164:    # ignore names starting with a dot
                    165:    next if ($fn =~ /^\./);
                    166:    # ignore other silly files
                    167:    next if ($junk_files{$fn});
                    168: 
                    169:    $cnt++;
                    170:    my $f = "$directory/$fn";
                    171:    my $docf = ($docdir) ? "$docdir/$fn" : $fn;
                    172:    #print "fs_file: \"$f\"\n";
                    173:    if (-f $f) {
                    174:        #print "  is file\n";
                    175:        my ($dev,$ino,$mode,$nlink,$uid,$gid,$rdev,$size,
                    176:                       $atime,$mtime,$ctime,$blksize,$blocks)
                    177:                           = stat(_); 
                    178:        $$files{$docf} = [$fn, $size, stime($mtime)];
                    179:        #logger("TEST", "fn $fn, size $size, mtime $mtime");
                    180:    } elsif (-d _) {
                    181:        #print "  is dir\n";
                    182:        $$dirs{$docf} = $fn;
                    183:        # recurse into directory
                    184:        $cnt += fs_read_files($f, $docf, $files, $dirs);
                    185:    }
                    186:     }
                    187:     return $cnt;
                    188: }
                    189: 
                    190: 
                    191: #
                    192: # $%files = run_query
                    193: #
                    194: # runs the archiver program on $docdir and returns a list of archived files
                    195: #
                    196: # Sample output:
                    197: #         20,345  B  08/06/03   17:17:02    /mpiwg/archive/data/proyectohumboldt/webb_histo_fr_01_1839/index.meta Never /mpiwg/archive/data/proyectohumboldt/webb_histo_fr_01_1839
                    198: #
                    199: sub run_query {
                    200:     my %files;
                    201:     print LOG "START checkarchive $version ", scalar localtime, "\n";
                    202:     my $archcmd = $archprog;
                    203:     $archcmd .= " query archive -subdir=yes";
                    204:     $archcmd .= " -description='$archname'";
                    205:     $archcmd .= " '$docdir/'";
                    206: 
                    207:     my $archcnt = 0;
                    208:     print LOG "CMD: $archcmd\n";
                    209:     if (open ARCH, "$archcmd 2>&1 |") {
                    210:    while (<ARCH>) {
                    211:        chomp;
                    212:        print LOG "ARCH: $_\n";
                    213:        if (/
                    214:        \s*([\d,]+)    # size
                    215:        \s+(\w+)       # unit of size
                    216:        \s+([\d\/]+)   # date mm\/dd\/yy
                    217:        \s+([\d:]+)    # time
                    218:        \s+(\S+)       # file name
                    219:        \s+(\w+)       # expiry
                    220:        \s+(\S+)       # archive label
                    221:        /x) {
                    222:        my $size = $1;
                    223:        my $sunit = $2;
                    224:        my $date = $3;
                    225:        my $time = $4;
                    226:        my $file = $5;
                    227:        my $exp = $6;
                    228:        my $label = $7;
                    229:        $size =~ s/,//g;
                    230:        $date = ymd_date($date);
                    231:        logger("DEBUG", "  QUERY: file '$file'");
                    232:        $archcnt++;
                    233:        if ($files{$file}) {
                    234:            logger("WARNING", "file $file seems to be archived multiple times: $time $date");
                    235:            $warncnt++;
                    236:        } 
                    237:        $files{$file} = [$size, "$date $time"];
                    238:        }
                    239:    }
                    240:    logger("INFO", "$archcnt archives of " . (scalar keys %files) . " files.");
                    241:     } else {
                    242:    logger("ABORT", "unable to start archive command '$archcmd'!!");
                    243:    exit 1;
                    244:     }
                    245: 
                    246:     return \%files;
                    247: }
                    248: 
                    249: 
                    250: #
                    251: # check_files(\%files_to_archive, \%archived_files)
                    252: #
                    253: # compares the list of archived and to be archived files
                    254: #
                    255: sub check_files {
                    256:     my ($to_archive, $archived) = @_;
                    257: 
                    258:     my $nt = scalar keys %$to_archive;
                    259:     my $na = scalar keys %$archived;
                    260: 
                    261:     foreach my $ft (sort keys %$to_archive) {
                    262:    my $fp = "$docdir/$ft";
                    263:    #logger("DEBUG", "  fp: $fp");
                    264:    if ($$archived{$fp}) {
                    265:        logger("DEBUG", "$ft archived OK");
                    266:        $$archived{$fp}->[2] = "OK";
                    267:    } else {
                    268:        logger("ERROR", "file entry '$ft' missing from archive!");
                    269:        $errcnt++;
                    270:    }
                    271:     }
                    272: 
                    273:     foreach my $fa (sort keys %$archived) {
                    274:    if (! $$archived{$fa}->[2]) {
                    275:        my ($fn, $fp) = split_file_path($fa);
                    276:        if ($index_files{$fn}) {
                    277:        logger("DEBUG", "$fa ignored");
                    278:        $na--;
                    279:        } else {
                    280:        logger("WARNING", "$fa archived but not in list!");
                    281:        $warncnt++;
                    282:        }
                    283:    }
                    284:     }
                    285: 
                    286:     if ($nt > $na) {
                    287:    logger("WARNING", "less files were archived ($na vs. $nt)!");
                    288:    $warncnt++;
                    289:     } elsif ($na > $nt) {
                    290:    logger("WARNING", "more files were archived ($na vs. $nt)!");
                    291:    $warncnt++;
                    292:     }
                    293: 
                    294: }
                    295: 
                    296: #
                    297: # compare_files(\%files_on_disk, \%archived_files)
                    298: #
                    299: # compares the list of archived files and files on disk
                    300: #
                    301: sub compare_files {
                    302:     my ($fs_files, $archived) = @_;
                    303: 
                    304:     foreach my $ft (sort keys %$fs_files) {
                    305:    next if ($index_files{$ft});
                    306:    my $fp = "$docdir/$ft";
                    307:    #logger("DEBUG", "  fp: $fp");
                    308:    if ($$archived{$fp}) {
                    309:        next if ($index_files{$ft});
                    310:        
                    311:        my $asize = $$archived{$fp}[0];
                    312:        my $atime = $$archived{$fp}[1];
                    313:        my $fsize = $$fs_files{$ft}[1];
                    314:        my $ftime = $$fs_files{$ft}[2];
                    315:        if ($asize != $fsize) {
                    316:        logger("ERROR", "archived $ft ($asize) and file on disk ($fsize) have different size!");
                    317:        $errcnt++;
                    318:        } elsif ($atime lt $ftime) {
                    319:        logger("ERROR", "archived $ft ($atime) is older than file on disk ($ftime)!");
                    320:        $errcnt++;
                    321:        } else {
                    322:        logger("ERROR", "archived file $ft still on disk");
                    323:        $errcnt++;
                    324:        }
                    325:    } else {
                    326:        logger("ERROR", "file '$ft' on disk missing from archive!");
                    327:        $errcnt++;
                    328:    }
                    329:     }
                    330: }
                    331: 
                    332: 
                    333: 
                    334: #######################################################
                    335: # main
                    336: #
                    337: 
                    338: logger("INFO", "archivecheck $version");
                    339: 
                    340: # make shure the right user is running this program
                    341: my $user = getlogin;
                    342: if (($user ne "archive")&&($user ne "root")) {
                    343:     logger("ABORT", "you must be archive or root user to run this program!");
                    344:     exit 1;
                    345: }
                    346: 
                    347: # read index.meta file
                    348: my ($document, $rootnode) = read_xml($metafile);
                    349: 
                    350: # check file and add archive date
                    351: my $files_to_archive = read_resource_meta($rootnode);
                    352: 
                    353: # check for .archived file
                    354: if (-f "$docdir/.archived") {
                    355:     logger("INFO", ".archived file exists.");
                    356: } else {
                    357:     logger("WARNING", "no .archived file!");
                    358:     $warncnt++;
                    359: }
                    360: 
                    361: # check archive
                    362: my $archived_files = run_query;
                    363: 
                    364: my $num_arch_files = (scalar keys %$archived_files);
                    365: if ($num_arch_files == 0) {
                    366:     logger("ABORT", "no archive of this directory!!");
                    367:     exit 1;
                    368: }
                    369: logger("INFO", "$num_arch_files files archived");
                    370: 
                    371: # check list of archived files
                    372: check_files($files_to_archive, $archived_files);
                    373: 
                    374: # read files from filesystem
                    375: my %fsfiles;
                    376: my %fsdirs;
                    377: my $num_fs_files = fs_read_files($docdir, "", \%fsfiles, \%fsdirs);
                    378: 
                    379: logger("INFO", "$num_fs_files files still on disk!");
                    380: if ($num_fs_files > 0) {
                    381:     compare_files(\%fsfiles, $archived_files);
                    382: }
                    383: 
                    384: logger("INFO", "$warncnt warnings");
                    385: logger("INFO", "$errcnt errors");
                    386: if ($errcnt == 0) {
                    387:     logger("DONE", "" . (scalar keys %$archived_files) . " archived files checked");
                    388: } else {
                    389:     logger("ABORT", "there were $errcnt errors!!");
                    390:     exit 1;
                    391: }

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>