Annotation of foxridge-archiver/archivecheck.pl, revision 1.2

1.1       casties     1: #!/usr/local/bin/perl -w
                      2: 
                      3: use strict;
                      4: 
                      5: use XML::LibXML;
                      6: 
                      7: # MPIWG libraries
                      8: use lib '/usr/local/mpiwg/archive';
                      9: use MPIWGStor;
                     10: 
                     11: # make output unbuffered
                     12: $|=1;
                     13: 
                     14: #######################################################
                     15: # internal parameters
                     16: #
                     17: 
                     18: # program version
1.2     ! casties    19: my $version = "0.4 (30.11.2004)";
1.1       casties    20: 
                     21: # read command line parameters
                     22: my $args = parseargs;
                     23: 
                     24: # debug level
                     25: $debug = (exists $$args{'debug'}) ? $$args{'debug'} : 0;
                     26: 
                     27: # XML namespace (not really implemented!)
                     28: my $namespace = "";
                     29: 
                     30: # archive name (archive-path element, usually == $docdir)
                     31: my $archname;
                     32: # archive storage date
                     33: my $archdate;
                     34: 
                     35: 
                     36: #######################################################
                     37: # external programs
                     38: #
                     39: my $archprog = "/opt/tivoli/tsm/client/ba/bin/dsmc";
                     40: if (! -x $archprog) {
                     41:     logger("ABORT", "TSM client program '$archprog' missing!!");
                     42:     exit 1;
                     43: }
                     44: # my $checkprog = "/usr/local/mpiwg/archive/metacheck";
                     45: # if (! -x $checkprog) {
                     46: #     logge("ABORT", "meta data checking program '$checkprog' missing!!");
                     47: #     exit 1;
                     48: # }
                     49: # log file for archiver
                     50: my $log_file = "/var/tmp/archivecheck.log";
                     51: if (! open LOG, ">>$log_file") {
                     52:     logger("ABORT", "unable to write log file '$log_file'!!");
                     53:     exit 1;
                     54: }
                     55: 
                     56: #######################################################
                     57: # check parameters that were passed to the program
                     58: #
                     59: my $docdir = $$args{'path'};
                     60: if (! $docdir) {
                     61:     print "ABORT: no document directory given!\n";
                     62:     exit 1;
                     63: }
                     64: # strip trailing slashes
                     65: $docdir =~ s/\/$//;
                     66: if (! -d $docdir) {
                     67:     print "ABORT: document directory \'$docdir\' doesn't exist!\n";
                     68:     exit 1;
                     69: }
                     70: 
                     71: my $metafile = "$docdir/index.meta";
                     72: if (! -f $metafile) {
                     73:     print "ABORT: metadata index file \'$metafile\' doesn't exist!\n";
                     74:     exit 1;
                     75: }
                     76: 
                     77: #######################################################
                     78: # internal variables
                     79: #
                     80: 
                     81: # number of errors
                     82: my $errcnt = 0;
                     83: # number of warnings
                     84: my $warncnt = 0;
                     85: 
                     86: #######################################################
                     87: # subroutines
                     88: #
                     89: 
                     90: 
                     91: #
                     92: # $files = read_resource_meta($rootnode)
                     93: #
                     94: # checks general resource meta information and reads the list of files
                     95: #
                     96: sub read_resource_meta {
                     97:     my ($rootnode) = @_;
                     98:     my %files;
                     99:     #
                    100:     # archive path
                    101:     #
                    102:     # get archive-path
                    103:     $archname = sstrip($rootnode->findvalue('child::archive-path'));
                    104:     if (! $archname) {
                    105:    logger("ABORT", "archive-name element missing!!");
                    106:    exit 1;
                    107:     }
                    108: 
                    109:     #
                    110:     # files
                    111:     #
                    112:     my @filenodes = $rootnode->findnodes('child::file');
                    113:     foreach my $fn (@filenodes) {
                    114:    my $name = sstrip($fn->findvalue('child::name'));
                    115:    my $path = sstrip($fn->findvalue('child::path'));
                    116:    logger("DEBUG", "FILE: ($path)$name");
                    117:    my $f = ($path) ? "$path/$name" : "$name";
                    118:    $files{$f} = [$name];
                    119:     }
                    120: 
                    121:     #
                    122:     # dirs
                    123:     #
                    124:     my @dirnodes = $rootnode->findnodes('child::dir');
                    125:     foreach my $fn (@dirnodes) {
                    126:    my $name = sstrip($fn->findvalue('child::name'));
                    127:    my $path = sstrip($fn->findvalue('child::path'));
                    128:    logger("DEBUG", "DIR: ($path)$name");
                    129:    my $f = ($path) ? "$path/$name" : "$name";
                    130:    $files{$f} = [$name];
                    131:     }
                    132: 
                    133:     #
                    134:     # archive-storage-date
                    135:     #
                    136:     my $archdate = $rootnode->find('child::archive-storage-date');
                    137:     if ($archdate) {
                    138:    logger("INFO", "archive storage date: $archdate");
                    139:     } else {
                    140:    logger("ERROR", "archive storage date missing!");
                    141:    $errcnt++;
                    142:     }
                    143:     return \%files;
                    144: }
                    145: 
                    146: 
                    147: #
                    148: # fs_read_files($realdir, $docdir, \%files, \%dirs)
                    149: #
                    150: # reads all files and directories below $realdir and puts the
                    151: # files in %files and directories in %dirs
                    152: # $docdir is only for recursion, it should be empty when called 
                    153: # from outside
                    154: #
                    155: sub fs_read_files {
                    156:     my ($directory, $docdir, $files, $dirs) = @_;    
                    157:     my $cnt = 0;
                    158: 
                    159:     if (! opendir DIR, $directory) {
                    160:    return 0;
                    161:     }
                    162:     my @dirfiles = readdir DIR;
                    163:     foreach my $fn (@dirfiles) {
                    164:    # ignore names starting with a dot
                    165:    next if ($fn =~ /^\./);
                    166:    # ignore other silly files
                    167:    next if ($junk_files{$fn});
                    168: 
                    169:    $cnt++;
                    170:    my $f = "$directory/$fn";
                    171:    my $docf = ($docdir) ? "$docdir/$fn" : $fn;
                    172:    #print "fs_file: \"$f\"\n";
                    173:    if (-f $f) {
                    174:        #print "  is file\n";
                    175:        my ($dev,$ino,$mode,$nlink,$uid,$gid,$rdev,$size,
                    176:                       $atime,$mtime,$ctime,$blksize,$blocks)
                    177:                           = stat(_); 
                    178:        $$files{$docf} = [$fn, $size, stime($mtime)];
                    179:        #logger("TEST", "fn $fn, size $size, mtime $mtime");
                    180:    } elsif (-d _) {
                    181:        #print "  is dir\n";
                    182:        $$dirs{$docf} = $fn;
                    183:        # recurse into directory
                    184:        $cnt += fs_read_files($f, $docf, $files, $dirs);
                    185:    }
                    186:     }
                    187:     return $cnt;
                    188: }
                    189: 
                    190: 
                    191: #
1.2     ! casties   192: # $archcnt = run_query($dirquery, \%files)
1.1       casties   193: #
1.2     ! casties   194: # runs the archiver program on $dirquery and adds to the hash of archived files
1.1       casties   195: #
                    196: # Sample output:
                    197: #         20,345  B  08/06/03   17:17:02    /mpiwg/archive/data/proyectohumboldt/webb_histo_fr_01_1839/index.meta Never /mpiwg/archive/data/proyectohumboldt/webb_histo_fr_01_1839
                    198: #
                    199: sub run_query {
1.2     ! casties   200:     my ($dirquery, $files) = @_;
1.1       casties   201:     print LOG "START checkarchive $version ", scalar localtime, "\n";
                    202:     my $archcmd = $archprog;
                    203:     $archcmd .= " query archive -subdir=yes";
                    204:     $archcmd .= " -description='$archname'";
1.2     ! casties   205:     $archcmd .= " '$dirquery'";
1.1       casties   206: 
                    207:     my $archcnt = 0;
                    208:     print LOG "CMD: $archcmd\n";
                    209:     if (open ARCH, "$archcmd 2>&1 |") {
                    210:    while (<ARCH>) {
                    211:        chomp;
                    212:        print LOG "ARCH: $_\n";
                    213:        if (/
                    214:        \s*([\d,]+)    # size
                    215:        \s+(\w+)       # unit of size
                    216:        \s+([\d\/]+)   # date mm\/dd\/yy
                    217:        \s+([\d:]+)    # time
                    218:        \s+(\S+)       # file name
                    219:        \s+(\w+)       # expiry
                    220:        \s+(\S+)       # archive label
                    221:        /x) {
                    222:        my $size = $1;
                    223:        my $sunit = $2;
                    224:        my $date = $3;
                    225:        my $time = $4;
                    226:        my $file = $5;
                    227:        my $exp = $6;
                    228:        my $label = $7;
                    229:        $size =~ s/,//g;
                    230:        $date = ymd_date($date);
                    231:        logger("DEBUG", "  QUERY: file '$file'");
                    232:        $archcnt++;
1.2     ! casties   233:        if ($$files{$file}) {
        !           234:            logger("DEBUG", "file $file seems to be archived multiple times: $time $date");
        !           235:            #$warncnt++;
        !           236:        }
        !           237:        if (length $file <= length $docdir) {
        !           238:            logger("DEBUG", "not below document dir: $file");
        !           239:            next;
        !           240:        }
        !           241:        $$files{$file} = [$size, "$date $time"];
1.1       casties   242:        }
                    243:    }
                    244:     } else {
                    245:    logger("ABORT", "unable to start archive command '$archcmd'!!");
                    246:    exit 1;
                    247:     }
                    248: 
1.2     ! casties   249:     return $archcnt;
1.1       casties   250: }
                    251: 
                    252: 
                    253: #
                    254: # check_files(\%files_to_archive, \%archived_files)
                    255: #
                    256: # compares the list of archived and to be archived files
                    257: #
                    258: sub check_files {
                    259:     my ($to_archive, $archived) = @_;
                    260: 
                    261:     my $nt = scalar keys %$to_archive;
                    262:     my $na = scalar keys %$archived;
                    263: 
                    264:     foreach my $ft (sort keys %$to_archive) {
                    265:    my $fp = "$docdir/$ft";
                    266:    #logger("DEBUG", "  fp: $fp");
                    267:    if ($$archived{$fp}) {
                    268:        logger("DEBUG", "$ft archived OK");
                    269:        $$archived{$fp}->[2] = "OK";
                    270:    } else {
                    271:        logger("ERROR", "file entry '$ft' missing from archive!");
                    272:        $errcnt++;
                    273:    }
                    274:     }
                    275: 
                    276:     foreach my $fa (sort keys %$archived) {
                    277:    if (! $$archived{$fa}->[2]) {
                    278:        my ($fn, $fp) = split_file_path($fa);
                    279:        if ($index_files{$fn}) {
                    280:        logger("DEBUG", "$fa ignored");
                    281:        $na--;
                    282:        } else {
                    283:        logger("WARNING", "$fa archived but not in list!");
                    284:        $warncnt++;
                    285:        }
                    286:    }
                    287:     }
                    288: 
                    289:     if ($nt > $na) {
                    290:    logger("WARNING", "less files were archived ($na vs. $nt)!");
                    291:    $warncnt++;
                    292:     } elsif ($na > $nt) {
                    293:    logger("WARNING", "more files were archived ($na vs. $nt)!");
                    294:    $warncnt++;
                    295:     }
                    296: 
                    297: }
                    298: 
                    299: #
                    300: # compare_files(\%files_on_disk, \%archived_files)
                    301: #
                    302: # compares the list of archived files and files on disk
                    303: #
                    304: sub compare_files {
                    305:     my ($fs_files, $archived) = @_;
                    306: 
                    307:     foreach my $ft (sort keys %$fs_files) {
                    308:    next if ($index_files{$ft});
                    309:    my $fp = "$docdir/$ft";
                    310:    #logger("DEBUG", "  fp: $fp");
                    311:    if ($$archived{$fp}) {
                    312:        next if ($index_files{$ft});
                    313:        
                    314:        my $asize = $$archived{$fp}[0];
                    315:        my $atime = $$archived{$fp}[1];
                    316:        my $fsize = $$fs_files{$ft}[1];
                    317:        my $ftime = $$fs_files{$ft}[2];
                    318:        if ($asize != $fsize) {
                    319:        logger("ERROR", "archived $ft ($asize) and file on disk ($fsize) have different size!");
                    320:        $errcnt++;
                    321:        } elsif ($atime lt $ftime) {
                    322:        logger("ERROR", "archived $ft ($atime) is older than file on disk ($ftime)!");
                    323:        $errcnt++;
                    324:        } else {
                    325:        logger("ERROR", "archived file $ft still on disk");
                    326:        $errcnt++;
                    327:        }
                    328:    } else {
1.2     ! casties   329:        logger("ERROR", "file on disk '$ft' is not in archive!");
1.1       casties   330:        $errcnt++;
                    331:    }
                    332:     }
                    333: }
                    334: 
                    335: 
                    336: 
                    337: #######################################################
                    338: # main
                    339: #
                    340: 
                    341: logger("INFO", "archivecheck $version");
                    342: 
                    343: # make shure the right user is running this program
                    344: my $user = getlogin;
                    345: if (($user ne "archive")&&($user ne "root")) {
                    346:     logger("ABORT", "you must be archive or root user to run this program!");
                    347:     exit 1;
                    348: }
                    349: 
                    350: # read index.meta file
                    351: my ($document, $rootnode) = read_xml($metafile);
                    352: 
                    353: # check file and add archive date
                    354: my $files_to_archive = read_resource_meta($rootnode);
                    355: 
                    356: # check for .archived file
                    357: if (-f "$docdir/.archived") {
                    358:     logger("INFO", ".archived file exists.");
                    359: } else {
                    360:     logger("WARNING", "no .archived file!");
                    361:     $warncnt++;
                    362: }
                    363: 
                    364: # check archive
1.2     ! casties   365: my %archived_files = ();
        !           366: my $archcnt = 0;
        !           367: if ($docdir =~ /\/mpiwg\/archive\/data\/(.*)/) {
        !           368:     # TSM needs two different paths because of historical mount points :-(
        !           369:     my $docdir1 = "/mpiwg/archive/data/";
        !           370:     $archcnt += run_query($docdir1, \%archived_files);
        !           371:     my $docdir2 = "/mpiwg/archive/";
        !           372:     $archcnt += run_query($docdir2, \%archived_files);
        !           373: } else {
        !           374:     $archcnt += run_query("$docdir/", \%archived_files);
        !           375: }
        !           376: logger("INFO", "$archcnt archives of " . (scalar keys %archived_files) . " files.");
1.1       casties   377: 
1.2     ! casties   378: my $num_arch_files = (scalar keys %archived_files);
1.1       casties   379: if ($num_arch_files == 0) {
                    380:     logger("ABORT", "no archive of this directory!!");
                    381:     exit 1;
                    382: }
                    383: logger("INFO", "$num_arch_files files archived");
                    384: 
                    385: # check list of archived files
1.2     ! casties   386: check_files($files_to_archive, \%archived_files);
1.1       casties   387: 
                    388: # read files from filesystem
                    389: my %fsfiles;
                    390: my %fsdirs;
                    391: my $num_fs_files = fs_read_files($docdir, "", \%fsfiles, \%fsdirs);
                    392: 
                    393: logger("INFO", "$num_fs_files files still on disk!");
                    394: if ($num_fs_files > 0) {
1.2     ! casties   395:     compare_files(\%fsfiles, \%archived_files);
1.1       casties   396: }
                    397: 
                    398: logger("INFO", "$warncnt warnings");
                    399: logger("INFO", "$errcnt errors");
                    400: if ($errcnt == 0) {
1.2     ! casties   401:     logger("DONE", "" . (scalar keys %archived_files) . " archived files OK");
1.1       casties   402: } else {
                    403:     logger("ABORT", "there were $errcnt errors!!");
                    404:     exit 1;
                    405: }

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>