Annotation of foxridge-archiver/archivecheck.pl, revision 1.7

1.1       casties     1: #!/usr/local/bin/perl -w
                      2: 
                      3: use strict;
                      4: 
                      5: use XML::LibXML;
                      6: 
                      7: # MPIWG libraries
                      8: use lib '/usr/local/mpiwg/archive';
                      9: use MPIWGStor;
                     10: 
                     11: # make output unbuffered
                     12: $|=1;
                     13: 
                     14: #######################################################
                     15: # internal parameters
                     16: #
                     17: 
                     18: # program version
1.7     ! casties    19: my $version = "0.4.4 (10.9.2008 ROC)";
1.1       casties    20: 
                     21: # read command line parameters
                     22: my $args = parseargs;
                     23: 
                     24: # debug level
                     25: $debug = (exists $$args{'debug'}) ? $$args{'debug'} : 0;
                     26: 
                     27: # XML namespace (not really implemented!)
                     28: my $namespace = "";
                     29: 
                     30: # archive name (archive-path element, usually == $docdir)
                     31: my $archname;
                     32: 
                     33: 
                     34: #######################################################
                     35: # external programs
                     36: #
                     37: my $archprog = "/opt/tivoli/tsm/client/ba/bin/dsmc";
                     38: if (! -x $archprog) {
                     39:     logger("ABORT", "TSM client program '$archprog' missing!!");
                     40:     exit 1;
                     41: }
                     42: # my $checkprog = "/usr/local/mpiwg/archive/metacheck";
                     43: # if (! -x $checkprog) {
                     44: #     logge("ABORT", "meta data checking program '$checkprog' missing!!");
                     45: #     exit 1;
                     46: # }
                     47: # log file for archiver
                     48: my $log_file = "/var/tmp/archivecheck.log";
                     49: if (! open LOG, ">>$log_file") {
                     50:     logger("ABORT", "unable to write log file '$log_file'!!");
                     51:     exit 1;
                     52: }
                     53: 
                     54: #######################################################
                     55: # check parameters that were passed to the program
                     56: #
                     57: my $docdir = $$args{'path'};
                     58: if (! $docdir) {
                     59:     print "ABORT: no document directory given!\n";
                     60:     exit 1;
                     61: }
                     62: # strip trailing slashes
                     63: $docdir =~ s/\/$//;
                     64: if (! -d $docdir) {
                     65:     print "ABORT: document directory \'$docdir\' doesn't exist!\n";
                     66:     exit 1;
                     67: }
                     68: 
                     69: my $metafile = "$docdir/index.meta";
                     70: if (! -f $metafile) {
                     71:     print "ABORT: metadata index file \'$metafile\' doesn't exist!\n";
                     72:     exit 1;
                     73: }
                     74: 
                     75: #######################################################
                     76: # internal variables
                     77: #
                     78: 
                     79: # number of errors
                     80: my $errcnt = 0;
                     81: # number of warnings
                     82: my $warncnt = 0;
                     83: 
                     84: #######################################################
                     85: # subroutines
                     86: #
                     87: 
                     88: 
                     89: #
                     90: # $files = read_resource_meta($rootnode)
                     91: #
                     92: # checks general resource meta information and reads the list of files
                     93: #
                     94: sub read_resource_meta {
                     95:     my ($rootnode) = @_;
                     96:     my %files;
                     97:     #
                     98:     # archive path
                     99:     #
                    100:     # get archive-path
                    101:     $archname = sstrip($rootnode->findvalue('child::archive-path'));
                    102:     if (! $archname) {
                    103:    logger("ABORT", "archive-name element missing!!");
                    104:    exit 1;
                    105:     }
                    106: 
                    107:     #
                    108:     # files
                    109:     #
                    110:     my @filenodes = $rootnode->findnodes('child::file');
                    111:     foreach my $fn (@filenodes) {
                    112:    my $name = sstrip($fn->findvalue('child::name'));
                    113:    my $path = sstrip($fn->findvalue('child::path'));
                    114:    logger("DEBUG", "FILE: ($path)$name");
                    115:    my $f = ($path) ? "$path/$name" : "$name";
                    116:    $files{$f} = [$name];
                    117:     }
                    118: 
                    119:     #
                    120:     # dirs
                    121:     #
                    122:     my @dirnodes = $rootnode->findnodes('child::dir');
                    123:     foreach my $fn (@dirnodes) {
                    124:    my $name = sstrip($fn->findvalue('child::name'));
                    125:    my $path = sstrip($fn->findvalue('child::path'));
                    126:    logger("DEBUG", "DIR: ($path)$name");
1.5       casties   127:    my $f = "$name";
                    128:    if (($path)&&($path ne '.')) {
                    129:        $f = "$path/$name";
                    130:    }
1.1       casties   131:    $files{$f} = [$name];
                    132:     }
                    133: 
                    134:     #
                    135:     # archive-storage-date
                    136:     #
                    137:     my $archdate = $rootnode->find('child::archive-storage-date');
                    138:     if ($archdate) {
                    139:    logger("INFO", "archive storage date: $archdate");
                    140:     } else {
                    141:    logger("ERROR", "archive storage date missing!");
                    142:    $errcnt++;
                    143:     }
                    144:     return \%files;
                    145: }
                    146: 
                    147: 
                    148: #
                    149: # fs_read_files($realdir, $docdir, \%files, \%dirs)
                    150: #
                    151: # reads all files and directories below $realdir and puts the
                    152: # files in %files and directories in %dirs
                    153: # $docdir is only for recursion, it should be empty when called 
                    154: # from outside
                    155: #
                    156: sub fs_read_files {
                    157:     my ($directory, $docdir, $files, $dirs) = @_;    
                    158:     my $cnt = 0;
                    159: 
                    160:     if (! opendir DIR, $directory) {
                    161:    return 0;
                    162:     }
                    163:     my @dirfiles = readdir DIR;
                    164:     foreach my $fn (@dirfiles) {
                    165:    # ignore names starting with a dot
                    166:    next if ($fn =~ /^\./);
                    167:    # ignore other silly files
                    168:    next if ($junk_files{$fn});
                    169: 
                    170:    $cnt++;
                    171:    my $f = "$directory/$fn";
                    172:    my $docf = ($docdir) ? "$docdir/$fn" : $fn;
                    173:    #print "fs_file: \"$f\"\n";
                    174:    if (-f $f) {
                    175:        #print "  is file\n";
                    176:        my ($dev,$ino,$mode,$nlink,$uid,$gid,$rdev,$size,
                    177:                       $atime,$mtime,$ctime,$blksize,$blocks)
                    178:                           = stat(_); 
                    179:        $$files{$docf} = [$fn, $size, stime($mtime)];
                    180:        #logger("TEST", "fn $fn, size $size, mtime $mtime");
                    181:    } elsif (-d _) {
                    182:        #print "  is dir\n";
                    183:        $$dirs{$docf} = $fn;
                    184:        # recurse into directory
                    185:        $cnt += fs_read_files($f, $docf, $files, $dirs);
                    186:    }
                    187:     }
                    188:     return $cnt;
                    189: }
                    190: 
                    191: 
                    192: #
1.2       casties   193: # $archcnt = run_query($dirquery, \%files)
1.1       casties   194: #
1.2       casties   195: # runs the archiver program on $dirquery and adds to the hash of archived files
1.1       casties   196: #
                    197: # Sample output:
                    198: #         20,345  B  08/06/03   17:17:02    /mpiwg/archive/data/proyectohumboldt/webb_histo_fr_01_1839/index.meta Never /mpiwg/archive/data/proyectohumboldt/webb_histo_fr_01_1839
                    199: #
                    200: sub run_query {
1.2       casties   201:     my ($dirquery, $files) = @_;
1.1       casties   202:     print LOG "START checkarchive $version ", scalar localtime, "\n";
                    203:     my $archcmd = $archprog;
                    204:     $archcmd .= " query archive -subdir=yes";
                    205:     $archcmd .= " -description='$archname'";
1.2       casties   206:     $archcmd .= " '$dirquery'";
1.1       casties   207: 
1.6       casties   208:     logger('INFO', "querying TSM server for $dirquery, please wait...");
1.1       casties   209:     my $archcnt = 0;
                    210:     print LOG "CMD: $archcmd\n";
                    211:     if (open ARCH, "$archcmd 2>&1 |") {
                    212:    while (<ARCH>) {
                    213:        chomp;
                    214:        print LOG "ARCH: $_\n";
                    215:        if (/
                    216:        \s*([\d,]+)    # size
                    217:        \s+(\w+)       # unit of size
                    218:        \s+([\d\/]+)   # date mm\/dd\/yy
                    219:        \s+([\d:]+)    # time
                    220:        \s+(\S+)       # file name
                    221:        \s+(\w+)       # expiry
                    222:        \s+(\S+)       # archive label
                    223:        /x) {
                    224:        my $size = $1;
                    225:        my $sunit = $2;
                    226:        my $date = $3;
                    227:        my $time = $4;
                    228:        my $file = $5;
                    229:        my $exp = $6;
                    230:        my $label = $7;
                    231:        $size =~ s/,//g;
                    232:        $date = ymd_date($date);
                    233:        logger("DEBUG", "  QUERY: file '$file'");
                    234:        $archcnt++;
1.2       casties   235:        if ($$files{$file}) {
                    236:            logger("DEBUG", "file $file seems to be archived multiple times: $time $date");
                    237:            #$warncnt++;
                    238:        }
                    239:        if (length $file <= length $docdir) {
                    240:            logger("DEBUG", "not below document dir: $file");
                    241:            next;
                    242:        }
                    243:        $$files{$file} = [$size, "$date $time"];
1.1       casties   244:        }
                    245:    }
                    246:     } else {
                    247:    logger("ABORT", "unable to start archive command '$archcmd'!!");
                    248:    exit 1;
                    249:     }
                    250: 
1.2       casties   251:     return $archcnt;
1.1       casties   252: }
                    253: 
                    254: 
                    255: #
                    256: # check_files(\%files_to_archive, \%archived_files)
                    257: #
                    258: # compares the list of archived and to be archived files
                    259: #
                    260: sub check_files {
                    261:     my ($to_archive, $archived) = @_;
                    262: 
                    263:     my $nt = scalar keys %$to_archive;
                    264:     my $na = scalar keys %$archived;
                    265: 
                    266:     foreach my $ft (sort keys %$to_archive) {
                    267:    my $fp = "$docdir/$ft";
                    268:    #logger("DEBUG", "  fp: $fp");
                    269:    if ($$archived{$fp}) {
                    270:        logger("DEBUG", "$ft archived OK");
                    271:        $$archived{$fp}->[2] = "OK";
                    272:    } else {
                    273:        logger("ERROR", "file entry '$ft' missing from archive!");
                    274:        $errcnt++;
                    275:    }
                    276:     }
                    277: 
                    278:     foreach my $fa (sort keys %$archived) {
                    279:    if (! $$archived{$fa}->[2]) {
                    280:        my ($fn, $fp) = split_file_path($fa);
                    281:        if ($index_files{$fn}) {
                    282:        logger("DEBUG", "$fa ignored");
                    283:        $na--;
                    284:        } else {
                    285:        logger("WARNING", "$fa archived but not in list!");
                    286:        $warncnt++;
                    287:        }
                    288:    }
                    289:     }
                    290: 
                    291:     if ($nt > $na) {
                    292:    logger("WARNING", "less files were archived ($na vs. $nt)!");
                    293:    $warncnt++;
                    294:     } elsif ($na > $nt) {
                    295:    logger("WARNING", "more files were archived ($na vs. $nt)!");
                    296:    $warncnt++;
                    297:     }
                    298: 
                    299: }
                    300: 
                    301: #
                    302: # compare_files(\%files_on_disk, \%archived_files)
                    303: #
                    304: # compares the list of archived files and files on disk
                    305: #
                    306: sub compare_files {
                    307:     my ($fs_files, $archived) = @_;
                    308: 
                    309:     foreach my $ft (sort keys %$fs_files) {
                    310:    next if ($index_files{$ft});
                    311:    my $fp = "$docdir/$ft";
                    312:    #logger("DEBUG", "  fp: $fp");
                    313:    if ($$archived{$fp}) {
                    314:        next if ($index_files{$ft});
                    315:        
                    316:        my $asize = $$archived{$fp}[0];
                    317:        my $atime = $$archived{$fp}[1];
                    318:        my $fsize = $$fs_files{$ft}[1];
                    319:        my $ftime = $$fs_files{$ft}[2];
                    320:        if ($asize != $fsize) {
                    321:        logger("ERROR", "archived $ft ($asize) and file on disk ($fsize) have different size!");
                    322:        $errcnt++;
                    323:        } elsif ($atime lt $ftime) {
                    324:        logger("ERROR", "archived $ft ($atime) is older than file on disk ($ftime)!");
                    325:        $errcnt++;
                    326:        } else {
1.3       casties   327:        logger("WARNING", "archived file $ft still on disk");
                    328:        $warncnt++;
1.1       casties   329:        }
                    330:    } else {
1.2       casties   331:        logger("ERROR", "file on disk '$ft' is not in archive!");
1.1       casties   332:        $errcnt++;
                    333:    }
                    334:     }
                    335: }
                    336: 
                    337: 
                    338: 
                    339: #######################################################
                    340: # main
                    341: #
                    342: 
                    343: logger("INFO", "archivecheck $version");
                    344: 
                    345: # make shure the right user is running this program
1.7     ! casties   346: my $user = getlogin || getpwuid($<) ;
1.4       casties   347: if (not (($user eq "archive")||($user eq "root"))) {
1.1       casties   348:     logger("ABORT", "you must be archive or root user to run this program!");
                    349:     exit 1;
                    350: }
                    351: 
                    352: # read index.meta file
                    353: my ($document, $rootnode) = read_xml($metafile);
                    354: 
                    355: # check file and add archive date
                    356: my $files_to_archive = read_resource_meta($rootnode);
                    357: 
                    358: # check for .archived file
                    359: if (-f "$docdir/.archived") {
                    360:     logger("INFO", ".archived file exists.");
                    361: } else {
                    362:     logger("WARNING", "no .archived file!");
                    363:     $warncnt++;
                    364: }
                    365: 
                    366: # check archive
1.2       casties   367: my %archived_files = ();
                    368: my $archcnt = 0;
                    369: if ($docdir =~ /\/mpiwg\/archive\/data\/(.*)/) {
                    370:     # TSM needs two different paths because of historical mount points :-(
                    371:     my $docdir1 = "/mpiwg/archive/data/";
                    372:     $archcnt += run_query($docdir1, \%archived_files);
                    373:     my $docdir2 = "/mpiwg/archive/";
                    374:     $archcnt += run_query($docdir2, \%archived_files);
                    375: } else {
                    376:     $archcnt += run_query("$docdir/", \%archived_files);
                    377: }
                    378: logger("INFO", "$archcnt archives of " . (scalar keys %archived_files) . " files.");
1.1       casties   379: 
1.2       casties   380: my $num_arch_files = (scalar keys %archived_files);
1.1       casties   381: if ($num_arch_files == 0) {
                    382:     logger("ABORT", "no archive of this directory!!");
                    383:     exit 1;
                    384: }
                    385: logger("INFO", "$num_arch_files files archived");
                    386: 
                    387: # check list of archived files
1.2       casties   388: check_files($files_to_archive, \%archived_files);
1.1       casties   389: 
                    390: # read files from filesystem
                    391: my %fsfiles;
                    392: my %fsdirs;
                    393: my $num_fs_files = fs_read_files($docdir, "", \%fsfiles, \%fsdirs);
                    394: 
                    395: logger("INFO", "$num_fs_files files still on disk!");
                    396: if ($num_fs_files > 0) {
1.2       casties   397:     compare_files(\%fsfiles, \%archived_files);
1.1       casties   398: }
                    399: 
                    400: logger("INFO", "$warncnt warnings");
                    401: logger("INFO", "$errcnt errors");
                    402: if ($errcnt == 0) {
1.2       casties   403:     logger("DONE", "" . (scalar keys %archived_files) . " archived files OK");
1.3       casties   404:     exit 0;
1.1       casties   405: } else {
                    406:     logger("ABORT", "there were $errcnt errors!!");
                    407:     exit 1;
                    408: }

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>