Annotation of foxridge-archiver/archivecheck.pl, revision 1.1

1.1     ! casties     1: #!/usr/local/bin/perl -w
        !             2: 
        !             3: use strict;
        !             4: 
        !             5: use XML::LibXML;
        !             6: 
        !             7: # MPIWG libraries
        !             8: use lib '/usr/local/mpiwg/archive';
        !             9: use MPIWGStor;
        !            10: 
        !            11: # make output unbuffered
        !            12: $|=1;
        !            13: 
        !            14: #######################################################
        !            15: # internal parameters
        !            16: #
        !            17: 
        !            18: # program version
        !            19: my $version = "0.3 (24.9.2003)";
        !            20: 
        !            21: # read command line parameters
        !            22: my $args = parseargs;
        !            23: 
        !            24: # debug level
        !            25: $debug = (exists $$args{'debug'}) ? $$args{'debug'} : 0;
        !            26: 
        !            27: # XML namespace (not really implemented!)
        !            28: my $namespace = "";
        !            29: 
        !            30: # archive name (archive-path element, usually == $docdir)
        !            31: my $archname;
        !            32: # archive storage date
        !            33: my $archdate;
        !            34: 
        !            35: 
        !            36: #######################################################
        !            37: # external programs
        !            38: #
        !            39: my $archprog = "/opt/tivoli/tsm/client/ba/bin/dsmc";
        !            40: if (! -x $archprog) {
        !            41:     logger("ABORT", "TSM client program '$archprog' missing!!");
        !            42:     exit 1;
        !            43: }
        !            44: # my $checkprog = "/usr/local/mpiwg/archive/metacheck";
        !            45: # if (! -x $checkprog) {
        !            46: #     logge("ABORT", "meta data checking program '$checkprog' missing!!");
        !            47: #     exit 1;
        !            48: # }
        !            49: # log file for archiver
        !            50: my $log_file = "/var/tmp/archivecheck.log";
        !            51: if (! open LOG, ">>$log_file") {
        !            52:     logger("ABORT", "unable to write log file '$log_file'!!");
        !            53:     exit 1;
        !            54: }
        !            55: 
        !            56: #######################################################
        !            57: # check parameters that were passed to the program
        !            58: #
        !            59: my $docdir = $$args{'path'};
        !            60: if (! $docdir) {
        !            61:     print "ABORT: no document directory given!\n";
        !            62:     exit 1;
        !            63: }
        !            64: # strip trailing slashes
        !            65: $docdir =~ s/\/$//;
        !            66: if (! -d $docdir) {
        !            67:     print "ABORT: document directory \'$docdir\' doesn't exist!\n";
        !            68:     exit 1;
        !            69: }
        !            70: 
        !            71: my $metafile = "$docdir/index.meta";
        !            72: if (! -f $metafile) {
        !            73:     print "ABORT: metadata index file \'$metafile\' doesn't exist!\n";
        !            74:     exit 1;
        !            75: }
        !            76: 
        !            77: #######################################################
        !            78: # internal variables
        !            79: #
        !            80: 
        !            81: # number of errors
        !            82: my $errcnt = 0;
        !            83: # number of warnings
        !            84: my $warncnt = 0;
        !            85: 
        !            86: #######################################################
        !            87: # subroutines
        !            88: #
        !            89: 
        !            90: 
        !            91: #
        !            92: # $files = read_resource_meta($rootnode)
        !            93: #
        !            94: # checks general resource meta information and reads the list of files
        !            95: #
        !            96: sub read_resource_meta {
        !            97:     my ($rootnode) = @_;
        !            98:     my %files;
        !            99:     #
        !           100:     # archive path
        !           101:     #
        !           102:     # get archive-path
        !           103:     $archname = sstrip($rootnode->findvalue('child::archive-path'));
        !           104:     if (! $archname) {
        !           105:    logger("ABORT", "archive-name element missing!!");
        !           106:    exit 1;
        !           107:     }
        !           108: 
        !           109:     #
        !           110:     # files
        !           111:     #
        !           112:     my @filenodes = $rootnode->findnodes('child::file');
        !           113:     foreach my $fn (@filenodes) {
        !           114:    my $name = sstrip($fn->findvalue('child::name'));
        !           115:    my $path = sstrip($fn->findvalue('child::path'));
        !           116:    logger("DEBUG", "FILE: ($path)$name");
        !           117:    my $f = ($path) ? "$path/$name" : "$name";
        !           118:    $files{$f} = [$name];
        !           119:     }
        !           120: 
        !           121:     #
        !           122:     # dirs
        !           123:     #
        !           124:     my @dirnodes = $rootnode->findnodes('child::dir');
        !           125:     foreach my $fn (@dirnodes) {
        !           126:    my $name = sstrip($fn->findvalue('child::name'));
        !           127:    my $path = sstrip($fn->findvalue('child::path'));
        !           128:    logger("DEBUG", "DIR: ($path)$name");
        !           129:    my $f = ($path) ? "$path/$name" : "$name";
        !           130:    $files{$f} = [$name];
        !           131:     }
        !           132: 
        !           133:     #
        !           134:     # archive-storage-date
        !           135:     #
        !           136:     my $archdate = $rootnode->find('child::archive-storage-date');
        !           137:     if ($archdate) {
        !           138:    logger("INFO", "archive storage date: $archdate");
        !           139:     } else {
        !           140:    logger("ERROR", "archive storage date missing!");
        !           141:    $errcnt++;
        !           142:     }
        !           143:     return \%files;
        !           144: }
        !           145: 
        !           146: 
        !           147: #
        !           148: # fs_read_files($realdir, $docdir, \%files, \%dirs)
        !           149: #
        !           150: # reads all files and directories below $realdir and puts the
        !           151: # files in %files and directories in %dirs
        !           152: # $docdir is only for recursion, it should be empty when called 
        !           153: # from outside
        !           154: #
        !           155: sub fs_read_files {
        !           156:     my ($directory, $docdir, $files, $dirs) = @_;    
        !           157:     my $cnt = 0;
        !           158: 
        !           159:     if (! opendir DIR, $directory) {
        !           160:    return 0;
        !           161:     }
        !           162:     my @dirfiles = readdir DIR;
        !           163:     foreach my $fn (@dirfiles) {
        !           164:    # ignore names starting with a dot
        !           165:    next if ($fn =~ /^\./);
        !           166:    # ignore other silly files
        !           167:    next if ($junk_files{$fn});
        !           168: 
        !           169:    $cnt++;
        !           170:    my $f = "$directory/$fn";
        !           171:    my $docf = ($docdir) ? "$docdir/$fn" : $fn;
        !           172:    #print "fs_file: \"$f\"\n";
        !           173:    if (-f $f) {
        !           174:        #print "  is file\n";
        !           175:        my ($dev,$ino,$mode,$nlink,$uid,$gid,$rdev,$size,
        !           176:                       $atime,$mtime,$ctime,$blksize,$blocks)
        !           177:                           = stat(_); 
        !           178:        $$files{$docf} = [$fn, $size, stime($mtime)];
        !           179:        #logger("TEST", "fn $fn, size $size, mtime $mtime");
        !           180:    } elsif (-d _) {
        !           181:        #print "  is dir\n";
        !           182:        $$dirs{$docf} = $fn;
        !           183:        # recurse into directory
        !           184:        $cnt += fs_read_files($f, $docf, $files, $dirs);
        !           185:    }
        !           186:     }
        !           187:     return $cnt;
        !           188: }
        !           189: 
        !           190: 
        !           191: #
        !           192: # $%files = run_query
        !           193: #
        !           194: # runs the archiver program on $docdir and returns a list of archived files
        !           195: #
        !           196: # Sample output:
        !           197: #         20,345  B  08/06/03   17:17:02    /mpiwg/archive/data/proyectohumboldt/webb_histo_fr_01_1839/index.meta Never /mpiwg/archive/data/proyectohumboldt/webb_histo_fr_01_1839
        !           198: #
        !           199: sub run_query {
        !           200:     my %files;
        !           201:     print LOG "START checkarchive $version ", scalar localtime, "\n";
        !           202:     my $archcmd = $archprog;
        !           203:     $archcmd .= " query archive -subdir=yes";
        !           204:     $archcmd .= " -description='$archname'";
        !           205:     $archcmd .= " '$docdir/'";
        !           206: 
        !           207:     my $archcnt = 0;
        !           208:     print LOG "CMD: $archcmd\n";
        !           209:     if (open ARCH, "$archcmd 2>&1 |") {
        !           210:    while (<ARCH>) {
        !           211:        chomp;
        !           212:        print LOG "ARCH: $_\n";
        !           213:        if (/
        !           214:        \s*([\d,]+)    # size
        !           215:        \s+(\w+)       # unit of size
        !           216:        \s+([\d\/]+)   # date mm\/dd\/yy
        !           217:        \s+([\d:]+)    # time
        !           218:        \s+(\S+)       # file name
        !           219:        \s+(\w+)       # expiry
        !           220:        \s+(\S+)       # archive label
        !           221:        /x) {
        !           222:        my $size = $1;
        !           223:        my $sunit = $2;
        !           224:        my $date = $3;
        !           225:        my $time = $4;
        !           226:        my $file = $5;
        !           227:        my $exp = $6;
        !           228:        my $label = $7;
        !           229:        $size =~ s/,//g;
        !           230:        $date = ymd_date($date);
        !           231:        logger("DEBUG", "  QUERY: file '$file'");
        !           232:        $archcnt++;
        !           233:        if ($files{$file}) {
        !           234:            logger("WARNING", "file $file seems to be archived multiple times: $time $date");
        !           235:            $warncnt++;
        !           236:        } 
        !           237:        $files{$file} = [$size, "$date $time"];
        !           238:        }
        !           239:    }
        !           240:    logger("INFO", "$archcnt archives of " . (scalar keys %files) . " files.");
        !           241:     } else {
        !           242:    logger("ABORT", "unable to start archive command '$archcmd'!!");
        !           243:    exit 1;
        !           244:     }
        !           245: 
        !           246:     return \%files;
        !           247: }
        !           248: 
        !           249: 
        !           250: #
        !           251: # check_files(\%files_to_archive, \%archived_files)
        !           252: #
        !           253: # compares the list of archived and to be archived files
        !           254: #
        !           255: sub check_files {
        !           256:     my ($to_archive, $archived) = @_;
        !           257: 
        !           258:     my $nt = scalar keys %$to_archive;
        !           259:     my $na = scalar keys %$archived;
        !           260: 
        !           261:     foreach my $ft (sort keys %$to_archive) {
        !           262:    my $fp = "$docdir/$ft";
        !           263:    #logger("DEBUG", "  fp: $fp");
        !           264:    if ($$archived{$fp}) {
        !           265:        logger("DEBUG", "$ft archived OK");
        !           266:        $$archived{$fp}->[2] = "OK";
        !           267:    } else {
        !           268:        logger("ERROR", "file entry '$ft' missing from archive!");
        !           269:        $errcnt++;
        !           270:    }
        !           271:     }
        !           272: 
        !           273:     foreach my $fa (sort keys %$archived) {
        !           274:    if (! $$archived{$fa}->[2]) {
        !           275:        my ($fn, $fp) = split_file_path($fa);
        !           276:        if ($index_files{$fn}) {
        !           277:        logger("DEBUG", "$fa ignored");
        !           278:        $na--;
        !           279:        } else {
        !           280:        logger("WARNING", "$fa archived but not in list!");
        !           281:        $warncnt++;
        !           282:        }
        !           283:    }
        !           284:     }
        !           285: 
        !           286:     if ($nt > $na) {
        !           287:    logger("WARNING", "less files were archived ($na vs. $nt)!");
        !           288:    $warncnt++;
        !           289:     } elsif ($na > $nt) {
        !           290:    logger("WARNING", "more files were archived ($na vs. $nt)!");
        !           291:    $warncnt++;
        !           292:     }
        !           293: 
        !           294: }
        !           295: 
        !           296: #
        !           297: # compare_files(\%files_on_disk, \%archived_files)
        !           298: #
        !           299: # compares the list of archived files and files on disk
        !           300: #
        !           301: sub compare_files {
        !           302:     my ($fs_files, $archived) = @_;
        !           303: 
        !           304:     foreach my $ft (sort keys %$fs_files) {
        !           305:    next if ($index_files{$ft});
        !           306:    my $fp = "$docdir/$ft";
        !           307:    #logger("DEBUG", "  fp: $fp");
        !           308:    if ($$archived{$fp}) {
        !           309:        next if ($index_files{$ft});
        !           310:        
        !           311:        my $asize = $$archived{$fp}[0];
        !           312:        my $atime = $$archived{$fp}[1];
        !           313:        my $fsize = $$fs_files{$ft}[1];
        !           314:        my $ftime = $$fs_files{$ft}[2];
        !           315:        if ($asize != $fsize) {
        !           316:        logger("ERROR", "archived $ft ($asize) and file on disk ($fsize) have different size!");
        !           317:        $errcnt++;
        !           318:        } elsif ($atime lt $ftime) {
        !           319:        logger("ERROR", "archived $ft ($atime) is older than file on disk ($ftime)!");
        !           320:        $errcnt++;
        !           321:        } else {
        !           322:        logger("ERROR", "archived file $ft still on disk");
        !           323:        $errcnt++;
        !           324:        }
        !           325:    } else {
        !           326:        logger("ERROR", "file '$ft' on disk missing from archive!");
        !           327:        $errcnt++;
        !           328:    }
        !           329:     }
        !           330: }
        !           331: 
        !           332: 
        !           333: 
        !           334: #######################################################
        !           335: # main
        !           336: #
        !           337: 
        !           338: logger("INFO", "archivecheck $version");
        !           339: 
        !           340: # make shure the right user is running this program
        !           341: my $user = getlogin;
        !           342: if (($user ne "archive")&&($user ne "root")) {
        !           343:     logger("ABORT", "you must be archive or root user to run this program!");
        !           344:     exit 1;
        !           345: }
        !           346: 
        !           347: # read index.meta file
        !           348: my ($document, $rootnode) = read_xml($metafile);
        !           349: 
        !           350: # check file and add archive date
        !           351: my $files_to_archive = read_resource_meta($rootnode);
        !           352: 
        !           353: # check for .archived file
        !           354: if (-f "$docdir/.archived") {
        !           355:     logger("INFO", ".archived file exists.");
        !           356: } else {
        !           357:     logger("WARNING", "no .archived file!");
        !           358:     $warncnt++;
        !           359: }
        !           360: 
        !           361: # check archive
        !           362: my $archived_files = run_query;
        !           363: 
        !           364: my $num_arch_files = (scalar keys %$archived_files);
        !           365: if ($num_arch_files == 0) {
        !           366:     logger("ABORT", "no archive of this directory!!");
        !           367:     exit 1;
        !           368: }
        !           369: logger("INFO", "$num_arch_files files archived");
        !           370: 
        !           371: # check list of archived files
        !           372: check_files($files_to_archive, $archived_files);
        !           373: 
        !           374: # read files from filesystem
        !           375: my %fsfiles;
        !           376: my %fsdirs;
        !           377: my $num_fs_files = fs_read_files($docdir, "", \%fsfiles, \%fsdirs);
        !           378: 
        !           379: logger("INFO", "$num_fs_files files still on disk!");
        !           380: if ($num_fs_files > 0) {
        !           381:     compare_files(\%fsfiles, $archived_files);
        !           382: }
        !           383: 
        !           384: logger("INFO", "$warncnt warnings");
        !           385: logger("INFO", "$errcnt errors");
        !           386: if ($errcnt == 0) {
        !           387:     logger("DONE", "" . (scalar keys %$archived_files) . " archived files checked");
        !           388: } else {
        !           389:     logger("ABORT", "there were $errcnt errors!!");
        !           390:     exit 1;
        !           391: }

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>