Annotation of foxridge-archiver/archiver.pl, revision 1.5

1.1       casties     1: #!/usr/local/bin/perl -w
                      2: 
                      3: use strict;
                      4: 
                      5: use XML::LibXML;
                      6: 
                      7: # MPIWG libraries
                      8: use lib '/usr/local/mpiwg/archive';
                      9: use MPIWGStor;
                     10: 
                     11: # make output unbuffered
                     12: $|=1;
                     13: 
                     14: #######################################################
                     15: # internal parameters
                     16: #
                     17: 
                     18: # program version
1.5     ! casties    19: my $version = "0.7 (ROC 20.9.2005)";
1.4       casties    20: 
                     21: # short help
1.5     ! casties    22: my $help = "MPIWG archiver $version
        !            23: use:  archiver [options] docpath
        !            24: options:
        !            25:   -debug  show debugging info
        !            26:   -premigrate  don't delete archived files
        !            27:   -force  archive even if already archived
        !            28: ";
1.4       casties    29: 
                     30: # read command line parameters
                     31: my $args = MPIWGStor::parseargs;
1.5     ! casties    32: if (! scalar(%$args)) {
        !            33:     print $help, "\n";
        !            34:     exit 1;
        !            35: }
1.4       casties    36: 
                     37: # debug level
1.5     ! casties    38: $debug = (exists $$args{'debug'}) ? $$args{'debug'} : 0;
        !            39: 
        !            40: # force archiving
        !            41: my $force_archive = (exists $$args{'force'}) ? $$args{'force'} : 0;
1.1       casties    42: 
                     43: # rewrite XML file (necessary for archive date!)
                     44: my $fix_xml = 1;
                     45: my $xml_changed = 0;
1.5     ! casties    46: 
1.1       casties    47: # XML namespace (not really implemented!)
                     48: my $namespace = "";
                     49: 
                     50: # archive name (archive-path element, usually == $docdir)
                     51: my $archname;
1.5     ! casties    52: 
1.1       casties    53: # archive storage date (now)
                     54: my $archdate = stime(time);
                     55: 
                     56: # delete "junk" files before archiving
                     57: my $delete_junk_files = 1;
                     58: 
                     59: # delete data files after archiving
                     60: my $delete_data_files = 1;
                     61: 
1.5     ! casties    62: # don't delete archived files with "-premigrate"
        !            63: if (exists $$args{'premigrate'}) {
        !            64:     $delete_data_files = not $$args{'premigrate'};
        !            65: }
        !            66: if ($delete_data_files) {
        !            67:     logger('INFO', "going to remove successfully archived files from disk");
        !            68: }
        !            69: 
1.1       casties    70: 
                     71: #######################################################
                     72: # external programs
                     73: #
                     74: my $archprog = "/opt/tivoli/tsm/client/ba/bin/dsmc";
                     75: if (! -x $archprog) {
1.5     ! casties    76:     logger('ABORT', "TSM client program '$archprog' missing!");
1.1       casties    77:     exit 1;
                     78: }
                     79: my $checkprog = "/usr/local/mpiwg/archive/metacheck";
                     80: if (! -x $checkprog) {
1.5     ! casties    81:     logger('ABORT', "meta data checking program '$checkprog' missing!");
1.1       casties    82:     exit 1;
                     83: }
                     84: # log file for archiver
                     85: my $log_file = "/var/log/mpiwg-archiver.log";
                     86: if (! open LOG, ">>$log_file") {
1.5     ! casties    87:     logger('ABORT', "unable to write log file '$log_file'!");
1.1       casties    88:     exit 1;
                     89: }
                     90: 
                     91: #######################################################
                     92: # check parameters that were passed to the program
                     93: #
1.5     ! casties    94: 
1.4       casties    95: my $docdir = $$args{'path'};
1.2       casties    96: # strip double slashes
                     97: $docdir =~ s/\/\//\//;
1.1       casties    98: # strip trailing slashes
1.2       casties    99: $docdir =~ s/\/+$//;
1.1       casties   100: if (! -d $docdir) {
1.5     ! casties   101:     logger('ABORT', "document directory \'$docdir\' doesn't exist!");
1.1       casties   102:     exit 1;
                    103: }
                    104: 
                    105: my $metafile = "$docdir/index.meta";
                    106: if (! -f $metafile) {
1.5     ! casties   107:     logger('ABORT', "metadata index file \'$metafile\' doesn't exist!");
1.1       casties   108:     exit 1;
                    109: }
                    110: 
                    111: #######################################################
                    112: # internal variables
                    113: #
                    114: 
                    115: # number of errors
                    116: my $errcnt = 0;
                    117: # number of warnings
                    118: my $warncnt = 0;
                    119: 
                    120: #######################################################
                    121: # subroutines
                    122: #
                    123: 
                    124: #
                    125: # $files = read_resource_meta($rootnode)
                    126: #
                    127: # checks general resource meta information and reads the list of files
                    128: #
                    129: sub read_resource_meta {
                    130:     my ($rootnode) = @_;
                    131:     my %files;
                    132:     #
                    133:     # archive path
                    134:     #
                    135:     # get archive-path
                    136:     $archname = MPIWGStor::sstrip($rootnode->findvalue('child::archive-path'));
                    137:     if (! $archname) {
1.5     ! casties   138:    logger('ABORT', "archive-name element missing!");
1.1       casties   139:    exit 1;
                    140:     }
                    141: 
                    142:     #
                    143:     # files
                    144:     #
                    145:     my @filenodes = $rootnode->findnodes('child::file');
                    146:     foreach my $fn (@filenodes) {
                    147:    my $name = MPIWGStor::sstrip($fn->findvalue('child::name'));
                    148:    my $path = MPIWGStor::sstrip($fn->findvalue('child::path'));
1.5     ! casties   149:    logger('DEBUG', "FILE ($path)$name");
1.1       casties   150:    my $f = ($path) ? "$path/$name" : "$name";
                    151:    $files{$f} = $name;
                    152:     }
                    153: 
                    154:     #
                    155:     # archive-storage-date
                    156:     #
                    157:     my $stordatenode = ($rootnode->find('child::archive-storage-date'))->get_node(1);
                    158:     if ($stordatenode) {
1.5     ! casties   159:    logger('WARNING', "archive storage date exists! Resource already archived?");
1.1       casties   160:    $warncnt++;
                    161:    # delete old date
                    162:    $stordatenode->removeChildNodes;
                    163:     } else {
                    164:    # create new storage date node
                    165:    $stordatenode = $rootnode->addNewChild($namespace, "archive-storage-date");
                    166:    # move after archive-path
                    167:    $rootnode->insertAfter($stordatenode, ($rootnode->find('child::archive-path'))->get_node(1));
                    168:     }
                    169:     $stordatenode->appendTextNode($archdate);
                    170:     $xml_changed++;
                    171:     return \%files;
                    172: }
                    173: 
                    174: 
                    175: #
                    176: # $%files = run_archive
                    177: #
                    178: # runs the archiver program on $docdir and returns a list of archived files
                    179: #
                    180: sub run_archive {
                    181:     my %files;
                    182:     print LOG "START archiver $version $archdate\n";
                    183:     my $archcmd = $archprog;
                    184:     $archcmd .= " archive -archsymlinkasfile=no -subdir=yes";
                    185:     $archcmd .= " -description='$archname'";
                    186:     $archcmd .= " '$docdir/'";
                    187: 
                    188:     print LOG "CMD: $archcmd\n";
                    189:     if (open ARCH, "$archcmd 2>&1 |") {
                    190:    while (<ARCH>) {
                    191:        chomp;
                    192:        print LOG "ARCH: $_\n";
                    193:        if (/Normal File-->\s+[\d,]+\s+(.*)\s+\[Sent\]/) {
                    194:        print "  ARCH: file '$1'\n";
                    195:        $files{$1} = "ok";
                    196:        }
                    197:        if (/^Archive processing of .* finished without failure./) {
                    198:        print "  ARCH: OK\n";
                    199:        }
                    200:    }
                    201:     } else {
1.5     ! casties   202:    logger('ABORT', "unable to start archive command '$archcmd'!!");
1.1       casties   203:    exit 1;
                    204:     }
                    205: 
                    206:     return \%files;
                    207: }
                    208: 
                    209: 
                    210: #
                    211: # check_files(\%files_to_archive, \%archived_files)
                    212: #
                    213: # compares the list of archived and to be archived files
                    214: #
                    215: sub check_files {
                    216:     my ($to_archive, $archived) = @_;
                    217: 
                    218:     my $nt = scalar keys %$to_archive;
                    219:     my $na = scalar keys %$archived;
                    220: 
                    221:     foreach my $ft (sort keys %$to_archive) {
                    222:    my $fp = "$docdir/$ft";
                    223:    #print "  fp: $fp\n";
                    224:    if ($$archived{$fp}) {
1.5     ! casties   225:        logger('DEBUG', "$ft archived OK");
1.1       casties   226:        $$archived{$fp} = "OK";
                    227:    } else {
1.5     ! casties   228:        logger('ERROR', "file '$ft' missing from archive!");
1.1       casties   229:        $errcnt++;
                    230:    }
                    231:     }
                    232: 
                    233:     foreach my $fa (sort keys %$archived) {
                    234:    if ($$archived{$fa} ne "OK") {
                    235:        my ($fn, $fp) = MPIWGStor::split_file_path($fa);
                    236:        if ($MPIWGStor::index_files{$fn}) {
1.5     ! casties   237:        logger('DEBUG', "$fa ignored");
1.1       casties   238:        $na--;
                    239:        } else {
1.5     ! casties   240:        logger('WARNING', "$fa archived but not in list!");
1.1       casties   241:        $warncnt++;
                    242:        }
                    243:    }
                    244:     }
                    245: 
                    246:     if ($nt > $na) {
1.5     ! casties   247:    logger('WARNING', "less files were archived ($na vs. $nt)!");
1.1       casties   248:     } elsif ($na > $nt) {
1.5     ! casties   249:    logger('WARNING', "more files were archived ($na vs. $nt)!");
1.1       casties   250:     }
                    251: 
                    252: }
                    253: 
                    254: 
                    255: #
                    256: # delete_files(\%files)
                    257: #
                    258: # deletes the files from the list (of absolute files) and their directories
                    259: # if they are empty
                    260: #
                    261: sub delete_files {
                    262:     my ($files) = @_;
                    263:     my %dirs;
                    264: 
                    265:     foreach my $f (sort keys %$files) {
                    266:    my ($fn, $fp) = MPIWGStor::split_file_path($f);
                    267:    # collect all unique directories
                    268:         if ($fp && (! $dirs{$fp})) {
                    269:        $dirs{$fp} = $fp;
                    270:    }
                    271:    # don't delete index files
                    272:    next if ($MPIWGStor::index_files{$fn});
                    273:    # no file no delete
                    274:    next unless (-f $f);
                    275:    # delete files
                    276:    if (unlink $f) {
1.5     ! casties   277:        logger('INFO', "remove $f ($fn)");
1.1       casties   278:    } else {
1.5     ! casties   279:        logger('ERROR', "unable to delete $f!");
1.1       casties   280:        $errcnt++;
                    281:    }
                    282:     }
                    283:     # try to delete all empty directories
                    284:     my @dirkeys = sort keys %dirs;
                    285:     # starting at the end to get to the subdirectories first
                    286:     for (my $i = $#dirkeys; $i >= 0; $i--) {
                    287:    my $d = $dirkeys[$i];
                    288:    # dont't remove document dir (shouldn't be empty anyway)
                    289:    next if ($d eq $docdir);
                    290:    if (-d $d) {
1.5     ! casties   291:        logger('INFO', "remove dir $d");
1.1       casties   292:        rmdir $d;
                    293:    }
                    294:     }
                    295: }
                    296: 
                    297: 
                    298: #
                    299: # delete_all_files(\%files, $dir)
                    300: #
                    301: # deletes all files with names from the list %files
                    302: # in the directory $dir and its subdirectories 
                    303: #
                    304: sub delete_all_files {
                    305:     my ($files, $dir) = @_;
                    306: 
                    307:     if (! opendir DIR, $dir) {
1.5     ! casties   308:    logger('ERROR', "unable to read directory $dir!");
1.1       casties   309:    $errcnt++;
                    310:    return;
                    311:     }
                    312:     my @fl = readdir DIR;
                    313:     closedir DIR;
                    314: 
                    315:     foreach my $f (@fl) {
                    316:    next if ($f =~ /^\.{1,2}$/);
                    317:    if ($$files{$f}) {
                    318:        # $f is in the file list
                    319:        if (-f "$dir/$f") {
                    320:        # $f is a file
                    321:        if (unlink "$dir/$f") {
1.5     ! casties   322:            logger('INFO', "removed $f");
1.1       casties   323:        } else {
1.5     ! casties   324:            logger('ERROR', "unable to delete $f!");
1.1       casties   325:            $errcnt++;
                    326:        }
                    327:        } elsif (-d _) {
                    328:        # $f is a directory (unlink won't work)
                    329:        if ((system 'rm', '-r', "$dir/$f") == 0) {
1.5     ! casties   330:            logger('INFO', "removed directory $f");
1.1       casties   331:        } else {
1.5     ! casties   332:            logger('ERROR', "unable to delete directory $f!");
1.1       casties   333:            $errcnt++;
                    334:        }
                    335:        } else {
1.5     ! casties   336:        logger('ERROR', "funny object $dir/$f!");
1.1       casties   337:        $errcnt++;
                    338:        }
                    339:    } else {
                    340:        # $f is not in the list
                    341:        if (-d "$dir/$f") {
                    342:        # recurse into directories
1.5     ! casties   343:        logger('DEBUG', "enter $dir/$f");
1.1       casties   344:        delete_all_files($files, "$dir/$f");
                    345:        }
                    346:    }
                    347:     }
                    348: }
                    349: 
                    350: 
                    351: #######################################################
                    352: # main
                    353: #
                    354: 
1.5     ! casties   355: logger('START', "archiver $version at $archdate");
1.1       casties   356: 
                    357: # make shure the right user is running this program
                    358: my $user = getlogin;
1.5     ! casties   359: if (($user ne "archive")&&($user ne "root")) {
        !           360:     logger("ABORT", "you ($user) must be archive or root user to run this program!");
        !           361:     exit 1;
        !           362: }
        !           363: 
        !           364: # check for .archived file
        !           365: if (-f "$docdir/.archived") {
        !           366:     if (not $force_archive) {
        !           367:    logger('ABORT', "already archived! (.archived file exists)");
        !           368:    exit 1;
        !           369:     } else {
        !           370:    logger('WARNING', "resource already archived? (.archived file exists)");
        !           371:    $warncnt++;
        !           372:     }
        !           373: }
1.1       casties   374: 
                    375: # use metacheck first
1.5     ! casties   376: if (open CHECK, "$checkprog -add-files $docdir |") {
        !           377:     my @errors;
        !           378:     my $msg;
        !           379:     while (<CHECK>) {
        !           380:    chomp;
        !           381:    if (/^ERROR/) {
        !           382:        push @errors, $_;
        !           383:    }
        !           384:    $msg = $_;
        !           385:     }
        !           386:     if ($msg =~ /^DONE/) {
        !           387:    logger('DEBUG', "checking index file: $msg");
        !           388:    logger('INFO', "resource '$docdir' check OK");
        !           389:     } else {
        !           390:    logger('DEBUG', "errors checking index file:\n    " . join("\n    ", @errors) . "\n    $msg");
        !           391:    logger('ABORT', "resource '$docdir' check failed!");
        !           392:    exit 1;
        !           393:     }
1.1       casties   394: } else {
1.5     ! casties   395:     logger('ABORT', "unable to run $checkprog");
1.1       casties   396:     exit 1;
                    397: }
1.5     ! casties   398: # if (system("$checkprog $docdir >/dev/null") == 0) {
        !           399: #     logger('INFO', "resource '$docdir' check OK");
        !           400: # } else {
        !           401: #     logger('ABORT', "resource '$docdir' check failed!!");
        !           402: #     exit 1;
        !           403: # }
1.1       casties   404: 
                    405: # read index.meta file
                    406: my ($document, $rootnode) = MPIWGStor::read_xml($metafile);
                    407: 
                    408: # check file and add archive date
                    409: my $files_to_archive = read_resource_meta($rootnode);
                    410: 
1.5     ! casties   411: logger('INFO', (scalar keys %$files_to_archive) . " files to archive");
1.1       casties   412: 
1.5     ! casties   413: # remove .archived file
1.1       casties   414: if (-f "$docdir/.archived") {
                    415:     if (unlink "$docdir/.archived") {
1.5     ! casties   416:    logger('WARNING', "existing .archived file has been removed!");
1.1       casties   417:    $warncnt++;
                    418:     } else {
1.5     ! casties   419:    logger('ERROR', "unable to remove existing .archived file!");
1.1       casties   420:    $errcnt++;
                    421:     }
                    422: }
                    423: 
                    424: # remove junk files
                    425: if ($delete_junk_files) {
                    426:     delete_all_files(\%MPIWGStor::junk_files, $docdir);
                    427: }
                    428: 
                    429: # write new index.meta
                    430: if ($errcnt > 0) {
1.5     ! casties   431:     logger('ABORT', "there were errors!");
1.1       casties   432:     exit 1;
                    433: } else {
                    434:     if ($fix_xml) {
                    435:    MPIWGStor::write_xml($document, $metafile);
                    436:     }
                    437: }
                    438: 
                    439: # start archiving
1.3       casties   440: my $archived_files = run_archive();
                    441: my $num_archfiles = scalar keys %$archived_files;
1.1       casties   442: 
1.5     ! casties   443: logger('INFO', "$num_archfiles files archived");
1.1       casties   444: 
                    445: # check list of archived files
                    446: check_files($files_to_archive, $archived_files);
                    447: 
                    448: # delete files if all went OK
                    449: if ($errcnt == 0) {
                    450:     system("touch", "$docdir/.archived");
                    451:     # remove junk files (again)
                    452:     if ($delete_junk_files) {
                    453:    delete_all_files(\%MPIWGStor::junk_files, $docdir);
                    454:     }
                    455:     # remove archived files
                    456:     if ($delete_data_files) {
                    457:    delete_files($archived_files);
                    458:     }
                    459: }
                    460: 
1.5     ! casties   461: logger('INFO', "$warncnt warnings");
        !           462: logger('INFO', "$errcnt errors");
1.1       casties   463: if ($errcnt > 0) {
1.4       casties   464:     logger('ABORT', "there were errors! ($num_archfiles files archived) at " . stime(time));
1.1       casties   465:     exit 1;
                    466: } else {
1.4       casties   467:     logger('DONE', "$num_archfiles files archived at " . stime(time));
1.1       casties   468: }

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>