Annotation of foxridge-archiver/archiver.pl, revision 1.4

1.1       casties     1: #!/usr/local/bin/perl -w
                      2: 
                      3: use strict;
                      4: 
                      5: use XML::LibXML;
                      6: 
                      7: # MPIWG libraries
                      8: use lib '/usr/local/mpiwg/archive';
                      9: use MPIWGStor;
                     10: 
                     11: # make output unbuffered
                     12: $|=1;
                     13: 
                     14: #######################################################
                     15: # internal parameters
                     16: #
                     17: 
                     18: # program version
1.4     ! casties    19: my $version = "0.6 (ROC 23.3.2005)";
        !            20: 
        !            21: # short help
        !            22: my $shorthelp = "MPIWG archiver $version\nuse:\n  archiver [options] docpath\noptions:\n  -premigrate  don't delete archived files\n";
        !            23: 
        !            24: # read command line parameters
        !            25: my $args = MPIWGStor::parseargs;
        !            26: 
        !            27: # debug level
        !            28: my $debug = (exists $$args{'debug'}) ? $$args{'debug'} : 0;
1.1       casties    29: 
                     30: # rewrite XML file (necessary for archive date!)
                     31: my $fix_xml = 1;
                     32: my $xml_changed = 0;
                     33: # XML namespace (not really implemented!)
                     34: my $namespace = "";
                     35: 
                     36: # archive name (archive-path element, usually == $docdir)
                     37: my $archname;
                     38: # archive storage date (now)
                     39: my $archdate = stime(time);
                     40: 
                     41: # delete "junk" files before archiving
                     42: my $delete_junk_files = 1;
                     43: 
                     44: # delete data files after archiving
                     45: my $delete_data_files = 1;
                     46: 
                     47: 
                     48: #######################################################
                     49: # external programs
                     50: #
                     51: my $archprog = "/opt/tivoli/tsm/client/ba/bin/dsmc";
                     52: if (! -x $archprog) {
1.4     ! casties    53:     logger('ABORT', "TSM client program '$archprog' missing!!\n");
1.1       casties    54:     exit 1;
                     55: }
                     56: my $checkprog = "/usr/local/mpiwg/archive/metacheck";
                     57: if (! -x $checkprog) {
1.4     ! casties    58:     logger('ABORT', "meta data checking program '$checkprog' missing!!\n");
1.1       casties    59:     exit 1;
                     60: }
                     61: # log file for archiver
                     62: my $log_file = "/var/log/mpiwg-archiver.log";
                     63: if (! open LOG, ">>$log_file") {
1.4     ! casties    64:     logger('ABORT', "unable to write log file '$log_file'!!\n");
1.1       casties    65:     exit 1;
                     66: }
                     67: 
                     68: #######################################################
                     69: # check parameters that were passed to the program
                     70: #
                     71: if ($#ARGV < 0) {
1.4     ! casties    72:     print $shorthelp;
1.1       casties    73:     exit 1;
                     74: }
1.4     ! casties    75: my $docdir = $$args{'path'};
1.2       casties    76: # strip double slashes
                     77: $docdir =~ s/\/\//\//;
1.1       casties    78: # strip trailing slashes
1.2       casties    79: $docdir =~ s/\/+$//;
1.1       casties    80: if (! -d $docdir) {
1.4     ! casties    81:     logger('ABORT', "document directory \'$docdir\' doesn't exist!\n");
1.1       casties    82:     exit 1;
                     83: }
1.4     ! casties    84: # don't delete archived files with "-premigrate"
        !            85: if (exists $$args{'premigrate'}) {
        !            86:     $delete_data_files = not $$args{'premigrate'};
        !            87: }
        !            88: if ($delete_data_files) {
        !            89:     logger('INFO', "going to remove successfully archived files from disk");
1.1       casties    90: }
                     91: 
                     92: my $metafile = "$docdir/index.meta";
                     93: if (! -f $metafile) {
1.4     ! casties    94:     logger('ABORT', "metadata index file \'$metafile\' doesn't exist!\n");
1.1       casties    95:     exit 1;
                     96: }
                     97: 
                     98: #######################################################
                     99: # internal variables
                    100: #
                    101: 
                    102: # number of errors
                    103: my $errcnt = 0;
                    104: # number of warnings
                    105: my $warncnt = 0;
                    106: 
                    107: #######################################################
                    108: # subroutines
                    109: #
                    110: 
                    111: #
                    112: # $files = read_resource_meta($rootnode)
                    113: #
                    114: # checks general resource meta information and reads the list of files
                    115: #
                    116: sub read_resource_meta {
                    117:     my ($rootnode) = @_;
                    118:     my %files;
                    119:     #
                    120:     # archive path
                    121:     #
                    122:     # get archive-path
                    123:     $archname = MPIWGStor::sstrip($rootnode->findvalue('child::archive-path'));
                    124:     if (! $archname) {
1.4     ! casties   125:    logger('ABORT', "archive-name element missing!!\n");
1.1       casties   126:    exit 1;
                    127:     }
                    128: 
                    129:     #
                    130:     # files
                    131:     #
                    132:     my @filenodes = $rootnode->findnodes('child::file');
                    133:     foreach my $fn (@filenodes) {
                    134:    my $name = MPIWGStor::sstrip($fn->findvalue('child::name'));
                    135:    my $path = MPIWGStor::sstrip($fn->findvalue('child::path'));
1.4     ! casties   136:    logger('DEBUG', "FILE ($path)$name\n");
1.1       casties   137:    my $f = ($path) ? "$path/$name" : "$name";
                    138:    $files{$f} = $name;
                    139:     }
                    140: 
                    141:     #
                    142:     # archive-storage-date
                    143:     #
                    144:     my $stordatenode = ($rootnode->find('child::archive-storage-date'))->get_node(1);
                    145:     if ($stordatenode) {
1.4     ! casties   146:    logger('WARNING', "archive storage date exists! Resource already archived?\n");
1.1       casties   147:    $warncnt++;
                    148:    # delete old date
                    149:    $stordatenode->removeChildNodes;
                    150:     } else {
                    151:    # create new storage date node
                    152:    $stordatenode = $rootnode->addNewChild($namespace, "archive-storage-date");
                    153:    # move after archive-path
                    154:    $rootnode->insertAfter($stordatenode, ($rootnode->find('child::archive-path'))->get_node(1));
                    155:     }
                    156:     $stordatenode->appendTextNode($archdate);
                    157:     $xml_changed++;
                    158:     return \%files;
                    159: }
                    160: 
                    161: 
                    162: #
                    163: # $%files = run_archive
                    164: #
                    165: # runs the archiver program on $docdir and returns a list of archived files
                    166: #
                    167: sub run_archive {
                    168:     my %files;
                    169:     print LOG "START archiver $version $archdate\n";
                    170:     my $archcmd = $archprog;
                    171:     $archcmd .= " archive -archsymlinkasfile=no -subdir=yes";
                    172:     $archcmd .= " -description='$archname'";
                    173:     $archcmd .= " '$docdir/'";
                    174: 
                    175:     print LOG "CMD: $archcmd\n";
                    176:     if (open ARCH, "$archcmd 2>&1 |") {
                    177:    while (<ARCH>) {
                    178:        chomp;
                    179:        print LOG "ARCH: $_\n";
                    180:        if (/Normal File-->\s+[\d,]+\s+(.*)\s+\[Sent\]/) {
                    181:        print "  ARCH: file '$1'\n";
                    182:        $files{$1} = "ok";
                    183:        }
                    184:        if (/^Archive processing of .* finished without failure./) {
                    185:        print "  ARCH: OK\n";
                    186:        }
                    187:    }
                    188:     } else {
1.4     ! casties   189:    logger('ABORT', "unable to start archive command '$archcmd'!!\n");
1.1       casties   190:    exit 1;
                    191:     }
                    192: 
                    193:     return \%files;
                    194: }
                    195: 
                    196: 
                    197: #
                    198: # check_files(\%files_to_archive, \%archived_files)
                    199: #
                    200: # compares the list of archived and to be archived files
                    201: #
                    202: sub check_files {
                    203:     my ($to_archive, $archived) = @_;
                    204: 
                    205:     my $nt = scalar keys %$to_archive;
                    206:     my $na = scalar keys %$archived;
                    207: 
                    208:     foreach my $ft (sort keys %$to_archive) {
                    209:    my $fp = "$docdir/$ft";
                    210:    #print "  fp: $fp\n";
                    211:    if ($$archived{$fp}) {
1.4     ! casties   212:        logger('DEBUG', "$ft archived OK\n");
1.1       casties   213:        $$archived{$fp} = "OK";
                    214:    } else {
1.4     ! casties   215:        logger('ERROR', "file '$ft' missing from archive!\n");
1.1       casties   216:        $errcnt++;
                    217:    }
                    218:     }
                    219: 
                    220:     foreach my $fa (sort keys %$archived) {
                    221:    if ($$archived{$fa} ne "OK") {
                    222:        my ($fn, $fp) = MPIWGStor::split_file_path($fa);
                    223:        if ($MPIWGStor::index_files{$fn}) {
1.4     ! casties   224:        logger('DEBUG', "$fa ignored\n");
1.1       casties   225:        $na--;
                    226:        } else {
1.4     ! casties   227:        logger('WARNING', "$fa archived but not in list!\n");
1.1       casties   228:        $warncnt++;
                    229:        }
                    230:    }
                    231:     }
                    232: 
                    233:     if ($nt > $na) {
1.4     ! casties   234:    logger('WARNING', "less files were archived ($na vs. $nt)!\n");
1.1       casties   235:     } elsif ($na > $nt) {
1.4     ! casties   236:    logger('WARNING', "more files were archived ($na vs. $nt)!\n");
1.1       casties   237:     }
                    238: 
                    239: }
                    240: 
                    241: 
                    242: #
                    243: # delete_files(\%files)
                    244: #
                    245: # deletes the files from the list (of absolute files) and their directories
                    246: # if they are empty
                    247: #
                    248: sub delete_files {
                    249:     my ($files) = @_;
                    250:     my %dirs;
                    251: 
                    252:     foreach my $f (sort keys %$files) {
                    253:    my ($fn, $fp) = MPIWGStor::split_file_path($f);
                    254:    # collect all unique directories
                    255:         if ($fp && (! $dirs{$fp})) {
                    256:        $dirs{$fp} = $fp;
                    257:    }
                    258:    # don't delete index files
                    259:    next if ($MPIWGStor::index_files{$fn});
                    260:    # no file no delete
                    261:    next unless (-f $f);
                    262:    # delete files
                    263:    if (unlink $f) {
1.4     ! casties   264:        logger('INFO', "remove $f ($fn)\n");
1.1       casties   265:    } else {
1.4     ! casties   266:        logger('ERROR', "unable to delete $f!\n");
1.1       casties   267:        $errcnt++;
                    268:    }
                    269:     }
                    270:     # try to delete all empty directories
                    271:     my @dirkeys = sort keys %dirs;
                    272:     # starting at the end to get to the subdirectories first
                    273:     for (my $i = $#dirkeys; $i >= 0; $i--) {
                    274:    my $d = $dirkeys[$i];
                    275:    # dont't remove document dir (shouldn't be empty anyway)
                    276:    next if ($d eq $docdir);
                    277:    if (-d $d) {
1.4     ! casties   278:        logger('INFO', "remove dir $d\n");
1.1       casties   279:        rmdir $d;
                    280:    }
                    281:     }
                    282: }
                    283: 
                    284: 
                    285: #
                    286: # delete_all_files(\%files, $dir)
                    287: #
                    288: # deletes all files with names from the list %files
                    289: # in the directory $dir and its subdirectories 
                    290: #
                    291: sub delete_all_files {
                    292:     my ($files, $dir) = @_;
                    293: 
                    294:     if (! opendir DIR, $dir) {
1.4     ! casties   295:    logger('ERROR', "unable to read directory $dir!\n");
1.1       casties   296:    $errcnt++;
                    297:    return;
                    298:     }
                    299:     my @fl = readdir DIR;
                    300:     closedir DIR;
                    301: 
                    302:     foreach my $f (@fl) {
                    303:    next if ($f =~ /^\.{1,2}$/);
                    304:    if ($$files{$f}) {
                    305:        # $f is in the file list
                    306:        if (-f "$dir/$f") {
                    307:        # $f is a file
                    308:        if (unlink "$dir/$f") {
1.4     ! casties   309:            logger('INFO', "removed $f\n");
1.1       casties   310:        } else {
1.4     ! casties   311:            logger('ERROR', "unable to delete $f!\n");
1.1       casties   312:            $errcnt++;
                    313:        }
                    314:        } elsif (-d _) {
                    315:        # $f is a directory (unlink won't work)
                    316:        if ((system 'rm', '-r', "$dir/$f") == 0) {
1.4     ! casties   317:            logger('INFO', "removed directory $f\n");
1.1       casties   318:        } else {
1.4     ! casties   319:            logger('ERROR', "unable to delete directory $f!\n");
1.1       casties   320:            $errcnt++;
                    321:        }
                    322:        } else {
1.4     ! casties   323:        logger('ERROR', "funny object $dir/$f!\n");
1.1       casties   324:        $errcnt++;
                    325:        }
                    326:    } else {
                    327:        # $f is not in the list
                    328:        if (-d "$dir/$f") {
                    329:        # recurse into directories
1.4     ! casties   330:        logger('DEBUG', "enter $dir/$f\n");
1.1       casties   331:        delete_all_files($files, "$dir/$f");
                    332:        }
                    333:    }
                    334:     }
                    335: }
                    336: 
                    337: 
                    338: #######################################################
                    339: # main
                    340: #
                    341: 
1.4     ! casties   342: logger('START', "archiver $version at $archdate\n");
1.1       casties   343: 
                    344: # make shure the right user is running this program
                    345: my $user = getlogin;
                    346: #if (($user ne "archive")||($user ne "root")) {
                    347: #    logger("ABORT", "you ($user) must be archive or root user to run this program!");
                    348: #    exit 1;
                    349: #}
                    350: 
                    351: # use metacheck first
                    352: if (system("$checkprog $docdir >/dev/null") == 0) {
1.4     ! casties   353:     logger('INFO', "resource '$docdir' check OK\n");
1.1       casties   354: } else {
1.4     ! casties   355:     logger('ABORT', "resource '$docdir' check failed!!\n");
1.1       casties   356:     exit 1;
                    357: }
                    358: 
                    359: # read index.meta file
                    360: my ($document, $rootnode) = MPIWGStor::read_xml($metafile);
                    361: 
                    362: # check file and add archive date
                    363: my $files_to_archive = read_resource_meta($rootnode);
                    364: 
                    365: print "INFO: ", scalar keys %$files_to_archive, " files to archive\n";
                    366: 
                    367: # check for .archived file
                    368: if (-f "$docdir/.archived") {
                    369:     if (unlink "$docdir/.archived") {
1.4     ! casties   370:    logger('WARNING', "existing .archived file has been removed! Resource already archived?\n");
1.1       casties   371:    $warncnt++;
                    372:     } else {
1.4     ! casties   373:    logger('ERROR', "unable to remove existing .archived file!\n");
1.1       casties   374:    $errcnt++;
                    375:     }
                    376: }
                    377: 
                    378: # remove junk files
                    379: if ($delete_junk_files) {
                    380:     delete_all_files(\%MPIWGStor::junk_files, $docdir);
                    381: }
                    382: 
                    383: # write new index.meta
                    384: if ($errcnt > 0) {
1.4     ! casties   385:     logger('ABORT', "there were errors!\n");
1.1       casties   386:     exit 1;
                    387: } else {
                    388:     if ($fix_xml) {
                    389:    MPIWGStor::write_xml($document, $metafile);
                    390:     }
                    391: }
                    392: 
                    393: # start archiving
1.3       casties   394: my $archived_files = run_archive();
                    395: my $num_archfiles = scalar keys %$archived_files;
1.1       casties   396: 
1.4     ! casties   397: logger('INFO', "$num_archfiles files archived\n");
1.1       casties   398: 
                    399: # check list of archived files
                    400: check_files($files_to_archive, $archived_files);
                    401: 
                    402: # delete files if all went OK
                    403: if ($errcnt == 0) {
                    404:     system("touch", "$docdir/.archived");
                    405:     # remove junk files (again)
                    406:     if ($delete_junk_files) {
                    407:    delete_all_files(\%MPIWGStor::junk_files, $docdir);
                    408:     }
                    409:     # remove archived files
                    410:     if ($delete_data_files) {
                    411:    delete_files($archived_files);
                    412:     }
                    413: }
                    414: 
1.4     ! casties   415: logger('INFO', "$warncnt warnings\n");
        !           416: logger('INFO', "$errcnt errors\n");
1.1       casties   417: if ($errcnt > 0) {
1.4     ! casties   418:     logger('ABORT', "there were errors! ($num_archfiles files archived) at " . stime(time));
1.1       casties   419:     exit 1;
                    420: } else {
1.4     ! casties   421:     logger('DONE', "$num_archfiles files archived at " . stime(time));
1.1       casties   422: }

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>