Annotation of foxridge-archiver/archiver.pl, revision 1.8

1.1       casties     1: #!/usr/local/bin/perl -w
                      2: 
                      3: use strict;
                      4: 
                      5: use XML::LibXML;
                      6: 
                      7: # MPIWG libraries
                      8: use lib '/usr/local/mpiwg/archive';
                      9: use MPIWGStor;
                     10: 
                     11: # make output unbuffered
                     12: $|=1;
                     13: 
                     14: #######################################################
                     15: # internal parameters
                     16: #
                     17: 
                     18: # program version
1.8     ! casties    19: my $version = "0.7.3 (ROC 10.9.2008)";
1.4       casties    20: 
                     21: # short help
1.5       casties    22: my $help = "MPIWG archiver $version
                     23: use:  archiver [options] docpath
                     24: options:
                     25:   -debug  show debugging info
                     26:   -premigrate  don't delete archived files
                     27:   -force  archive even if already archived
1.7       casties    28:   -replace  rewrite index file
1.5       casties    29: ";
1.4       casties    30: 
                     31: # read command line parameters
                     32: my $args = MPIWGStor::parseargs;
1.5       casties    33: if (! scalar(%$args)) {
                     34:     print $help, "\n";
                     35:     exit 1;
                     36: }
1.4       casties    37: 
                     38: # debug level
1.5       casties    39: $debug = (exists $$args{'debug'}) ? $$args{'debug'} : 0;
                     40: 
                     41: # force archiving
                     42: my $force_archive = (exists $$args{'force'}) ? $$args{'force'} : 0;
1.1       casties    43: 
1.7       casties    44: # rewrite index file
                     45: my $rewrite_index = (exists $$args{'replace'}) ? $$args{'replace'} : 0;
                     46: 
1.1       casties    47: # rewrite XML file (necessary for archive date!)
                     48: my $fix_xml = 1;
                     49: my $xml_changed = 0;
1.5       casties    50: 
1.1       casties    51: # XML namespace (not really implemented!)
                     52: my $namespace = "";
                     53: 
                     54: # archive name (archive-path element, usually == $docdir)
                     55: my $archname;
1.5       casties    56: 
1.1       casties    57: # archive storage date (now)
                     58: my $archdate = stime(time);
                     59: 
                     60: # delete "junk" files before archiving
                     61: my $delete_junk_files = 1;
                     62: 
                     63: # delete data files after archiving
                     64: my $delete_data_files = 1;
                     65: 
1.5       casties    66: # don't delete archived files with "-premigrate"
                     67: if (exists $$args{'premigrate'}) {
                     68:     $delete_data_files = not $$args{'premigrate'};
                     69: }
                     70: if ($delete_data_files) {
                     71:     logger('INFO', "going to remove successfully archived files from disk");
                     72: }
                     73: 
1.1       casties    74: 
                     75: #######################################################
                     76: # external programs
                     77: #
                     78: my $archprog = "/opt/tivoli/tsm/client/ba/bin/dsmc";
                     79: if (! -x $archprog) {
1.5       casties    80:     logger('ABORT', "TSM client program '$archprog' missing!");
1.1       casties    81:     exit 1;
                     82: }
                     83: my $checkprog = "/usr/local/mpiwg/archive/metacheck";
                     84: if (! -x $checkprog) {
1.5       casties    85:     logger('ABORT', "meta data checking program '$checkprog' missing!");
1.1       casties    86:     exit 1;
                     87: }
                     88: # log file for archiver
                     89: my $log_file = "/var/log/mpiwg-archiver.log";
                     90: if (! open LOG, ">>$log_file") {
1.5       casties    91:     logger('ABORT', "unable to write log file '$log_file'!");
1.1       casties    92:     exit 1;
                     93: }
                     94: 
                     95: #######################################################
                     96: # check parameters that were passed to the program
                     97: #
1.5       casties    98: 
1.4       casties    99: my $docdir = $$args{'path'};
1.2       casties   100: # strip double slashes
                    101: $docdir =~ s/\/\//\//;
1.1       casties   102: # strip trailing slashes
1.2       casties   103: $docdir =~ s/\/+$//;
1.1       casties   104: if (! -d $docdir) {
1.5       casties   105:     logger('ABORT', "document directory \'$docdir\' doesn't exist!");
1.1       casties   106:     exit 1;
                    107: }
                    108: 
                    109: my $metafile = "$docdir/index.meta";
                    110: if (! -f $metafile) {
1.5       casties   111:     logger('ABORT', "metadata index file \'$metafile\' doesn't exist!");
1.1       casties   112:     exit 1;
                    113: }
                    114: 
                    115: #######################################################
                    116: # internal variables
                    117: #
                    118: 
                    119: # number of errors
                    120: my $errcnt = 0;
                    121: # number of warnings
                    122: my $warncnt = 0;
                    123: 
                    124: #######################################################
                    125: # subroutines
                    126: #
                    127: 
                    128: #
                    129: # $files = read_resource_meta($rootnode)
                    130: #
                    131: # checks general resource meta information and reads the list of files
                    132: #
                    133: sub read_resource_meta {
                    134:     my ($rootnode) = @_;
                    135:     my %files;
                    136:     #
                    137:     # archive path
                    138:     #
                    139:     # get archive-path
                    140:     $archname = MPIWGStor::sstrip($rootnode->findvalue('child::archive-path'));
                    141:     if (! $archname) {
1.5       casties   142:    logger('ABORT', "archive-name element missing!");
1.1       casties   143:    exit 1;
                    144:     }
                    145: 
                    146:     #
                    147:     # files
                    148:     #
                    149:     my @filenodes = $rootnode->findnodes('child::file');
                    150:     foreach my $fn (@filenodes) {
                    151:    my $name = MPIWGStor::sstrip($fn->findvalue('child::name'));
                    152:    my $path = MPIWGStor::sstrip($fn->findvalue('child::path'));
1.5       casties   153:    logger('DEBUG', "FILE ($path)$name");
1.1       casties   154:    my $f = ($path) ? "$path/$name" : "$name";
                    155:    $files{$f} = $name;
                    156:     }
                    157: 
                    158:     #
                    159:     # archive-storage-date
                    160:     #
                    161:     my $stordatenode = ($rootnode->find('child::archive-storage-date'))->get_node(1);
                    162:     if ($stordatenode) {
1.5       casties   163:    logger('WARNING', "archive storage date exists! Resource already archived?");
1.1       casties   164:    $warncnt++;
                    165:    # delete old date
                    166:    $stordatenode->removeChildNodes;
                    167:     } else {
                    168:    # create new storage date node
                    169:    $stordatenode = $rootnode->addNewChild($namespace, "archive-storage-date");
                    170:    # move after archive-path
                    171:    $rootnode->insertAfter($stordatenode, ($rootnode->find('child::archive-path'))->get_node(1));
                    172:     }
                    173:     $stordatenode->appendTextNode($archdate);
                    174:     $xml_changed++;
                    175:     return \%files;
                    176: }
                    177: 
                    178: 
                    179: #
                    180: # $%files = run_archive
                    181: #
                    182: # runs the archiver program on $docdir and returns a list of archived files
                    183: #
                    184: sub run_archive {
                    185:     my %files;
                    186:     print LOG "START archiver $version $archdate\n";
                    187:     my $archcmd = $archprog;
                    188:     $archcmd .= " archive -archsymlinkasfile=no -subdir=yes";
                    189:     $archcmd .= " -description='$archname'";
                    190:     $archcmd .= " '$docdir/'";
                    191: 
                    192:     print LOG "CMD: $archcmd\n";
                    193:     if (open ARCH, "$archcmd 2>&1 |") {
                    194:    while (<ARCH>) {
                    195:        chomp;
                    196:        print LOG "ARCH: $_\n";
                    197:        if (/Normal File-->\s+[\d,]+\s+(.*)\s+\[Sent\]/) {
                    198:        print "  ARCH: file '$1'\n";
                    199:        $files{$1} = "ok";
                    200:        }
                    201:        if (/^Archive processing of .* finished without failure./) {
                    202:        print "  ARCH: OK\n";
                    203:        }
                    204:    }
                    205:     } else {
1.5       casties   206:    logger('ABORT', "unable to start archive command '$archcmd'!!");
1.1       casties   207:    exit 1;
                    208:     }
                    209: 
                    210:     return \%files;
                    211: }
                    212: 
                    213: 
                    214: #
                    215: # check_files(\%files_to_archive, \%archived_files)
                    216: #
                    217: # compares the list of archived and to be archived files
                    218: #
                    219: sub check_files {
                    220:     my ($to_archive, $archived) = @_;
                    221: 
                    222:     my $nt = scalar keys %$to_archive;
                    223:     my $na = scalar keys %$archived;
                    224: 
                    225:     foreach my $ft (sort keys %$to_archive) {
                    226:    my $fp = "$docdir/$ft";
                    227:    #print "  fp: $fp\n";
                    228:    if ($$archived{$fp}) {
1.5       casties   229:        logger('DEBUG', "$ft archived OK");
1.1       casties   230:        $$archived{$fp} = "OK";
                    231:    } else {
1.5       casties   232:        logger('ERROR', "file '$ft' missing from archive!");
1.1       casties   233:        $errcnt++;
                    234:    }
                    235:     }
                    236: 
                    237:     foreach my $fa (sort keys %$archived) {
                    238:    if ($$archived{$fa} ne "OK") {
                    239:        my ($fn, $fp) = MPIWGStor::split_file_path($fa);
                    240:        if ($MPIWGStor::index_files{$fn}) {
1.5       casties   241:        logger('DEBUG', "$fa ignored");
1.1       casties   242:        $na--;
                    243:        } else {
1.5       casties   244:        logger('WARNING', "$fa archived but not in list!");
1.1       casties   245:        $warncnt++;
                    246:        }
                    247:    }
                    248:     }
                    249: 
                    250:     if ($nt > $na) {
1.5       casties   251:    logger('WARNING', "less files were archived ($na vs. $nt)!");
1.1       casties   252:     } elsif ($na > $nt) {
1.5       casties   253:    logger('WARNING', "more files were archived ($na vs. $nt)!");
1.1       casties   254:     }
                    255: 
                    256: }
                    257: 
                    258: 
                    259: #
                    260: # delete_files(\%files)
                    261: #
                    262: # deletes the files from the list (of absolute files) and their directories
                    263: # if they are empty
                    264: #
                    265: sub delete_files {
                    266:     my ($files) = @_;
                    267:     my %dirs;
                    268: 
                    269:     foreach my $f (sort keys %$files) {
                    270:    my ($fn, $fp) = MPIWGStor::split_file_path($f);
                    271:    # collect all unique directories
                    272:         if ($fp && (! $dirs{$fp})) {
                    273:        $dirs{$fp} = $fp;
                    274:    }
                    275:    # don't delete index files
                    276:    next if ($MPIWGStor::index_files{$fn});
                    277:    # no file no delete
                    278:    next unless (-f $f);
                    279:    # delete files
                    280:    if (unlink $f) {
1.6       casties   281:        logger('DEBUG', "remove $f ($fn)");
1.1       casties   282:    } else {
1.5       casties   283:        logger('ERROR', "unable to delete $f!");
1.1       casties   284:        $errcnt++;
                    285:    }
                    286:     }
                    287:     # try to delete all empty directories
                    288:     my @dirkeys = sort keys %dirs;
                    289:     # starting at the end to get to the subdirectories first
                    290:     for (my $i = $#dirkeys; $i >= 0; $i--) {
                    291:    my $d = $dirkeys[$i];
                    292:    # dont't remove document dir (shouldn't be empty anyway)
                    293:    next if ($d eq $docdir);
                    294:    if (-d $d) {
1.6       casties   295:        logger('DEBUG', "remove dir $d");
1.1       casties   296:        rmdir $d;
                    297:    }
                    298:     }
                    299: }
                    300: 
                    301: 
                    302: #
                    303: # delete_all_files(\%files, $dir)
                    304: #
                    305: # deletes all files with names from the list %files
                    306: # in the directory $dir and its subdirectories 
                    307: #
                    308: sub delete_all_files {
                    309:     my ($files, $dir) = @_;
                    310: 
                    311:     if (! opendir DIR, $dir) {
1.5       casties   312:    logger('ERROR', "unable to read directory $dir!");
1.1       casties   313:    $errcnt++;
                    314:    return;
                    315:     }
                    316:     my @fl = readdir DIR;
                    317:     closedir DIR;
                    318: 
                    319:     foreach my $f (@fl) {
                    320:    next if ($f =~ /^\.{1,2}$/);
                    321:    if ($$files{$f}) {
                    322:        # $f is in the file list
                    323:        if (-f "$dir/$f") {
                    324:        # $f is a file
                    325:        if (unlink "$dir/$f") {
1.6       casties   326:            logger('DEBUG', "removed $f");
1.1       casties   327:        } else {
1.5       casties   328:            logger('ERROR', "unable to delete $f!");
1.1       casties   329:            $errcnt++;
                    330:        }
                    331:        } elsif (-d _) {
                    332:        # $f is a directory (unlink won't work)
                    333:        if ((system 'rm', '-r', "$dir/$f") == 0) {
1.6       casties   334:            logger('DEBUG', "removed directory $f");
1.1       casties   335:        } else {
1.5       casties   336:            logger('ERROR', "unable to delete directory $f!");
1.1       casties   337:            $errcnt++;
                    338:        }
                    339:        } else {
1.5       casties   340:        logger('ERROR', "funny object $dir/$f!");
1.1       casties   341:        $errcnt++;
                    342:        }
                    343:    } else {
                    344:        # $f is not in the list
                    345:        if (-d "$dir/$f") {
                    346:        # recurse into directories
1.5       casties   347:        logger('DEBUG', "enter $dir/$f");
1.1       casties   348:        delete_all_files($files, "$dir/$f");
                    349:        }
                    350:    }
                    351:     }
                    352: }
                    353: 
                    354: 
                    355: #######################################################
                    356: # main
                    357: #
                    358: 
1.5       casties   359: logger('START', "archiver $version at $archdate");
1.1       casties   360: 
                    361: # make shure the right user is running this program
1.8     ! casties   362: my $user = getlogin || getpwuid($<);
1.5       casties   363: if (($user ne "archive")&&($user ne "root")) {
1.6       casties   364:     logger("WARNING", "you ($user) should be archive or root user to run this program!");
1.5       casties   365: }
                    366: 
                    367: # check for .archived file
                    368: if (-f "$docdir/.archived") {
                    369:     if (not $force_archive) {
                    370:    logger('ABORT', "already archived! (.archived file exists)");
                    371:    exit 1;
                    372:     } else {
                    373:    logger('WARNING', "resource already archived? (.archived file exists)");
                    374:    $warncnt++;
                    375:     }
                    376: }
1.1       casties   377: 
                    378: # use metacheck first
1.7       casties   379: my $check_opts="-add-files";
                    380: if ($rewrite_index) {
                    381:     $check_opts = "-replace";
                    382: }
                    383: if (open CHECK, "$checkprog $check_opts $docdir |") {
1.5       casties   384:     my @errors;
                    385:     my $msg;
                    386:     while (<CHECK>) {
                    387:    chomp;
                    388:    if (/^ERROR/) {
                    389:        push @errors, $_;
                    390:    }
                    391:    $msg = $_;
                    392:     }
                    393:     if ($msg =~ /^DONE/) {
                    394:    logger('DEBUG', "checking index file: $msg");
                    395:    logger('INFO', "resource '$docdir' check OK");
                    396:     } else {
                    397:    logger('DEBUG', "errors checking index file:\n    " . join("\n    ", @errors) . "\n    $msg");
                    398:    logger('ABORT', "resource '$docdir' check failed!");
                    399:    exit 1;
                    400:     }
1.1       casties   401: } else {
1.5       casties   402:     logger('ABORT', "unable to run $checkprog");
1.1       casties   403:     exit 1;
                    404: }
1.5       casties   405: # if (system("$checkprog $docdir >/dev/null") == 0) {
                    406: #     logger('INFO', "resource '$docdir' check OK");
                    407: # } else {
                    408: #     logger('ABORT', "resource '$docdir' check failed!!");
                    409: #     exit 1;
                    410: # }
1.1       casties   411: 
                    412: # read index.meta file
                    413: my ($document, $rootnode) = MPIWGStor::read_xml($metafile);
                    414: 
                    415: # check file and add archive date
                    416: my $files_to_archive = read_resource_meta($rootnode);
                    417: 
1.5       casties   418: logger('INFO', (scalar keys %$files_to_archive) . " files to archive");
1.1       casties   419: 
1.5       casties   420: # remove .archived file
1.1       casties   421: if (-f "$docdir/.archived") {
                    422:     if (unlink "$docdir/.archived") {
1.5       casties   423:    logger('WARNING', "existing .archived file has been removed!");
1.1       casties   424:    $warncnt++;
                    425:     } else {
1.5       casties   426:    logger('ERROR', "unable to remove existing .archived file!");
1.1       casties   427:    $errcnt++;
                    428:     }
                    429: }
                    430: 
                    431: # remove junk files
                    432: if ($delete_junk_files) {
                    433:     delete_all_files(\%MPIWGStor::junk_files, $docdir);
                    434: }
                    435: 
                    436: # write new index.meta
                    437: if ($errcnt > 0) {
1.5       casties   438:     logger('ABORT', "there were errors!");
1.1       casties   439:     exit 1;
                    440: } else {
                    441:     if ($fix_xml) {
                    442:    MPIWGStor::write_xml($document, $metafile);
                    443:     }
                    444: }
                    445: 
                    446: # start archiving
1.3       casties   447: my $archived_files = run_archive();
                    448: my $num_archfiles = scalar keys %$archived_files;
1.1       casties   449: 
1.5       casties   450: logger('INFO', "$num_archfiles files archived");
1.1       casties   451: 
                    452: # check list of archived files
                    453: check_files($files_to_archive, $archived_files);
                    454: 
                    455: # delete files if all went OK
                    456: if ($errcnt == 0) {
                    457:     system("touch", "$docdir/.archived");
                    458:     # remove junk files (again)
                    459:     if ($delete_junk_files) {
                    460:    delete_all_files(\%MPIWGStor::junk_files, $docdir);
                    461:     }
                    462:     # remove archived files
                    463:     if ($delete_data_files) {
                    464:    delete_files($archived_files);
                    465:     }
                    466: }
                    467: 
1.5       casties   468: logger('INFO', "$warncnt warnings");
                    469: logger('INFO', "$errcnt errors");
1.1       casties   470: if ($errcnt > 0) {
1.4       casties   471:     logger('ABORT', "there were errors! ($num_archfiles files archived) at " . stime(time));
1.1       casties   472:     exit 1;
                    473: } else {
1.4       casties   474:     logger('DONE', "$num_archfiles files archived at " . stime(time));
1.1       casties   475: }

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>