Annotation of foxridge-archiver/archiver.pl, revision 1.3

1.1       casties     1: #!/usr/local/bin/perl -w
                      2: 
                      3: use strict;
                      4: 
                      5: use XML::LibXML;
                      6: 
                      7: # MPIWG libraries
                      8: use lib '/usr/local/mpiwg/archive';
                      9: use MPIWGStor;
                     10: 
                     11: # make output unbuffered
                     12: $|=1;
                     13: 
                     14: #######################################################
                     15: # internal parameters
                     16: #
                     17: 
                     18: # program version
1.3     ! casties    19: my $version = "0.5.2 (12.7.2004)";
1.1       casties    20: 
                     21: # rewrite XML file (necessary for archive date!)
                     22: my $fix_xml = 1;
                     23: my $xml_changed = 0;
                     24: # XML namespace (not really implemented!)
                     25: my $namespace = "";
                     26: 
                     27: # archive name (archive-path element, usually == $docdir)
                     28: my $archname;
                     29: # archive storage date (now)
                     30: my $archdate = stime(time);
                     31: 
                     32: # delete "junk" files before archiving
                     33: my $delete_junk_files = 1;
                     34: 
                     35: # delete data files after archiving
                     36: my $delete_data_files = 1;
                     37: 
                     38: 
                     39: #######################################################
                     40: # external programs
                     41: #
                     42: my $archprog = "/opt/tivoli/tsm/client/ba/bin/dsmc";
                     43: if (! -x $archprog) {
                     44:     print "ABORT: TSM client program '$archprog' missing!!\n";
                     45:     exit 1;
                     46: }
                     47: my $checkprog = "/usr/local/mpiwg/archive/metacheck";
                     48: if (! -x $checkprog) {
                     49:     print "ABORT: meta data checking program '$checkprog' missing!!\n";
                     50:     exit 1;
                     51: }
                     52: # log file for archiver
                     53: my $log_file = "/var/log/mpiwg-archiver.log";
                     54: if (! open LOG, ">>$log_file") {
                     55:     print "ABORT: unable to write log file '$log_file'!!\n";
                     56:     exit 1;
                     57: }
                     58: 
                     59: #######################################################
                     60: # check parameters that were passed to the program
                     61: #
                     62: if ($#ARGV < 0) {
                     63:     print "ABORT: no document directory given!\n";
                     64:     exit 1;
                     65: }
                     66: my $docdir = $ARGV[0];
1.2       casties    67: # strip double slashes
                     68: $docdir =~ s/\/\//\//;
1.1       casties    69: # strip trailing slashes
1.2       casties    70: $docdir =~ s/\/+$//;
1.1       casties    71: if (! -d $docdir) {
                     72:     print "ABORT: document directory \'$docdir\' doesn't exist!\n";
                     73:     exit 1;
                     74: }
                     75: if (($#ARGV > 0)&&($ARGV[1] eq "-premigrate")) {
                     76:     $delete_data_files = 0;
                     77: }
                     78: 
                     79: my $metafile = "$docdir/index.meta";
                     80: if (! -f $metafile) {
                     81:     print "ABORT: metadata index file \'$metafile\' doesn't exist!\n";
                     82:     exit 1;
                     83: }
                     84: 
                     85: #######################################################
                     86: # internal variables
                     87: #
                     88: 
                     89: # number of errors
                     90: my $errcnt = 0;
                     91: # number of warnings
                     92: my $warncnt = 0;
                     93: 
                     94: #######################################################
                     95: # subroutines
                     96: #
                     97: 
                     98: #
                     99: # $files = read_resource_meta($rootnode)
                    100: #
                    101: # checks general resource meta information and reads the list of files
                    102: #
                    103: sub read_resource_meta {
                    104:     my ($rootnode) = @_;
                    105:     my %files;
                    106:     #
                    107:     # archive path
                    108:     #
                    109:     # get archive-path
                    110:     $archname = MPIWGStor::sstrip($rootnode->findvalue('child::archive-path'));
                    111:     if (! $archname) {
                    112:    print "ABORT: archive-name element missing!!\n";
                    113:    exit 1;
                    114:     }
                    115: 
                    116:     #
                    117:     # files
                    118:     #
                    119:     my @filenodes = $rootnode->findnodes('child::file');
                    120:     foreach my $fn (@filenodes) {
                    121:    my $name = MPIWGStor::sstrip($fn->findvalue('child::name'));
                    122:    my $path = MPIWGStor::sstrip($fn->findvalue('child::path'));
                    123:    print "FILE: ($path)$name\n";
                    124:    my $f = ($path) ? "$path/$name" : "$name";
                    125:    $files{$f} = $name;
                    126:     }
                    127: 
                    128:     #
                    129:     # archive-storage-date
                    130:     #
                    131:     my $stordatenode = ($rootnode->find('child::archive-storage-date'))->get_node(1);
                    132:     if ($stordatenode) {
                    133:    print "WARNING: archive storage date exists! Resource already archived?\n";
                    134:    $warncnt++;
                    135:    # delete old date
                    136:    $stordatenode->removeChildNodes;
                    137:     } else {
                    138:    # create new storage date node
                    139:    $stordatenode = $rootnode->addNewChild($namespace, "archive-storage-date");
                    140:    # move after archive-path
                    141:    $rootnode->insertAfter($stordatenode, ($rootnode->find('child::archive-path'))->get_node(1));
                    142:     }
                    143:     $stordatenode->appendTextNode($archdate);
                    144:     $xml_changed++;
                    145:     return \%files;
                    146: }
                    147: 
                    148: 
                    149: #
                    150: # $%files = run_archive
                    151: #
                    152: # runs the archiver program on $docdir and returns a list of archived files
                    153: #
                    154: sub run_archive {
                    155:     my %files;
                    156:     print LOG "START archiver $version $archdate\n";
                    157:     my $archcmd = $archprog;
                    158:     $archcmd .= " archive -archsymlinkasfile=no -subdir=yes";
                    159:     $archcmd .= " -description='$archname'";
                    160:     $archcmd .= " '$docdir/'";
                    161: 
                    162:     print LOG "CMD: $archcmd\n";
                    163:     if (open ARCH, "$archcmd 2>&1 |") {
                    164:    while (<ARCH>) {
                    165:        chomp;
                    166:        print LOG "ARCH: $_\n";
                    167:        if (/Normal File-->\s+[\d,]+\s+(.*)\s+\[Sent\]/) {
                    168:        print "  ARCH: file '$1'\n";
                    169:        $files{$1} = "ok";
                    170:        }
                    171:        if (/^Archive processing of .* finished without failure./) {
                    172:        print "  ARCH: OK\n";
                    173:        }
                    174:    }
                    175:     } else {
                    176:    print "ABORT: unable to start archive command '$archcmd'!!\n";
                    177:    exit 1;
                    178:     }
                    179: 
                    180:     return \%files;
                    181: }
                    182: 
                    183: 
                    184: #
                    185: # check_files(\%files_to_archive, \%archived_files)
                    186: #
                    187: # compares the list of archived and to be archived files
                    188: #
                    189: sub check_files {
                    190:     my ($to_archive, $archived) = @_;
                    191: 
                    192:     my $nt = scalar keys %$to_archive;
                    193:     my $na = scalar keys %$archived;
                    194: 
                    195:     foreach my $ft (sort keys %$to_archive) {
                    196:    my $fp = "$docdir/$ft";
                    197:    #print "  fp: $fp\n";
                    198:    if ($$archived{$fp}) {
                    199:        print "DEBUG: $ft archived OK\n";
                    200:        $$archived{$fp} = "OK";
                    201:    } else {
                    202:        print "ERROR: file '$ft' missing from archive!\n";
                    203:        $errcnt++;
                    204:    }
                    205:     }
                    206: 
                    207:     foreach my $fa (sort keys %$archived) {
                    208:    if ($$archived{$fa} ne "OK") {
                    209:        my ($fn, $fp) = MPIWGStor::split_file_path($fa);
                    210:        if ($MPIWGStor::index_files{$fn}) {
                    211:        print "DEBUG: $fa ignored\n";
                    212:        $na--;
                    213:        } else {
                    214:        print "WARNING: $fa archived but not in list!\n";
                    215:        $warncnt++;
                    216:        }
                    217:    }
                    218:     }
                    219: 
                    220:     if ($nt > $na) {
                    221:    print "WARNING: less files were archived ($na vs. $nt)!\n";
                    222:     } elsif ($na > $nt) {
                    223:    print "WARNING: more files were archived ($na vs. $nt)!\n";
                    224:     }
                    225: 
                    226: }
                    227: 
                    228: 
                    229: #
                    230: # delete_files(\%files)
                    231: #
                    232: # deletes the files from the list (of absolute files) and their directories
                    233: # if they are empty
                    234: #
                    235: sub delete_files {
                    236:     my ($files) = @_;
                    237:     my %dirs;
                    238: 
                    239:     foreach my $f (sort keys %$files) {
                    240:    my ($fn, $fp) = MPIWGStor::split_file_path($f);
                    241:    # collect all unique directories
                    242:         if ($fp && (! $dirs{$fp})) {
                    243:        $dirs{$fp} = $fp;
                    244:    }
                    245:    # don't delete index files
                    246:    next if ($MPIWGStor::index_files{$fn});
                    247:    # no file no delete
                    248:    next unless (-f $f);
                    249:    # delete files
                    250:    if (unlink $f) {
                    251:        print "INFO: remove $f ($fn)\n";
                    252:    } else {
                    253:        print "ERROR: unable to delete $f!\n";
                    254:        $errcnt++;
                    255:    }
                    256:     }
                    257:     # try to delete all empty directories
                    258:     my @dirkeys = sort keys %dirs;
                    259:     # starting at the end to get to the subdirectories first
                    260:     for (my $i = $#dirkeys; $i >= 0; $i--) {
                    261:    my $d = $dirkeys[$i];
                    262:    # dont't remove document dir (shouldn't be empty anyway)
                    263:    next if ($d eq $docdir);
                    264:    if (-d $d) {
                    265:        print "INFO: remove dir $d\n";
                    266:        rmdir $d;
                    267:    }
                    268:     }
                    269: }
                    270: 
                    271: 
                    272: #
                    273: # delete_all_files(\%files, $dir)
                    274: #
                    275: # deletes all files with names from the list %files
                    276: # in the directory $dir and its subdirectories 
                    277: #
                    278: sub delete_all_files {
                    279:     my ($files, $dir) = @_;
                    280: 
                    281:     if (! opendir DIR, $dir) {
                    282:    print "ERROR: unable to read directory $dir!\n";
                    283:    $errcnt++;
                    284:    return;
                    285:     }
                    286:     my @fl = readdir DIR;
                    287:     closedir DIR;
                    288: 
                    289:     foreach my $f (@fl) {
                    290:    next if ($f =~ /^\.{1,2}$/);
                    291:    if ($$files{$f}) {
                    292:        # $f is in the file list
                    293:        if (-f "$dir/$f") {
                    294:        # $f is a file
                    295:        if (unlink "$dir/$f") {
                    296:            print "INFO: removed $f\n";
                    297:        } else {
                    298:            print "ERROR: unable to delete $f!\n";
                    299:            $errcnt++;
                    300:        }
                    301:        } elsif (-d _) {
                    302:        # $f is a directory (unlink won't work)
                    303:        if ((system 'rm', '-r', "$dir/$f") == 0) {
                    304:            print "INFO: removed directory $f\n";
                    305:        } else {
                    306:            print "ERROR: unable to delete directory $f!\n";
                    307:            $errcnt++;
                    308:        }
                    309:        } else {
                    310:        print "ERROR: funny object $dir/$f!\n";
                    311:        $errcnt++;
                    312:        }
                    313:    } else {
                    314:        # $f is not in the list
                    315:        if (-d "$dir/$f") {
                    316:        # recurse into directories
                    317:        print "DEBUG: enter $dir/$f\n";
                    318:        delete_all_files($files, "$dir/$f");
                    319:        }
                    320:    }
                    321:     }
                    322: }
                    323: 
                    324: 
                    325: #######################################################
                    326: # main
                    327: #
                    328: 
                    329: print "START: archiver $version at $archdate\n";
                    330: 
                    331: # make shure the right user is running this program
                    332: my $user = getlogin;
                    333: #if (($user ne "archive")||($user ne "root")) {
                    334: #    logger("ABORT", "you ($user) must be archive or root user to run this program!");
                    335: #    exit 1;
                    336: #}
                    337: 
                    338: # use metacheck first
                    339: if (system("$checkprog $docdir >/dev/null") == 0) {
                    340:     print "INFO: resource '$docdir' check OK\n";
                    341: } else {
                    342:     print "ABORT: resource '$docdir' check failed!!\n";
                    343:     exit 1;
                    344: }
                    345: 
                    346: # read index.meta file
                    347: my ($document, $rootnode) = MPIWGStor::read_xml($metafile);
                    348: 
                    349: # check file and add archive date
                    350: my $files_to_archive = read_resource_meta($rootnode);
                    351: 
                    352: print "INFO: ", scalar keys %$files_to_archive, " files to archive\n";
                    353: 
                    354: # check for .archived file
                    355: if (-f "$docdir/.archived") {
                    356:     if (unlink "$docdir/.archived") {
                    357:    print "WARNING: existing .archived file has been removed! Resource already archived?\n";
                    358:    $warncnt++;
                    359:     } else {
                    360:    print "ERROR: unable to remove existing .archived file!\n";
                    361:    $errcnt++;
                    362:     }
                    363: }
                    364: 
                    365: # remove junk files
                    366: if ($delete_junk_files) {
                    367:     delete_all_files(\%MPIWGStor::junk_files, $docdir);
                    368: }
                    369: 
                    370: # write new index.meta
                    371: if ($errcnt > 0) {
                    372:     print "ABORT: there were errors!\n";
                    373:     exit 1;
                    374: } else {
                    375:     if ($fix_xml) {
                    376:    MPIWGStor::write_xml($document, $metafile);
                    377:     }
                    378: }
                    379: 
                    380: # start archiving
1.3     ! casties   381: my $archived_files = run_archive();
        !           382: my $num_archfiles = scalar keys %$archived_files;
1.1       casties   383: 
1.3     ! casties   384: print "INFO: $num_archfiles files archived\n";
1.1       casties   385: 
                    386: # check list of archived files
                    387: check_files($files_to_archive, $archived_files);
                    388: 
                    389: # delete files if all went OK
                    390: if ($errcnt == 0) {
                    391:     system("touch", "$docdir/.archived");
                    392:     # remove junk files (again)
                    393:     if ($delete_junk_files) {
                    394:    delete_all_files(\%MPIWGStor::junk_files, $docdir);
                    395:     }
                    396:     # remove archived files
                    397:     if ($delete_data_files) {
                    398:    delete_files($archived_files);
                    399:     }
                    400: }
                    401: 
                    402: print "INFO: $warncnt warnings\n";
                    403: print "INFO: $errcnt errors\n";
                    404: if ($errcnt > 0) {
                    405:     print "ABORT: there were errors! ($num_archfiles files archived) at ", stime(time), "\n";
                    406:     exit 1;
                    407: } else {
                    408:     print "DONE: $num_archfiles files archived at ", stime(time), "\n";
                    409: }

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>