Annotation of foxridge-archiver/archiver.pl, revision 1.1.1.1

1.1       casties     1: #!/usr/local/bin/perl -w
                      2: 
                      3: use strict;
                      4: 
                      5: use XML::LibXML;
                      6: 
                      7: # MPIWG libraries
                      8: use lib '/usr/local/mpiwg/archive';
                      9: use MPIWGStor;
                     10: 
                     11: # make output unbuffered
                     12: $|=1;
                     13: 
                     14: #######################################################
                     15: # internal parameters
                     16: #
                     17: 
                     18: # program version
                     19: my $version = "0.5 (15.1.2004)";
                     20: 
                     21: # rewrite XML file (necessary for archive date!)
                     22: my $fix_xml = 1;
                     23: my $xml_changed = 0;
                     24: # XML namespace (not really implemented!)
                     25: my $namespace = "";
                     26: 
                     27: # archive name (archive-path element, usually == $docdir)
                     28: my $archname;
                     29: # archive storage date (now)
                     30: my $archdate = stime(time);
                     31: 
                     32: # delete "junk" files before archiving
                     33: my $delete_junk_files = 1;
                     34: 
                     35: # delete data files after archiving
                     36: my $delete_data_files = 1;
                     37: 
                     38: 
                     39: #######################################################
                     40: # external programs
                     41: #
                     42: my $archprog = "/opt/tivoli/tsm/client/ba/bin/dsmc";
                     43: if (! -x $archprog) {
                     44:     print "ABORT: TSM client program '$archprog' missing!!\n";
                     45:     exit 1;
                     46: }
                     47: my $checkprog = "/usr/local/mpiwg/archive/metacheck";
                     48: if (! -x $checkprog) {
                     49:     print "ABORT: meta data checking program '$checkprog' missing!!\n";
                     50:     exit 1;
                     51: }
                     52: # log file for archiver
                     53: my $log_file = "/var/log/mpiwg-archiver.log";
                     54: if (! open LOG, ">>$log_file") {
                     55:     print "ABORT: unable to write log file '$log_file'!!\n";
                     56:     exit 1;
                     57: }
                     58: 
                     59: #######################################################
                     60: # check parameters that were passed to the program
                     61: #
                     62: if ($#ARGV < 0) {
                     63:     print "ABORT: no document directory given!\n";
                     64:     exit 1;
                     65: }
                     66: my $docdir = $ARGV[0];
                     67: # strip trailing slashes
                     68: $docdir =~ s/\/$//;
                     69: if (! -d $docdir) {
                     70:     print "ABORT: document directory \'$docdir\' doesn't exist!\n";
                     71:     exit 1;
                     72: }
                     73: if (($#ARGV > 0)&&($ARGV[1] eq "-premigrate")) {
                     74:     $delete_data_files = 0;
                     75: }
                     76: 
                     77: my $metafile = "$docdir/index.meta";
                     78: if (! -f $metafile) {
                     79:     print "ABORT: metadata index file \'$metafile\' doesn't exist!\n";
                     80:     exit 1;
                     81: }
                     82: 
                     83: #######################################################
                     84: # internal variables
                     85: #
                     86: 
                     87: # number of errors
                     88: my $errcnt = 0;
                     89: # number of warnings
                     90: my $warncnt = 0;
                     91: 
                     92: #######################################################
                     93: # subroutines
                     94: #
                     95: 
                     96: #
                     97: # $files = read_resource_meta($rootnode)
                     98: #
                     99: # checks general resource meta information and reads the list of files
                    100: #
                    101: sub read_resource_meta {
                    102:     my ($rootnode) = @_;
                    103:     my %files;
                    104:     #
                    105:     # archive path
                    106:     #
                    107:     # get archive-path
                    108:     $archname = MPIWGStor::sstrip($rootnode->findvalue('child::archive-path'));
                    109:     if (! $archname) {
                    110:    print "ABORT: archive-name element missing!!\n";
                    111:    exit 1;
                    112:     }
                    113: 
                    114:     #
                    115:     # files
                    116:     #
                    117:     my @filenodes = $rootnode->findnodes('child::file');
                    118:     foreach my $fn (@filenodes) {
                    119:    my $name = MPIWGStor::sstrip($fn->findvalue('child::name'));
                    120:    my $path = MPIWGStor::sstrip($fn->findvalue('child::path'));
                    121:    print "FILE: ($path)$name\n";
                    122:    my $f = ($path) ? "$path/$name" : "$name";
                    123:    $files{$f} = $name;
                    124:     }
                    125: 
                    126:     #
                    127:     # archive-storage-date
                    128:     #
                    129:     my $stordatenode = ($rootnode->find('child::archive-storage-date'))->get_node(1);
                    130:     if ($stordatenode) {
                    131:    print "WARNING: archive storage date exists! Resource already archived?\n";
                    132:    $warncnt++;
                    133:    # delete old date
                    134:    $stordatenode->removeChildNodes;
                    135:     } else {
                    136:    # create new storage date node
                    137:    $stordatenode = $rootnode->addNewChild($namespace, "archive-storage-date");
                    138:    # move after archive-path
                    139:    $rootnode->insertAfter($stordatenode, ($rootnode->find('child::archive-path'))->get_node(1));
                    140:     }
                    141:     $stordatenode->appendTextNode($archdate);
                    142:     $xml_changed++;
                    143:     return \%files;
                    144: }
                    145: 
                    146: 
                    147: #
                    148: # $%files = run_archive
                    149: #
                    150: # runs the archiver program on $docdir and returns a list of archived files
                    151: #
                    152: sub run_archive {
                    153:     my %files;
                    154:     print LOG "START archiver $version $archdate\n";
                    155:     my $archcmd = $archprog;
                    156:     $archcmd .= " archive -archsymlinkasfile=no -subdir=yes";
                    157:     $archcmd .= " -description='$archname'";
                    158:     $archcmd .= " '$docdir/'";
                    159: 
                    160:     print LOG "CMD: $archcmd\n";
                    161:     if (open ARCH, "$archcmd 2>&1 |") {
                    162:    while (<ARCH>) {
                    163:        chomp;
                    164:        print LOG "ARCH: $_\n";
                    165:        if (/Normal File-->\s+[\d,]+\s+(.*)\s+\[Sent\]/) {
                    166:        print "  ARCH: file '$1'\n";
                    167:        $files{$1} = "ok";
                    168:        }
                    169:        if (/^Archive processing of .* finished without failure./) {
                    170:        print "  ARCH: OK\n";
                    171:        }
                    172:    }
                    173:     } else {
                    174:    print "ABORT: unable to start archive command '$archcmd'!!\n";
                    175:    exit 1;
                    176:     }
                    177: 
                    178:     return \%files;
                    179: }
                    180: 
                    181: 
                    182: #
                    183: # check_files(\%files_to_archive, \%archived_files)
                    184: #
                    185: # compares the list of archived and to be archived files
                    186: #
                    187: sub check_files {
                    188:     my ($to_archive, $archived) = @_;
                    189: 
                    190:     my $nt = scalar keys %$to_archive;
                    191:     my $na = scalar keys %$archived;
                    192: 
                    193:     foreach my $ft (sort keys %$to_archive) {
                    194:    my $fp = "$docdir/$ft";
                    195:    #print "  fp: $fp\n";
                    196:    if ($$archived{$fp}) {
                    197:        print "DEBUG: $ft archived OK\n";
                    198:        $$archived{$fp} = "OK";
                    199:    } else {
                    200:        print "ERROR: file '$ft' missing from archive!\n";
                    201:        $errcnt++;
                    202:    }
                    203:     }
                    204: 
                    205:     foreach my $fa (sort keys %$archived) {
                    206:    if ($$archived{$fa} ne "OK") {
                    207:        my ($fn, $fp) = MPIWGStor::split_file_path($fa);
                    208:        if ($MPIWGStor::index_files{$fn}) {
                    209:        print "DEBUG: $fa ignored\n";
                    210:        $na--;
                    211:        } else {
                    212:        print "WARNING: $fa archived but not in list!\n";
                    213:        $warncnt++;
                    214:        }
                    215:    }
                    216:     }
                    217: 
                    218:     if ($nt > $na) {
                    219:    print "WARNING: less files were archived ($na vs. $nt)!\n";
                    220:     } elsif ($na > $nt) {
                    221:    print "WARNING: more files were archived ($na vs. $nt)!\n";
                    222:     }
                    223: 
                    224: }
                    225: 
                    226: 
                    227: #
                    228: # delete_files(\%files)
                    229: #
                    230: # deletes the files from the list (of absolute files) and their directories
                    231: # if they are empty
                    232: #
                    233: sub delete_files {
                    234:     my ($files) = @_;
                    235:     my %dirs;
                    236: 
                    237:     foreach my $f (sort keys %$files) {
                    238:    my ($fn, $fp) = MPIWGStor::split_file_path($f);
                    239:    # collect all unique directories
                    240:         if ($fp && (! $dirs{$fp})) {
                    241:        $dirs{$fp} = $fp;
                    242:    }
                    243:    # don't delete index files
                    244:    next if ($MPIWGStor::index_files{$fn});
                    245:    # no file no delete
                    246:    next unless (-f $f);
                    247:    # delete files
                    248:    if (unlink $f) {
                    249:        print "INFO: remove $f ($fn)\n";
                    250:    } else {
                    251:        print "ERROR: unable to delete $f!\n";
                    252:        $errcnt++;
                    253:    }
                    254:     }
                    255:     # try to delete all empty directories
                    256:     my @dirkeys = sort keys %dirs;
                    257:     # starting at the end to get to the subdirectories first
                    258:     for (my $i = $#dirkeys; $i >= 0; $i--) {
                    259:    my $d = $dirkeys[$i];
                    260:    # dont't remove document dir (shouldn't be empty anyway)
                    261:    next if ($d eq $docdir);
                    262:    if (-d $d) {
                    263:        print "INFO: remove dir $d\n";
                    264:        rmdir $d;
                    265:    }
                    266:     }
                    267: }
                    268: 
                    269: 
                    270: #
                    271: # delete_all_files(\%files, $dir)
                    272: #
                    273: # deletes all files with names from the list %files
                    274: # in the directory $dir and its subdirectories 
                    275: #
                    276: sub delete_all_files {
                    277:     my ($files, $dir) = @_;
                    278: 
                    279:     if (! opendir DIR, $dir) {
                    280:    print "ERROR: unable to read directory $dir!\n";
                    281:    $errcnt++;
                    282:    return;
                    283:     }
                    284:     my @fl = readdir DIR;
                    285:     closedir DIR;
                    286: 
                    287:     foreach my $f (@fl) {
                    288:    next if ($f =~ /^\.{1,2}$/);
                    289:    if ($$files{$f}) {
                    290:        # $f is in the file list
                    291:        if (-f "$dir/$f") {
                    292:        # $f is a file
                    293:        if (unlink "$dir/$f") {
                    294:            print "INFO: removed $f\n";
                    295:        } else {
                    296:            print "ERROR: unable to delete $f!\n";
                    297:            $errcnt++;
                    298:        }
                    299:        } elsif (-d _) {
                    300:        # $f is a directory (unlink won't work)
                    301:        if ((system 'rm', '-r', "$dir/$f") == 0) {
                    302:            print "INFO: removed directory $f\n";
                    303:        } else {
                    304:            print "ERROR: unable to delete directory $f!\n";
                    305:            $errcnt++;
                    306:        }
                    307:        } else {
                    308:        print "ERROR: funny object $dir/$f!\n";
                    309:        $errcnt++;
                    310:        }
                    311:    } else {
                    312:        # $f is not in the list
                    313:        if (-d "$dir/$f") {
                    314:        # recurse into directories
                    315:        print "DEBUG: enter $dir/$f\n";
                    316:        delete_all_files($files, "$dir/$f");
                    317:        }
                    318:    }
                    319:     }
                    320: }
                    321: 
                    322: 
                    323: #######################################################
                    324: # main
                    325: #
                    326: 
                    327: print "START: archiver $version at $archdate\n";
                    328: 
                    329: # make shure the right user is running this program
                    330: my $user = getlogin;
                    331: #if (($user ne "archive")||($user ne "root")) {
                    332: #    logger("ABORT", "you ($user) must be archive or root user to run this program!");
                    333: #    exit 1;
                    334: #}
                    335: 
                    336: # use metacheck first
                    337: if (system("$checkprog $docdir >/dev/null") == 0) {
                    338:     print "INFO: resource '$docdir' check OK\n";
                    339: } else {
                    340:     print "ABORT: resource '$docdir' check failed!!\n";
                    341:     exit 1;
                    342: }
                    343: 
                    344: # read index.meta file
                    345: my ($document, $rootnode) = MPIWGStor::read_xml($metafile);
                    346: 
                    347: # check file and add archive date
                    348: my $files_to_archive = read_resource_meta($rootnode);
                    349: 
                    350: print "INFO: ", scalar keys %$files_to_archive, " files to archive\n";
                    351: 
                    352: # check for .archived file
                    353: if (-f "$docdir/.archived") {
                    354:     if (unlink "$docdir/.archived") {
                    355:    print "WARNING: existing .archived file has been removed! Resource already archived?\n";
                    356:    $warncnt++;
                    357:     } else {
                    358:    print "ERROR: unable to remove existing .archived file!\n";
                    359:    $errcnt++;
                    360:     }
                    361: }
                    362: 
                    363: # remove junk files
                    364: if ($delete_junk_files) {
                    365:     delete_all_files(\%MPIWGStor::junk_files, $docdir);
                    366: }
                    367: 
                    368: # write new index.meta
                    369: if ($errcnt > 0) {
                    370:     print "ABORT: there were errors!\n";
                    371:     exit 1;
                    372: } else {
                    373:     if ($fix_xml) {
                    374:    MPIWGStor::write_xml($document, $metafile);
                    375:     }
                    376: }
                    377: 
                    378: # start archiving
                    379: my $archived_files = run_archive;
                    380: 
                    381: print "INFO: ", scalar keys %$archived_files, " files archived\n";
                    382: 
                    383: # check list of archived files
                    384: check_files($files_to_archive, $archived_files);
                    385: 
                    386: # delete files if all went OK
                    387: if ($errcnt == 0) {
                    388:     system("touch", "$docdir/.archived");
                    389:     # remove junk files (again)
                    390:     if ($delete_junk_files) {
                    391:    delete_all_files(\%MPIWGStor::junk_files, $docdir);
                    392:     }
                    393:     # remove archived files
                    394:     if ($delete_data_files) {
                    395:    delete_files($archived_files);
                    396:     }
                    397: }
                    398: 
                    399: print "INFO: $warncnt warnings\n";
                    400: print "INFO: $errcnt errors\n";
                    401: my $num_archfiles = %$archived_files + 1;
                    402: if ($errcnt > 0) {
                    403:     print "ABORT: there were errors! ($num_archfiles files archived) at ", stime(time), "\n";
                    404:     exit 1;
                    405: } else {
                    406:     print "DONE: $num_archfiles files archived at ", stime(time), "\n";
                    407: }

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>