Annotation of foxridge-archiver/archiver.pl, revision 1.1

1.1     ! casties     1: #!/usr/local/bin/perl -w
        !             2: 
        !             3: use strict;
        !             4: 
        !             5: use XML::LibXML;
        !             6: 
        !             7: # MPIWG libraries
        !             8: use lib '/usr/local/mpiwg/archive';
        !             9: use MPIWGStor;
        !            10: 
        !            11: # make output unbuffered
        !            12: $|=1;
        !            13: 
        !            14: #######################################################
        !            15: # internal parameters
        !            16: #
        !            17: 
        !            18: # program version
        !            19: my $version = "0.5 (15.1.2004)";
        !            20: 
        !            21: # rewrite XML file (necessary for archive date!)
        !            22: my $fix_xml = 1;
        !            23: my $xml_changed = 0;
        !            24: # XML namespace (not really implemented!)
        !            25: my $namespace = "";
        !            26: 
        !            27: # archive name (archive-path element, usually == $docdir)
        !            28: my $archname;
        !            29: # archive storage date (now)
        !            30: my $archdate = stime(time);
        !            31: 
        !            32: # delete "junk" files before archiving
        !            33: my $delete_junk_files = 1;
        !            34: 
        !            35: # delete data files after archiving
        !            36: my $delete_data_files = 1;
        !            37: 
        !            38: 
        !            39: #######################################################
        !            40: # external programs
        !            41: #
        !            42: my $archprog = "/opt/tivoli/tsm/client/ba/bin/dsmc";
        !            43: if (! -x $archprog) {
        !            44:     print "ABORT: TSM client program '$archprog' missing!!\n";
        !            45:     exit 1;
        !            46: }
        !            47: my $checkprog = "/usr/local/mpiwg/archive/metacheck";
        !            48: if (! -x $checkprog) {
        !            49:     print "ABORT: meta data checking program '$checkprog' missing!!\n";
        !            50:     exit 1;
        !            51: }
        !            52: # log file for archiver
        !            53: my $log_file = "/var/log/mpiwg-archiver.log";
        !            54: if (! open LOG, ">>$log_file") {
        !            55:     print "ABORT: unable to write log file '$log_file'!!\n";
        !            56:     exit 1;
        !            57: }
        !            58: 
        !            59: #######################################################
        !            60: # check parameters that were passed to the program
        !            61: #
        !            62: if ($#ARGV < 0) {
        !            63:     print "ABORT: no document directory given!\n";
        !            64:     exit 1;
        !            65: }
        !            66: my $docdir = $ARGV[0];
        !            67: # strip trailing slashes
        !            68: $docdir =~ s/\/$//;
        !            69: if (! -d $docdir) {
        !            70:     print "ABORT: document directory \'$docdir\' doesn't exist!\n";
        !            71:     exit 1;
        !            72: }
        !            73: if (($#ARGV > 0)&&($ARGV[1] eq "-premigrate")) {
        !            74:     $delete_data_files = 0;
        !            75: }
        !            76: 
        !            77: my $metafile = "$docdir/index.meta";
        !            78: if (! -f $metafile) {
        !            79:     print "ABORT: metadata index file \'$metafile\' doesn't exist!\n";
        !            80:     exit 1;
        !            81: }
        !            82: 
        !            83: #######################################################
        !            84: # internal variables
        !            85: #
        !            86: 
        !            87: # number of errors
        !            88: my $errcnt = 0;
        !            89: # number of warnings
        !            90: my $warncnt = 0;
        !            91: 
        !            92: #######################################################
        !            93: # subroutines
        !            94: #
        !            95: 
        !            96: #
        !            97: # $files = read_resource_meta($rootnode)
        !            98: #
        !            99: # checks general resource meta information and reads the list of files
        !           100: #
        !           101: sub read_resource_meta {
        !           102:     my ($rootnode) = @_;
        !           103:     my %files;
        !           104:     #
        !           105:     # archive path
        !           106:     #
        !           107:     # get archive-path
        !           108:     $archname = MPIWGStor::sstrip($rootnode->findvalue('child::archive-path'));
        !           109:     if (! $archname) {
        !           110:    print "ABORT: archive-name element missing!!\n";
        !           111:    exit 1;
        !           112:     }
        !           113: 
        !           114:     #
        !           115:     # files
        !           116:     #
        !           117:     my @filenodes = $rootnode->findnodes('child::file');
        !           118:     foreach my $fn (@filenodes) {
        !           119:    my $name = MPIWGStor::sstrip($fn->findvalue('child::name'));
        !           120:    my $path = MPIWGStor::sstrip($fn->findvalue('child::path'));
        !           121:    print "FILE: ($path)$name\n";
        !           122:    my $f = ($path) ? "$path/$name" : "$name";
        !           123:    $files{$f} = $name;
        !           124:     }
        !           125: 
        !           126:     #
        !           127:     # archive-storage-date
        !           128:     #
        !           129:     my $stordatenode = ($rootnode->find('child::archive-storage-date'))->get_node(1);
        !           130:     if ($stordatenode) {
        !           131:    print "WARNING: archive storage date exists! Resource already archived?\n";
        !           132:    $warncnt++;
        !           133:    # delete old date
        !           134:    $stordatenode->removeChildNodes;
        !           135:     } else {
        !           136:    # create new storage date node
        !           137:    $stordatenode = $rootnode->addNewChild($namespace, "archive-storage-date");
        !           138:    # move after archive-path
        !           139:    $rootnode->insertAfter($stordatenode, ($rootnode->find('child::archive-path'))->get_node(1));
        !           140:     }
        !           141:     $stordatenode->appendTextNode($archdate);
        !           142:     $xml_changed++;
        !           143:     return \%files;
        !           144: }
        !           145: 
        !           146: 
        !           147: #
        !           148: # $%files = run_archive
        !           149: #
        !           150: # runs the archiver program on $docdir and returns a list of archived files
        !           151: #
        !           152: sub run_archive {
        !           153:     my %files;
        !           154:     print LOG "START archiver $version $archdate\n";
        !           155:     my $archcmd = $archprog;
        !           156:     $archcmd .= " archive -archsymlinkasfile=no -subdir=yes";
        !           157:     $archcmd .= " -description='$archname'";
        !           158:     $archcmd .= " '$docdir/'";
        !           159: 
        !           160:     print LOG "CMD: $archcmd\n";
        !           161:     if (open ARCH, "$archcmd 2>&1 |") {
        !           162:    while (<ARCH>) {
        !           163:        chomp;
        !           164:        print LOG "ARCH: $_\n";
        !           165:        if (/Normal File-->\s+[\d,]+\s+(.*)\s+\[Sent\]/) {
        !           166:        print "  ARCH: file '$1'\n";
        !           167:        $files{$1} = "ok";
        !           168:        }
        !           169:        if (/^Archive processing of .* finished without failure./) {
        !           170:        print "  ARCH: OK\n";
        !           171:        }
        !           172:    }
        !           173:     } else {
        !           174:    print "ABORT: unable to start archive command '$archcmd'!!\n";
        !           175:    exit 1;
        !           176:     }
        !           177: 
        !           178:     return \%files;
        !           179: }
        !           180: 
        !           181: 
        !           182: #
        !           183: # check_files(\%files_to_archive, \%archived_files)
        !           184: #
        !           185: # compares the list of archived and to be archived files
        !           186: #
        !           187: sub check_files {
        !           188:     my ($to_archive, $archived) = @_;
        !           189: 
        !           190:     my $nt = scalar keys %$to_archive;
        !           191:     my $na = scalar keys %$archived;
        !           192: 
        !           193:     foreach my $ft (sort keys %$to_archive) {
        !           194:    my $fp = "$docdir/$ft";
        !           195:    #print "  fp: $fp\n";
        !           196:    if ($$archived{$fp}) {
        !           197:        print "DEBUG: $ft archived OK\n";
        !           198:        $$archived{$fp} = "OK";
        !           199:    } else {
        !           200:        print "ERROR: file '$ft' missing from archive!\n";
        !           201:        $errcnt++;
        !           202:    }
        !           203:     }
        !           204: 
        !           205:     foreach my $fa (sort keys %$archived) {
        !           206:    if ($$archived{$fa} ne "OK") {
        !           207:        my ($fn, $fp) = MPIWGStor::split_file_path($fa);
        !           208:        if ($MPIWGStor::index_files{$fn}) {
        !           209:        print "DEBUG: $fa ignored\n";
        !           210:        $na--;
        !           211:        } else {
        !           212:        print "WARNING: $fa archived but not in list!\n";
        !           213:        $warncnt++;
        !           214:        }
        !           215:    }
        !           216:     }
        !           217: 
        !           218:     if ($nt > $na) {
        !           219:    print "WARNING: less files were archived ($na vs. $nt)!\n";
        !           220:     } elsif ($na > $nt) {
        !           221:    print "WARNING: more files were archived ($na vs. $nt)!\n";
        !           222:     }
        !           223: 
        !           224: }
        !           225: 
        !           226: 
        !           227: #
        !           228: # delete_files(\%files)
        !           229: #
        !           230: # deletes the files from the list (of absolute files) and their directories
        !           231: # if they are empty
        !           232: #
        !           233: sub delete_files {
        !           234:     my ($files) = @_;
        !           235:     my %dirs;
        !           236: 
        !           237:     foreach my $f (sort keys %$files) {
        !           238:    my ($fn, $fp) = MPIWGStor::split_file_path($f);
        !           239:    # collect all unique directories
        !           240:         if ($fp && (! $dirs{$fp})) {
        !           241:        $dirs{$fp} = $fp;
        !           242:    }
        !           243:    # don't delete index files
        !           244:    next if ($MPIWGStor::index_files{$fn});
        !           245:    # no file no delete
        !           246:    next unless (-f $f);
        !           247:    # delete files
        !           248:    if (unlink $f) {
        !           249:        print "INFO: remove $f ($fn)\n";
        !           250:    } else {
        !           251:        print "ERROR: unable to delete $f!\n";
        !           252:        $errcnt++;
        !           253:    }
        !           254:     }
        !           255:     # try to delete all empty directories
        !           256:     my @dirkeys = sort keys %dirs;
        !           257:     # starting at the end to get to the subdirectories first
        !           258:     for (my $i = $#dirkeys; $i >= 0; $i--) {
        !           259:    my $d = $dirkeys[$i];
        !           260:    # dont't remove document dir (shouldn't be empty anyway)
        !           261:    next if ($d eq $docdir);
        !           262:    if (-d $d) {
        !           263:        print "INFO: remove dir $d\n";
        !           264:        rmdir $d;
        !           265:    }
        !           266:     }
        !           267: }
        !           268: 
        !           269: 
        !           270: #
        !           271: # delete_all_files(\%files, $dir)
        !           272: #
        !           273: # deletes all files with names from the list %files
        !           274: # in the directory $dir and its subdirectories 
        !           275: #
        !           276: sub delete_all_files {
        !           277:     my ($files, $dir) = @_;
        !           278: 
        !           279:     if (! opendir DIR, $dir) {
        !           280:    print "ERROR: unable to read directory $dir!\n";
        !           281:    $errcnt++;
        !           282:    return;
        !           283:     }
        !           284:     my @fl = readdir DIR;
        !           285:     closedir DIR;
        !           286: 
        !           287:     foreach my $f (@fl) {
        !           288:    next if ($f =~ /^\.{1,2}$/);
        !           289:    if ($$files{$f}) {
        !           290:        # $f is in the file list
        !           291:        if (-f "$dir/$f") {
        !           292:        # $f is a file
        !           293:        if (unlink "$dir/$f") {
        !           294:            print "INFO: removed $f\n";
        !           295:        } else {
        !           296:            print "ERROR: unable to delete $f!\n";
        !           297:            $errcnt++;
        !           298:        }
        !           299:        } elsif (-d _) {
        !           300:        # $f is a directory (unlink won't work)
        !           301:        if ((system 'rm', '-r', "$dir/$f") == 0) {
        !           302:            print "INFO: removed directory $f\n";
        !           303:        } else {
        !           304:            print "ERROR: unable to delete directory $f!\n";
        !           305:            $errcnt++;
        !           306:        }
        !           307:        } else {
        !           308:        print "ERROR: funny object $dir/$f!\n";
        !           309:        $errcnt++;
        !           310:        }
        !           311:    } else {
        !           312:        # $f is not in the list
        !           313:        if (-d "$dir/$f") {
        !           314:        # recurse into directories
        !           315:        print "DEBUG: enter $dir/$f\n";
        !           316:        delete_all_files($files, "$dir/$f");
        !           317:        }
        !           318:    }
        !           319:     }
        !           320: }
        !           321: 
        !           322: 
        !           323: #######################################################
        !           324: # main
        !           325: #
        !           326: 
        !           327: print "START: archiver $version at $archdate\n";
        !           328: 
        !           329: # make shure the right user is running this program
        !           330: my $user = getlogin;
        !           331: #if (($user ne "archive")||($user ne "root")) {
        !           332: #    logger("ABORT", "you ($user) must be archive or root user to run this program!");
        !           333: #    exit 1;
        !           334: #}
        !           335: 
        !           336: # use metacheck first
        !           337: if (system("$checkprog $docdir >/dev/null") == 0) {
        !           338:     print "INFO: resource '$docdir' check OK\n";
        !           339: } else {
        !           340:     print "ABORT: resource '$docdir' check failed!!\n";
        !           341:     exit 1;
        !           342: }
        !           343: 
        !           344: # read index.meta file
        !           345: my ($document, $rootnode) = MPIWGStor::read_xml($metafile);
        !           346: 
        !           347: # check file and add archive date
        !           348: my $files_to_archive = read_resource_meta($rootnode);
        !           349: 
        !           350: print "INFO: ", scalar keys %$files_to_archive, " files to archive\n";
        !           351: 
        !           352: # check for .archived file
        !           353: if (-f "$docdir/.archived") {
        !           354:     if (unlink "$docdir/.archived") {
        !           355:    print "WARNING: existing .archived file has been removed! Resource already archived?\n";
        !           356:    $warncnt++;
        !           357:     } else {
        !           358:    print "ERROR: unable to remove existing .archived file!\n";
        !           359:    $errcnt++;
        !           360:     }
        !           361: }
        !           362: 
        !           363: # remove junk files
        !           364: if ($delete_junk_files) {
        !           365:     delete_all_files(\%MPIWGStor::junk_files, $docdir);
        !           366: }
        !           367: 
        !           368: # write new index.meta
        !           369: if ($errcnt > 0) {
        !           370:     print "ABORT: there were errors!\n";
        !           371:     exit 1;
        !           372: } else {
        !           373:     if ($fix_xml) {
        !           374:    MPIWGStor::write_xml($document, $metafile);
        !           375:     }
        !           376: }
        !           377: 
        !           378: # start archiving
        !           379: my $archived_files = run_archive;
        !           380: 
        !           381: print "INFO: ", scalar keys %$archived_files, " files archived\n";
        !           382: 
        !           383: # check list of archived files
        !           384: check_files($files_to_archive, $archived_files);
        !           385: 
        !           386: # delete files if all went OK
        !           387: if ($errcnt == 0) {
        !           388:     system("touch", "$docdir/.archived");
        !           389:     # remove junk files (again)
        !           390:     if ($delete_junk_files) {
        !           391:    delete_all_files(\%MPIWGStor::junk_files, $docdir);
        !           392:     }
        !           393:     # remove archived files
        !           394:     if ($delete_data_files) {
        !           395:    delete_files($archived_files);
        !           396:     }
        !           397: }
        !           398: 
        !           399: print "INFO: $warncnt warnings\n";
        !           400: print "INFO: $errcnt errors\n";
        !           401: my $num_archfiles = %$archived_files + 1;
        !           402: if ($errcnt > 0) {
        !           403:     print "ABORT: there were errors! ($num_archfiles files archived) at ", stime(time), "\n";
        !           404:     exit 1;
        !           405: } else {
        !           406:     print "DONE: $num_archfiles files archived at ", stime(time), "\n";
        !           407: }

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>