Annotation of foxridge-archiver/unarchiver.pl, revision 1.1.1.1

1.1       casties     1: #!/usr/local/bin/perl -w
                      2: 
                      3: use strict;
                      4: 
                      5: use XML::LibXML;
                      6: 
                      7: # MPIWG libraries
                      8: use lib '/usr/local/mpiwg/archive';
                      9: use MPIWGStor;
                     10: 
                     11: # make output unbuffered
                     12: $|=1;
                     13: 
                     14: 
                     15: #######################################################
                     16: # internal parameters
                     17: #
                     18: 
                     19: # program version
                     20: my $version = "0.1 (24.9.2003)";
                     21: 
                     22: # read command line parameters
                     23: my $args = parseargs;
                     24: 
                     25: # debug level
                     26: $debug = (exists $$args{'debug'}) ? ($$args{'debug'}) : 0;
                     27: 
                     28: # rewrite XML file (necessary for archive date!)
                     29: my $fix_xml = 1;
                     30: my $xml_changed = 0;
                     31: # XML namespace (not really implemented!)
                     32: my $namespace = "";
                     33: 
                     34: # archive name (archive-path element, usually == $docdir)
                     35: my $archname;
                     36: # archive storage date
                     37: my $archdate;
                     38: 
                     39: #######################################################
                     40: # external programs
                     41: #
                     42: my $archprog = "/opt/tivoli/tsm/client/ba/bin/dsmc";
                     43: if (! -x $archprog) {
                     44:     logger("ABORT", "TSM client program '$archprog' missing!!");
                     45:     exit 1;
                     46: }
                     47: my $checkprog = "/usr/local/mpiwg/archive/archivecheck";
                     48: if (! -x $checkprog) {
                     49:     logger("ABORT", "archive checking program '$checkprog' missing!!");
                     50:     exit 1;
                     51: }
                     52: # log file for archiver
                     53: my $log_file = "/var/tmp/unarchiver.log";
                     54: if (! open LOG, ">>$log_file") {
                     55:     logger("ABORT", "unable to write log file '$log_file'!!");
                     56:     exit 1;
                     57: }
                     58: 
                     59: #######################################################
                     60: # check parameters that were passed to the program
                     61: #
                     62: my $docdir = $$args{'path'};
                     63: if (! $docdir) {
                     64:     print "ABORT: no document directory given!\n";
                     65:     exit 1;
                     66: }
                     67: # strip trailing slashes
                     68: $docdir =~ s/\/$//;
                     69: if (! -d $docdir) {
                     70:     print "ABORT: document directory \'$docdir\' doesn't exist!\n";
                     71:     exit 1;
                     72: }
                     73: 
                     74: my $metafile = "$docdir/index.meta";
                     75: if (! -f $metafile) {
                     76:     print "ABORT: metadata index file \'$metafile\' doesn't exist!\n";
                     77:     exit 1;
                     78: }
                     79: 
                     80: 
                     81: #######################################################
                     82: # internal variables
                     83: #
                     84: 
                     85: # number of errors
                     86: my $errcnt = 0;
                     87: # number of warnings
                     88: my $warncnt = 0;
                     89: 
                     90: #######################################################
                     91: # subroutines
                     92: #
                     93: 
                     94: 
                     95: #
                     96: # $files = read_resource_meta($rootnode)
                     97: #
                     98: # checks general resource meta information and reads the list of files
                     99: #
                    100: sub read_resource_meta {
                    101:     my ($rootnode) = @_;
                    102:     my %files;
                    103:     #
                    104:     # archive path
                    105:     #
                    106:     # get archive-path
                    107:     $archname = sstrip($rootnode->findvalue('child::archive-path'));
                    108:     if (! $archname) {
                    109:    logger("ABORT", "archive-name element missing!!");
                    110:    exit 1;
                    111:     }
                    112: 
                    113:     #
                    114:     # files
                    115:     #
                    116:     my @filenodes = $rootnode->findnodes('child::file');
                    117:     foreach my $fn (@filenodes) {
                    118:    my $name = sstrip($fn->findvalue('child::name'));
                    119:    my $path = sstrip($fn->findvalue('child::path'));
                    120:    logger("DEBUG", "FILE: ($path)$name");
                    121:    my $f = ($path) ? "$path/$name" : "$name";
                    122:    $files{$f} = [$name];
                    123:     }
                    124: 
                    125:     #
                    126:     # dirs
                    127:     #
                    128:     my @dirnodes = $rootnode->findnodes('child::dir');
                    129:     foreach my $fn (@dirnodes) {
                    130:    my $name = sstrip($fn->findvalue('child::name'));
                    131:    my $path = sstrip($fn->findvalue('child::path'));
                    132:    logger("DEBUG", "DIR: ($path)$name");
                    133:    my $f = ($path) ? "$path/$name" : "$name";
                    134:    $files{$f} = [$name];
                    135:     }
                    136: 
                    137:     #
                    138:     # archive-storage-date
                    139:     #
                    140:     my $archdate = $rootnode->find('child::archive-storage-date');
                    141:     if ($archdate) {
                    142:    logger("INFO", "archive storage date: $archdate");
                    143:     } else {
                    144:    logger("ERROR", "archive storage date missing!");
                    145:    $errcnt++;
                    146:     }
                    147: 
                    148:     #
                    149:     # archive-recall-date
                    150:     #
                    151:     my $recalldatenode = ($rootnode->find('child::archive-recall-date'))->get_node(1);
                    152:     if ($recalldatenode) {
                    153:    print "INFO: archive recall date exists!\n";
                    154:    # delete old date
                    155:    $recalldatenode->removeChildNodes;
                    156:     } else {
                    157:    # create new storage date node
                    158:    $recalldatenode = $rootnode->addNewChild($namespace, "archive-recall-date");
                    159:    # move after archive-path
                    160:    $rootnode->insertAfter($recalldatenode, ($rootnode->find('child::archive-storage-date'))->get_node(1));
                    161:     }
                    162:     $recalldatenode->appendTextNode(scalar localtime);
                    163:     $xml_changed++;
                    164: 
                    165:     return \%files;
                    166: }
                    167: 
                    168: 
                    169: #
                    170: # $%files = run_retrieve
                    171: #
                    172: # runs the retriever program on $docdir and returns a list of archived files
                    173: #
                    174: # Sample output:
                    175: # Retrieving          17,234 /mpiwg/archive/data/test/auto_titit_123/pageimg/essen-wind1.jpg [Done]
                    176: #
                    177: sub run_retrieve {
                    178:     my %files;
                    179:     print LOG "START unarchive $version ", scalar localtime, "\n";
                    180:     my $archcmd = $archprog;
                    181:     $archcmd .= " retrieve -subdir=yes -replace=all";
                    182:     $archcmd .= " -description='$archname'";
                    183:     $archcmd .= " '$docdir/'";
                    184: 
                    185:     my $archcnt = 0;
                    186:     print LOG "CMD: $archcmd\n";
                    187:     if (open ARCH, "$archcmd 2>&1 |") {
                    188:    while (<ARCH>) {
                    189:        chomp;
                    190:        print LOG "ARCH: $_\n";
                    191:        if (/
                    192:        Retrieving
                    193:        \s+([\d,]+)    # size
                    194:        \s+(\S+)       # file name
                    195:        \s+\[Done\]
                    196:        /x) {
                    197:        my $size = $1;
                    198:        my $file = $2;
                    199:        $size =~ s/,//g;
                    200:        logger("DEBUG", "  RETRIEVE: file '$file'");
                    201:        $archcnt++;
                    202:        if ($files{$file}) {
                    203:            logger("WARNING", "file $file seems to be archived multiple times.");
                    204:            $warncnt++;
                    205:        } 
                    206:        $files{$file} = [$size];
                    207:        }
                    208:    }
                    209:    logger("INFO", "$archcnt archives of " . (scalar keys %files) . " files.");
                    210:     } else {
                    211:    logger("ABORT", "unable to start archive command '$archcmd'!!");
                    212:    exit 1;
                    213:     }
                    214:     return \%files;
                    215: }
                    216: 
                    217: 
                    218: #
                    219: # check_files(\%files_to_retrieve, \%retrieved_files)
                    220: #
                    221: # compares the list of archived and retrieved files
                    222: #
                    223: sub check_files {
                    224:     my ($to_retrieve, $retrieved) = @_;
                    225: 
                    226:     my $nt = scalar keys %$to_retrieve;
                    227:     my $na = scalar keys %$retrieved;
                    228: 
                    229:     foreach my $ft (sort keys %$to_retrieve) {
                    230:    my $fp = "$docdir/$ft";
                    231:    #logger("DEBUG", "  fp: $fp");
                    232:    if ($$retrieved{$fp}) {
                    233:        logger("DEBUG", "$ft retrieved OK");
                    234:        $$retrieved{$fp}->[1] = "OK";
                    235:    } else {
                    236:        logger("ERROR", "file entry '$ft' missing from archive!");
                    237:        $errcnt++;
                    238:    }
                    239:     }
                    240: 
                    241:     foreach my $fa (sort keys %$retrieved) {
                    242:    if (! $$retrieved{$fa}->[1]) {
                    243:        my ($fn, $fp) = split_file_path($fa);
                    244:        if ($index_files{$fn}) {
                    245:        logger("DEBUG", "$fa ignored");
                    246:        $na--;
                    247:        } else {
                    248:        logger("WARNING", "$fa retrieved but not in list!");
                    249:        $warncnt++;
                    250:        }
                    251:    }
                    252:     }
                    253: 
                    254:     if ($nt > $na) {
                    255:    logger("WARNING", "less files were retrieved ($na vs. $nt)!");
                    256:    $warncnt++;
                    257:     } elsif ($na > $nt) {
                    258:    logger("WARNING", "more files were retrieved ($na vs. $nt)!");
                    259:    $warncnt++;
                    260:     }
                    261: 
                    262: }
                    263: 
                    264: 
                    265: 
                    266: #######################################################
                    267: # main
                    268: #
                    269: 
                    270: logger("INFO", "unarchiver $version");
                    271: 
                    272: # make shure the right user is running this program
                    273: my $user = getlogin;
                    274: if (($user ne "archive")&&($user ne "root")) {
                    275:     logger("ABORT", "you must be archive or root user to run this program!");
                    276:     exit 1;
                    277: }
                    278: 
                    279: # use checkarchive first
                    280: if (system("$checkprog $docdir >/dev/null") == 0) {
                    281:     logger("INFO", "archive '$docdir' check OK");
                    282: } else {
                    283:     logger("ABORT", "archive '$docdir' check failed!!");
                    284:     exit 1;
                    285: }
                    286: 
                    287: # read index.meta file
                    288: my ($document, $rootnode) = read_xml($metafile);
                    289: 
                    290: # check index file
                    291: my $archived_files = read_resource_meta($rootnode);
                    292: my $num_archived_files = scalar keys %$archived_files;
                    293: 
                    294: # check for .archived file
                    295: if (-f "$docdir/.archived") {
                    296:     logger("INFO", ".archived file exists.");
                    297: } else {
                    298:     logger("WARNING", "no .archived file!");
                    299:     $warncnt++;
                    300: }
                    301: 
                    302: logger("INFO", "$num_archived_files files to retrieve.");
                    303: 
                    304: # retrieve
                    305: my $retrieved_files = run_retrieve;
                    306: 
                    307: my $num_arch_files = (scalar keys %$retrieved_files);
                    308: if ($num_arch_files == 0) {
                    309:     logger("ABORT", "no files retrieved!!");
                    310:     exit 1;
                    311: }
                    312: logger("INFO", "$num_arch_files files retrieved");
                    313: 
                    314: # check list of archived files
                    315: check_files($archived_files, $retrieved_files);
                    316: 
                    317: # rewrite index.meta file
                    318: write_xml($document, $metafile);
                    319: 
                    320: logger("INFO", "$warncnt warnings");
                    321: logger("INFO", "$errcnt errors");
                    322: if ($errcnt == 0) {
                    323:     logger("DONE", "" . (scalar keys %$retrieved_files) . " archived files retrieved");
                    324: } else {
                    325:     logger("ABORT", "there were $errcnt errors!!");
                    326:     exit 1;
                    327: }

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>