Annotation of foxridge-archiver/unarchiver.pl, revision 1.9

1.9     ! casties     1: #!/usr/bin/perl -w
1.1       casties     2: 
                      3: use strict;
                      4: 
                      5: use XML::LibXML;
1.2       casties     6: use FileHandle;
1.1       casties     7: 
                      8: # MPIWG libraries
                      9: use lib '/usr/local/mpiwg/archive';
                     10: use MPIWGStor;
                     11: 
                     12: # make output unbuffered
                     13: $|=1;
                     14: 
                     15: 
                     16: #######################################################
                     17: # internal parameters
                     18: #
                     19: 
                     20: # program version
1.8       casties    21: my $version = "0.3.4 (11.8.2009)";
1.1       casties    22: 
                     23: # read command line parameters
                     24: my $args = parseargs;
                     25: 
                     26: # debug level
                     27: $debug = (exists $$args{'debug'}) ? ($$args{'debug'}) : 0;
                     28: 
                     29: # rewrite XML file (necessary for archive date!)
                     30: my $fix_xml = 1;
                     31: my $xml_changed = 0;
                     32: # XML namespace (not really implemented!)
                     33: my $namespace = "";
                     34: 
                     35: # archive name (archive-path element, usually == $docdir)
                     36: my $archname;
                     37: # archive storage date
                     38: my $archdate;
                     39: 
                     40: #######################################################
                     41: # external programs
                     42: #
                     43: my $archprog = "/opt/tivoli/tsm/client/ba/bin/dsmc";
                     44: if (! -x $archprog) {
                     45:     logger("ABORT", "TSM client program '$archprog' missing!!");
                     46:     exit 1;
                     47: }
                     48: my $checkprog = "/usr/local/mpiwg/archive/archivecheck";
                     49: if (! -x $checkprog) {
                     50:     logger("ABORT", "archive checking program '$checkprog' missing!!");
                     51:     exit 1;
                     52: }
                     53: # log file for archiver
                     54: my $log_file = "/var/tmp/unarchiver.log";
                     55: if (! open LOG, ">>$log_file") {
                     56:     logger("ABORT", "unable to write log file '$log_file'!!");
                     57:     exit 1;
                     58: }
1.2       casties    59: LOG->autoflush(1);
1.1       casties    60: 
                     61: #######################################################
                     62: # check parameters that were passed to the program
                     63: #
                     64: my $docdir = $$args{'path'};
                     65: if (! $docdir) {
                     66:     print "ABORT: no document directory given!\n";
                     67:     exit 1;
                     68: }
                     69: # strip trailing slashes
                     70: $docdir =~ s/\/$//;
                     71: if (! -d $docdir) {
                     72:     print "ABORT: document directory \'$docdir\' doesn't exist!\n";
                     73:     exit 1;
                     74: }
                     75: 
                     76: my $metafile = "$docdir/index.meta";
                     77: if (! -f $metafile) {
                     78:     print "ABORT: metadata index file \'$metafile\' doesn't exist!\n";
                     79:     exit 1;
                     80: }
                     81: 
                     82: #######################################################
                     83: # internal variables
                     84: #
                     85: 
                     86: # number of errors
                     87: my $errcnt = 0;
                     88: # number of warnings
                     89: my $warncnt = 0;
                     90: 
                     91: #######################################################
                     92: # subroutines
                     93: #
                     94: 
1.3       casties    95: # construct document's parent dir
                     96: sub get_parent {
                     97:     my ($dirname) = @_;
                     98:     my $dirparent = $dirname;
                     99:     $dirparent =~ s!/[^/]+$!!;
                    100:     return $dirparent;
                    101: }
                    102: 
1.1       casties   103: 
                    104: #
                    105: # $files = read_resource_meta($rootnode)
                    106: #
                    107: # checks general resource meta information and reads the list of files
                    108: #
                    109: sub read_resource_meta {
                    110:     my ($rootnode) = @_;
                    111:     my %files;
                    112:     #
                    113:     # archive path
                    114:     #
                    115:     # get archive-path
                    116:     $archname = sstrip($rootnode->findvalue('child::archive-path'));
                    117:     if (! $archname) {
                    118:    logger("ABORT", "archive-name element missing!!");
                    119:    exit 1;
                    120:     }
                    121: 
                    122:     #
                    123:     # files
                    124:     #
                    125:     my @filenodes = $rootnode->findnodes('child::file');
                    126:     foreach my $fn (@filenodes) {
                    127:    my $name = sstrip($fn->findvalue('child::name'));
                    128:    my $path = sstrip($fn->findvalue('child::path'));
                    129:    logger("DEBUG", "FILE: ($path)$name");
                    130:    my $f = ($path) ? "$path/$name" : "$name";
                    131:    $files{$f} = [$name];
                    132:     }
                    133: 
                    134:     #
                    135:     # dirs
                    136:     #
                    137:     my @dirnodes = $rootnode->findnodes('child::dir');
                    138:     foreach my $fn (@dirnodes) {
                    139:    my $name = sstrip($fn->findvalue('child::name'));
                    140:    my $path = sstrip($fn->findvalue('child::path'));
                    141:    logger("DEBUG", "DIR: ($path)$name");
                    142:    my $f = ($path) ? "$path/$name" : "$name";
                    143:    $files{$f} = [$name];
                    144:     }
                    145: 
                    146:     #
                    147:     # archive-storage-date
                    148:     #
                    149:     my $archdate = $rootnode->find('child::archive-storage-date');
                    150:     if ($archdate) {
                    151:    logger("INFO", "archive storage date: $archdate");
                    152:     } else {
                    153:    logger("ERROR", "archive storage date missing!");
                    154:    $errcnt++;
                    155:     }
                    156: 
                    157:     #
                    158:     # archive-recall-date
                    159:     #
                    160:     my $recalldatenode = ($rootnode->find('child::archive-recall-date'))->get_node(1);
                    161:     if ($recalldatenode) {
                    162:    print "INFO: archive recall date exists!\n";
                    163:    # delete old date
                    164:    $recalldatenode->removeChildNodes;
                    165:     } else {
                    166:    # create new storage date node
                    167:    $recalldatenode = $rootnode->addNewChild($namespace, "archive-recall-date");
                    168:    # move after archive-path
                    169:    $rootnode->insertAfter($recalldatenode, ($rootnode->find('child::archive-storage-date'))->get_node(1));
                    170:     }
                    171:     $recalldatenode->appendTextNode(scalar localtime);
                    172:     $xml_changed++;
                    173: 
                    174:     return \%files;
                    175: }
                    176: 
                    177: 
                    178: #
1.3       casties   179: # $num_files = run_retrieve($docdir, $docmount, \%files)
1.1       casties   180: #
1.3       casties   181: # Runs the retriever program on $docdir and returns the number of unarchived files.
                    182: # All filenames are put in %files. 
                    183: # $docmount is the mount point of the doc partition in cases when the new mount point
                    184: # is different.
1.1       casties   185: #
                    186: # Sample output:
1.2       casties   187: # (old!) Retrieving          17,234 /mpiwg/archive/data/test/auto_titit_123/pageimg/essen-wind1.jpg [Done]
                    188: # Retrieving      42,406,326 /mpiwg/archive/data/library/B980G582/raw/00015.tif --> /mpiwg/archive/data/library/B980G582/raw/00015.tif [Done]
1.1       casties   189: sub run_retrieve {
1.3       casties   190:     my ($archdir, $archmount, $files) = @_;
                    191:     my $archparent;
                    192:     if ($archmount eq $archdir) {
                    193:    # no explicit mount point
                    194:    $archparent = get_parent($archdir);
                    195:     } else {
                    196:    # destination dir is mount point
                    197:    $archparent = $archmount;
                    198:     }
1.5       casties   199:     logger("INFO", "looking for archives in $archmount...");
1.3       casties   200: 
1.2       casties   201:     print LOG "START unarchive $version on ", scalar localtime, "\n";
1.1       casties   202:     my $archcmd = $archprog;
1.8       casties   203:     $archcmd .= " retrieve -subdir=yes -replace=all -ifnewer";
1.3       casties   204:     $archcmd .= " -description='$archname'"; # archive name
                    205:     $archcmd .= " '$archmount/'"; # archive mount point
                    206:     $archcmd .= " '$archparent/'"; # destination dir name
1.1       casties   207: 
1.6       casties   208:     logger('INFO', "querying TSM server for $archmount, please wait...");
                    209: 
1.1       casties   210:     my $archcnt = 0;
1.3       casties   211:     my $numfiles = 0;
1.1       casties   212:     print LOG "CMD: $archcmd\n";
                    213:     if (open ARCH, "$archcmd 2>&1 |") {
                    214:    while (<ARCH>) {
                    215:        chomp;
                    216:        print LOG "ARCH: $_\n";
                    217:        if (/
                    218:        Retrieving
                    219:        \s+([\d,]+)    # size
                    220:        \s+(\S+)       # file name
1.2       casties   221:        \s+-->
                    222:        \s+(\S+)       # destination file name
1.1       casties   223:        \s+\[Done\]
                    224:        /x) {
                    225:        my $size = $1;
                    226:        my $file = $2;
                    227:        $size =~ s/,//g;
                    228:        logger("DEBUG", "  RETRIEVE: file '$file'");
                    229:        $archcnt++;
1.3       casties   230:        if ($$files{$file}) {
1.1       casties   231:            logger("WARNING", "file $file seems to be archived multiple times.");
                    232:            $warncnt++;
                    233:        } 
1.3       casties   234:        $$files{$file} = [$size];
1.1       casties   235:        }
                    236:    }
1.3       casties   237:    $numfiles =  (scalar keys %$files);
                    238:    logger("INFO", "$archcnt archives of $numfiles files (in $archmount).");
1.1       casties   239:     } else {
                    240:    logger("ABORT", "unable to start archive command '$archcmd'!!");
                    241:    exit 1;
                    242:     }
1.3       casties   243:     return $numfiles;
1.1       casties   244: }
                    245: 
                    246: 
                    247: #
                    248: # check_files(\%files_to_retrieve, \%retrieved_files)
                    249: #
                    250: # compares the list of archived and retrieved files
                    251: #
                    252: sub check_files {
                    253:     my ($to_retrieve, $retrieved) = @_;
                    254: 
                    255:     my $nt = scalar keys %$to_retrieve;
                    256:     my $na = scalar keys %$retrieved;
                    257: 
                    258:     foreach my $ft (sort keys %$to_retrieve) {
                    259:    my $fp = "$docdir/$ft";
                    260:    #logger("DEBUG", "  fp: $fp");
                    261:    if ($$retrieved{$fp}) {
                    262:        logger("DEBUG", "$ft retrieved OK");
                    263:        $$retrieved{$fp}->[1] = "OK";
                    264:    } else {
                    265:        logger("ERROR", "file entry '$ft' missing from archive!");
                    266:        $errcnt++;
                    267:    }
                    268:     }
                    269: 
                    270:     foreach my $fa (sort keys %$retrieved) {
                    271:    if (! $$retrieved{$fa}->[1]) {
                    272:        my ($fn, $fp) = split_file_path($fa);
                    273:        if ($index_files{$fn}) {
                    274:        logger("DEBUG", "$fa ignored");
                    275:        $na--;
                    276:        } else {
                    277:        logger("WARNING", "$fa retrieved but not in list!");
                    278:        $warncnt++;
                    279:        }
                    280:    }
                    281:     }
                    282: 
                    283:     if ($nt > $na) {
                    284:    logger("WARNING", "less files were retrieved ($na vs. $nt)!");
                    285:    $warncnt++;
                    286:     } elsif ($na > $nt) {
                    287:    logger("WARNING", "more files were retrieved ($na vs. $nt)!");
                    288:    $warncnt++;
                    289:     }
                    290: 
                    291: }
                    292: 
                    293: 
                    294: 
                    295: #######################################################
                    296: # main
                    297: #
                    298: 
                    299: logger("INFO", "unarchiver $version");
                    300: 
                    301: # make shure the right user is running this program
1.7       casties   302: my $user = getlogin || getpwuid($<);
1.2       casties   303: if (($user)&&($user ne "archive")&&($user ne "root")) {
1.1       casties   304:     logger("ABORT", "you must be archive or root user to run this program!");
                    305:     exit 1;
                    306: }
                    307: 
                    308: # use checkarchive first
                    309: if (system("$checkprog $docdir >/dev/null") == 0) {
1.3       casties   310:     logger("INFO", "archive \"$docdir\" check OK");
1.1       casties   311: } else {
1.3       casties   312:     logger("ABORT", "archive \"$docdir\" check failed!!");
1.1       casties   313:     exit 1;
                    314: }
                    315: 
                    316: # read index.meta file
                    317: my ($document, $rootnode) = read_xml($metafile);
                    318: 
                    319: # check index file
                    320: my $archived_files = read_resource_meta($rootnode);
                    321: my $num_archived_files = scalar keys %$archived_files;
                    322: 
                    323: # check for .archived file
                    324: if (-f "$docdir/.archived") {
                    325:     logger("INFO", ".archived file exists.");
                    326: } else {
                    327:     logger("WARNING", "no .archived file!");
                    328:     $warncnt++;
                    329: }
                    330: 
                    331: logger("INFO", "$num_archived_files files to retrieve.");
                    332: 
1.3       casties   333: # save current index.meta
                    334: park_file($metafile); 
                    335: 
1.1       casties   336: # retrieve
1.3       casties   337: my %retrieved_files = ();
                    338: my $archcnt = 0;
                    339: 
                    340: if ($docdir =~ /\/mpiwg\/archive\/data\/(.*)/) {
                    341:     # TSM needs two different paths because of historical mount points :-(
                    342:     # try the new one first
                    343:     $archcnt = run_retrieve($docdir, "/mpiwg/archive", \%retrieved_files);
                    344:     if ($archcnt == 0) {
                    345:    # and then the old one
                    346:    $archcnt = run_retrieve($docdir, "/mpiwg/archive/data", \%retrieved_files);
                    347:     }
                    348: } else {
                    349:     # otherwise we assume that it works
                    350:     $archcnt += run_retrieve($docdir, $docdir, \%retrieved_files);
                    351: }
                    352: 
                    353: # restore current index.meta
                    354: unpark_file($metafile);
1.1       casties   355: 
1.3       casties   356: if ($archcnt == 0) {
1.1       casties   357:     logger("ABORT", "no files retrieved!!");
                    358:     exit 1;
                    359: }
1.3       casties   360: logger("INFO", "$archcnt files retrieved");
1.1       casties   361: 
                    362: # check list of archived files
1.3       casties   363: check_files($archived_files, \%retrieved_files);
1.1       casties   364: 
                    365: # rewrite index.meta file
                    366: write_xml($document, $metafile);
                    367: 
                    368: logger("INFO", "$warncnt warnings");
                    369: logger("INFO", "$errcnt errors");
                    370: if ($errcnt == 0) {
1.3       casties   371:     logger("DONE", "$archcnt archived files retrieved");
1.1       casties   372: } else {
                    373:     logger("ABORT", "there were $errcnt errors!!");
                    374:     exit 1;
                    375: }

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>