File:  [Repository] / foxridge-archiver / unarchiver.pl
Revision 1.9: download - view: text, annotated - select for diffs - revision graph
Thu Mar 16 17:00:43 2017 UTC (7 years, 3 months ago) by casties
Branches: MAIN
CVS tags: HEAD
updated to Ubuntu Perl paths.

    1: #!/usr/bin/perl -w
    2: 
    3: use strict;
    4: 
    5: use XML::LibXML;
    6: use FileHandle;
    7: 
    8: # MPIWG libraries
    9: use lib '/usr/local/mpiwg/archive';
   10: use MPIWGStor;
   11: 
   12: # make output unbuffered
   13: $|=1;
   14: 
   15: 
   16: #######################################################
   17: # internal parameters
   18: #
   19: 
   20: # program version
   21: my $version = "0.3.4 (11.8.2009)";
   22: 
   23: # read command line parameters
   24: my $args = parseargs;
   25: 
   26: # debug level
   27: $debug = (exists $$args{'debug'}) ? ($$args{'debug'}) : 0;
   28: 
   29: # rewrite XML file (necessary for archive date!)
   30: my $fix_xml = 1;
   31: my $xml_changed = 0;
   32: # XML namespace (not really implemented!)
   33: my $namespace = "";
   34: 
   35: # archive name (archive-path element, usually == $docdir)
   36: my $archname;
   37: # archive storage date
   38: my $archdate;
   39: 
   40: #######################################################
   41: # external programs
   42: #
   43: my $archprog = "/opt/tivoli/tsm/client/ba/bin/dsmc";
   44: if (! -x $archprog) {
   45:     logger("ABORT", "TSM client program '$archprog' missing!!");
   46:     exit 1;
   47: }
   48: my $checkprog = "/usr/local/mpiwg/archive/archivecheck";
   49: if (! -x $checkprog) {
   50:     logger("ABORT", "archive checking program '$checkprog' missing!!");
   51:     exit 1;
   52: }
   53: # log file for archiver
   54: my $log_file = "/var/tmp/unarchiver.log";
   55: if (! open LOG, ">>$log_file") {
   56:     logger("ABORT", "unable to write log file '$log_file'!!");
   57:     exit 1;
   58: }
   59: LOG->autoflush(1);
   60: 
   61: #######################################################
   62: # check parameters that were passed to the program
   63: #
   64: my $docdir = $$args{'path'};
   65: if (! $docdir) {
   66:     print "ABORT: no document directory given!\n";
   67:     exit 1;
   68: }
   69: # strip trailing slashes
   70: $docdir =~ s/\/$//;
   71: if (! -d $docdir) {
   72:     print "ABORT: document directory \'$docdir\' doesn't exist!\n";
   73:     exit 1;
   74: }
   75: 
   76: my $metafile = "$docdir/index.meta";
   77: if (! -f $metafile) {
   78:     print "ABORT: metadata index file \'$metafile\' doesn't exist!\n";
   79:     exit 1;
   80: }
   81: 
   82: #######################################################
   83: # internal variables
   84: #
   85: 
   86: # number of errors
   87: my $errcnt = 0;
   88: # number of warnings
   89: my $warncnt = 0;
   90: 
   91: #######################################################
   92: # subroutines
   93: #
   94: 
   95: # construct document's parent dir
   96: sub get_parent {
   97:     my ($dirname) = @_;
   98:     my $dirparent = $dirname;
   99:     $dirparent =~ s!/[^/]+$!!;
  100:     return $dirparent;
  101: }
  102: 
  103: 
  104: #
  105: # $files = read_resource_meta($rootnode)
  106: #
  107: # checks general resource meta information and reads the list of files
  108: #
  109: sub read_resource_meta {
  110:     my ($rootnode) = @_;
  111:     my %files;
  112:     #
  113:     # archive path
  114:     #
  115:     # get archive-path
  116:     $archname = sstrip($rootnode->findvalue('child::archive-path'));
  117:     if (! $archname) {
  118: 	logger("ABORT", "archive-name element missing!!");
  119: 	exit 1;
  120:     }
  121: 
  122:     #
  123:     # files
  124:     #
  125:     my @filenodes = $rootnode->findnodes('child::file');
  126:     foreach my $fn (@filenodes) {
  127: 	my $name = sstrip($fn->findvalue('child::name'));
  128: 	my $path = sstrip($fn->findvalue('child::path'));
  129: 	logger("DEBUG", "FILE: ($path)$name");
  130: 	my $f = ($path) ? "$path/$name" : "$name";
  131: 	$files{$f} = [$name];
  132:     }
  133: 
  134:     #
  135:     # dirs
  136:     #
  137:     my @dirnodes = $rootnode->findnodes('child::dir');
  138:     foreach my $fn (@dirnodes) {
  139: 	my $name = sstrip($fn->findvalue('child::name'));
  140: 	my $path = sstrip($fn->findvalue('child::path'));
  141: 	logger("DEBUG", "DIR: ($path)$name");
  142: 	my $f = ($path) ? "$path/$name" : "$name";
  143: 	$files{$f} = [$name];
  144:     }
  145: 
  146:     #
  147:     # archive-storage-date
  148:     #
  149:     my $archdate = $rootnode->find('child::archive-storage-date');
  150:     if ($archdate) {
  151: 	logger("INFO", "archive storage date: $archdate");
  152:     } else {
  153: 	logger("ERROR", "archive storage date missing!");
  154: 	$errcnt++;
  155:     }
  156: 
  157:     #
  158:     # archive-recall-date
  159:     #
  160:     my $recalldatenode = ($rootnode->find('child::archive-recall-date'))->get_node(1);
  161:     if ($recalldatenode) {
  162: 	print "INFO: archive recall date exists!\n";
  163: 	# delete old date
  164: 	$recalldatenode->removeChildNodes;
  165:     } else {
  166: 	# create new storage date node
  167: 	$recalldatenode = $rootnode->addNewChild($namespace, "archive-recall-date");
  168: 	# move after archive-path
  169: 	$rootnode->insertAfter($recalldatenode, ($rootnode->find('child::archive-storage-date'))->get_node(1));
  170:     }
  171:     $recalldatenode->appendTextNode(scalar localtime);
  172:     $xml_changed++;
  173: 
  174:     return \%files;
  175: }
  176: 
  177: 
  178: #
  179: # $num_files = run_retrieve($docdir, $docmount, \%files)
  180: #
  181: # Runs the retriever program on $docdir and returns the number of unarchived files.
  182: # All filenames are put in %files. 
  183: # $docmount is the mount point of the doc partition in cases when the new mount point
  184: # is different.
  185: #
  186: # Sample output:
  187: # (old!) Retrieving          17,234 /mpiwg/archive/data/test/auto_titit_123/pageimg/essen-wind1.jpg [Done]
  188: # Retrieving      42,406,326 /mpiwg/archive/data/library/B980G582/raw/00015.tif --> /mpiwg/archive/data/library/B980G582/raw/00015.tif [Done]
  189: sub run_retrieve {
  190:     my ($archdir, $archmount, $files) = @_;
  191:     my $archparent;
  192:     if ($archmount eq $archdir) {
  193: 	# no explicit mount point
  194: 	$archparent = get_parent($archdir);
  195:     } else {
  196: 	# destination dir is mount point
  197: 	$archparent = $archmount;
  198:     }
  199:     logger("INFO", "looking for archives in $archmount...");
  200: 
  201:     print LOG "START unarchive $version on ", scalar localtime, "\n";
  202:     my $archcmd = $archprog;
  203:     $archcmd .= " retrieve -subdir=yes -replace=all -ifnewer";
  204:     $archcmd .= " -description='$archname'"; # archive name
  205:     $archcmd .= " '$archmount/'"; # archive mount point
  206:     $archcmd .= " '$archparent/'"; # destination dir name
  207: 
  208:     logger('INFO', "querying TSM server for $archmount, please wait...");
  209: 
  210:     my $archcnt = 0;
  211:     my $numfiles = 0;
  212:     print LOG "CMD: $archcmd\n";
  213:     if (open ARCH, "$archcmd 2>&1 |") {
  214: 	while (<ARCH>) {
  215: 	    chomp;
  216: 	    print LOG "ARCH: $_\n";
  217: 	    if (/
  218: 		Retrieving
  219: 		\s+([\d,]+)    # size
  220: 		\s+(\S+)       # file name
  221: 		\s+-->
  222: 		\s+(\S+)       # destination file name
  223: 		\s+\[Done\]
  224: 		/x) {
  225: 		my $size = $1;
  226: 		my $file = $2;
  227: 		$size =~ s/,//g;
  228: 		logger("DEBUG", "  RETRIEVE: file '$file'");
  229: 		$archcnt++;
  230: 		if ($$files{$file}) {
  231: 		    logger("WARNING", "file $file seems to be archived multiple times.");
  232: 		    $warncnt++;
  233: 		} 
  234: 		$$files{$file} = [$size];
  235: 	    }
  236: 	}
  237: 	$numfiles =  (scalar keys %$files);
  238: 	logger("INFO", "$archcnt archives of $numfiles files (in $archmount).");
  239:     } else {
  240: 	logger("ABORT", "unable to start archive command '$archcmd'!!");
  241: 	exit 1;
  242:     }
  243:     return $numfiles;
  244: }
  245: 
  246: 
  247: #
  248: # check_files(\%files_to_retrieve, \%retrieved_files)
  249: #
  250: # compares the list of archived and retrieved files
  251: #
  252: sub check_files {
  253:     my ($to_retrieve, $retrieved) = @_;
  254: 
  255:     my $nt = scalar keys %$to_retrieve;
  256:     my $na = scalar keys %$retrieved;
  257: 
  258:     foreach my $ft (sort keys %$to_retrieve) {
  259: 	my $fp = "$docdir/$ft";
  260: 	#logger("DEBUG", "  fp: $fp");
  261: 	if ($$retrieved{$fp}) {
  262: 	    logger("DEBUG", "$ft retrieved OK");
  263: 	    $$retrieved{$fp}->[1] = "OK";
  264: 	} else {
  265: 	    logger("ERROR", "file entry '$ft' missing from archive!");
  266: 	    $errcnt++;
  267: 	}
  268:     }
  269: 
  270:     foreach my $fa (sort keys %$retrieved) {
  271: 	if (! $$retrieved{$fa}->[1]) {
  272: 	    my ($fn, $fp) = split_file_path($fa);
  273: 	    if ($index_files{$fn}) {
  274: 		logger("DEBUG", "$fa ignored");
  275: 		$na--;
  276: 	    } else {
  277: 		logger("WARNING", "$fa retrieved but not in list!");
  278: 		$warncnt++;
  279: 	    }
  280: 	}
  281:     }
  282: 
  283:     if ($nt > $na) {
  284: 	logger("WARNING", "less files were retrieved ($na vs. $nt)!");
  285: 	$warncnt++;
  286:     } elsif ($na > $nt) {
  287: 	logger("WARNING", "more files were retrieved ($na vs. $nt)!");
  288: 	$warncnt++;
  289:     }
  290: 
  291: }
  292: 
  293: 
  294: 
  295: #######################################################
  296: # main
  297: #
  298: 
  299: logger("INFO", "unarchiver $version");
  300: 
  301: # make shure the right user is running this program
  302: my $user = getlogin || getpwuid($<);
  303: if (($user)&&($user ne "archive")&&($user ne "root")) {
  304:     logger("ABORT", "you must be archive or root user to run this program!");
  305:     exit 1;
  306: }
  307: 
  308: # use checkarchive first
  309: if (system("$checkprog $docdir >/dev/null") == 0) {
  310:     logger("INFO", "archive \"$docdir\" check OK");
  311: } else {
  312:     logger("ABORT", "archive \"$docdir\" check failed!!");
  313:     exit 1;
  314: }
  315: 
  316: # read index.meta file
  317: my ($document, $rootnode) = read_xml($metafile);
  318: 
  319: # check index file
  320: my $archived_files = read_resource_meta($rootnode);
  321: my $num_archived_files = scalar keys %$archived_files;
  322: 
  323: # check for .archived file
  324: if (-f "$docdir/.archived") {
  325:     logger("INFO", ".archived file exists.");
  326: } else {
  327:     logger("WARNING", "no .archived file!");
  328:     $warncnt++;
  329: }
  330: 
  331: logger("INFO", "$num_archived_files files to retrieve.");
  332: 
  333: # save current index.meta
  334: park_file($metafile); 
  335: 
  336: # retrieve
  337: my %retrieved_files = ();
  338: my $archcnt = 0;
  339: 
  340: if ($docdir =~ /\/mpiwg\/archive\/data\/(.*)/) {
  341:     # TSM needs two different paths because of historical mount points :-(
  342:     # try the new one first
  343:     $archcnt = run_retrieve($docdir, "/mpiwg/archive", \%retrieved_files);
  344:     if ($archcnt == 0) {
  345: 	# and then the old one
  346: 	$archcnt = run_retrieve($docdir, "/mpiwg/archive/data", \%retrieved_files);
  347:     }
  348: } else {
  349:     # otherwise we assume that it works
  350:     $archcnt += run_retrieve($docdir, $docdir, \%retrieved_files);
  351: }
  352: 
  353: # restore current index.meta
  354: unpark_file($metafile);
  355: 
  356: if ($archcnt == 0) {
  357:     logger("ABORT", "no files retrieved!!");
  358:     exit 1;
  359: }
  360: logger("INFO", "$archcnt files retrieved");
  361: 
  362: # check list of archived files
  363: check_files($archived_files, \%retrieved_files);
  364: 
  365: # rewrite index.meta file
  366: write_xml($document, $metafile);
  367: 
  368: logger("INFO", "$warncnt warnings");
  369: logger("INFO", "$errcnt errors");
  370: if ($errcnt == 0) {
  371:     logger("DONE", "$archcnt archived files retrieved");
  372: } else {
  373:     logger("ABORT", "there were $errcnt errors!!");
  374:     exit 1;
  375: }

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>