File:  [Repository] / foxridge-archiver / unarchiver.pl
Revision 1.2: download - view: text, annotated - select for diffs - revision graph
Thu Sep 23 19:55:44 2004 UTC (19 years, 9 months ago) by casties
Branches: MAIN
CVS tags: HEAD

fixed problem with dsmc because of new filespace.
uses excplicit destination directory now.

    1: #!/usr/local/bin/perl -w
    2: 
    3: use strict;
    4: 
    5: use XML::LibXML;
    6: use FileHandle;
    7: 
    8: # MPIWG libraries
    9: use lib '/usr/local/mpiwg/archive';
   10: use MPIWGStor;
   11: 
   12: # make output unbuffered
   13: $|=1;
   14: 
   15: 
   16: #######################################################
   17: # internal parameters
   18: #
   19: 
   20: # program version
   21: my $version = "0.2 (23.9.2004)";
   22: 
   23: # read command line parameters
   24: my $args = parseargs;
   25: 
   26: # debug level
   27: $debug = (exists $$args{'debug'}) ? ($$args{'debug'}) : 0;
   28: 
   29: # rewrite XML file (necessary for archive date!)
   30: my $fix_xml = 1;
   31: my $xml_changed = 0;
   32: # XML namespace (not really implemented!)
   33: my $namespace = "";
   34: 
   35: # archive name (archive-path element, usually == $docdir)
   36: my $archname;
   37: # archive storage date
   38: my $archdate;
   39: 
   40: #######################################################
   41: # external programs
   42: #
   43: my $archprog = "/opt/tivoli/tsm/client/ba/bin/dsmc";
   44: if (! -x $archprog) {
   45:     logger("ABORT", "TSM client program '$archprog' missing!!");
   46:     exit 1;
   47: }
   48: my $checkprog = "/usr/local/mpiwg/archive/archivecheck";
   49: if (! -x $checkprog) {
   50:     logger("ABORT", "archive checking program '$checkprog' missing!!");
   51:     exit 1;
   52: }
   53: # log file for archiver
   54: my $log_file = "/var/tmp/unarchiver.log";
   55: if (! open LOG, ">>$log_file") {
   56:     logger("ABORT", "unable to write log file '$log_file'!!");
   57:     exit 1;
   58: }
   59: LOG->autoflush(1);
   60: 
   61: #######################################################
   62: # check parameters that were passed to the program
   63: #
   64: my $docdir = $$args{'path'};
   65: if (! $docdir) {
   66:     print "ABORT: no document directory given!\n";
   67:     exit 1;
   68: }
   69: # strip trailing slashes
   70: $docdir =~ s/\/$//;
   71: if (! -d $docdir) {
   72:     print "ABORT: document directory \'$docdir\' doesn't exist!\n";
   73:     exit 1;
   74: }
   75: 
   76: my $metafile = "$docdir/index.meta";
   77: if (! -f $metafile) {
   78:     print "ABORT: metadata index file \'$metafile\' doesn't exist!\n";
   79:     exit 1;
   80: }
   81: 
   82: # construct document's parent dir
   83: my $docparent = $docdir;
   84: $docparent =~ s!/[^/]+$!!;
   85: 
   86: #######################################################
   87: # internal variables
   88: #
   89: 
   90: # number of errors
   91: my $errcnt = 0;
   92: # number of warnings
   93: my $warncnt = 0;
   94: 
   95: #######################################################
   96: # subroutines
   97: #
   98: 
   99: 
  100: #
  101: # $files = read_resource_meta($rootnode)
  102: #
  103: # checks general resource meta information and reads the list of files
  104: #
  105: sub read_resource_meta {
  106:     my ($rootnode) = @_;
  107:     my %files;
  108:     #
  109:     # archive path
  110:     #
  111:     # get archive-path
  112:     $archname = sstrip($rootnode->findvalue('child::archive-path'));
  113:     if (! $archname) {
  114: 	logger("ABORT", "archive-name element missing!!");
  115: 	exit 1;
  116:     }
  117: 
  118:     #
  119:     # files
  120:     #
  121:     my @filenodes = $rootnode->findnodes('child::file');
  122:     foreach my $fn (@filenodes) {
  123: 	my $name = sstrip($fn->findvalue('child::name'));
  124: 	my $path = sstrip($fn->findvalue('child::path'));
  125: 	logger("DEBUG", "FILE: ($path)$name");
  126: 	my $f = ($path) ? "$path/$name" : "$name";
  127: 	$files{$f} = [$name];
  128:     }
  129: 
  130:     #
  131:     # dirs
  132:     #
  133:     my @dirnodes = $rootnode->findnodes('child::dir');
  134:     foreach my $fn (@dirnodes) {
  135: 	my $name = sstrip($fn->findvalue('child::name'));
  136: 	my $path = sstrip($fn->findvalue('child::path'));
  137: 	logger("DEBUG", "DIR: ($path)$name");
  138: 	my $f = ($path) ? "$path/$name" : "$name";
  139: 	$files{$f} = [$name];
  140:     }
  141: 
  142:     #
  143:     # archive-storage-date
  144:     #
  145:     my $archdate = $rootnode->find('child::archive-storage-date');
  146:     if ($archdate) {
  147: 	logger("INFO", "archive storage date: $archdate");
  148:     } else {
  149: 	logger("ERROR", "archive storage date missing!");
  150: 	$errcnt++;
  151:     }
  152: 
  153:     #
  154:     # archive-recall-date
  155:     #
  156:     my $recalldatenode = ($rootnode->find('child::archive-recall-date'))->get_node(1);
  157:     if ($recalldatenode) {
  158: 	print "INFO: archive recall date exists!\n";
  159: 	# delete old date
  160: 	$recalldatenode->removeChildNodes;
  161:     } else {
  162: 	# create new storage date node
  163: 	$recalldatenode = $rootnode->addNewChild($namespace, "archive-recall-date");
  164: 	# move after archive-path
  165: 	$rootnode->insertAfter($recalldatenode, ($rootnode->find('child::archive-storage-date'))->get_node(1));
  166:     }
  167:     $recalldatenode->appendTextNode(scalar localtime);
  168:     $xml_changed++;
  169: 
  170:     return \%files;
  171: }
  172: 
  173: 
  174: #
  175: # $%files = run_retrieve
  176: #
  177: # runs the retriever program on $docdir and returns a list of archived files
  178: #
  179: # Sample output:
  180: # (old!) Retrieving          17,234 /mpiwg/archive/data/test/auto_titit_123/pageimg/essen-wind1.jpg [Done]
  181: # Retrieving      42,406,326 /mpiwg/archive/data/library/B980G582/raw/00015.tif --> /mpiwg/archive/data/library/B980G582/raw/00015.tif [Done]
  182: sub run_retrieve {
  183:     my %files;
  184:     print LOG "START unarchive $version on ", scalar localtime, "\n";
  185:     my $archcmd = $archprog;
  186:     $archcmd .= " retrieve -subdir=yes -replace=all";
  187:     $archcmd .= " -description='$archname'";
  188:     $archcmd .= " '$docdir/'"; # archive name
  189:     $archcmd .= " '$docparent/'"; # destination dir name
  190: 
  191:     my $archcnt = 0;
  192:     print LOG "CMD: $archcmd\n";
  193:     if (open ARCH, "$archcmd 2>&1 |") {
  194: 	while (<ARCH>) {
  195: 	    chomp;
  196: 	    print LOG "ARCH: $_\n";
  197: 	    if (/
  198: 		Retrieving
  199: 		\s+([\d,]+)    # size
  200: 		\s+(\S+)       # file name
  201: 		\s+-->
  202: 		\s+(\S+)       # destination file name
  203: 		\s+\[Done\]
  204: 		/x) {
  205: 		my $size = $1;
  206: 		my $file = $2;
  207: 		$size =~ s/,//g;
  208: 		logger("DEBUG", "  RETRIEVE: file '$file'");
  209: 		$archcnt++;
  210: 		if ($files{$file}) {
  211: 		    logger("WARNING", "file $file seems to be archived multiple times.");
  212: 		    $warncnt++;
  213: 		} 
  214: 		$files{$file} = [$size];
  215: 	    }
  216: 	}
  217: 	logger("INFO", "$archcnt archives of " . (scalar keys %files) . " files.");
  218:     } else {
  219: 	logger("ABORT", "unable to start archive command '$archcmd'!!");
  220: 	exit 1;
  221:     }
  222:     return \%files;
  223: }
  224: 
  225: 
  226: #
  227: # check_files(\%files_to_retrieve, \%retrieved_files)
  228: #
  229: # compares the list of archived and retrieved files
  230: #
  231: sub check_files {
  232:     my ($to_retrieve, $retrieved) = @_;
  233: 
  234:     my $nt = scalar keys %$to_retrieve;
  235:     my $na = scalar keys %$retrieved;
  236: 
  237:     foreach my $ft (sort keys %$to_retrieve) {
  238: 	my $fp = "$docdir/$ft";
  239: 	#logger("DEBUG", "  fp: $fp");
  240: 	if ($$retrieved{$fp}) {
  241: 	    logger("DEBUG", "$ft retrieved OK");
  242: 	    $$retrieved{$fp}->[1] = "OK";
  243: 	} else {
  244: 	    logger("ERROR", "file entry '$ft' missing from archive!");
  245: 	    $errcnt++;
  246: 	}
  247:     }
  248: 
  249:     foreach my $fa (sort keys %$retrieved) {
  250: 	if (! $$retrieved{$fa}->[1]) {
  251: 	    my ($fn, $fp) = split_file_path($fa);
  252: 	    if ($index_files{$fn}) {
  253: 		logger("DEBUG", "$fa ignored");
  254: 		$na--;
  255: 	    } else {
  256: 		logger("WARNING", "$fa retrieved but not in list!");
  257: 		$warncnt++;
  258: 	    }
  259: 	}
  260:     }
  261: 
  262:     if ($nt > $na) {
  263: 	logger("WARNING", "less files were retrieved ($na vs. $nt)!");
  264: 	$warncnt++;
  265:     } elsif ($na > $nt) {
  266: 	logger("WARNING", "more files were retrieved ($na vs. $nt)!");
  267: 	$warncnt++;
  268:     }
  269: 
  270: }
  271: 
  272: 
  273: 
  274: #######################################################
  275: # main
  276: #
  277: 
  278: logger("INFO", "unarchiver $version");
  279: 
  280: # make shure the right user is running this program
  281: my $user = getlogin;
  282: if (($user)&&($user ne "archive")&&($user ne "root")) {
  283:     logger("ABORT", "you must be archive or root user to run this program!");
  284:     exit 1;
  285: }
  286: 
  287: # use checkarchive first
  288: if (system("$checkprog $docdir >/dev/null") == 0) {
  289:     logger("INFO", "archive '$docdir' check OK");
  290: } else {
  291:     logger("ABORT", "archive '$docdir' check failed!!");
  292:     exit 1;
  293: }
  294: 
  295: # read index.meta file
  296: my ($document, $rootnode) = read_xml($metafile);
  297: 
  298: # check index file
  299: my $archived_files = read_resource_meta($rootnode);
  300: my $num_archived_files = scalar keys %$archived_files;
  301: 
  302: # check for .archived file
  303: if (-f "$docdir/.archived") {
  304:     logger("INFO", ".archived file exists.");
  305: } else {
  306:     logger("WARNING", "no .archived file!");
  307:     $warncnt++;
  308: }
  309: 
  310: logger("INFO", "$num_archived_files files to retrieve.");
  311: 
  312: # retrieve
  313: my $retrieved_files = run_retrieve;
  314: 
  315: my $num_arch_files = (scalar keys %$retrieved_files);
  316: if ($num_arch_files == 0) {
  317:     logger("ABORT", "no files retrieved!!");
  318:     exit 1;
  319: }
  320: logger("INFO", "$num_arch_files files retrieved");
  321: 
  322: # check list of archived files
  323: check_files($archived_files, $retrieved_files);
  324: 
  325: # rewrite index.meta file
  326: write_xml($document, $metafile);
  327: 
  328: logger("INFO", "$warncnt warnings");
  329: logger("INFO", "$errcnt errors");
  330: if ($errcnt == 0) {
  331:     logger("DONE", "" . (scalar keys %$retrieved_files) . " archived files retrieved");
  332: } else {
  333:     logger("ABORT", "there were $errcnt errors!!");
  334:     exit 1;
  335: }

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>