Annotation of foxridge-archiver/unarchiver.pl, revision 1.3
1.1 casties 1: #!/usr/local/bin/perl -w
2:
3: use strict;
4:
5: use XML::LibXML;
1.2 casties 6: use FileHandle;
1.1 casties 7:
8: # MPIWG libraries
9: use lib '/usr/local/mpiwg/archive';
10: use MPIWGStor;
11:
12: # make output unbuffered
13: $|=1;
14:
15:
16: #######################################################
17: # internal parameters
18: #
19:
20: # program version
1.3 ! casties 21: my $version = "0.3 (20.1.2004)";
1.1 casties 22:
23: # read command line parameters
24: my $args = parseargs;
25:
26: # debug level
27: $debug = (exists $$args{'debug'}) ? ($$args{'debug'}) : 0;
28:
29: # rewrite XML file (necessary for archive date!)
30: my $fix_xml = 1;
31: my $xml_changed = 0;
32: # XML namespace (not really implemented!)
33: my $namespace = "";
34:
35: # archive name (archive-path element, usually == $docdir)
36: my $archname;
37: # archive storage date
38: my $archdate;
39:
40: #######################################################
41: # external programs
42: #
43: my $archprog = "/opt/tivoli/tsm/client/ba/bin/dsmc";
44: if (! -x $archprog) {
45: logger("ABORT", "TSM client program '$archprog' missing!!");
46: exit 1;
47: }
48: my $checkprog = "/usr/local/mpiwg/archive/archivecheck";
49: if (! -x $checkprog) {
50: logger("ABORT", "archive checking program '$checkprog' missing!!");
51: exit 1;
52: }
53: # log file for archiver
54: my $log_file = "/var/tmp/unarchiver.log";
55: if (! open LOG, ">>$log_file") {
56: logger("ABORT", "unable to write log file '$log_file'!!");
57: exit 1;
58: }
1.2 casties 59: LOG->autoflush(1);
1.1 casties 60:
61: #######################################################
62: # check parameters that were passed to the program
63: #
64: my $docdir = $$args{'path'};
65: if (! $docdir) {
66: print "ABORT: no document directory given!\n";
67: exit 1;
68: }
69: # strip trailing slashes
70: $docdir =~ s/\/$//;
71: if (! -d $docdir) {
72: print "ABORT: document directory \'$docdir\' doesn't exist!\n";
73: exit 1;
74: }
75:
76: my $metafile = "$docdir/index.meta";
77: if (! -f $metafile) {
78: print "ABORT: metadata index file \'$metafile\' doesn't exist!\n";
79: exit 1;
80: }
81:
82: #######################################################
83: # internal variables
84: #
85:
86: # number of errors
87: my $errcnt = 0;
88: # number of warnings
89: my $warncnt = 0;
90:
91: #######################################################
92: # subroutines
93: #
94:
1.3 ! casties 95: # construct document's parent dir
! 96: sub get_parent {
! 97: my ($dirname) = @_;
! 98: my $dirparent = $dirname;
! 99: $dirparent =~ s!/[^/]+$!!;
! 100: return $dirparent;
! 101: }
! 102:
1.1 casties 103:
104: #
105: # $files = read_resource_meta($rootnode)
106: #
107: # checks general resource meta information and reads the list of files
108: #
109: sub read_resource_meta {
110: my ($rootnode) = @_;
111: my %files;
112: #
113: # archive path
114: #
115: # get archive-path
116: $archname = sstrip($rootnode->findvalue('child::archive-path'));
117: if (! $archname) {
118: logger("ABORT", "archive-name element missing!!");
119: exit 1;
120: }
121:
122: #
123: # files
124: #
125: my @filenodes = $rootnode->findnodes('child::file');
126: foreach my $fn (@filenodes) {
127: my $name = sstrip($fn->findvalue('child::name'));
128: my $path = sstrip($fn->findvalue('child::path'));
129: logger("DEBUG", "FILE: ($path)$name");
130: my $f = ($path) ? "$path/$name" : "$name";
131: $files{$f} = [$name];
132: }
133:
134: #
135: # dirs
136: #
137: my @dirnodes = $rootnode->findnodes('child::dir');
138: foreach my $fn (@dirnodes) {
139: my $name = sstrip($fn->findvalue('child::name'));
140: my $path = sstrip($fn->findvalue('child::path'));
141: logger("DEBUG", "DIR: ($path)$name");
142: my $f = ($path) ? "$path/$name" : "$name";
143: $files{$f} = [$name];
144: }
145:
146: #
147: # archive-storage-date
148: #
149: my $archdate = $rootnode->find('child::archive-storage-date');
150: if ($archdate) {
151: logger("INFO", "archive storage date: $archdate");
152: } else {
153: logger("ERROR", "archive storage date missing!");
154: $errcnt++;
155: }
156:
157: #
158: # archive-recall-date
159: #
160: my $recalldatenode = ($rootnode->find('child::archive-recall-date'))->get_node(1);
161: if ($recalldatenode) {
162: print "INFO: archive recall date exists!\n";
163: # delete old date
164: $recalldatenode->removeChildNodes;
165: } else {
166: # create new storage date node
167: $recalldatenode = $rootnode->addNewChild($namespace, "archive-recall-date");
168: # move after archive-path
169: $rootnode->insertAfter($recalldatenode, ($rootnode->find('child::archive-storage-date'))->get_node(1));
170: }
171: $recalldatenode->appendTextNode(scalar localtime);
172: $xml_changed++;
173:
174: return \%files;
175: }
176:
177:
178: #
1.3 ! casties 179: # $num_files = run_retrieve($docdir, $docmount, \%files)
1.1 casties 180: #
1.3 ! casties 181: # Runs the retriever program on $docdir and returns the number of unarchived files.
! 182: # All filenames are put in %files.
! 183: # $docmount is the mount point of the doc partition in cases when the new mount point
! 184: # is different.
1.1 casties 185: #
186: # Sample output:
1.2 casties 187: # (old!) Retrieving 17,234 /mpiwg/archive/data/test/auto_titit_123/pageimg/essen-wind1.jpg [Done]
188: # Retrieving 42,406,326 /mpiwg/archive/data/library/B980G582/raw/00015.tif --> /mpiwg/archive/data/library/B980G582/raw/00015.tif [Done]
1.1 casties 189: sub run_retrieve {
1.3 ! casties 190: my ($archdir, $archmount, $files) = @_;
! 191: my $archparent;
! 192: if ($archmount eq $archdir) {
! 193: # no explicit mount point
! 194: $archparent = get_parent($archdir);
! 195: } else {
! 196: # destination dir is mount point
! 197: $archparent = $archmount;
! 198: }
! 199:
1.2 casties 200: print LOG "START unarchive $version on ", scalar localtime, "\n";
1.1 casties 201: my $archcmd = $archprog;
202: $archcmd .= " retrieve -subdir=yes -replace=all";
1.3 ! casties 203: $archcmd .= " -description='$archname'"; # archive name
! 204: $archcmd .= " '$archmount/'"; # archive mount point
! 205: $archcmd .= " '$archparent/'"; # destination dir name
1.1 casties 206:
207: my $archcnt = 0;
1.3 ! casties 208: my $numfiles = 0;
1.1 casties 209: print LOG "CMD: $archcmd\n";
210: if (open ARCH, "$archcmd 2>&1 |") {
211: while (<ARCH>) {
212: chomp;
213: print LOG "ARCH: $_\n";
214: if (/
215: Retrieving
216: \s+([\d,]+) # size
217: \s+(\S+) # file name
1.2 casties 218: \s+-->
219: \s+(\S+) # destination file name
1.1 casties 220: \s+\[Done\]
221: /x) {
222: my $size = $1;
223: my $file = $2;
224: $size =~ s/,//g;
225: logger("DEBUG", " RETRIEVE: file '$file'");
226: $archcnt++;
1.3 ! casties 227: if ($$files{$file}) {
1.1 casties 228: logger("WARNING", "file $file seems to be archived multiple times.");
229: $warncnt++;
230: }
1.3 ! casties 231: $$files{$file} = [$size];
1.1 casties 232: }
233: }
1.3 ! casties 234: $numfiles = (scalar keys %$files);
! 235: logger("INFO", "$archcnt archives of $numfiles files (in $archmount).");
1.1 casties 236: } else {
237: logger("ABORT", "unable to start archive command '$archcmd'!!");
238: exit 1;
239: }
1.3 ! casties 240: return $numfiles;
1.1 casties 241: }
242:
243:
244: #
245: # check_files(\%files_to_retrieve, \%retrieved_files)
246: #
247: # compares the list of archived and retrieved files
248: #
249: sub check_files {
250: my ($to_retrieve, $retrieved) = @_;
251:
252: my $nt = scalar keys %$to_retrieve;
253: my $na = scalar keys %$retrieved;
254:
255: foreach my $ft (sort keys %$to_retrieve) {
256: my $fp = "$docdir/$ft";
257: #logger("DEBUG", " fp: $fp");
258: if ($$retrieved{$fp}) {
259: logger("DEBUG", "$ft retrieved OK");
260: $$retrieved{$fp}->[1] = "OK";
261: } else {
262: logger("ERROR", "file entry '$ft' missing from archive!");
263: $errcnt++;
264: }
265: }
266:
267: foreach my $fa (sort keys %$retrieved) {
268: if (! $$retrieved{$fa}->[1]) {
269: my ($fn, $fp) = split_file_path($fa);
270: if ($index_files{$fn}) {
271: logger("DEBUG", "$fa ignored");
272: $na--;
273: } else {
274: logger("WARNING", "$fa retrieved but not in list!");
275: $warncnt++;
276: }
277: }
278: }
279:
280: if ($nt > $na) {
281: logger("WARNING", "less files were retrieved ($na vs. $nt)!");
282: $warncnt++;
283: } elsif ($na > $nt) {
284: logger("WARNING", "more files were retrieved ($na vs. $nt)!");
285: $warncnt++;
286: }
287:
288: }
289:
290:
291:
292: #######################################################
293: # main
294: #
295:
296: logger("INFO", "unarchiver $version");
297:
298: # make shure the right user is running this program
299: my $user = getlogin;
1.2 casties 300: if (($user)&&($user ne "archive")&&($user ne "root")) {
1.1 casties 301: logger("ABORT", "you must be archive or root user to run this program!");
302: exit 1;
303: }
304:
305: # use checkarchive first
306: if (system("$checkprog $docdir >/dev/null") == 0) {
1.3 ! casties 307: logger("INFO", "archive \"$docdir\" check OK");
1.1 casties 308: } else {
1.3 ! casties 309: logger("ABORT", "archive \"$docdir\" check failed!!");
1.1 casties 310: exit 1;
311: }
312:
313: # read index.meta file
314: my ($document, $rootnode) = read_xml($metafile);
315:
316: # check index file
317: my $archived_files = read_resource_meta($rootnode);
318: my $num_archived_files = scalar keys %$archived_files;
319:
320: # check for .archived file
321: if (-f "$docdir/.archived") {
322: logger("INFO", ".archived file exists.");
323: } else {
324: logger("WARNING", "no .archived file!");
325: $warncnt++;
326: }
327:
328: logger("INFO", "$num_archived_files files to retrieve.");
329:
1.3 ! casties 330: # save current index.meta
! 331: park_file($metafile);
! 332:
1.1 casties 333: # retrieve
1.3 ! casties 334: my %retrieved_files = ();
! 335: my $archcnt = 0;
! 336:
! 337: if ($docdir =~ /\/mpiwg\/archive\/data\/(.*)/) {
! 338: # TSM needs two different paths because of historical mount points :-(
! 339: # try the new one first
! 340: $archcnt = run_retrieve($docdir, "/mpiwg/archive", \%retrieved_files);
! 341: if ($archcnt == 0) {
! 342: # and then the old one
! 343: $archcnt = run_retrieve($docdir, "/mpiwg/archive/data", \%retrieved_files);
! 344: }
! 345: } else {
! 346: # otherwise we assume that it works
! 347: $archcnt += run_retrieve($docdir, $docdir, \%retrieved_files);
! 348: }
! 349:
! 350: # restore current index.meta
! 351: unpark_file($metafile);
1.1 casties 352:
1.3 ! casties 353: if ($archcnt == 0) {
1.1 casties 354: logger("ABORT", "no files retrieved!!");
355: exit 1;
356: }
1.3 ! casties 357: logger("INFO", "$archcnt files retrieved");
1.1 casties 358:
359: # check list of archived files
1.3 ! casties 360: check_files($archived_files, \%retrieved_files);
1.1 casties 361:
362: # rewrite index.meta file
363: write_xml($document, $metafile);
364:
365: logger("INFO", "$warncnt warnings");
366: logger("INFO", "$errcnt errors");
367: if ($errcnt == 0) {
1.3 ! casties 368: logger("DONE", "$archcnt archived files retrieved");
1.1 casties 369: } else {
370: logger("ABORT", "there were $errcnt errors!!");
371: exit 1;
372: }
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>