comparison unarchiver.pl @ 10:4417be0e2f07

adapted to dsmc's problem with mount points saves index.meta from overwriting by archived version
author casties
date Fri, 21 Jan 2005 15:33:26 +0100
parents c4e6fc065b6d
children b19df18aa19a
comparison
equal deleted inserted replaced
9:9c61f624d802 10:4417be0e2f07
16 ####################################################### 16 #######################################################
17 # internal parameters 17 # internal parameters
18 # 18 #
19 19
20 # program version 20 # program version
21 my $version = "0.2 (23.9.2004)"; 21 my $version = "0.3 (20.1.2004)";
22 22
23 # read command line parameters 23 # read command line parameters
24 my $args = parseargs; 24 my $args = parseargs;
25 25
26 # debug level 26 # debug level
77 if (! -f $metafile) { 77 if (! -f $metafile) {
78 print "ABORT: metadata index file \'$metafile\' doesn't exist!\n"; 78 print "ABORT: metadata index file \'$metafile\' doesn't exist!\n";
79 exit 1; 79 exit 1;
80 } 80 }
81 81
82 # construct document's parent dir
83 my $docparent = $docdir;
84 $docparent =~ s!/[^/]+$!!;
85
86 ####################################################### 82 #######################################################
87 # internal variables 83 # internal variables
88 # 84 #
89 85
90 # number of errors 86 # number of errors
93 my $warncnt = 0; 89 my $warncnt = 0;
94 90
95 ####################################################### 91 #######################################################
96 # subroutines 92 # subroutines
97 # 93 #
94
95 # construct document's parent dir
96 sub get_parent {
97 my ($dirname) = @_;
98 my $dirparent = $dirname;
99 $dirparent =~ s!/[^/]+$!!;
100 return $dirparent;
101 }
98 102
99 103
100 # 104 #
101 # $files = read_resource_meta($rootnode) 105 # $files = read_resource_meta($rootnode)
102 # 106 #
170 return \%files; 174 return \%files;
171 } 175 }
172 176
173 177
174 # 178 #
175 # $%files = run_retrieve 179 # $num_files = run_retrieve($docdir, $docmount, \%files)
176 # 180 #
177 # runs the retriever program on $docdir and returns a list of archived files 181 # Runs the retriever program on $docdir and returns the number of unarchived files.
182 # All filenames are put in %files.
183 # $docmount is the mount point of the doc partition in cases when the new mount point
184 # is different.
178 # 185 #
179 # Sample output: 186 # Sample output:
180 # (old!) Retrieving 17,234 /mpiwg/archive/data/test/auto_titit_123/pageimg/essen-wind1.jpg [Done] 187 # (old!) Retrieving 17,234 /mpiwg/archive/data/test/auto_titit_123/pageimg/essen-wind1.jpg [Done]
181 # Retrieving 42,406,326 /mpiwg/archive/data/library/B980G582/raw/00015.tif --> /mpiwg/archive/data/library/B980G582/raw/00015.tif [Done] 188 # Retrieving 42,406,326 /mpiwg/archive/data/library/B980G582/raw/00015.tif --> /mpiwg/archive/data/library/B980G582/raw/00015.tif [Done]
182 sub run_retrieve { 189 sub run_retrieve {
183 my %files; 190 my ($archdir, $archmount, $files) = @_;
191 my $archparent;
192 if ($archmount eq $archdir) {
193 # no explicit mount point
194 $archparent = get_parent($archdir);
195 } else {
196 # destination dir is mount point
197 $archparent = $archmount;
198 }
199
184 print LOG "START unarchive $version on ", scalar localtime, "\n"; 200 print LOG "START unarchive $version on ", scalar localtime, "\n";
185 my $archcmd = $archprog; 201 my $archcmd = $archprog;
186 $archcmd .= " retrieve -subdir=yes -replace=all"; 202 $archcmd .= " retrieve -subdir=yes -replace=all";
187 $archcmd .= " -description='$archname'"; 203 $archcmd .= " -description='$archname'"; # archive name
188 $archcmd .= " '$docdir/'"; # archive name 204 $archcmd .= " '$archmount/'"; # archive mount point
189 $archcmd .= " '$docparent/'"; # destination dir name 205 $archcmd .= " '$archparent/'"; # destination dir name
190 206
191 my $archcnt = 0; 207 my $archcnt = 0;
208 my $numfiles = 0;
192 print LOG "CMD: $archcmd\n"; 209 print LOG "CMD: $archcmd\n";
193 if (open ARCH, "$archcmd 2>&1 |") { 210 if (open ARCH, "$archcmd 2>&1 |") {
194 while (<ARCH>) { 211 while (<ARCH>) {
195 chomp; 212 chomp;
196 print LOG "ARCH: $_\n"; 213 print LOG "ARCH: $_\n";
205 my $size = $1; 222 my $size = $1;
206 my $file = $2; 223 my $file = $2;
207 $size =~ s/,//g; 224 $size =~ s/,//g;
208 logger("DEBUG", " RETRIEVE: file '$file'"); 225 logger("DEBUG", " RETRIEVE: file '$file'");
209 $archcnt++; 226 $archcnt++;
210 if ($files{$file}) { 227 if ($$files{$file}) {
211 logger("WARNING", "file $file seems to be archived multiple times."); 228 logger("WARNING", "file $file seems to be archived multiple times.");
212 $warncnt++; 229 $warncnt++;
213 } 230 }
214 $files{$file} = [$size]; 231 $$files{$file} = [$size];
215 } 232 }
216 } 233 }
217 logger("INFO", "$archcnt archives of " . (scalar keys %files) . " files."); 234 $numfiles = (scalar keys %$files);
235 logger("INFO", "$archcnt archives of $numfiles files (in $archmount).");
218 } else { 236 } else {
219 logger("ABORT", "unable to start archive command '$archcmd'!!"); 237 logger("ABORT", "unable to start archive command '$archcmd'!!");
220 exit 1; 238 exit 1;
221 } 239 }
222 return \%files; 240 return $numfiles;
223 } 241 }
224 242
225 243
226 # 244 #
227 # check_files(\%files_to_retrieve, \%retrieved_files) 245 # check_files(\%files_to_retrieve, \%retrieved_files)
284 exit 1; 302 exit 1;
285 } 303 }
286 304
287 # use checkarchive first 305 # use checkarchive first
288 if (system("$checkprog $docdir >/dev/null") == 0) { 306 if (system("$checkprog $docdir >/dev/null") == 0) {
289 logger("INFO", "archive '$docdir' check OK"); 307 logger("INFO", "archive \"$docdir\" check OK");
290 } else { 308 } else {
291 logger("ABORT", "archive '$docdir' check failed!!"); 309 logger("ABORT", "archive \"$docdir\" check failed!!");
292 exit 1; 310 exit 1;
293 } 311 }
294 312
295 # read index.meta file 313 # read index.meta file
296 my ($document, $rootnode) = read_xml($metafile); 314 my ($document, $rootnode) = read_xml($metafile);
307 $warncnt++; 325 $warncnt++;
308 } 326 }
309 327
310 logger("INFO", "$num_archived_files files to retrieve."); 328 logger("INFO", "$num_archived_files files to retrieve.");
311 329
330 # save current index.meta
331 park_file($metafile);
332
312 # retrieve 333 # retrieve
313 my $retrieved_files = run_retrieve; 334 my %retrieved_files = ();
314 335 my $archcnt = 0;
315 my $num_arch_files = (scalar keys %$retrieved_files); 336
316 if ($num_arch_files == 0) { 337 if ($docdir =~ /\/mpiwg\/archive\/data\/(.*)/) {
338 # TSM needs two different paths because of historical mount points :-(
339 # try the new one first
340 $archcnt = run_retrieve($docdir, "/mpiwg/archive", \%retrieved_files);
341 if ($archcnt == 0) {
342 # and then the old one
343 $archcnt = run_retrieve($docdir, "/mpiwg/archive/data", \%retrieved_files);
344 }
345 } else {
346 # otherwise we assume that it works
347 $archcnt += run_retrieve($docdir, $docdir, \%retrieved_files);
348 }
349
350 # restore current index.meta
351 unpark_file($metafile);
352
353 if ($archcnt == 0) {
317 logger("ABORT", "no files retrieved!!"); 354 logger("ABORT", "no files retrieved!!");
318 exit 1; 355 exit 1;
319 } 356 }
320 logger("INFO", "$num_arch_files files retrieved"); 357 logger("INFO", "$archcnt files retrieved");
321 358
322 # check list of archived files 359 # check list of archived files
323 check_files($archived_files, $retrieved_files); 360 check_files($archived_files, \%retrieved_files);
324 361
325 # rewrite index.meta file 362 # rewrite index.meta file
326 write_xml($document, $metafile); 363 write_xml($document, $metafile);
327 364
328 logger("INFO", "$warncnt warnings"); 365 logger("INFO", "$warncnt warnings");
329 logger("INFO", "$errcnt errors"); 366 logger("INFO", "$errcnt errors");
330 if ($errcnt == 0) { 367 if ($errcnt == 0) {
331 logger("DONE", "" . (scalar keys %$retrieved_files) . " archived files retrieved"); 368 logger("DONE", "$archcnt archived files retrieved");
332 } else { 369 } else {
333 logger("ABORT", "there were $errcnt errors!!"); 370 logger("ABORT", "there were $errcnt errors!!");
334 exit 1; 371 exit 1;
335 } 372 }