Mercurial > hg > foxridge-archiver
comparison unarchiver.pl @ 10:4417be0e2f07
adapted to dsmc's problem with mount points
saves index.meta from overwriting by archived version
author | casties |
---|---|
date | Fri, 21 Jan 2005 15:33:26 +0100 |
parents | c4e6fc065b6d |
children | b19df18aa19a |
comparison
equal
deleted
inserted
replaced
9:9c61f624d802 | 10:4417be0e2f07 |
---|---|
16 ####################################################### | 16 ####################################################### |
17 # internal parameters | 17 # internal parameters |
18 # | 18 # |
19 | 19 |
20 # program version | 20 # program version |
21 my $version = "0.2 (23.9.2004)"; | 21 my $version = "0.3 (20.1.2004)"; |
22 | 22 |
23 # read command line parameters | 23 # read command line parameters |
24 my $args = parseargs; | 24 my $args = parseargs; |
25 | 25 |
26 # debug level | 26 # debug level |
77 if (! -f $metafile) { | 77 if (! -f $metafile) { |
78 print "ABORT: metadata index file \'$metafile\' doesn't exist!\n"; | 78 print "ABORT: metadata index file \'$metafile\' doesn't exist!\n"; |
79 exit 1; | 79 exit 1; |
80 } | 80 } |
81 | 81 |
82 # construct document's parent dir | |
83 my $docparent = $docdir; | |
84 $docparent =~ s!/[^/]+$!!; | |
85 | |
86 ####################################################### | 82 ####################################################### |
87 # internal variables | 83 # internal variables |
88 # | 84 # |
89 | 85 |
90 # number of errors | 86 # number of errors |
93 my $warncnt = 0; | 89 my $warncnt = 0; |
94 | 90 |
95 ####################################################### | 91 ####################################################### |
96 # subroutines | 92 # subroutines |
97 # | 93 # |
94 | |
95 # construct document's parent dir | |
96 sub get_parent { | |
97 my ($dirname) = @_; | |
98 my $dirparent = $dirname; | |
99 $dirparent =~ s!/[^/]+$!!; | |
100 return $dirparent; | |
101 } | |
98 | 102 |
99 | 103 |
100 # | 104 # |
101 # $files = read_resource_meta($rootnode) | 105 # $files = read_resource_meta($rootnode) |
102 # | 106 # |
170 return \%files; | 174 return \%files; |
171 } | 175 } |
172 | 176 |
173 | 177 |
174 # | 178 # |
175 # $%files = run_retrieve | 179 # $num_files = run_retrieve($docdir, $docmount, \%files) |
176 # | 180 # |
177 # runs the retriever program on $docdir and returns a list of archived files | 181 # Runs the retriever program on $docdir and returns the number of unarchived files. |
182 # All filenames are put in %files. | |
183 # $docmount is the mount point of the doc partition in cases when the new mount point | |
184 # is different. | |
178 # | 185 # |
179 # Sample output: | 186 # Sample output: |
180 # (old!) Retrieving 17,234 /mpiwg/archive/data/test/auto_titit_123/pageimg/essen-wind1.jpg [Done] | 187 # (old!) Retrieving 17,234 /mpiwg/archive/data/test/auto_titit_123/pageimg/essen-wind1.jpg [Done] |
181 # Retrieving 42,406,326 /mpiwg/archive/data/library/B980G582/raw/00015.tif --> /mpiwg/archive/data/library/B980G582/raw/00015.tif [Done] | 188 # Retrieving 42,406,326 /mpiwg/archive/data/library/B980G582/raw/00015.tif --> /mpiwg/archive/data/library/B980G582/raw/00015.tif [Done] |
182 sub run_retrieve { | 189 sub run_retrieve { |
183 my %files; | 190 my ($archdir, $archmount, $files) = @_; |
191 my $archparent; | |
192 if ($archmount eq $archdir) { | |
193 # no explicit mount point | |
194 $archparent = get_parent($archdir); | |
195 } else { | |
196 # destination dir is mount point | |
197 $archparent = $archmount; | |
198 } | |
199 | |
184 print LOG "START unarchive $version on ", scalar localtime, "\n"; | 200 print LOG "START unarchive $version on ", scalar localtime, "\n"; |
185 my $archcmd = $archprog; | 201 my $archcmd = $archprog; |
186 $archcmd .= " retrieve -subdir=yes -replace=all"; | 202 $archcmd .= " retrieve -subdir=yes -replace=all"; |
187 $archcmd .= " -description='$archname'"; | 203 $archcmd .= " -description='$archname'"; # archive name |
188 $archcmd .= " '$docdir/'"; # archive name | 204 $archcmd .= " '$archmount/'"; # archive mount point |
189 $archcmd .= " '$docparent/'"; # destination dir name | 205 $archcmd .= " '$archparent/'"; # destination dir name |
190 | 206 |
191 my $archcnt = 0; | 207 my $archcnt = 0; |
208 my $numfiles = 0; | |
192 print LOG "CMD: $archcmd\n"; | 209 print LOG "CMD: $archcmd\n"; |
193 if (open ARCH, "$archcmd 2>&1 |") { | 210 if (open ARCH, "$archcmd 2>&1 |") { |
194 while (<ARCH>) { | 211 while (<ARCH>) { |
195 chomp; | 212 chomp; |
196 print LOG "ARCH: $_\n"; | 213 print LOG "ARCH: $_\n"; |
205 my $size = $1; | 222 my $size = $1; |
206 my $file = $2; | 223 my $file = $2; |
207 $size =~ s/,//g; | 224 $size =~ s/,//g; |
208 logger("DEBUG", " RETRIEVE: file '$file'"); | 225 logger("DEBUG", " RETRIEVE: file '$file'"); |
209 $archcnt++; | 226 $archcnt++; |
210 if ($files{$file}) { | 227 if ($$files{$file}) { |
211 logger("WARNING", "file $file seems to be archived multiple times."); | 228 logger("WARNING", "file $file seems to be archived multiple times."); |
212 $warncnt++; | 229 $warncnt++; |
213 } | 230 } |
214 $files{$file} = [$size]; | 231 $$files{$file} = [$size]; |
215 } | 232 } |
216 } | 233 } |
217 logger("INFO", "$archcnt archives of " . (scalar keys %files) . " files."); | 234 $numfiles = (scalar keys %$files); |
235 logger("INFO", "$archcnt archives of $numfiles files (in $archmount)."); | |
218 } else { | 236 } else { |
219 logger("ABORT", "unable to start archive command '$archcmd'!!"); | 237 logger("ABORT", "unable to start archive command '$archcmd'!!"); |
220 exit 1; | 238 exit 1; |
221 } | 239 } |
222 return \%files; | 240 return $numfiles; |
223 } | 241 } |
224 | 242 |
225 | 243 |
226 # | 244 # |
227 # check_files(\%files_to_retrieve, \%retrieved_files) | 245 # check_files(\%files_to_retrieve, \%retrieved_files) |
284 exit 1; | 302 exit 1; |
285 } | 303 } |
286 | 304 |
287 # use checkarchive first | 305 # use checkarchive first |
288 if (system("$checkprog $docdir >/dev/null") == 0) { | 306 if (system("$checkprog $docdir >/dev/null") == 0) { |
289 logger("INFO", "archive '$docdir' check OK"); | 307 logger("INFO", "archive \"$docdir\" check OK"); |
290 } else { | 308 } else { |
291 logger("ABORT", "archive '$docdir' check failed!!"); | 309 logger("ABORT", "archive \"$docdir\" check failed!!"); |
292 exit 1; | 310 exit 1; |
293 } | 311 } |
294 | 312 |
295 # read index.meta file | 313 # read index.meta file |
296 my ($document, $rootnode) = read_xml($metafile); | 314 my ($document, $rootnode) = read_xml($metafile); |
307 $warncnt++; | 325 $warncnt++; |
308 } | 326 } |
309 | 327 |
310 logger("INFO", "$num_archived_files files to retrieve."); | 328 logger("INFO", "$num_archived_files files to retrieve."); |
311 | 329 |
330 # save current index.meta | |
331 park_file($metafile); | |
332 | |
312 # retrieve | 333 # retrieve |
313 my $retrieved_files = run_retrieve; | 334 my %retrieved_files = (); |
314 | 335 my $archcnt = 0; |
315 my $num_arch_files = (scalar keys %$retrieved_files); | 336 |
316 if ($num_arch_files == 0) { | 337 if ($docdir =~ /\/mpiwg\/archive\/data\/(.*)/) { |
338 # TSM needs two different paths because of historical mount points :-( | |
339 # try the new one first | |
340 $archcnt = run_retrieve($docdir, "/mpiwg/archive", \%retrieved_files); | |
341 if ($archcnt == 0) { | |
342 # and then the old one | |
343 $archcnt = run_retrieve($docdir, "/mpiwg/archive/data", \%retrieved_files); | |
344 } | |
345 } else { | |
346 # otherwise we assume that it works | |
347 $archcnt += run_retrieve($docdir, $docdir, \%retrieved_files); | |
348 } | |
349 | |
350 # restore current index.meta | |
351 unpark_file($metafile); | |
352 | |
353 if ($archcnt == 0) { | |
317 logger("ABORT", "no files retrieved!!"); | 354 logger("ABORT", "no files retrieved!!"); |
318 exit 1; | 355 exit 1; |
319 } | 356 } |
320 logger("INFO", "$num_arch_files files retrieved"); | 357 logger("INFO", "$archcnt files retrieved"); |
321 | 358 |
322 # check list of archived files | 359 # check list of archived files |
323 check_files($archived_files, $retrieved_files); | 360 check_files($archived_files, \%retrieved_files); |
324 | 361 |
325 # rewrite index.meta file | 362 # rewrite index.meta file |
326 write_xml($document, $metafile); | 363 write_xml($document, $metafile); |
327 | 364 |
328 logger("INFO", "$warncnt warnings"); | 365 logger("INFO", "$warncnt warnings"); |
329 logger("INFO", "$errcnt errors"); | 366 logger("INFO", "$errcnt errors"); |
330 if ($errcnt == 0) { | 367 if ($errcnt == 0) { |
331 logger("DONE", "" . (scalar keys %$retrieved_files) . " archived files retrieved"); | 368 logger("DONE", "$archcnt archived files retrieved"); |
332 } else { | 369 } else { |
333 logger("ABORT", "there were $errcnt errors!!"); | 370 logger("ABORT", "there were $errcnt errors!!"); |
334 exit 1; | 371 exit 1; |
335 } | 372 } |