Mercurial > hg > foxridge-archiver
annotate archiver.pl @ 53:86965c7658a9
*** empty log message ***
author | casties |
---|---|
date | Wed, 19 Nov 2008 14:58:41 +0100 |
parents | b762b5af6e42 |
children | 2208ed7370cb |
rev | line source |
---|---|
0 | 1 #!/usr/local/bin/perl -w |
2 | |
3 use strict; | |
4 | |
5 use XML::LibXML; | |
6 | |
7 # MPIWG libraries | |
8 use lib '/usr/local/mpiwg/archive'; | |
9 use MPIWGStor; | |
10 | |
11 # make output unbuffered | |
12 $|=1; | |
13 | |
14 ####################################################### | |
15 # internal parameters | |
16 # | |
17 | |
18 # program version | |
52 | 19 my $version = "0.7.3 (ROC 10.9.2008)"; |
11 | 20 |
21 # short help | |
18 | 22 my $help = "MPIWG archiver $version |
23 use: archiver [options] docpath | |
24 options: | |
25 -debug show debugging info | |
26 -premigrate don't delete archived files | |
27 -force archive even if already archived | |
25
8b9d91963de7
more option passing to metacheck from archivemany via archiver
casties
parents:
23
diff
changeset
|
28 -replace rewrite index file |
18 | 29 "; |
11 | 30 |
31 # read command line parameters | |
32 my $args = MPIWGStor::parseargs; | |
18 | 33 if (! scalar(%$args)) { |
34 print $help, "\n"; | |
35 exit 1; | |
36 } | |
11 | 37 |
38 # debug level | |
18 | 39 $debug = (exists $$args{'debug'}) ? $$args{'debug'} : 0; |
40 | |
41 # force archiving | |
42 my $force_archive = (exists $$args{'force'}) ? $$args{'force'} : 0; | |
0 | 43 |
25
8b9d91963de7
more option passing to metacheck from archivemany via archiver
casties
parents:
23
diff
changeset
|
44 # rewrite index file |
8b9d91963de7
more option passing to metacheck from archivemany via archiver
casties
parents:
23
diff
changeset
|
45 my $rewrite_index = (exists $$args{'replace'}) ? $$args{'replace'} : 0; |
8b9d91963de7
more option passing to metacheck from archivemany via archiver
casties
parents:
23
diff
changeset
|
46 |
0 | 47 # rewrite XML file (necessary for archive date!) |
48 my $fix_xml = 1; | |
49 my $xml_changed = 0; | |
18 | 50 |
0 | 51 # XML namespace (not really implemented!) |
52 my $namespace = ""; | |
53 | |
54 # archive name (archive-path element, usually == $docdir) | |
55 my $archname; | |
18 | 56 |
0 | 57 # archive storage date (now) |
58 my $archdate = stime(time); | |
59 | |
60 # delete "junk" files before archiving | |
61 my $delete_junk_files = 1; | |
62 | |
63 # delete data files after archiving | |
64 my $delete_data_files = 1; | |
65 | |
11 | 66 # don't delete archived files with "-premigrate" |
67 if (exists $$args{'premigrate'}) { | |
68 $delete_data_files = not $$args{'premigrate'}; | |
69 } | |
70 if ($delete_data_files) { | |
71 logger('INFO', "going to remove successfully archived files from disk"); | |
0 | 72 } |
73 | |
18 | 74 |
75 ####################################################### | |
76 # external programs | |
77 # | |
78 my $archprog = "/opt/tivoli/tsm/client/ba/bin/dsmc"; | |
79 if (! -x $archprog) { | |
80 logger('ABORT', "TSM client program '$archprog' missing!"); | |
81 exit 1; | |
82 } | |
83 my $checkprog = "/usr/local/mpiwg/archive/metacheck"; | |
84 if (! -x $checkprog) { | |
85 logger('ABORT', "meta data checking program '$checkprog' missing!"); | |
86 exit 1; | |
87 } | |
88 # log file for archiver | |
89 my $log_file = "/var/log/mpiwg-archiver.log"; | |
90 if (! open LOG, ">>$log_file") { | |
91 logger('ABORT', "unable to write log file '$log_file'!"); | |
92 exit 1; | |
93 } | |
94 | |
95 ####################################################### | |
96 # check parameters that were passed to the program | |
97 # | |
98 | |
99 my $docdir = $$args{'path'}; | |
100 # strip double slashes | |
101 $docdir =~ s/\/\//\//; | |
102 # strip trailing slashes | |
103 $docdir =~ s/\/+$//; | |
104 if (! -d $docdir) { | |
105 logger('ABORT', "document directory \'$docdir\' doesn't exist!"); | |
106 exit 1; | |
107 } | |
108 | |
0 | 109 my $metafile = "$docdir/index.meta"; |
110 if (! -f $metafile) { | |
18 | 111 logger('ABORT', "metadata index file \'$metafile\' doesn't exist!"); |
0 | 112 exit 1; |
113 } | |
114 | |
115 ####################################################### | |
116 # internal variables | |
117 # | |
118 | |
119 # number of errors | |
120 my $errcnt = 0; | |
121 # number of warnings | |
122 my $warncnt = 0; | |
123 | |
124 ####################################################### | |
125 # subroutines | |
126 # | |
127 | |
128 # | |
129 # $files = read_resource_meta($rootnode) | |
130 # | |
131 # checks general resource meta information and reads the list of files | |
132 # | |
133 sub read_resource_meta { | |
134 my ($rootnode) = @_; | |
135 my %files; | |
136 # | |
137 # archive path | |
138 # | |
139 # get archive-path | |
140 $archname = MPIWGStor::sstrip($rootnode->findvalue('child::archive-path')); | |
141 if (! $archname) { | |
18 | 142 logger('ABORT', "archive-name element missing!"); |
0 | 143 exit 1; |
144 } | |
145 | |
146 # | |
147 # files | |
148 # | |
149 my @filenodes = $rootnode->findnodes('child::file'); | |
150 foreach my $fn (@filenodes) { | |
151 my $name = MPIWGStor::sstrip($fn->findvalue('child::name')); | |
152 my $path = MPIWGStor::sstrip($fn->findvalue('child::path')); | |
18 | 153 logger('DEBUG', "FILE ($path)$name"); |
0 | 154 my $f = ($path) ? "$path/$name" : "$name"; |
155 $files{$f} = $name; | |
156 } | |
157 | |
158 # | |
159 # archive-storage-date | |
160 # | |
161 my $stordatenode = ($rootnode->find('child::archive-storage-date'))->get_node(1); | |
162 if ($stordatenode) { | |
18 | 163 logger('WARNING', "archive storage date exists! Resource already archived?"); |
0 | 164 $warncnt++; |
165 # delete old date | |
166 $stordatenode->removeChildNodes; | |
167 } else { | |
168 # create new storage date node | |
169 $stordatenode = $rootnode->addNewChild($namespace, "archive-storage-date"); | |
170 # move after archive-path | |
171 $rootnode->insertAfter($stordatenode, ($rootnode->find('child::archive-path'))->get_node(1)); | |
172 } | |
173 $stordatenode->appendTextNode($archdate); | |
174 $xml_changed++; | |
175 return \%files; | |
176 } | |
177 | |
178 | |
179 # | |
180 # $%files = run_archive | |
181 # | |
182 # runs the archiver program on $docdir and returns a list of archived files | |
183 # | |
184 sub run_archive { | |
185 my %files; | |
186 print LOG "START archiver $version $archdate\n"; | |
187 my $archcmd = $archprog; | |
188 $archcmd .= " archive -archsymlinkasfile=no -subdir=yes"; | |
189 $archcmd .= " -description='$archname'"; | |
190 $archcmd .= " '$docdir/'"; | |
191 | |
192 print LOG "CMD: $archcmd\n"; | |
193 if (open ARCH, "$archcmd 2>&1 |") { | |
194 while (<ARCH>) { | |
195 chomp; | |
196 print LOG "ARCH: $_\n"; | |
197 if (/Normal File-->\s+[\d,]+\s+(.*)\s+\[Sent\]/) { | |
198 print " ARCH: file '$1'\n"; | |
199 $files{$1} = "ok"; | |
200 } | |
201 if (/^Archive processing of .* finished without failure./) { | |
202 print " ARCH: OK\n"; | |
203 } | |
204 } | |
205 } else { | |
18 | 206 logger('ABORT', "unable to start archive command '$archcmd'!!"); |
0 | 207 exit 1; |
208 } | |
209 | |
210 return \%files; | |
211 } | |
212 | |
213 | |
214 # | |
215 # check_files(\%files_to_archive, \%archived_files) | |
216 # | |
217 # compares the list of archived and to be archived files | |
218 # | |
219 sub check_files { | |
220 my ($to_archive, $archived) = @_; | |
221 | |
222 my $nt = scalar keys %$to_archive; | |
223 my $na = scalar keys %$archived; | |
224 | |
225 foreach my $ft (sort keys %$to_archive) { | |
226 my $fp = "$docdir/$ft"; | |
227 #print " fp: $fp\n"; | |
228 if ($$archived{$fp}) { | |
18 | 229 logger('DEBUG', "$ft archived OK"); |
0 | 230 $$archived{$fp} = "OK"; |
231 } else { | |
18 | 232 logger('ERROR', "file '$ft' missing from archive!"); |
0 | 233 $errcnt++; |
234 } | |
235 } | |
236 | |
237 foreach my $fa (sort keys %$archived) { | |
238 if ($$archived{$fa} ne "OK") { | |
239 my ($fn, $fp) = MPIWGStor::split_file_path($fa); | |
240 if ($MPIWGStor::index_files{$fn}) { | |
18 | 241 logger('DEBUG', "$fa ignored"); |
0 | 242 $na--; |
243 } else { | |
18 | 244 logger('WARNING', "$fa archived but not in list!"); |
0 | 245 $warncnt++; |
246 } | |
247 } | |
248 } | |
249 | |
250 if ($nt > $na) { | |
18 | 251 logger('WARNING', "less files were archived ($na vs. $nt)!"); |
0 | 252 } elsif ($na > $nt) { |
18 | 253 logger('WARNING', "more files were archived ($na vs. $nt)!"); |
0 | 254 } |
255 | |
256 } | |
257 | |
258 | |
259 # | |
260 # delete_files(\%files) | |
261 # | |
262 # deletes the files from the list (of absolute files) and their directories | |
263 # if they are empty | |
264 # | |
265 sub delete_files { | |
266 my ($files) = @_; | |
267 my %dirs; | |
268 | |
269 foreach my $f (sort keys %$files) { | |
270 my ($fn, $fp) = MPIWGStor::split_file_path($f); | |
271 # collect all unique directories | |
272 if ($fp && (! $dirs{$fp})) { | |
273 $dirs{$fp} = $fp; | |
274 } | |
275 # don't delete index files | |
276 next if ($MPIWGStor::index_files{$fn}); | |
277 # no file no delete | |
278 next unless (-f $f); | |
279 # delete files | |
280 if (unlink $f) { | |
23 | 281 logger('DEBUG', "remove $f ($fn)"); |
0 | 282 } else { |
18 | 283 logger('ERROR', "unable to delete $f!"); |
0 | 284 $errcnt++; |
285 } | |
286 } | |
287 # try to delete all empty directories | |
288 my @dirkeys = sort keys %dirs; | |
289 # starting at the end to get to the subdirectories first | |
290 for (my $i = $#dirkeys; $i >= 0; $i--) { | |
291 my $d = $dirkeys[$i]; | |
292 # dont't remove document dir (shouldn't be empty anyway) | |
293 next if ($d eq $docdir); | |
294 if (-d $d) { | |
23 | 295 logger('DEBUG', "remove dir $d"); |
0 | 296 rmdir $d; |
297 } | |
298 } | |
299 } | |
300 | |
301 | |
302 # | |
303 # delete_all_files(\%files, $dir) | |
304 # | |
305 # deletes all files with names from the list %files | |
306 # in the directory $dir and its subdirectories | |
307 # | |
308 sub delete_all_files { | |
309 my ($files, $dir) = @_; | |
310 | |
311 if (! opendir DIR, $dir) { | |
18 | 312 logger('ERROR', "unable to read directory $dir!"); |
0 | 313 $errcnt++; |
314 return; | |
315 } | |
316 my @fl = readdir DIR; | |
317 closedir DIR; | |
318 | |
319 foreach my $f (@fl) { | |
320 next if ($f =~ /^\.{1,2}$/); | |
321 if ($$files{$f}) { | |
322 # $f is in the file list | |
323 if (-f "$dir/$f") { | |
324 # $f is a file | |
325 if (unlink "$dir/$f") { | |
23 | 326 logger('DEBUG', "removed $f"); |
0 | 327 } else { |
18 | 328 logger('ERROR', "unable to delete $f!"); |
0 | 329 $errcnt++; |
330 } | |
331 } elsif (-d _) { | |
332 # $f is a directory (unlink won't work) | |
333 if ((system 'rm', '-r', "$dir/$f") == 0) { | |
23 | 334 logger('DEBUG', "removed directory $f"); |
0 | 335 } else { |
18 | 336 logger('ERROR', "unable to delete directory $f!"); |
0 | 337 $errcnt++; |
338 } | |
339 } else { | |
18 | 340 logger('ERROR', "funny object $dir/$f!"); |
0 | 341 $errcnt++; |
342 } | |
343 } else { | |
344 # $f is not in the list | |
345 if (-d "$dir/$f") { | |
346 # recurse into directories | |
18 | 347 logger('DEBUG', "enter $dir/$f"); |
0 | 348 delete_all_files($files, "$dir/$f"); |
349 } | |
350 } | |
351 } | |
352 } | |
353 | |
354 | |
355 ####################################################### | |
356 # main | |
357 # | |
358 | |
18 | 359 logger('START', "archiver $version at $archdate"); |
0 | 360 |
361 # make shure the right user is running this program | |
52 | 362 my $user = getlogin || getpwuid($<); |
18 | 363 if (($user ne "archive")&&($user ne "root")) { |
23 | 364 logger("WARNING", "you ($user) should be archive or root user to run this program!"); |
18 | 365 } |
366 | |
367 # check for .archived file | |
368 if (-f "$docdir/.archived") { | |
369 if (not $force_archive) { | |
370 logger('ABORT', "already archived! (.archived file exists)"); | |
371 exit 1; | |
372 } else { | |
373 logger('WARNING', "resource already archived? (.archived file exists)"); | |
374 $warncnt++; | |
375 } | |
376 } | |
0 | 377 |
378 # use metacheck first | |
25
8b9d91963de7
more option passing to metacheck from archivemany via archiver
casties
parents:
23
diff
changeset
|
379 my $check_opts="-add-files"; |
8b9d91963de7
more option passing to metacheck from archivemany via archiver
casties
parents:
23
diff
changeset
|
380 if ($rewrite_index) { |
8b9d91963de7
more option passing to metacheck from archivemany via archiver
casties
parents:
23
diff
changeset
|
381 $check_opts = "-replace"; |
8b9d91963de7
more option passing to metacheck from archivemany via archiver
casties
parents:
23
diff
changeset
|
382 } |
8b9d91963de7
more option passing to metacheck from archivemany via archiver
casties
parents:
23
diff
changeset
|
383 if (open CHECK, "$checkprog $check_opts $docdir |") { |
18 | 384 my @errors; |
385 my $msg; | |
386 while (<CHECK>) { | |
387 chomp; | |
388 if (/^ERROR/) { | |
389 push @errors, $_; | |
390 } | |
391 $msg = $_; | |
392 } | |
393 if ($msg =~ /^DONE/) { | |
394 logger('DEBUG', "checking index file: $msg"); | |
395 logger('INFO', "resource '$docdir' check OK"); | |
396 } else { | |
397 logger('DEBUG', "errors checking index file:\n " . join("\n ", @errors) . "\n $msg"); | |
398 logger('ABORT', "resource '$docdir' check failed!"); | |
399 exit 1; | |
400 } | |
0 | 401 } else { |
18 | 402 logger('ABORT', "unable to run $checkprog"); |
0 | 403 exit 1; |
404 } | |
18 | 405 # if (system("$checkprog $docdir >/dev/null") == 0) { |
406 # logger('INFO', "resource '$docdir' check OK"); | |
407 # } else { | |
408 # logger('ABORT', "resource '$docdir' check failed!!"); | |
409 # exit 1; | |
410 # } | |
0 | 411 |
412 # read index.meta file | |
413 my ($document, $rootnode) = MPIWGStor::read_xml($metafile); | |
414 | |
415 # check file and add archive date | |
416 my $files_to_archive = read_resource_meta($rootnode); | |
417 | |
18 | 418 logger('INFO', (scalar keys %$files_to_archive) . " files to archive"); |
0 | 419 |
18 | 420 # remove .archived file |
0 | 421 if (-f "$docdir/.archived") { |
422 if (unlink "$docdir/.archived") { | |
18 | 423 logger('WARNING', "existing .archived file has been removed!"); |
0 | 424 $warncnt++; |
425 } else { | |
18 | 426 logger('ERROR', "unable to remove existing .archived file!"); |
0 | 427 $errcnt++; |
428 } | |
429 } | |
430 | |
431 # remove junk files | |
432 if ($delete_junk_files) { | |
433 delete_all_files(\%MPIWGStor::junk_files, $docdir); | |
434 } | |
435 | |
436 # write new index.meta | |
437 if ($errcnt > 0) { | |
18 | 438 logger('ABORT', "there were errors!"); |
0 | 439 exit 1; |
440 } else { | |
441 if ($fix_xml) { | |
442 MPIWGStor::write_xml($document, $metafile); | |
443 } | |
444 } | |
445 | |
446 # start archiving | |
5 | 447 my $archived_files = run_archive(); |
448 my $num_archfiles = scalar keys %$archived_files; | |
0 | 449 |
18 | 450 logger('INFO', "$num_archfiles files archived"); |
0 | 451 |
452 # check list of archived files | |
453 check_files($files_to_archive, $archived_files); | |
454 | |
455 # delete files if all went OK | |
456 if ($errcnt == 0) { | |
457 system("touch", "$docdir/.archived"); | |
458 # remove junk files (again) | |
459 if ($delete_junk_files) { | |
460 delete_all_files(\%MPIWGStor::junk_files, $docdir); | |
461 } | |
462 # remove archived files | |
463 if ($delete_data_files) { | |
464 delete_files($archived_files); | |
465 } | |
466 } | |
467 | |
18 | 468 logger('INFO', "$warncnt warnings"); |
469 logger('INFO', "$errcnt errors"); | |
0 | 470 if ($errcnt > 0) { |
11 | 471 logger('ABORT', "there were errors! ($num_archfiles files archived) at " . stime(time)); |
0 | 472 exit 1; |
473 } else { | |
11 | 474 logger('DONE', "$num_archfiles files archived at " . stime(time)); |
0 | 475 } |