Annotation of foxridge-archiver/archiver.pl, revision 1.5
1.1 casties 1: #!/usr/local/bin/perl -w
2:
3: use strict;
4:
5: use XML::LibXML;
6:
7: # MPIWG libraries
8: use lib '/usr/local/mpiwg/archive';
9: use MPIWGStor;
10:
11: # make output unbuffered
12: $|=1;
13:
14: #######################################################
15: # internal parameters
16: #
17:
18: # program version
1.5 ! casties 19: my $version = "0.7 (ROC 20.9.2005)";
1.4 casties 20:
21: # short help
1.5 ! casties 22: my $help = "MPIWG archiver $version
! 23: use: archiver [options] docpath
! 24: options:
! 25: -debug show debugging info
! 26: -premigrate don't delete archived files
! 27: -force archive even if already archived
! 28: ";
1.4 casties 29:
30: # read command line parameters
31: my $args = MPIWGStor::parseargs;
1.5 ! casties 32: if (! scalar(%$args)) {
! 33: print $help, "\n";
! 34: exit 1;
! 35: }
1.4 casties 36:
37: # debug level
1.5 ! casties 38: $debug = (exists $$args{'debug'}) ? $$args{'debug'} : 0;
! 39:
! 40: # force archiving
! 41: my $force_archive = (exists $$args{'force'}) ? $$args{'force'} : 0;
1.1 casties 42:
43: # rewrite XML file (necessary for archive date!)
44: my $fix_xml = 1;
45: my $xml_changed = 0;
1.5 ! casties 46:
1.1 casties 47: # XML namespace (not really implemented!)
48: my $namespace = "";
49:
50: # archive name (archive-path element, usually == $docdir)
51: my $archname;
1.5 ! casties 52:
1.1 casties 53: # archive storage date (now)
54: my $archdate = stime(time);
55:
56: # delete "junk" files before archiving
57: my $delete_junk_files = 1;
58:
59: # delete data files after archiving
60: my $delete_data_files = 1;
61:
1.5 ! casties 62: # don't delete archived files with "-premigrate"
! 63: if (exists $$args{'premigrate'}) {
! 64: $delete_data_files = not $$args{'premigrate'};
! 65: }
! 66: if ($delete_data_files) {
! 67: logger('INFO', "going to remove successfully archived files from disk");
! 68: }
! 69:
1.1 casties 70:
71: #######################################################
72: # external programs
73: #
74: my $archprog = "/opt/tivoli/tsm/client/ba/bin/dsmc";
75: if (! -x $archprog) {
1.5 ! casties 76: logger('ABORT', "TSM client program '$archprog' missing!");
1.1 casties 77: exit 1;
78: }
79: my $checkprog = "/usr/local/mpiwg/archive/metacheck";
80: if (! -x $checkprog) {
1.5 ! casties 81: logger('ABORT', "meta data checking program '$checkprog' missing!");
1.1 casties 82: exit 1;
83: }
84: # log file for archiver
85: my $log_file = "/var/log/mpiwg-archiver.log";
86: if (! open LOG, ">>$log_file") {
1.5 ! casties 87: logger('ABORT', "unable to write log file '$log_file'!");
1.1 casties 88: exit 1;
89: }
90:
91: #######################################################
92: # check parameters that were passed to the program
93: #
1.5 ! casties 94:
1.4 casties 95: my $docdir = $$args{'path'};
1.2 casties 96: # strip double slashes
97: $docdir =~ s/\/\//\//;
1.1 casties 98: # strip trailing slashes
1.2 casties 99: $docdir =~ s/\/+$//;
1.1 casties 100: if (! -d $docdir) {
1.5 ! casties 101: logger('ABORT', "document directory \'$docdir\' doesn't exist!");
1.1 casties 102: exit 1;
103: }
104:
105: my $metafile = "$docdir/index.meta";
106: if (! -f $metafile) {
1.5 ! casties 107: logger('ABORT', "metadata index file \'$metafile\' doesn't exist!");
1.1 casties 108: exit 1;
109: }
110:
111: #######################################################
112: # internal variables
113: #
114:
115: # number of errors
116: my $errcnt = 0;
117: # number of warnings
118: my $warncnt = 0;
119:
120: #######################################################
121: # subroutines
122: #
123:
124: #
125: # $files = read_resource_meta($rootnode)
126: #
127: # checks general resource meta information and reads the list of files
128: #
129: sub read_resource_meta {
130: my ($rootnode) = @_;
131: my %files;
132: #
133: # archive path
134: #
135: # get archive-path
136: $archname = MPIWGStor::sstrip($rootnode->findvalue('child::archive-path'));
137: if (! $archname) {
1.5 ! casties 138: logger('ABORT', "archive-name element missing!");
1.1 casties 139: exit 1;
140: }
141:
142: #
143: # files
144: #
145: my @filenodes = $rootnode->findnodes('child::file');
146: foreach my $fn (@filenodes) {
147: my $name = MPIWGStor::sstrip($fn->findvalue('child::name'));
148: my $path = MPIWGStor::sstrip($fn->findvalue('child::path'));
1.5 ! casties 149: logger('DEBUG', "FILE ($path)$name");
1.1 casties 150: my $f = ($path) ? "$path/$name" : "$name";
151: $files{$f} = $name;
152: }
153:
154: #
155: # archive-storage-date
156: #
157: my $stordatenode = ($rootnode->find('child::archive-storage-date'))->get_node(1);
158: if ($stordatenode) {
1.5 ! casties 159: logger('WARNING', "archive storage date exists! Resource already archived?");
1.1 casties 160: $warncnt++;
161: # delete old date
162: $stordatenode->removeChildNodes;
163: } else {
164: # create new storage date node
165: $stordatenode = $rootnode->addNewChild($namespace, "archive-storage-date");
166: # move after archive-path
167: $rootnode->insertAfter($stordatenode, ($rootnode->find('child::archive-path'))->get_node(1));
168: }
169: $stordatenode->appendTextNode($archdate);
170: $xml_changed++;
171: return \%files;
172: }
173:
174:
175: #
176: # $%files = run_archive
177: #
178: # runs the archiver program on $docdir and returns a list of archived files
179: #
180: sub run_archive {
181: my %files;
182: print LOG "START archiver $version $archdate\n";
183: my $archcmd = $archprog;
184: $archcmd .= " archive -archsymlinkasfile=no -subdir=yes";
185: $archcmd .= " -description='$archname'";
186: $archcmd .= " '$docdir/'";
187:
188: print LOG "CMD: $archcmd\n";
189: if (open ARCH, "$archcmd 2>&1 |") {
190: while (<ARCH>) {
191: chomp;
192: print LOG "ARCH: $_\n";
193: if (/Normal File-->\s+[\d,]+\s+(.*)\s+\[Sent\]/) {
194: print " ARCH: file '$1'\n";
195: $files{$1} = "ok";
196: }
197: if (/^Archive processing of .* finished without failure./) {
198: print " ARCH: OK\n";
199: }
200: }
201: } else {
1.5 ! casties 202: logger('ABORT', "unable to start archive command '$archcmd'!!");
1.1 casties 203: exit 1;
204: }
205:
206: return \%files;
207: }
208:
209:
210: #
211: # check_files(\%files_to_archive, \%archived_files)
212: #
213: # compares the list of archived and to be archived files
214: #
215: sub check_files {
216: my ($to_archive, $archived) = @_;
217:
218: my $nt = scalar keys %$to_archive;
219: my $na = scalar keys %$archived;
220:
221: foreach my $ft (sort keys %$to_archive) {
222: my $fp = "$docdir/$ft";
223: #print " fp: $fp\n";
224: if ($$archived{$fp}) {
1.5 ! casties 225: logger('DEBUG', "$ft archived OK");
1.1 casties 226: $$archived{$fp} = "OK";
227: } else {
1.5 ! casties 228: logger('ERROR', "file '$ft' missing from archive!");
1.1 casties 229: $errcnt++;
230: }
231: }
232:
233: foreach my $fa (sort keys %$archived) {
234: if ($$archived{$fa} ne "OK") {
235: my ($fn, $fp) = MPIWGStor::split_file_path($fa);
236: if ($MPIWGStor::index_files{$fn}) {
1.5 ! casties 237: logger('DEBUG', "$fa ignored");
1.1 casties 238: $na--;
239: } else {
1.5 ! casties 240: logger('WARNING', "$fa archived but not in list!");
1.1 casties 241: $warncnt++;
242: }
243: }
244: }
245:
246: if ($nt > $na) {
1.5 ! casties 247: logger('WARNING', "less files were archived ($na vs. $nt)!");
1.1 casties 248: } elsif ($na > $nt) {
1.5 ! casties 249: logger('WARNING', "more files were archived ($na vs. $nt)!");
1.1 casties 250: }
251:
252: }
253:
254:
255: #
256: # delete_files(\%files)
257: #
258: # deletes the files from the list (of absolute files) and their directories
259: # if they are empty
260: #
261: sub delete_files {
262: my ($files) = @_;
263: my %dirs;
264:
265: foreach my $f (sort keys %$files) {
266: my ($fn, $fp) = MPIWGStor::split_file_path($f);
267: # collect all unique directories
268: if ($fp && (! $dirs{$fp})) {
269: $dirs{$fp} = $fp;
270: }
271: # don't delete index files
272: next if ($MPIWGStor::index_files{$fn});
273: # no file no delete
274: next unless (-f $f);
275: # delete files
276: if (unlink $f) {
1.5 ! casties 277: logger('INFO', "remove $f ($fn)");
1.1 casties 278: } else {
1.5 ! casties 279: logger('ERROR', "unable to delete $f!");
1.1 casties 280: $errcnt++;
281: }
282: }
283: # try to delete all empty directories
284: my @dirkeys = sort keys %dirs;
285: # starting at the end to get to the subdirectories first
286: for (my $i = $#dirkeys; $i >= 0; $i--) {
287: my $d = $dirkeys[$i];
288: # dont't remove document dir (shouldn't be empty anyway)
289: next if ($d eq $docdir);
290: if (-d $d) {
1.5 ! casties 291: logger('INFO', "remove dir $d");
1.1 casties 292: rmdir $d;
293: }
294: }
295: }
296:
297:
298: #
299: # delete_all_files(\%files, $dir)
300: #
301: # deletes all files with names from the list %files
302: # in the directory $dir and its subdirectories
303: #
304: sub delete_all_files {
305: my ($files, $dir) = @_;
306:
307: if (! opendir DIR, $dir) {
1.5 ! casties 308: logger('ERROR', "unable to read directory $dir!");
1.1 casties 309: $errcnt++;
310: return;
311: }
312: my @fl = readdir DIR;
313: closedir DIR;
314:
315: foreach my $f (@fl) {
316: next if ($f =~ /^\.{1,2}$/);
317: if ($$files{$f}) {
318: # $f is in the file list
319: if (-f "$dir/$f") {
320: # $f is a file
321: if (unlink "$dir/$f") {
1.5 ! casties 322: logger('INFO', "removed $f");
1.1 casties 323: } else {
1.5 ! casties 324: logger('ERROR', "unable to delete $f!");
1.1 casties 325: $errcnt++;
326: }
327: } elsif (-d _) {
328: # $f is a directory (unlink won't work)
329: if ((system 'rm', '-r', "$dir/$f") == 0) {
1.5 ! casties 330: logger('INFO', "removed directory $f");
1.1 casties 331: } else {
1.5 ! casties 332: logger('ERROR', "unable to delete directory $f!");
1.1 casties 333: $errcnt++;
334: }
335: } else {
1.5 ! casties 336: logger('ERROR', "funny object $dir/$f!");
1.1 casties 337: $errcnt++;
338: }
339: } else {
340: # $f is not in the list
341: if (-d "$dir/$f") {
342: # recurse into directories
1.5 ! casties 343: logger('DEBUG', "enter $dir/$f");
1.1 casties 344: delete_all_files($files, "$dir/$f");
345: }
346: }
347: }
348: }
349:
350:
351: #######################################################
352: # main
353: #
354:
1.5 ! casties 355: logger('START', "archiver $version at $archdate");
1.1 casties 356:
357: # make shure the right user is running this program
358: my $user = getlogin;
1.5 ! casties 359: if (($user ne "archive")&&($user ne "root")) {
! 360: logger("ABORT", "you ($user) must be archive or root user to run this program!");
! 361: exit 1;
! 362: }
! 363:
! 364: # check for .archived file
! 365: if (-f "$docdir/.archived") {
! 366: if (not $force_archive) {
! 367: logger('ABORT', "already archived! (.archived file exists)");
! 368: exit 1;
! 369: } else {
! 370: logger('WARNING', "resource already archived? (.archived file exists)");
! 371: $warncnt++;
! 372: }
! 373: }
1.1 casties 374:
375: # use metacheck first
1.5 ! casties 376: if (open CHECK, "$checkprog -add-files $docdir |") {
! 377: my @errors;
! 378: my $msg;
! 379: while (<CHECK>) {
! 380: chomp;
! 381: if (/^ERROR/) {
! 382: push @errors, $_;
! 383: }
! 384: $msg = $_;
! 385: }
! 386: if ($msg =~ /^DONE/) {
! 387: logger('DEBUG', "checking index file: $msg");
! 388: logger('INFO', "resource '$docdir' check OK");
! 389: } else {
! 390: logger('DEBUG', "errors checking index file:\n " . join("\n ", @errors) . "\n $msg");
! 391: logger('ABORT', "resource '$docdir' check failed!");
! 392: exit 1;
! 393: }
1.1 casties 394: } else {
1.5 ! casties 395: logger('ABORT', "unable to run $checkprog");
1.1 casties 396: exit 1;
397: }
1.5 ! casties 398: # if (system("$checkprog $docdir >/dev/null") == 0) {
! 399: # logger('INFO', "resource '$docdir' check OK");
! 400: # } else {
! 401: # logger('ABORT', "resource '$docdir' check failed!!");
! 402: # exit 1;
! 403: # }
1.1 casties 404:
405: # read index.meta file
406: my ($document, $rootnode) = MPIWGStor::read_xml($metafile);
407:
408: # check file and add archive date
409: my $files_to_archive = read_resource_meta($rootnode);
410:
1.5 ! casties 411: logger('INFO', (scalar keys %$files_to_archive) . " files to archive");
1.1 casties 412:
1.5 ! casties 413: # remove .archived file
1.1 casties 414: if (-f "$docdir/.archived") {
415: if (unlink "$docdir/.archived") {
1.5 ! casties 416: logger('WARNING', "existing .archived file has been removed!");
1.1 casties 417: $warncnt++;
418: } else {
1.5 ! casties 419: logger('ERROR', "unable to remove existing .archived file!");
1.1 casties 420: $errcnt++;
421: }
422: }
423:
424: # remove junk files
425: if ($delete_junk_files) {
426: delete_all_files(\%MPIWGStor::junk_files, $docdir);
427: }
428:
429: # write new index.meta
430: if ($errcnt > 0) {
1.5 ! casties 431: logger('ABORT', "there were errors!");
1.1 casties 432: exit 1;
433: } else {
434: if ($fix_xml) {
435: MPIWGStor::write_xml($document, $metafile);
436: }
437: }
438:
439: # start archiving
1.3 casties 440: my $archived_files = run_archive();
441: my $num_archfiles = scalar keys %$archived_files;
1.1 casties 442:
1.5 ! casties 443: logger('INFO', "$num_archfiles files archived");
1.1 casties 444:
445: # check list of archived files
446: check_files($files_to_archive, $archived_files);
447:
448: # delete files if all went OK
449: if ($errcnt == 0) {
450: system("touch", "$docdir/.archived");
451: # remove junk files (again)
452: if ($delete_junk_files) {
453: delete_all_files(\%MPIWGStor::junk_files, $docdir);
454: }
455: # remove archived files
456: if ($delete_data_files) {
457: delete_files($archived_files);
458: }
459: }
460:
1.5 ! casties 461: logger('INFO', "$warncnt warnings");
! 462: logger('INFO', "$errcnt errors");
1.1 casties 463: if ($errcnt > 0) {
1.4 casties 464: logger('ABORT', "there were errors! ($num_archfiles files archived) at " . stime(time));
1.1 casties 465: exit 1;
466: } else {
1.4 casties 467: logger('DONE', "$num_archfiles files archived at " . stime(time));
1.1 casties 468: }
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>