Annotation of foxridge-archiver/archivecheck.pl, revision 1.5
1.1 casties 1: #!/usr/local/bin/perl -w
2:
3: use strict;
4:
5: use XML::LibXML;
6:
7: # MPIWG libraries
8: use lib '/usr/local/mpiwg/archive';
9: use MPIWGStor;
10:
11: # make output unbuffered
12: $|=1;
13:
14: #######################################################
15: # internal parameters
16: #
17:
18: # program version
1.5 ! casties 19: my $version = "0.4.2 (7.12.2005 ROC)";
1.1 casties 20:
21: # read command line parameters
22: my $args = parseargs;
23:
24: # debug level
25: $debug = (exists $$args{'debug'}) ? $$args{'debug'} : 0;
26:
27: # XML namespace (not really implemented!)
28: my $namespace = "";
29:
30: # archive name (archive-path element, usually == $docdir)
31: my $archname;
32:
33:
34: #######################################################
35: # external programs
36: #
37: my $archprog = "/opt/tivoli/tsm/client/ba/bin/dsmc";
38: if (! -x $archprog) {
39: logger("ABORT", "TSM client program '$archprog' missing!!");
40: exit 1;
41: }
42: # my $checkprog = "/usr/local/mpiwg/archive/metacheck";
43: # if (! -x $checkprog) {
44: # logge("ABORT", "meta data checking program '$checkprog' missing!!");
45: # exit 1;
46: # }
47: # log file for archiver
48: my $log_file = "/var/tmp/archivecheck.log";
49: if (! open LOG, ">>$log_file") {
50: logger("ABORT", "unable to write log file '$log_file'!!");
51: exit 1;
52: }
53:
54: #######################################################
55: # check parameters that were passed to the program
56: #
57: my $docdir = $$args{'path'};
58: if (! $docdir) {
59: print "ABORT: no document directory given!\n";
60: exit 1;
61: }
62: # strip trailing slashes
63: $docdir =~ s/\/$//;
64: if (! -d $docdir) {
65: print "ABORT: document directory \'$docdir\' doesn't exist!\n";
66: exit 1;
67: }
68:
69: my $metafile = "$docdir/index.meta";
70: if (! -f $metafile) {
71: print "ABORT: metadata index file \'$metafile\' doesn't exist!\n";
72: exit 1;
73: }
74:
75: #######################################################
76: # internal variables
77: #
78:
79: # number of errors
80: my $errcnt = 0;
81: # number of warnings
82: my $warncnt = 0;
83:
84: #######################################################
85: # subroutines
86: #
87:
88:
89: #
90: # $files = read_resource_meta($rootnode)
91: #
92: # checks general resource meta information and reads the list of files
93: #
94: sub read_resource_meta {
95: my ($rootnode) = @_;
96: my %files;
97: #
98: # archive path
99: #
100: # get archive-path
101: $archname = sstrip($rootnode->findvalue('child::archive-path'));
102: if (! $archname) {
103: logger("ABORT", "archive-name element missing!!");
104: exit 1;
105: }
106:
107: #
108: # files
109: #
110: my @filenodes = $rootnode->findnodes('child::file');
111: foreach my $fn (@filenodes) {
112: my $name = sstrip($fn->findvalue('child::name'));
113: my $path = sstrip($fn->findvalue('child::path'));
114: logger("DEBUG", "FILE: ($path)$name");
115: my $f = ($path) ? "$path/$name" : "$name";
116: $files{$f} = [$name];
117: }
118:
119: #
120: # dirs
121: #
122: my @dirnodes = $rootnode->findnodes('child::dir');
123: foreach my $fn (@dirnodes) {
124: my $name = sstrip($fn->findvalue('child::name'));
125: my $path = sstrip($fn->findvalue('child::path'));
126: logger("DEBUG", "DIR: ($path)$name");
1.5 ! casties 127: my $f = "$name";
! 128: if (($path)&&($path ne '.')) {
! 129: $f = "$path/$name";
! 130: }
1.1 casties 131: $files{$f} = [$name];
132: }
133:
134: #
135: # archive-storage-date
136: #
137: my $archdate = $rootnode->find('child::archive-storage-date');
138: if ($archdate) {
139: logger("INFO", "archive storage date: $archdate");
140: } else {
141: logger("ERROR", "archive storage date missing!");
142: $errcnt++;
143: }
144: return \%files;
145: }
146:
147:
148: #
149: # fs_read_files($realdir, $docdir, \%files, \%dirs)
150: #
151: # reads all files and directories below $realdir and puts the
152: # files in %files and directories in %dirs
153: # $docdir is only for recursion, it should be empty when called
154: # from outside
155: #
156: sub fs_read_files {
157: my ($directory, $docdir, $files, $dirs) = @_;
158: my $cnt = 0;
159:
160: if (! opendir DIR, $directory) {
161: return 0;
162: }
163: my @dirfiles = readdir DIR;
164: foreach my $fn (@dirfiles) {
165: # ignore names starting with a dot
166: next if ($fn =~ /^\./);
167: # ignore other silly files
168: next if ($junk_files{$fn});
169:
170: $cnt++;
171: my $f = "$directory/$fn";
172: my $docf = ($docdir) ? "$docdir/$fn" : $fn;
173: #print "fs_file: \"$f\"\n";
174: if (-f $f) {
175: #print " is file\n";
176: my ($dev,$ino,$mode,$nlink,$uid,$gid,$rdev,$size,
177: $atime,$mtime,$ctime,$blksize,$blocks)
178: = stat(_);
179: $$files{$docf} = [$fn, $size, stime($mtime)];
180: #logger("TEST", "fn $fn, size $size, mtime $mtime");
181: } elsif (-d _) {
182: #print " is dir\n";
183: $$dirs{$docf} = $fn;
184: # recurse into directory
185: $cnt += fs_read_files($f, $docf, $files, $dirs);
186: }
187: }
188: return $cnt;
189: }
190:
191:
192: #
1.2 casties 193: # $archcnt = run_query($dirquery, \%files)
1.1 casties 194: #
1.2 casties 195: # runs the archiver program on $dirquery and adds to the hash of archived files
1.1 casties 196: #
197: # Sample output:
198: # 20,345 B 08/06/03 17:17:02 /mpiwg/archive/data/proyectohumboldt/webb_histo_fr_01_1839/index.meta Never /mpiwg/archive/data/proyectohumboldt/webb_histo_fr_01_1839
199: #
200: sub run_query {
1.2 casties 201: my ($dirquery, $files) = @_;
1.1 casties 202: print LOG "START checkarchive $version ", scalar localtime, "\n";
203: my $archcmd = $archprog;
204: $archcmd .= " query archive -subdir=yes";
205: $archcmd .= " -description='$archname'";
1.2 casties 206: $archcmd .= " '$dirquery'";
1.1 casties 207:
208: my $archcnt = 0;
209: print LOG "CMD: $archcmd\n";
210: if (open ARCH, "$archcmd 2>&1 |") {
211: while (<ARCH>) {
212: chomp;
213: print LOG "ARCH: $_\n";
214: if (/
215: \s*([\d,]+) # size
216: \s+(\w+) # unit of size
217: \s+([\d\/]+) # date mm\/dd\/yy
218: \s+([\d:]+) # time
219: \s+(\S+) # file name
220: \s+(\w+) # expiry
221: \s+(\S+) # archive label
222: /x) {
223: my $size = $1;
224: my $sunit = $2;
225: my $date = $3;
226: my $time = $4;
227: my $file = $5;
228: my $exp = $6;
229: my $label = $7;
230: $size =~ s/,//g;
231: $date = ymd_date($date);
232: logger("DEBUG", " QUERY: file '$file'");
233: $archcnt++;
1.2 casties 234: if ($$files{$file}) {
235: logger("DEBUG", "file $file seems to be archived multiple times: $time $date");
236: #$warncnt++;
237: }
238: if (length $file <= length $docdir) {
239: logger("DEBUG", "not below document dir: $file");
240: next;
241: }
242: $$files{$file} = [$size, "$date $time"];
1.1 casties 243: }
244: }
245: } else {
246: logger("ABORT", "unable to start archive command '$archcmd'!!");
247: exit 1;
248: }
249:
1.2 casties 250: return $archcnt;
1.1 casties 251: }
252:
253:
254: #
255: # check_files(\%files_to_archive, \%archived_files)
256: #
257: # compares the list of archived and to be archived files
258: #
259: sub check_files {
260: my ($to_archive, $archived) = @_;
261:
262: my $nt = scalar keys %$to_archive;
263: my $na = scalar keys %$archived;
264:
265: foreach my $ft (sort keys %$to_archive) {
266: my $fp = "$docdir/$ft";
267: #logger("DEBUG", " fp: $fp");
268: if ($$archived{$fp}) {
269: logger("DEBUG", "$ft archived OK");
270: $$archived{$fp}->[2] = "OK";
271: } else {
272: logger("ERROR", "file entry '$ft' missing from archive!");
273: $errcnt++;
274: }
275: }
276:
277: foreach my $fa (sort keys %$archived) {
278: if (! $$archived{$fa}->[2]) {
279: my ($fn, $fp) = split_file_path($fa);
280: if ($index_files{$fn}) {
281: logger("DEBUG", "$fa ignored");
282: $na--;
283: } else {
284: logger("WARNING", "$fa archived but not in list!");
285: $warncnt++;
286: }
287: }
288: }
289:
290: if ($nt > $na) {
291: logger("WARNING", "less files were archived ($na vs. $nt)!");
292: $warncnt++;
293: } elsif ($na > $nt) {
294: logger("WARNING", "more files were archived ($na vs. $nt)!");
295: $warncnt++;
296: }
297:
298: }
299:
300: #
301: # compare_files(\%files_on_disk, \%archived_files)
302: #
303: # compares the list of archived files and files on disk
304: #
305: sub compare_files {
306: my ($fs_files, $archived) = @_;
307:
308: foreach my $ft (sort keys %$fs_files) {
309: next if ($index_files{$ft});
310: my $fp = "$docdir/$ft";
311: #logger("DEBUG", " fp: $fp");
312: if ($$archived{$fp}) {
313: next if ($index_files{$ft});
314:
315: my $asize = $$archived{$fp}[0];
316: my $atime = $$archived{$fp}[1];
317: my $fsize = $$fs_files{$ft}[1];
318: my $ftime = $$fs_files{$ft}[2];
319: if ($asize != $fsize) {
320: logger("ERROR", "archived $ft ($asize) and file on disk ($fsize) have different size!");
321: $errcnt++;
322: } elsif ($atime lt $ftime) {
323: logger("ERROR", "archived $ft ($atime) is older than file on disk ($ftime)!");
324: $errcnt++;
325: } else {
1.3 casties 326: logger("WARNING", "archived file $ft still on disk");
327: $warncnt++;
1.1 casties 328: }
329: } else {
1.2 casties 330: logger("ERROR", "file on disk '$ft' is not in archive!");
1.1 casties 331: $errcnt++;
332: }
333: }
334: }
335:
336:
337:
338: #######################################################
339: # main
340: #
341:
342: logger("INFO", "archivecheck $version");
343:
344: # make shure the right user is running this program
345: my $user = getlogin;
1.4 casties 346: if (not (($user eq "archive")||($user eq "root"))) {
1.1 casties 347: logger("ABORT", "you must be archive or root user to run this program!");
348: exit 1;
349: }
350:
351: # read index.meta file
352: my ($document, $rootnode) = read_xml($metafile);
353:
354: # check file and add archive date
355: my $files_to_archive = read_resource_meta($rootnode);
356:
357: # check for .archived file
358: if (-f "$docdir/.archived") {
359: logger("INFO", ".archived file exists.");
360: } else {
361: logger("WARNING", "no .archived file!");
362: $warncnt++;
363: }
364:
365: # check archive
1.2 casties 366: my %archived_files = ();
367: my $archcnt = 0;
368: if ($docdir =~ /\/mpiwg\/archive\/data\/(.*)/) {
369: # TSM needs two different paths because of historical mount points :-(
370: my $docdir1 = "/mpiwg/archive/data/";
371: $archcnt += run_query($docdir1, \%archived_files);
372: my $docdir2 = "/mpiwg/archive/";
373: $archcnt += run_query($docdir2, \%archived_files);
374: } else {
375: $archcnt += run_query("$docdir/", \%archived_files);
376: }
377: logger("INFO", "$archcnt archives of " . (scalar keys %archived_files) . " files.");
1.1 casties 378:
1.2 casties 379: my $num_arch_files = (scalar keys %archived_files);
1.1 casties 380: if ($num_arch_files == 0) {
381: logger("ABORT", "no archive of this directory!!");
382: exit 1;
383: }
384: logger("INFO", "$num_arch_files files archived");
385:
386: # check list of archived files
1.2 casties 387: check_files($files_to_archive, \%archived_files);
1.1 casties 388:
389: # read files from filesystem
390: my %fsfiles;
391: my %fsdirs;
392: my $num_fs_files = fs_read_files($docdir, "", \%fsfiles, \%fsdirs);
393:
394: logger("INFO", "$num_fs_files files still on disk!");
395: if ($num_fs_files > 0) {
1.2 casties 396: compare_files(\%fsfiles, \%archived_files);
1.1 casties 397: }
398:
399: logger("INFO", "$warncnt warnings");
400: logger("INFO", "$errcnt errors");
401: if ($errcnt == 0) {
1.2 casties 402: logger("DONE", "" . (scalar keys %archived_files) . " archived files OK");
1.3 casties 403: exit 0;
1.1 casties 404: } else {
405: logger("ABORT", "there were $errcnt errors!!");
406: exit 1;
407: }
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>