Annotation of foxridge-archiver/archivecheck.pl, revision 1.2
1.1 casties 1: #!/usr/local/bin/perl -w
2:
3: use strict;
4:
5: use XML::LibXML;
6:
7: # MPIWG libraries
8: use lib '/usr/local/mpiwg/archive';
9: use MPIWGStor;
10:
11: # make output unbuffered
12: $|=1;
13:
14: #######################################################
15: # internal parameters
16: #
17:
18: # program version
1.2 ! casties 19: my $version = "0.4 (30.11.2004)";
1.1 casties 20:
21: # read command line parameters
22: my $args = parseargs;
23:
24: # debug level
25: $debug = (exists $$args{'debug'}) ? $$args{'debug'} : 0;
26:
27: # XML namespace (not really implemented!)
28: my $namespace = "";
29:
30: # archive name (archive-path element, usually == $docdir)
31: my $archname;
32: # archive storage date
33: my $archdate;
34:
35:
36: #######################################################
37: # external programs
38: #
39: my $archprog = "/opt/tivoli/tsm/client/ba/bin/dsmc";
40: if (! -x $archprog) {
41: logger("ABORT", "TSM client program '$archprog' missing!!");
42: exit 1;
43: }
44: # my $checkprog = "/usr/local/mpiwg/archive/metacheck";
45: # if (! -x $checkprog) {
46: # logge("ABORT", "meta data checking program '$checkprog' missing!!");
47: # exit 1;
48: # }
49: # log file for archiver
50: my $log_file = "/var/tmp/archivecheck.log";
51: if (! open LOG, ">>$log_file") {
52: logger("ABORT", "unable to write log file '$log_file'!!");
53: exit 1;
54: }
55:
56: #######################################################
57: # check parameters that were passed to the program
58: #
59: my $docdir = $$args{'path'};
60: if (! $docdir) {
61: print "ABORT: no document directory given!\n";
62: exit 1;
63: }
64: # strip trailing slashes
65: $docdir =~ s/\/$//;
66: if (! -d $docdir) {
67: print "ABORT: document directory \'$docdir\' doesn't exist!\n";
68: exit 1;
69: }
70:
71: my $metafile = "$docdir/index.meta";
72: if (! -f $metafile) {
73: print "ABORT: metadata index file \'$metafile\' doesn't exist!\n";
74: exit 1;
75: }
76:
77: #######################################################
78: # internal variables
79: #
80:
81: # number of errors
82: my $errcnt = 0;
83: # number of warnings
84: my $warncnt = 0;
85:
86: #######################################################
87: # subroutines
88: #
89:
90:
91: #
92: # $files = read_resource_meta($rootnode)
93: #
94: # checks general resource meta information and reads the list of files
95: #
96: sub read_resource_meta {
97: my ($rootnode) = @_;
98: my %files;
99: #
100: # archive path
101: #
102: # get archive-path
103: $archname = sstrip($rootnode->findvalue('child::archive-path'));
104: if (! $archname) {
105: logger("ABORT", "archive-name element missing!!");
106: exit 1;
107: }
108:
109: #
110: # files
111: #
112: my @filenodes = $rootnode->findnodes('child::file');
113: foreach my $fn (@filenodes) {
114: my $name = sstrip($fn->findvalue('child::name'));
115: my $path = sstrip($fn->findvalue('child::path'));
116: logger("DEBUG", "FILE: ($path)$name");
117: my $f = ($path) ? "$path/$name" : "$name";
118: $files{$f} = [$name];
119: }
120:
121: #
122: # dirs
123: #
124: my @dirnodes = $rootnode->findnodes('child::dir');
125: foreach my $fn (@dirnodes) {
126: my $name = sstrip($fn->findvalue('child::name'));
127: my $path = sstrip($fn->findvalue('child::path'));
128: logger("DEBUG", "DIR: ($path)$name");
129: my $f = ($path) ? "$path/$name" : "$name";
130: $files{$f} = [$name];
131: }
132:
133: #
134: # archive-storage-date
135: #
136: my $archdate = $rootnode->find('child::archive-storage-date');
137: if ($archdate) {
138: logger("INFO", "archive storage date: $archdate");
139: } else {
140: logger("ERROR", "archive storage date missing!");
141: $errcnt++;
142: }
143: return \%files;
144: }
145:
146:
147: #
148: # fs_read_files($realdir, $docdir, \%files, \%dirs)
149: #
150: # reads all files and directories below $realdir and puts the
151: # files in %files and directories in %dirs
152: # $docdir is only for recursion, it should be empty when called
153: # from outside
154: #
155: sub fs_read_files {
156: my ($directory, $docdir, $files, $dirs) = @_;
157: my $cnt = 0;
158:
159: if (! opendir DIR, $directory) {
160: return 0;
161: }
162: my @dirfiles = readdir DIR;
163: foreach my $fn (@dirfiles) {
164: # ignore names starting with a dot
165: next if ($fn =~ /^\./);
166: # ignore other silly files
167: next if ($junk_files{$fn});
168:
169: $cnt++;
170: my $f = "$directory/$fn";
171: my $docf = ($docdir) ? "$docdir/$fn" : $fn;
172: #print "fs_file: \"$f\"\n";
173: if (-f $f) {
174: #print " is file\n";
175: my ($dev,$ino,$mode,$nlink,$uid,$gid,$rdev,$size,
176: $atime,$mtime,$ctime,$blksize,$blocks)
177: = stat(_);
178: $$files{$docf} = [$fn, $size, stime($mtime)];
179: #logger("TEST", "fn $fn, size $size, mtime $mtime");
180: } elsif (-d _) {
181: #print " is dir\n";
182: $$dirs{$docf} = $fn;
183: # recurse into directory
184: $cnt += fs_read_files($f, $docf, $files, $dirs);
185: }
186: }
187: return $cnt;
188: }
189:
190:
191: #
1.2 ! casties 192: # $archcnt = run_query($dirquery, \%files)
1.1 casties 193: #
1.2 ! casties 194: # runs the archiver program on $dirquery and adds to the hash of archived files
1.1 casties 195: #
196: # Sample output:
197: # 20,345 B 08/06/03 17:17:02 /mpiwg/archive/data/proyectohumboldt/webb_histo_fr_01_1839/index.meta Never /mpiwg/archive/data/proyectohumboldt/webb_histo_fr_01_1839
198: #
199: sub run_query {
1.2 ! casties 200: my ($dirquery, $files) = @_;
1.1 casties 201: print LOG "START checkarchive $version ", scalar localtime, "\n";
202: my $archcmd = $archprog;
203: $archcmd .= " query archive -subdir=yes";
204: $archcmd .= " -description='$archname'";
1.2 ! casties 205: $archcmd .= " '$dirquery'";
1.1 casties 206:
207: my $archcnt = 0;
208: print LOG "CMD: $archcmd\n";
209: if (open ARCH, "$archcmd 2>&1 |") {
210: while (<ARCH>) {
211: chomp;
212: print LOG "ARCH: $_\n";
213: if (/
214: \s*([\d,]+) # size
215: \s+(\w+) # unit of size
216: \s+([\d\/]+) # date mm\/dd\/yy
217: \s+([\d:]+) # time
218: \s+(\S+) # file name
219: \s+(\w+) # expiry
220: \s+(\S+) # archive label
221: /x) {
222: my $size = $1;
223: my $sunit = $2;
224: my $date = $3;
225: my $time = $4;
226: my $file = $5;
227: my $exp = $6;
228: my $label = $7;
229: $size =~ s/,//g;
230: $date = ymd_date($date);
231: logger("DEBUG", " QUERY: file '$file'");
232: $archcnt++;
1.2 ! casties 233: if ($$files{$file}) {
! 234: logger("DEBUG", "file $file seems to be archived multiple times: $time $date");
! 235: #$warncnt++;
! 236: }
! 237: if (length $file <= length $docdir) {
! 238: logger("DEBUG", "not below document dir: $file");
! 239: next;
! 240: }
! 241: $$files{$file} = [$size, "$date $time"];
1.1 casties 242: }
243: }
244: } else {
245: logger("ABORT", "unable to start archive command '$archcmd'!!");
246: exit 1;
247: }
248:
1.2 ! casties 249: return $archcnt;
1.1 casties 250: }
251:
252:
253: #
254: # check_files(\%files_to_archive, \%archived_files)
255: #
256: # compares the list of archived and to be archived files
257: #
258: sub check_files {
259: my ($to_archive, $archived) = @_;
260:
261: my $nt = scalar keys %$to_archive;
262: my $na = scalar keys %$archived;
263:
264: foreach my $ft (sort keys %$to_archive) {
265: my $fp = "$docdir/$ft";
266: #logger("DEBUG", " fp: $fp");
267: if ($$archived{$fp}) {
268: logger("DEBUG", "$ft archived OK");
269: $$archived{$fp}->[2] = "OK";
270: } else {
271: logger("ERROR", "file entry '$ft' missing from archive!");
272: $errcnt++;
273: }
274: }
275:
276: foreach my $fa (sort keys %$archived) {
277: if (! $$archived{$fa}->[2]) {
278: my ($fn, $fp) = split_file_path($fa);
279: if ($index_files{$fn}) {
280: logger("DEBUG", "$fa ignored");
281: $na--;
282: } else {
283: logger("WARNING", "$fa archived but not in list!");
284: $warncnt++;
285: }
286: }
287: }
288:
289: if ($nt > $na) {
290: logger("WARNING", "less files were archived ($na vs. $nt)!");
291: $warncnt++;
292: } elsif ($na > $nt) {
293: logger("WARNING", "more files were archived ($na vs. $nt)!");
294: $warncnt++;
295: }
296:
297: }
298:
299: #
300: # compare_files(\%files_on_disk, \%archived_files)
301: #
302: # compares the list of archived files and files on disk
303: #
304: sub compare_files {
305: my ($fs_files, $archived) = @_;
306:
307: foreach my $ft (sort keys %$fs_files) {
308: next if ($index_files{$ft});
309: my $fp = "$docdir/$ft";
310: #logger("DEBUG", " fp: $fp");
311: if ($$archived{$fp}) {
312: next if ($index_files{$ft});
313:
314: my $asize = $$archived{$fp}[0];
315: my $atime = $$archived{$fp}[1];
316: my $fsize = $$fs_files{$ft}[1];
317: my $ftime = $$fs_files{$ft}[2];
318: if ($asize != $fsize) {
319: logger("ERROR", "archived $ft ($asize) and file on disk ($fsize) have different size!");
320: $errcnt++;
321: } elsif ($atime lt $ftime) {
322: logger("ERROR", "archived $ft ($atime) is older than file on disk ($ftime)!");
323: $errcnt++;
324: } else {
325: logger("ERROR", "archived file $ft still on disk");
326: $errcnt++;
327: }
328: } else {
1.2 ! casties 329: logger("ERROR", "file on disk '$ft' is not in archive!");
1.1 casties 330: $errcnt++;
331: }
332: }
333: }
334:
335:
336:
337: #######################################################
338: # main
339: #
340:
341: logger("INFO", "archivecheck $version");
342:
343: # make shure the right user is running this program
344: my $user = getlogin;
345: if (($user ne "archive")&&($user ne "root")) {
346: logger("ABORT", "you must be archive or root user to run this program!");
347: exit 1;
348: }
349:
350: # read index.meta file
351: my ($document, $rootnode) = read_xml($metafile);
352:
353: # check file and add archive date
354: my $files_to_archive = read_resource_meta($rootnode);
355:
356: # check for .archived file
357: if (-f "$docdir/.archived") {
358: logger("INFO", ".archived file exists.");
359: } else {
360: logger("WARNING", "no .archived file!");
361: $warncnt++;
362: }
363:
364: # check archive
1.2 ! casties 365: my %archived_files = ();
! 366: my $archcnt = 0;
! 367: if ($docdir =~ /\/mpiwg\/archive\/data\/(.*)/) {
! 368: # TSM needs two different paths because of historical mount points :-(
! 369: my $docdir1 = "/mpiwg/archive/data/";
! 370: $archcnt += run_query($docdir1, \%archived_files);
! 371: my $docdir2 = "/mpiwg/archive/";
! 372: $archcnt += run_query($docdir2, \%archived_files);
! 373: } else {
! 374: $archcnt += run_query("$docdir/", \%archived_files);
! 375: }
! 376: logger("INFO", "$archcnt archives of " . (scalar keys %archived_files) . " files.");
1.1 casties 377:
1.2 ! casties 378: my $num_arch_files = (scalar keys %archived_files);
1.1 casties 379: if ($num_arch_files == 0) {
380: logger("ABORT", "no archive of this directory!!");
381: exit 1;
382: }
383: logger("INFO", "$num_arch_files files archived");
384:
385: # check list of archived files
1.2 ! casties 386: check_files($files_to_archive, \%archived_files);
1.1 casties 387:
388: # read files from filesystem
389: my %fsfiles;
390: my %fsdirs;
391: my $num_fs_files = fs_read_files($docdir, "", \%fsfiles, \%fsdirs);
392:
393: logger("INFO", "$num_fs_files files still on disk!");
394: if ($num_fs_files > 0) {
1.2 ! casties 395: compare_files(\%fsfiles, \%archived_files);
1.1 casties 396: }
397:
398: logger("INFO", "$warncnt warnings");
399: logger("INFO", "$errcnt errors");
400: if ($errcnt == 0) {
1.2 ! casties 401: logger("DONE", "" . (scalar keys %archived_files) . " archived files OK");
1.1 casties 402: } else {
403: logger("ABORT", "there were $errcnt errors!!");
404: exit 1;
405: }
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>