Annotation of foxridge-archiver/archivecheck.pl, revision 1.8
1.8 ! casties 1: #!/usr/bin/perl -w
1.1 casties 2:
3: use strict;
4:
5: use XML::LibXML;
6:
7: # MPIWG libraries
8: use lib '/usr/local/mpiwg/archive';
9: use MPIWGStor;
10:
11: # make output unbuffered
12: $|=1;
13:
14: #######################################################
15: # internal parameters
16: #
17:
18: # program version
1.8 ! casties 19: my $version = "0.4.5 (13.7.2009 ROC)";
1.1 casties 20:
21: # read command line parameters
22: my $args = parseargs;
23:
24: # debug level
25: $debug = (exists $$args{'debug'}) ? $$args{'debug'} : 0;
26:
27: # XML namespace (not really implemented!)
28: my $namespace = "";
29:
30: # archive name (archive-path element, usually == $docdir)
31: my $archname;
32:
33:
34: #######################################################
35: # external programs
36: #
37: my $archprog = "/opt/tivoli/tsm/client/ba/bin/dsmc";
38: if (! -x $archprog) {
39: logger("ABORT", "TSM client program '$archprog' missing!!");
40: exit 1;
41: }
42: # my $checkprog = "/usr/local/mpiwg/archive/metacheck";
43: # if (! -x $checkprog) {
44: # logge("ABORT", "meta data checking program '$checkprog' missing!!");
45: # exit 1;
46: # }
47: # log file for archiver
48: my $log_file = "/var/tmp/archivecheck.log";
49: if (! open LOG, ">>$log_file") {
50: logger("ABORT", "unable to write log file '$log_file'!!");
51: exit 1;
52: }
53:
54: #######################################################
55: # check parameters that were passed to the program
56: #
57: my $docdir = $$args{'path'};
58: if (! $docdir) {
59: print "ABORT: no document directory given!\n";
60: exit 1;
61: }
62: # strip trailing slashes
63: $docdir =~ s/\/$//;
64: if (! -d $docdir) {
65: print "ABORT: document directory \'$docdir\' doesn't exist!\n";
66: exit 1;
67: }
68:
69: my $metafile = "$docdir/index.meta";
70: if (! -f $metafile) {
71: print "ABORT: metadata index file \'$metafile\' doesn't exist!\n";
72: exit 1;
73: }
74:
75: #######################################################
76: # internal variables
77: #
78:
79: # number of errors
80: my $errcnt = 0;
81: # number of warnings
82: my $warncnt = 0;
83:
84: #######################################################
85: # subroutines
86: #
87:
88:
89: #
90: # $files = read_resource_meta($rootnode)
91: #
92: # checks general resource meta information and reads the list of files
93: #
94: sub read_resource_meta {
95: my ($rootnode) = @_;
96: my %files;
97: #
98: # archive path
99: #
100: # get archive-path
101: $archname = sstrip($rootnode->findvalue('child::archive-path'));
102: if (! $archname) {
103: logger("ABORT", "archive-name element missing!!");
104: exit 1;
105: }
106:
107: #
108: # files
109: #
110: my @filenodes = $rootnode->findnodes('child::file');
111: foreach my $fn (@filenodes) {
112: my $name = sstrip($fn->findvalue('child::name'));
113: my $path = sstrip($fn->findvalue('child::path'));
114: logger("DEBUG", "FILE: ($path)$name");
115: my $f = ($path) ? "$path/$name" : "$name";
116: $files{$f} = [$name];
117: }
118:
119: #
120: # dirs
121: #
122: my @dirnodes = $rootnode->findnodes('child::dir');
123: foreach my $fn (@dirnodes) {
124: my $name = sstrip($fn->findvalue('child::name'));
125: my $path = sstrip($fn->findvalue('child::path'));
126: logger("DEBUG", "DIR: ($path)$name");
1.5 casties 127: my $f = "$name";
128: if (($path)&&($path ne '.')) {
129: $f = "$path/$name";
130: }
1.1 casties 131: $files{$f} = [$name];
132: }
133:
134: #
135: # archive-storage-date
136: #
137: my $archdate = $rootnode->find('child::archive-storage-date');
138: if ($archdate) {
139: logger("INFO", "archive storage date: $archdate");
140: } else {
141: logger("ERROR", "archive storage date missing!");
142: $errcnt++;
143: }
144: return \%files;
145: }
146:
147:
148: #
149: # fs_read_files($realdir, $docdir, \%files, \%dirs)
150: #
151: # reads all files and directories below $realdir and puts the
152: # files in %files and directories in %dirs
153: # $docdir is only for recursion, it should be empty when called
154: # from outside
155: #
156: sub fs_read_files {
157: my ($directory, $docdir, $files, $dirs) = @_;
158: my $cnt = 0;
159:
160: if (! opendir DIR, $directory) {
161: return 0;
162: }
163: my @dirfiles = readdir DIR;
164: foreach my $fn (@dirfiles) {
165: # ignore names starting with a dot
166: next if ($fn =~ /^\./);
167: # ignore other silly files
168: next if ($junk_files{$fn});
169:
170: $cnt++;
171: my $f = "$directory/$fn";
172: my $docf = ($docdir) ? "$docdir/$fn" : $fn;
173: #print "fs_file: \"$f\"\n";
174: if (-f $f) {
175: #print " is file\n";
176: my ($dev,$ino,$mode,$nlink,$uid,$gid,$rdev,$size,
177: $atime,$mtime,$ctime,$blksize,$blocks)
178: = stat(_);
179: $$files{$docf} = [$fn, $size, stime($mtime)];
180: #logger("TEST", "fn $fn, size $size, mtime $mtime");
181: } elsif (-d _) {
182: #print " is dir\n";
183: $$dirs{$docf} = $fn;
184: # recurse into directory
185: $cnt += fs_read_files($f, $docf, $files, $dirs);
186: }
187: }
188: return $cnt;
189: }
190:
191:
192: #
1.2 casties 193: # $archcnt = run_query($dirquery, \%files)
1.1 casties 194: #
1.2 casties 195: # runs the archiver program on $dirquery and adds to the hash of archived files
1.1 casties 196: #
197: # Sample output:
198: # 20,345 B 08/06/03 17:17:02 /mpiwg/archive/data/proyectohumboldt/webb_histo_fr_01_1839/index.meta Never /mpiwg/archive/data/proyectohumboldt/webb_histo_fr_01_1839
199: #
200: sub run_query {
1.2 casties 201: my ($dirquery, $files) = @_;
1.1 casties 202: print LOG "START checkarchive $version ", scalar localtime, "\n";
203: my $archcmd = $archprog;
204: $archcmd .= " query archive -subdir=yes";
205: $archcmd .= " -description='$archname'";
1.2 casties 206: $archcmd .= " '$dirquery'";
1.1 casties 207:
1.6 casties 208: logger('INFO', "querying TSM server for $dirquery, please wait...");
1.1 casties 209: my $archcnt = 0;
210: print LOG "CMD: $archcmd\n";
211: if (open ARCH, "$archcmd 2>&1 |") {
212: while (<ARCH>) {
213: chomp;
214: print LOG "ARCH: $_\n";
215: if (/
216: \s*([\d,]+) # size
217: \s+(\w+) # unit of size
218: \s+([\d\/]+) # date mm\/dd\/yy
219: \s+([\d:]+) # time
220: \s+(\S+) # file name
221: \s+(\w+) # expiry
222: \s+(\S+) # archive label
223: /x) {
224: my $size = $1;
225: my $sunit = $2;
226: my $date = $3;
227: my $time = $4;
228: my $file = $5;
229: my $exp = $6;
230: my $label = $7;
231: $size =~ s/,//g;
232: $date = ymd_date($date);
233: logger("DEBUG", " QUERY: file '$file'");
234: $archcnt++;
1.2 casties 235: if ($$files{$file}) {
236: logger("DEBUG", "file $file seems to be archived multiple times: $time $date");
237: #$warncnt++;
238: }
239: if (length $file <= length $docdir) {
240: logger("DEBUG", "not below document dir: $file");
241: next;
242: }
243: $$files{$file} = [$size, "$date $time"];
1.1 casties 244: }
245: }
246: } else {
247: logger("ABORT", "unable to start archive command '$archcmd'!!");
248: exit 1;
249: }
250:
1.2 casties 251: return $archcnt;
1.1 casties 252: }
253:
254:
255: #
256: # check_files(\%files_to_archive, \%archived_files)
257: #
258: # compares the list of archived and to be archived files
259: #
260: sub check_files {
261: my ($to_archive, $archived) = @_;
262:
263: my $nt = scalar keys %$to_archive;
264: my $na = scalar keys %$archived;
265:
266: foreach my $ft (sort keys %$to_archive) {
267: my $fp = "$docdir/$ft";
268: #logger("DEBUG", " fp: $fp");
269: if ($$archived{$fp}) {
270: logger("DEBUG", "$ft archived OK");
271: $$archived{$fp}->[2] = "OK";
272: } else {
273: logger("ERROR", "file entry '$ft' missing from archive!");
274: $errcnt++;
275: }
276: }
277:
278: foreach my $fa (sort keys %$archived) {
279: if (! $$archived{$fa}->[2]) {
280: my ($fn, $fp) = split_file_path($fa);
281: if ($index_files{$fn}) {
282: logger("DEBUG", "$fa ignored");
283: $na--;
284: } else {
285: logger("WARNING", "$fa archived but not in list!");
286: $warncnt++;
287: }
288: }
289: }
290:
291: if ($nt > $na) {
292: logger("WARNING", "less files were archived ($na vs. $nt)!");
293: $warncnt++;
294: } elsif ($na > $nt) {
295: logger("WARNING", "more files were archived ($na vs. $nt)!");
296: $warncnt++;
297: }
298:
299: }
300:
301: #
302: # compare_files(\%files_on_disk, \%archived_files)
303: #
304: # compares the list of archived files and files on disk
305: #
306: sub compare_files {
307: my ($fs_files, $archived) = @_;
308:
309: foreach my $ft (sort keys %$fs_files) {
310: next if ($index_files{$ft});
311: my $fp = "$docdir/$ft";
312: #logger("DEBUG", " fp: $fp");
313: if ($$archived{$fp}) {
314: next if ($index_files{$ft});
315:
316: my $asize = $$archived{$fp}[0];
317: my $atime = $$archived{$fp}[1];
318: my $fsize = $$fs_files{$ft}[1];
319: my $ftime = $$fs_files{$ft}[2];
320: if ($asize != $fsize) {
321: logger("ERROR", "archived $ft ($asize) and file on disk ($fsize) have different size!");
322: $errcnt++;
323: } elsif ($atime lt $ftime) {
324: logger("ERROR", "archived $ft ($atime) is older than file on disk ($ftime)!");
325: $errcnt++;
326: } else {
1.3 casties 327: logger("WARNING", "archived file $ft still on disk");
328: $warncnt++;
1.1 casties 329: }
330: } else {
1.2 casties 331: logger("ERROR", "file on disk '$ft' is not in archive!");
1.1 casties 332: $errcnt++;
333: }
334: }
335: }
336:
337:
338:
339: #######################################################
340: # main
341: #
342:
343: logger("INFO", "archivecheck $version");
344:
345: # make shure the right user is running this program
1.7 casties 346: my $user = getlogin || getpwuid($<) ;
1.4 casties 347: if (not (($user eq "archive")||($user eq "root"))) {
1.1 casties 348: logger("ABORT", "you must be archive or root user to run this program!");
349: exit 1;
350: }
351:
352: # read index.meta file
353: my ($document, $rootnode) = read_xml($metafile);
354:
355: # check file and add archive date
356: my $files_to_archive = read_resource_meta($rootnode);
357:
358: # check for .archived file
359: if (-f "$docdir/.archived") {
360: logger("INFO", ".archived file exists.");
361: } else {
362: logger("WARNING", "no .archived file!");
363: $warncnt++;
364: }
365:
366: # check archive
1.2 casties 367: my %archived_files = ();
368: my $archcnt = 0;
369: if ($docdir =~ /\/mpiwg\/archive\/data\/(.*)/) {
370: # TSM needs two different paths because of historical mount points :-(
1.8 ! casties 371: # ...doesn't work anymore since old filespace was renamed (13.7.2009)
! 372: # my $docdir1 = "/mpiwg/archivedataold/";
! 373: # $archcnt += run_query($docdir1, \%archived_files);
1.2 casties 374: my $docdir2 = "/mpiwg/archive/";
375: $archcnt += run_query($docdir2, \%archived_files);
376: } else {
377: $archcnt += run_query("$docdir/", \%archived_files);
378: }
379: logger("INFO", "$archcnt archives of " . (scalar keys %archived_files) . " files.");
1.1 casties 380:
1.2 casties 381: my $num_arch_files = (scalar keys %archived_files);
1.1 casties 382: if ($num_arch_files == 0) {
383: logger("ABORT", "no archive of this directory!!");
384: exit 1;
385: }
386: logger("INFO", "$num_arch_files files archived");
387:
388: # check list of archived files
1.2 casties 389: check_files($files_to_archive, \%archived_files);
1.1 casties 390:
391: # read files from filesystem
392: my %fsfiles;
393: my %fsdirs;
394: my $num_fs_files = fs_read_files($docdir, "", \%fsfiles, \%fsdirs);
395:
396: logger("INFO", "$num_fs_files files still on disk!");
397: if ($num_fs_files > 0) {
1.2 casties 398: compare_files(\%fsfiles, \%archived_files);
1.1 casties 399: }
400:
401: logger("INFO", "$warncnt warnings");
402: logger("INFO", "$errcnt errors");
403: if ($errcnt == 0) {
1.2 casties 404: logger("DONE", "" . (scalar keys %archived_files) . " archived files OK");
1.3 casties 405: exit 0;
1.1 casties 406: } else {
407: logger("ABORT", "there were $errcnt errors!!");
408: exit 1;
409: }
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>