Mercurial > hg > foxridge-archiver
annotate archivecheck.pl @ 20:79c6618e8dfa
small fixes
author | casties |
---|---|
date | Tue, 20 Sep 2005 19:30:34 +0200 |
parents | 320c4b93bf39 |
children | 24d9dd63ae93 |
rev | line source |
---|---|
0 | 1 #!/usr/local/bin/perl -w |
2 | |
3 use strict; | |
4 | |
5 use XML::LibXML; | |
6 | |
7 # MPIWG libraries | |
8 use lib '/usr/local/mpiwg/archive'; | |
9 use MPIWGStor; | |
10 | |
11 # make output unbuffered | |
12 $|=1; | |
13 | |
14 ####################################################### | |
15 # internal parameters | |
16 # | |
17 | |
18 # program version | |
11 | 19 my $version = "0.4.1 (15.2.2005)"; |
0 | 20 |
21 # read command line parameters | |
22 my $args = parseargs; | |
23 | |
24 # debug level | |
25 $debug = (exists $$args{'debug'}) ? $$args{'debug'} : 0; | |
26 | |
27 # XML namespace (not really implemented!) | |
28 my $namespace = ""; | |
29 | |
30 # archive name (archive-path element, usually == $docdir) | |
31 my $archname; | |
32 # archive storage date | |
33 my $archdate; | |
34 | |
35 | |
36 ####################################################### | |
37 # external programs | |
38 # | |
39 my $archprog = "/opt/tivoli/tsm/client/ba/bin/dsmc"; | |
40 if (! -x $archprog) { | |
41 logger("ABORT", "TSM client program '$archprog' missing!!"); | |
42 exit 1; | |
43 } | |
44 # my $checkprog = "/usr/local/mpiwg/archive/metacheck"; | |
45 # if (! -x $checkprog) { | |
46 # logge("ABORT", "meta data checking program '$checkprog' missing!!"); | |
47 # exit 1; | |
48 # } | |
49 # log file for archiver | |
50 my $log_file = "/var/tmp/archivecheck.log"; | |
51 if (! open LOG, ">>$log_file") { | |
52 logger("ABORT", "unable to write log file '$log_file'!!"); | |
53 exit 1; | |
54 } | |
55 | |
56 ####################################################### | |
57 # check parameters that were passed to the program | |
58 # | |
59 my $docdir = $$args{'path'}; | |
60 if (! $docdir) { | |
61 print "ABORT: no document directory given!\n"; | |
62 exit 1; | |
63 } | |
64 # strip trailing slashes | |
65 $docdir =~ s/\/$//; | |
66 if (! -d $docdir) { | |
67 print "ABORT: document directory \'$docdir\' doesn't exist!\n"; | |
68 exit 1; | |
69 } | |
70 | |
71 my $metafile = "$docdir/index.meta"; | |
72 if (! -f $metafile) { | |
73 print "ABORT: metadata index file \'$metafile\' doesn't exist!\n"; | |
74 exit 1; | |
75 } | |
76 | |
77 ####################################################### | |
78 # internal variables | |
79 # | |
80 | |
81 # number of errors | |
82 my $errcnt = 0; | |
83 # number of warnings | |
84 my $warncnt = 0; | |
85 | |
86 ####################################################### | |
87 # subroutines | |
88 # | |
89 | |
90 | |
91 # | |
92 # $files = read_resource_meta($rootnode) | |
93 # | |
94 # checks general resource meta information and reads the list of files | |
95 # | |
96 sub read_resource_meta { | |
97 my ($rootnode) = @_; | |
98 my %files; | |
99 # | |
100 # archive path | |
101 # | |
102 # get archive-path | |
103 $archname = sstrip($rootnode->findvalue('child::archive-path')); | |
104 if (! $archname) { | |
105 logger("ABORT", "archive-name element missing!!"); | |
106 exit 1; | |
107 } | |
108 | |
109 # | |
110 # files | |
111 # | |
112 my @filenodes = $rootnode->findnodes('child::file'); | |
113 foreach my $fn (@filenodes) { | |
114 my $name = sstrip($fn->findvalue('child::name')); | |
115 my $path = sstrip($fn->findvalue('child::path')); | |
116 logger("DEBUG", "FILE: ($path)$name"); | |
117 my $f = ($path) ? "$path/$name" : "$name"; | |
118 $files{$f} = [$name]; | |
119 } | |
120 | |
121 # | |
122 # dirs | |
123 # | |
124 my @dirnodes = $rootnode->findnodes('child::dir'); | |
125 foreach my $fn (@dirnodes) { | |
126 my $name = sstrip($fn->findvalue('child::name')); | |
127 my $path = sstrip($fn->findvalue('child::path')); | |
128 logger("DEBUG", "DIR: ($path)$name"); | |
129 my $f = ($path) ? "$path/$name" : "$name"; | |
130 $files{$f} = [$name]; | |
131 } | |
132 | |
133 # | |
134 # archive-storage-date | |
135 # | |
136 my $archdate = $rootnode->find('child::archive-storage-date'); | |
137 if ($archdate) { | |
138 logger("INFO", "archive storage date: $archdate"); | |
139 } else { | |
140 logger("ERROR", "archive storage date missing!"); | |
141 $errcnt++; | |
142 } | |
143 return \%files; | |
144 } | |
145 | |
146 | |
147 # | |
148 # fs_read_files($realdir, $docdir, \%files, \%dirs) | |
149 # | |
150 # reads all files and directories below $realdir and puts the | |
151 # files in %files and directories in %dirs | |
152 # $docdir is only for recursion, it should be empty when called | |
153 # from outside | |
154 # | |
155 sub fs_read_files { | |
156 my ($directory, $docdir, $files, $dirs) = @_; | |
157 my $cnt = 0; | |
158 | |
159 if (! opendir DIR, $directory) { | |
160 return 0; | |
161 } | |
162 my @dirfiles = readdir DIR; | |
163 foreach my $fn (@dirfiles) { | |
164 # ignore names starting with a dot | |
165 next if ($fn =~ /^\./); | |
166 # ignore other silly files | |
167 next if ($junk_files{$fn}); | |
168 | |
169 $cnt++; | |
170 my $f = "$directory/$fn"; | |
171 my $docf = ($docdir) ? "$docdir/$fn" : $fn; | |
172 #print "fs_file: \"$f\"\n"; | |
173 if (-f $f) { | |
174 #print " is file\n"; | |
175 my ($dev,$ino,$mode,$nlink,$uid,$gid,$rdev,$size, | |
176 $atime,$mtime,$ctime,$blksize,$blocks) | |
177 = stat(_); | |
178 $$files{$docf} = [$fn, $size, stime($mtime)]; | |
179 #logger("TEST", "fn $fn, size $size, mtime $mtime"); | |
180 } elsif (-d _) { | |
181 #print " is dir\n"; | |
182 $$dirs{$docf} = $fn; | |
183 # recurse into directory | |
184 $cnt += fs_read_files($f, $docf, $files, $dirs); | |
185 } | |
186 } | |
187 return $cnt; | |
188 } | |
189 | |
190 | |
191 # | |
9
9c61f624d802
fixed dsmc to work with different mount points (try a few...)
casties
parents:
0
diff
changeset
|
192 # $archcnt = run_query($dirquery, \%files) |
0 | 193 # |
9
9c61f624d802
fixed dsmc to work with different mount points (try a few...)
casties
parents:
0
diff
changeset
|
194 # runs the archiver program on $dirquery and adds to the hash of archived files |
0 | 195 # |
196 # Sample output: | |
197 # 20,345 B 08/06/03 17:17:02 /mpiwg/archive/data/proyectohumboldt/webb_histo_fr_01_1839/index.meta Never /mpiwg/archive/data/proyectohumboldt/webb_histo_fr_01_1839 | |
198 # | |
199 sub run_query { | |
9
9c61f624d802
fixed dsmc to work with different mount points (try a few...)
casties
parents:
0
diff
changeset
|
200 my ($dirquery, $files) = @_; |
0 | 201 print LOG "START checkarchive $version ", scalar localtime, "\n"; |
202 my $archcmd = $archprog; | |
203 $archcmd .= " query archive -subdir=yes"; | |
204 $archcmd .= " -description='$archname'"; | |
9
9c61f624d802
fixed dsmc to work with different mount points (try a few...)
casties
parents:
0
diff
changeset
|
205 $archcmd .= " '$dirquery'"; |
0 | 206 |
207 my $archcnt = 0; | |
208 print LOG "CMD: $archcmd\n"; | |
209 if (open ARCH, "$archcmd 2>&1 |") { | |
210 while (<ARCH>) { | |
211 chomp; | |
212 print LOG "ARCH: $_\n"; | |
213 if (/ | |
214 \s*([\d,]+) # size | |
215 \s+(\w+) # unit of size | |
216 \s+([\d\/]+) # date mm\/dd\/yy | |
217 \s+([\d:]+) # time | |
218 \s+(\S+) # file name | |
219 \s+(\w+) # expiry | |
220 \s+(\S+) # archive label | |
221 /x) { | |
222 my $size = $1; | |
223 my $sunit = $2; | |
224 my $date = $3; | |
225 my $time = $4; | |
226 my $file = $5; | |
227 my $exp = $6; | |
228 my $label = $7; | |
229 $size =~ s/,//g; | |
230 $date = ymd_date($date); | |
231 logger("DEBUG", " QUERY: file '$file'"); | |
232 $archcnt++; | |
9
9c61f624d802
fixed dsmc to work with different mount points (try a few...)
casties
parents:
0
diff
changeset
|
233 if ($$files{$file}) { |
9c61f624d802
fixed dsmc to work with different mount points (try a few...)
casties
parents:
0
diff
changeset
|
234 logger("DEBUG", "file $file seems to be archived multiple times: $time $date"); |
9c61f624d802
fixed dsmc to work with different mount points (try a few...)
casties
parents:
0
diff
changeset
|
235 #$warncnt++; |
9c61f624d802
fixed dsmc to work with different mount points (try a few...)
casties
parents:
0
diff
changeset
|
236 } |
9c61f624d802
fixed dsmc to work with different mount points (try a few...)
casties
parents:
0
diff
changeset
|
237 if (length $file <= length $docdir) { |
9c61f624d802
fixed dsmc to work with different mount points (try a few...)
casties
parents:
0
diff
changeset
|
238 logger("DEBUG", "not below document dir: $file"); |
9c61f624d802
fixed dsmc to work with different mount points (try a few...)
casties
parents:
0
diff
changeset
|
239 next; |
9c61f624d802
fixed dsmc to work with different mount points (try a few...)
casties
parents:
0
diff
changeset
|
240 } |
9c61f624d802
fixed dsmc to work with different mount points (try a few...)
casties
parents:
0
diff
changeset
|
241 $$files{$file} = [$size, "$date $time"]; |
0 | 242 } |
243 } | |
244 } else { | |
245 logger("ABORT", "unable to start archive command '$archcmd'!!"); | |
246 exit 1; | |
247 } | |
248 | |
9
9c61f624d802
fixed dsmc to work with different mount points (try a few...)
casties
parents:
0
diff
changeset
|
249 return $archcnt; |
0 | 250 } |
251 | |
252 | |
253 # | |
254 # check_files(\%files_to_archive, \%archived_files) | |
255 # | |
256 # compares the list of archived and to be archived files | |
257 # | |
258 sub check_files { | |
259 my ($to_archive, $archived) = @_; | |
260 | |
261 my $nt = scalar keys %$to_archive; | |
262 my $na = scalar keys %$archived; | |
263 | |
264 foreach my $ft (sort keys %$to_archive) { | |
265 my $fp = "$docdir/$ft"; | |
266 #logger("DEBUG", " fp: $fp"); | |
267 if ($$archived{$fp}) { | |
268 logger("DEBUG", "$ft archived OK"); | |
269 $$archived{$fp}->[2] = "OK"; | |
270 } else { | |
271 logger("ERROR", "file entry '$ft' missing from archive!"); | |
272 $errcnt++; | |
273 } | |
274 } | |
275 | |
276 foreach my $fa (sort keys %$archived) { | |
277 if (! $$archived{$fa}->[2]) { | |
278 my ($fn, $fp) = split_file_path($fa); | |
279 if ($index_files{$fn}) { | |
280 logger("DEBUG", "$fa ignored"); | |
281 $na--; | |
282 } else { | |
283 logger("WARNING", "$fa archived but not in list!"); | |
284 $warncnt++; | |
285 } | |
286 } | |
287 } | |
288 | |
289 if ($nt > $na) { | |
290 logger("WARNING", "less files were archived ($na vs. $nt)!"); | |
291 $warncnt++; | |
292 } elsif ($na > $nt) { | |
293 logger("WARNING", "more files were archived ($na vs. $nt)!"); | |
294 $warncnt++; | |
295 } | |
296 | |
297 } | |
298 | |
299 # | |
300 # compare_files(\%files_on_disk, \%archived_files) | |
301 # | |
302 # compares the list of archived files and files on disk | |
303 # | |
304 sub compare_files { | |
305 my ($fs_files, $archived) = @_; | |
306 | |
307 foreach my $ft (sort keys %$fs_files) { | |
308 next if ($index_files{$ft}); | |
309 my $fp = "$docdir/$ft"; | |
310 #logger("DEBUG", " fp: $fp"); | |
311 if ($$archived{$fp}) { | |
312 next if ($index_files{$ft}); | |
313 | |
314 my $asize = $$archived{$fp}[0]; | |
315 my $atime = $$archived{$fp}[1]; | |
316 my $fsize = $$fs_files{$ft}[1]; | |
317 my $ftime = $$fs_files{$ft}[2]; | |
318 if ($asize != $fsize) { | |
319 logger("ERROR", "archived $ft ($asize) and file on disk ($fsize) have different size!"); | |
320 $errcnt++; | |
321 } elsif ($atime lt $ftime) { | |
322 logger("ERROR", "archived $ft ($atime) is older than file on disk ($ftime)!"); | |
323 $errcnt++; | |
324 } else { | |
11 | 325 logger("WARNING", "archived file $ft still on disk"); |
326 $warncnt++; | |
0 | 327 } |
328 } else { | |
9
9c61f624d802
fixed dsmc to work with different mount points (try a few...)
casties
parents:
0
diff
changeset
|
329 logger("ERROR", "file on disk '$ft' is not in archive!"); |
0 | 330 $errcnt++; |
331 } | |
332 } | |
333 } | |
334 | |
335 | |
336 | |
337 ####################################################### | |
338 # main | |
339 # | |
340 | |
341 logger("INFO", "archivecheck $version"); | |
342 | |
343 # make shure the right user is running this program | |
344 my $user = getlogin; | |
19 | 345 if (not (($user eq "archive")||($user eq "root"))) { |
0 | 346 logger("ABORT", "you must be archive or root user to run this program!"); |
347 exit 1; | |
348 } | |
349 | |
350 # read index.meta file | |
351 my ($document, $rootnode) = read_xml($metafile); | |
352 | |
353 # check file and add archive date | |
354 my $files_to_archive = read_resource_meta($rootnode); | |
355 | |
356 # check for .archived file | |
357 if (-f "$docdir/.archived") { | |
358 logger("INFO", ".archived file exists."); | |
359 } else { | |
360 logger("WARNING", "no .archived file!"); | |
361 $warncnt++; | |
362 } | |
363 | |
364 # check archive | |
9
9c61f624d802
fixed dsmc to work with different mount points (try a few...)
casties
parents:
0
diff
changeset
|
365 my %archived_files = (); |
9c61f624d802
fixed dsmc to work with different mount points (try a few...)
casties
parents:
0
diff
changeset
|
366 my $archcnt = 0; |
9c61f624d802
fixed dsmc to work with different mount points (try a few...)
casties
parents:
0
diff
changeset
|
367 if ($docdir =~ /\/mpiwg\/archive\/data\/(.*)/) { |
9c61f624d802
fixed dsmc to work with different mount points (try a few...)
casties
parents:
0
diff
changeset
|
368 # TSM needs two different paths because of historical mount points :-( |
9c61f624d802
fixed dsmc to work with different mount points (try a few...)
casties
parents:
0
diff
changeset
|
369 my $docdir1 = "/mpiwg/archive/data/"; |
9c61f624d802
fixed dsmc to work with different mount points (try a few...)
casties
parents:
0
diff
changeset
|
370 $archcnt += run_query($docdir1, \%archived_files); |
9c61f624d802
fixed dsmc to work with different mount points (try a few...)
casties
parents:
0
diff
changeset
|
371 my $docdir2 = "/mpiwg/archive/"; |
9c61f624d802
fixed dsmc to work with different mount points (try a few...)
casties
parents:
0
diff
changeset
|
372 $archcnt += run_query($docdir2, \%archived_files); |
9c61f624d802
fixed dsmc to work with different mount points (try a few...)
casties
parents:
0
diff
changeset
|
373 } else { |
9c61f624d802
fixed dsmc to work with different mount points (try a few...)
casties
parents:
0
diff
changeset
|
374 $archcnt += run_query("$docdir/", \%archived_files); |
9c61f624d802
fixed dsmc to work with different mount points (try a few...)
casties
parents:
0
diff
changeset
|
375 } |
9c61f624d802
fixed dsmc to work with different mount points (try a few...)
casties
parents:
0
diff
changeset
|
376 logger("INFO", "$archcnt archives of " . (scalar keys %archived_files) . " files."); |
0 | 377 |
9
9c61f624d802
fixed dsmc to work with different mount points (try a few...)
casties
parents:
0
diff
changeset
|
378 my $num_arch_files = (scalar keys %archived_files); |
0 | 379 if ($num_arch_files == 0) { |
380 logger("ABORT", "no archive of this directory!!"); | |
381 exit 1; | |
382 } | |
383 logger("INFO", "$num_arch_files files archived"); | |
384 | |
385 # check list of archived files | |
9
9c61f624d802
fixed dsmc to work with different mount points (try a few...)
casties
parents:
0
diff
changeset
|
386 check_files($files_to_archive, \%archived_files); |
0 | 387 |
388 # read files from filesystem | |
389 my %fsfiles; | |
390 my %fsdirs; | |
391 my $num_fs_files = fs_read_files($docdir, "", \%fsfiles, \%fsdirs); | |
392 | |
393 logger("INFO", "$num_fs_files files still on disk!"); | |
394 if ($num_fs_files > 0) { | |
9
9c61f624d802
fixed dsmc to work with different mount points (try a few...)
casties
parents:
0
diff
changeset
|
395 compare_files(\%fsfiles, \%archived_files); |
0 | 396 } |
397 | |
398 logger("INFO", "$warncnt warnings"); | |
399 logger("INFO", "$errcnt errors"); | |
400 if ($errcnt == 0) { | |
9
9c61f624d802
fixed dsmc to work with different mount points (try a few...)
casties
parents:
0
diff
changeset
|
401 logger("DONE", "" . (scalar keys %archived_files) . " archived files OK"); |
11 | 402 exit 0; |
0 | 403 } else { |
404 logger("ABORT", "there were $errcnt errors!!"); | |
405 exit 1; | |
406 } |