Mercurial > hg > foxridge-archiver
annotate archivecheck.pl @ 30:398ef4b8f072
added -online-base option to makemeta-lib
author | casties |
---|---|
date | Mon, 12 Jun 2006 19:01:39 +0200 |
parents | 24d9dd63ae93 |
children | 724c615b5982 |
rev | line source |
---|---|
0 | 1 #!/usr/local/bin/perl -w |
2 | |
3 use strict; | |
4 | |
5 use XML::LibXML; | |
6 | |
7 # MPIWG libraries | |
8 use lib '/usr/local/mpiwg/archive'; | |
9 use MPIWGStor; | |
10 | |
11 # make output unbuffered | |
12 $|=1; | |
13 | |
14 ####################################################### | |
15 # internal parameters | |
16 # | |
17 | |
18 # program version | |
26 | 19 my $version = "0.4.2 (7.12.2005 ROC)"; |
0 | 20 |
21 # read command line parameters | |
22 my $args = parseargs; | |
23 | |
24 # debug level | |
25 $debug = (exists $$args{'debug'}) ? $$args{'debug'} : 0; | |
26 | |
27 # XML namespace (not really implemented!) | |
28 my $namespace = ""; | |
29 | |
30 # archive name (archive-path element, usually == $docdir) | |
31 my $archname; | |
32 | |
33 | |
34 ####################################################### | |
35 # external programs | |
36 # | |
37 my $archprog = "/opt/tivoli/tsm/client/ba/bin/dsmc"; | |
38 if (! -x $archprog) { | |
39 logger("ABORT", "TSM client program '$archprog' missing!!"); | |
40 exit 1; | |
41 } | |
42 # my $checkprog = "/usr/local/mpiwg/archive/metacheck"; | |
43 # if (! -x $checkprog) { | |
44 # logge("ABORT", "meta data checking program '$checkprog' missing!!"); | |
45 # exit 1; | |
46 # } | |
47 # log file for archiver | |
48 my $log_file = "/var/tmp/archivecheck.log"; | |
49 if (! open LOG, ">>$log_file") { | |
50 logger("ABORT", "unable to write log file '$log_file'!!"); | |
51 exit 1; | |
52 } | |
53 | |
54 ####################################################### | |
55 # check parameters that were passed to the program | |
56 # | |
57 my $docdir = $$args{'path'}; | |
58 if (! $docdir) { | |
59 print "ABORT: no document directory given!\n"; | |
60 exit 1; | |
61 } | |
62 # strip trailing slashes | |
63 $docdir =~ s/\/$//; | |
64 if (! -d $docdir) { | |
65 print "ABORT: document directory \'$docdir\' doesn't exist!\n"; | |
66 exit 1; | |
67 } | |
68 | |
69 my $metafile = "$docdir/index.meta"; | |
70 if (! -f $metafile) { | |
71 print "ABORT: metadata index file \'$metafile\' doesn't exist!\n"; | |
72 exit 1; | |
73 } | |
74 | |
75 ####################################################### | |
76 # internal variables | |
77 # | |
78 | |
79 # number of errors | |
80 my $errcnt = 0; | |
81 # number of warnings | |
82 my $warncnt = 0; | |
83 | |
84 ####################################################### | |
85 # subroutines | |
86 # | |
87 | |
88 | |
89 # | |
90 # $files = read_resource_meta($rootnode) | |
91 # | |
92 # checks general resource meta information and reads the list of files | |
93 # | |
94 sub read_resource_meta { | |
95 my ($rootnode) = @_; | |
96 my %files; | |
97 # | |
98 # archive path | |
99 # | |
100 # get archive-path | |
101 $archname = sstrip($rootnode->findvalue('child::archive-path')); | |
102 if (! $archname) { | |
103 logger("ABORT", "archive-name element missing!!"); | |
104 exit 1; | |
105 } | |
106 | |
107 # | |
108 # files | |
109 # | |
110 my @filenodes = $rootnode->findnodes('child::file'); | |
111 foreach my $fn (@filenodes) { | |
112 my $name = sstrip($fn->findvalue('child::name')); | |
113 my $path = sstrip($fn->findvalue('child::path')); | |
114 logger("DEBUG", "FILE: ($path)$name"); | |
115 my $f = ($path) ? "$path/$name" : "$name"; | |
116 $files{$f} = [$name]; | |
117 } | |
118 | |
119 # | |
120 # dirs | |
121 # | |
122 my @dirnodes = $rootnode->findnodes('child::dir'); | |
123 foreach my $fn (@dirnodes) { | |
124 my $name = sstrip($fn->findvalue('child::name')); | |
125 my $path = sstrip($fn->findvalue('child::path')); | |
126 logger("DEBUG", "DIR: ($path)$name"); | |
26 | 127 my $f = "$name"; |
128 if (($path)&&($path ne '.')) { | |
129 $f = "$path/$name"; | |
130 } | |
0 | 131 $files{$f} = [$name]; |
132 } | |
133 | |
134 # | |
135 # archive-storage-date | |
136 # | |
137 my $archdate = $rootnode->find('child::archive-storage-date'); | |
138 if ($archdate) { | |
139 logger("INFO", "archive storage date: $archdate"); | |
140 } else { | |
141 logger("ERROR", "archive storage date missing!"); | |
142 $errcnt++; | |
143 } | |
144 return \%files; | |
145 } | |
146 | |
147 | |
148 # | |
149 # fs_read_files($realdir, $docdir, \%files, \%dirs) | |
150 # | |
151 # reads all files and directories below $realdir and puts the | |
152 # files in %files and directories in %dirs | |
153 # $docdir is only for recursion, it should be empty when called | |
154 # from outside | |
155 # | |
156 sub fs_read_files { | |
157 my ($directory, $docdir, $files, $dirs) = @_; | |
158 my $cnt = 0; | |
159 | |
160 if (! opendir DIR, $directory) { | |
161 return 0; | |
162 } | |
163 my @dirfiles = readdir DIR; | |
164 foreach my $fn (@dirfiles) { | |
165 # ignore names starting with a dot | |
166 next if ($fn =~ /^\./); | |
167 # ignore other silly files | |
168 next if ($junk_files{$fn}); | |
169 | |
170 $cnt++; | |
171 my $f = "$directory/$fn"; | |
172 my $docf = ($docdir) ? "$docdir/$fn" : $fn; | |
173 #print "fs_file: \"$f\"\n"; | |
174 if (-f $f) { | |
175 #print " is file\n"; | |
176 my ($dev,$ino,$mode,$nlink,$uid,$gid,$rdev,$size, | |
177 $atime,$mtime,$ctime,$blksize,$blocks) | |
178 = stat(_); | |
179 $$files{$docf} = [$fn, $size, stime($mtime)]; | |
180 #logger("TEST", "fn $fn, size $size, mtime $mtime"); | |
181 } elsif (-d _) { | |
182 #print " is dir\n"; | |
183 $$dirs{$docf} = $fn; | |
184 # recurse into directory | |
185 $cnt += fs_read_files($f, $docf, $files, $dirs); | |
186 } | |
187 } | |
188 return $cnt; | |
189 } | |
190 | |
191 | |
192 # | |
9
9c61f624d802
fixed dsmc to work with different mount points (try a few...)
casties
parents:
0
diff
changeset
|
193 # $archcnt = run_query($dirquery, \%files) |
0 | 194 # |
9
9c61f624d802
fixed dsmc to work with different mount points (try a few...)
casties
parents:
0
diff
changeset
|
195 # runs the archiver program on $dirquery and adds to the hash of archived files |
0 | 196 # |
197 # Sample output: | |
198 # 20,345 B 08/06/03 17:17:02 /mpiwg/archive/data/proyectohumboldt/webb_histo_fr_01_1839/index.meta Never /mpiwg/archive/data/proyectohumboldt/webb_histo_fr_01_1839 | |
199 # | |
200 sub run_query { | |
9
9c61f624d802
fixed dsmc to work with different mount points (try a few...)
casties
parents:
0
diff
changeset
|
201 my ($dirquery, $files) = @_; |
0 | 202 print LOG "START checkarchive $version ", scalar localtime, "\n"; |
203 my $archcmd = $archprog; | |
204 $archcmd .= " query archive -subdir=yes"; | |
205 $archcmd .= " -description='$archname'"; | |
9
9c61f624d802
fixed dsmc to work with different mount points (try a few...)
casties
parents:
0
diff
changeset
|
206 $archcmd .= " '$dirquery'"; |
0 | 207 |
208 my $archcnt = 0; | |
209 print LOG "CMD: $archcmd\n"; | |
210 if (open ARCH, "$archcmd 2>&1 |") { | |
211 while (<ARCH>) { | |
212 chomp; | |
213 print LOG "ARCH: $_\n"; | |
214 if (/ | |
215 \s*([\d,]+) # size | |
216 \s+(\w+) # unit of size | |
217 \s+([\d\/]+) # date mm\/dd\/yy | |
218 \s+([\d:]+) # time | |
219 \s+(\S+) # file name | |
220 \s+(\w+) # expiry | |
221 \s+(\S+) # archive label | |
222 /x) { | |
223 my $size = $1; | |
224 my $sunit = $2; | |
225 my $date = $3; | |
226 my $time = $4; | |
227 my $file = $5; | |
228 my $exp = $6; | |
229 my $label = $7; | |
230 $size =~ s/,//g; | |
231 $date = ymd_date($date); | |
232 logger("DEBUG", " QUERY: file '$file'"); | |
233 $archcnt++; | |
9
9c61f624d802
fixed dsmc to work with different mount points (try a few...)
casties
parents:
0
diff
changeset
|
234 if ($$files{$file}) { |
9c61f624d802
fixed dsmc to work with different mount points (try a few...)
casties
parents:
0
diff
changeset
|
235 logger("DEBUG", "file $file seems to be archived multiple times: $time $date"); |
9c61f624d802
fixed dsmc to work with different mount points (try a few...)
casties
parents:
0
diff
changeset
|
236 #$warncnt++; |
9c61f624d802
fixed dsmc to work with different mount points (try a few...)
casties
parents:
0
diff
changeset
|
237 } |
9c61f624d802
fixed dsmc to work with different mount points (try a few...)
casties
parents:
0
diff
changeset
|
238 if (length $file <= length $docdir) { |
9c61f624d802
fixed dsmc to work with different mount points (try a few...)
casties
parents:
0
diff
changeset
|
239 logger("DEBUG", "not below document dir: $file"); |
9c61f624d802
fixed dsmc to work with different mount points (try a few...)
casties
parents:
0
diff
changeset
|
240 next; |
9c61f624d802
fixed dsmc to work with different mount points (try a few...)
casties
parents:
0
diff
changeset
|
241 } |
9c61f624d802
fixed dsmc to work with different mount points (try a few...)
casties
parents:
0
diff
changeset
|
242 $$files{$file} = [$size, "$date $time"]; |
0 | 243 } |
244 } | |
245 } else { | |
246 logger("ABORT", "unable to start archive command '$archcmd'!!"); | |
247 exit 1; | |
248 } | |
249 | |
9
9c61f624d802
fixed dsmc to work with different mount points (try a few...)
casties
parents:
0
diff
changeset
|
250 return $archcnt; |
0 | 251 } |
252 | |
253 | |
254 # | |
255 # check_files(\%files_to_archive, \%archived_files) | |
256 # | |
257 # compares the list of archived and to be archived files | |
258 # | |
259 sub check_files { | |
260 my ($to_archive, $archived) = @_; | |
261 | |
262 my $nt = scalar keys %$to_archive; | |
263 my $na = scalar keys %$archived; | |
264 | |
265 foreach my $ft (sort keys %$to_archive) { | |
266 my $fp = "$docdir/$ft"; | |
267 #logger("DEBUG", " fp: $fp"); | |
268 if ($$archived{$fp}) { | |
269 logger("DEBUG", "$ft archived OK"); | |
270 $$archived{$fp}->[2] = "OK"; | |
271 } else { | |
272 logger("ERROR", "file entry '$ft' missing from archive!"); | |
273 $errcnt++; | |
274 } | |
275 } | |
276 | |
277 foreach my $fa (sort keys %$archived) { | |
278 if (! $$archived{$fa}->[2]) { | |
279 my ($fn, $fp) = split_file_path($fa); | |
280 if ($index_files{$fn}) { | |
281 logger("DEBUG", "$fa ignored"); | |
282 $na--; | |
283 } else { | |
284 logger("WARNING", "$fa archived but not in list!"); | |
285 $warncnt++; | |
286 } | |
287 } | |
288 } | |
289 | |
290 if ($nt > $na) { | |
291 logger("WARNING", "less files were archived ($na vs. $nt)!"); | |
292 $warncnt++; | |
293 } elsif ($na > $nt) { | |
294 logger("WARNING", "more files were archived ($na vs. $nt)!"); | |
295 $warncnt++; | |
296 } | |
297 | |
298 } | |
299 | |
300 # | |
301 # compare_files(\%files_on_disk, \%archived_files) | |
302 # | |
303 # compares the list of archived files and files on disk | |
304 # | |
305 sub compare_files { | |
306 my ($fs_files, $archived) = @_; | |
307 | |
308 foreach my $ft (sort keys %$fs_files) { | |
309 next if ($index_files{$ft}); | |
310 my $fp = "$docdir/$ft"; | |
311 #logger("DEBUG", " fp: $fp"); | |
312 if ($$archived{$fp}) { | |
313 next if ($index_files{$ft}); | |
314 | |
315 my $asize = $$archived{$fp}[0]; | |
316 my $atime = $$archived{$fp}[1]; | |
317 my $fsize = $$fs_files{$ft}[1]; | |
318 my $ftime = $$fs_files{$ft}[2]; | |
319 if ($asize != $fsize) { | |
320 logger("ERROR", "archived $ft ($asize) and file on disk ($fsize) have different size!"); | |
321 $errcnt++; | |
322 } elsif ($atime lt $ftime) { | |
323 logger("ERROR", "archived $ft ($atime) is older than file on disk ($ftime)!"); | |
324 $errcnt++; | |
325 } else { | |
11 | 326 logger("WARNING", "archived file $ft still on disk"); |
327 $warncnt++; | |
0 | 328 } |
329 } else { | |
9
9c61f624d802
fixed dsmc to work with different mount points (try a few...)
casties
parents:
0
diff
changeset
|
330 logger("ERROR", "file on disk '$ft' is not in archive!"); |
0 | 331 $errcnt++; |
332 } | |
333 } | |
334 } | |
335 | |
336 | |
337 | |
338 ####################################################### | |
339 # main | |
340 # | |
341 | |
342 logger("INFO", "archivecheck $version"); | |
343 | |
344 # make shure the right user is running this program | |
345 my $user = getlogin; | |
19 | 346 if (not (($user eq "archive")||($user eq "root"))) { |
0 | 347 logger("ABORT", "you must be archive or root user to run this program!"); |
348 exit 1; | |
349 } | |
350 | |
351 # read index.meta file | |
352 my ($document, $rootnode) = read_xml($metafile); | |
353 | |
354 # check file and add archive date | |
355 my $files_to_archive = read_resource_meta($rootnode); | |
356 | |
357 # check for .archived file | |
358 if (-f "$docdir/.archived") { | |
359 logger("INFO", ".archived file exists."); | |
360 } else { | |
361 logger("WARNING", "no .archived file!"); | |
362 $warncnt++; | |
363 } | |
364 | |
365 # check archive | |
9
9c61f624d802
fixed dsmc to work with different mount points (try a few...)
casties
parents:
0
diff
changeset
|
366 my %archived_files = (); |
9c61f624d802
fixed dsmc to work with different mount points (try a few...)
casties
parents:
0
diff
changeset
|
367 my $archcnt = 0; |
9c61f624d802
fixed dsmc to work with different mount points (try a few...)
casties
parents:
0
diff
changeset
|
368 if ($docdir =~ /\/mpiwg\/archive\/data\/(.*)/) { |
9c61f624d802
fixed dsmc to work with different mount points (try a few...)
casties
parents:
0
diff
changeset
|
369 # TSM needs two different paths because of historical mount points :-( |
9c61f624d802
fixed dsmc to work with different mount points (try a few...)
casties
parents:
0
diff
changeset
|
370 my $docdir1 = "/mpiwg/archive/data/"; |
9c61f624d802
fixed dsmc to work with different mount points (try a few...)
casties
parents:
0
diff
changeset
|
371 $archcnt += run_query($docdir1, \%archived_files); |
9c61f624d802
fixed dsmc to work with different mount points (try a few...)
casties
parents:
0
diff
changeset
|
372 my $docdir2 = "/mpiwg/archive/"; |
9c61f624d802
fixed dsmc to work with different mount points (try a few...)
casties
parents:
0
diff
changeset
|
373 $archcnt += run_query($docdir2, \%archived_files); |
9c61f624d802
fixed dsmc to work with different mount points (try a few...)
casties
parents:
0
diff
changeset
|
374 } else { |
9c61f624d802
fixed dsmc to work with different mount points (try a few...)
casties
parents:
0
diff
changeset
|
375 $archcnt += run_query("$docdir/", \%archived_files); |
9c61f624d802
fixed dsmc to work with different mount points (try a few...)
casties
parents:
0
diff
changeset
|
376 } |
9c61f624d802
fixed dsmc to work with different mount points (try a few...)
casties
parents:
0
diff
changeset
|
377 logger("INFO", "$archcnt archives of " . (scalar keys %archived_files) . " files."); |
0 | 378 |
9
9c61f624d802
fixed dsmc to work with different mount points (try a few...)
casties
parents:
0
diff
changeset
|
379 my $num_arch_files = (scalar keys %archived_files); |
0 | 380 if ($num_arch_files == 0) { |
381 logger("ABORT", "no archive of this directory!!"); | |
382 exit 1; | |
383 } | |
384 logger("INFO", "$num_arch_files files archived"); | |
385 | |
386 # check list of archived files | |
9
9c61f624d802
fixed dsmc to work with different mount points (try a few...)
casties
parents:
0
diff
changeset
|
387 check_files($files_to_archive, \%archived_files); |
0 | 388 |
389 # read files from filesystem | |
390 my %fsfiles; | |
391 my %fsdirs; | |
392 my $num_fs_files = fs_read_files($docdir, "", \%fsfiles, \%fsdirs); | |
393 | |
394 logger("INFO", "$num_fs_files files still on disk!"); | |
395 if ($num_fs_files > 0) { | |
9
9c61f624d802
fixed dsmc to work with different mount points (try a few...)
casties
parents:
0
diff
changeset
|
396 compare_files(\%fsfiles, \%archived_files); |
0 | 397 } |
398 | |
399 logger("INFO", "$warncnt warnings"); | |
400 logger("INFO", "$errcnt errors"); | |
401 if ($errcnt == 0) { | |
9
9c61f624d802
fixed dsmc to work with different mount points (try a few...)
casties
parents:
0
diff
changeset
|
402 logger("DONE", "" . (scalar keys %archived_files) . " archived files OK"); |
11 | 403 exit 0; |
0 | 404 } else { |
405 logger("ABORT", "there were $errcnt errors!!"); | |
406 exit 1; | |
407 } |