Mercurial > hg > foxridge-archiver
annotate archivecheck.pl @ 60:5bee75ca9eb3 default tip
added old makemeta-quantum.pl that was not in CVS.
author | casties |
---|---|
date | Thu, 16 Mar 2017 18:29:58 +0100 |
parents | 2208ed7370cb |
children |
rev | line source |
---|---|
57 | 1 #!/usr/bin/perl -w |
0 | 2 |
3 use strict; | |
4 | |
5 use XML::LibXML; | |
6 | |
7 # MPIWG libraries | |
8 use lib '/usr/local/mpiwg/archive'; | |
9 use MPIWGStor; | |
10 | |
11 # make output unbuffered | |
12 $|=1; | |
13 | |
14 ####################################################### | |
15 # internal parameters | |
16 # | |
17 | |
18 # program version | |
57 | 19 my $version = "0.4.5 (13.7.2009 ROC)"; |
0 | 20 |
21 # read command line parameters | |
22 my $args = parseargs; | |
23 | |
24 # debug level | |
25 $debug = (exists $$args{'debug'}) ? $$args{'debug'} : 0; | |
26 | |
27 # XML namespace (not really implemented!) | |
28 my $namespace = ""; | |
29 | |
30 # archive name (archive-path element, usually == $docdir) | |
31 my $archname; | |
32 | |
33 | |
34 ####################################################### | |
35 # external programs | |
36 # | |
37 my $archprog = "/opt/tivoli/tsm/client/ba/bin/dsmc"; | |
38 if (! -x $archprog) { | |
39 logger("ABORT", "TSM client program '$archprog' missing!!"); | |
40 exit 1; | |
41 } | |
42 # my $checkprog = "/usr/local/mpiwg/archive/metacheck"; | |
43 # if (! -x $checkprog) { | |
44 # logge("ABORT", "meta data checking program '$checkprog' missing!!"); | |
45 # exit 1; | |
46 # } | |
47 # log file for archiver | |
48 my $log_file = "/var/tmp/archivecheck.log"; | |
49 if (! open LOG, ">>$log_file") { | |
50 logger("ABORT", "unable to write log file '$log_file'!!"); | |
51 exit 1; | |
52 } | |
53 | |
54 ####################################################### | |
55 # check parameters that were passed to the program | |
56 # | |
57 my $docdir = $$args{'path'}; | |
58 if (! $docdir) { | |
59 print "ABORT: no document directory given!\n"; | |
60 exit 1; | |
61 } | |
62 # strip trailing slashes | |
63 $docdir =~ s/\/$//; | |
64 if (! -d $docdir) { | |
65 print "ABORT: document directory \'$docdir\' doesn't exist!\n"; | |
66 exit 1; | |
67 } | |
68 | |
69 my $metafile = "$docdir/index.meta"; | |
70 if (! -f $metafile) { | |
71 print "ABORT: metadata index file \'$metafile\' doesn't exist!\n"; | |
72 exit 1; | |
73 } | |
74 | |
75 ####################################################### | |
76 # internal variables | |
77 # | |
78 | |
79 # number of errors | |
80 my $errcnt = 0; | |
81 # number of warnings | |
82 my $warncnt = 0; | |
83 | |
84 ####################################################### | |
85 # subroutines | |
86 # | |
87 | |
88 | |
89 # | |
90 # $files = read_resource_meta($rootnode) | |
91 # | |
92 # checks general resource meta information and reads the list of files | |
93 # | |
94 sub read_resource_meta { | |
95 my ($rootnode) = @_; | |
96 my %files; | |
97 # | |
98 # archive path | |
99 # | |
100 # get archive-path | |
101 $archname = sstrip($rootnode->findvalue('child::archive-path')); | |
102 if (! $archname) { | |
103 logger("ABORT", "archive-name element missing!!"); | |
104 exit 1; | |
105 } | |
106 | |
107 # | |
108 # files | |
109 # | |
110 my @filenodes = $rootnode->findnodes('child::file'); | |
111 foreach my $fn (@filenodes) { | |
112 my $name = sstrip($fn->findvalue('child::name')); | |
113 my $path = sstrip($fn->findvalue('child::path')); | |
114 logger("DEBUG", "FILE: ($path)$name"); | |
115 my $f = ($path) ? "$path/$name" : "$name"; | |
116 $files{$f} = [$name]; | |
117 } | |
118 | |
119 # | |
120 # dirs | |
121 # | |
122 my @dirnodes = $rootnode->findnodes('child::dir'); | |
123 foreach my $fn (@dirnodes) { | |
124 my $name = sstrip($fn->findvalue('child::name')); | |
125 my $path = sstrip($fn->findvalue('child::path')); | |
126 logger("DEBUG", "DIR: ($path)$name"); | |
26 | 127 my $f = "$name"; |
128 if (($path)&&($path ne '.')) { | |
129 $f = "$path/$name"; | |
130 } | |
0 | 131 $files{$f} = [$name]; |
132 } | |
133 | |
134 # | |
135 # archive-storage-date | |
136 # | |
137 my $archdate = $rootnode->find('child::archive-storage-date'); | |
138 if ($archdate) { | |
139 logger("INFO", "archive storage date: $archdate"); | |
140 } else { | |
141 logger("ERROR", "archive storage date missing!"); | |
142 $errcnt++; | |
143 } | |
144 return \%files; | |
145 } | |
146 | |
147 | |
148 # | |
149 # fs_read_files($realdir, $docdir, \%files, \%dirs) | |
150 # | |
151 # reads all files and directories below $realdir and puts the | |
152 # files in %files and directories in %dirs | |
153 # $docdir is only for recursion, it should be empty when called | |
154 # from outside | |
155 # | |
156 sub fs_read_files { | |
157 my ($directory, $docdir, $files, $dirs) = @_; | |
158 my $cnt = 0; | |
159 | |
160 if (! opendir DIR, $directory) { | |
161 return 0; | |
162 } | |
163 my @dirfiles = readdir DIR; | |
164 foreach my $fn (@dirfiles) { | |
165 # ignore names starting with a dot | |
166 next if ($fn =~ /^\./); | |
167 # ignore other silly files | |
168 next if ($junk_files{$fn}); | |
169 | |
170 $cnt++; | |
171 my $f = "$directory/$fn"; | |
172 my $docf = ($docdir) ? "$docdir/$fn" : $fn; | |
173 #print "fs_file: \"$f\"\n"; | |
174 if (-f $f) { | |
175 #print " is file\n"; | |
176 my ($dev,$ino,$mode,$nlink,$uid,$gid,$rdev,$size, | |
177 $atime,$mtime,$ctime,$blksize,$blocks) | |
178 = stat(_); | |
179 $$files{$docf} = [$fn, $size, stime($mtime)]; | |
180 #logger("TEST", "fn $fn, size $size, mtime $mtime"); | |
181 } elsif (-d _) { | |
182 #print " is dir\n"; | |
183 $$dirs{$docf} = $fn; | |
184 # recurse into directory | |
185 $cnt += fs_read_files($f, $docf, $files, $dirs); | |
186 } | |
187 } | |
188 return $cnt; | |
189 } | |
190 | |
191 | |
192 # | |
9
9c61f624d802
fixed dsmc to work with different mount points (try a few...)
casties
parents:
0
diff
changeset
|
193 # $archcnt = run_query($dirquery, \%files) |
0 | 194 # |
9
9c61f624d802
fixed dsmc to work with different mount points (try a few...)
casties
parents:
0
diff
changeset
|
195 # runs the archiver program on $dirquery and adds to the hash of archived files |
0 | 196 # |
197 # Sample output: | |
198 # 20,345 B 08/06/03 17:17:02 /mpiwg/archive/data/proyectohumboldt/webb_histo_fr_01_1839/index.meta Never /mpiwg/archive/data/proyectohumboldt/webb_histo_fr_01_1839 | |
199 # | |
200 sub run_query { | |
9
9c61f624d802
fixed dsmc to work with different mount points (try a few...)
casties
parents:
0
diff
changeset
|
201 my ($dirquery, $files) = @_; |
0 | 202 print LOG "START checkarchive $version ", scalar localtime, "\n"; |
203 my $archcmd = $archprog; | |
204 $archcmd .= " query archive -subdir=yes"; | |
205 $archcmd .= " -description='$archname'"; | |
9
9c61f624d802
fixed dsmc to work with different mount points (try a few...)
casties
parents:
0
diff
changeset
|
206 $archcmd .= " '$dirquery'"; |
0 | 207 |
50 | 208 logger('INFO', "querying TSM server for $dirquery, please wait..."); |
0 | 209 my $archcnt = 0; |
210 print LOG "CMD: $archcmd\n"; | |
211 if (open ARCH, "$archcmd 2>&1 |") { | |
212 while (<ARCH>) { | |
213 chomp; | |
214 print LOG "ARCH: $_\n"; | |
215 if (/ | |
216 \s*([\d,]+) # size | |
217 \s+(\w+) # unit of size | |
218 \s+([\d\/]+) # date mm\/dd\/yy | |
219 \s+([\d:]+) # time | |
220 \s+(\S+) # file name | |
221 \s+(\w+) # expiry | |
222 \s+(\S+) # archive label | |
223 /x) { | |
224 my $size = $1; | |
225 my $sunit = $2; | |
226 my $date = $3; | |
227 my $time = $4; | |
228 my $file = $5; | |
229 my $exp = $6; | |
230 my $label = $7; | |
231 $size =~ s/,//g; | |
232 $date = ymd_date($date); | |
233 logger("DEBUG", " QUERY: file '$file'"); | |
234 $archcnt++; | |
9
9c61f624d802
fixed dsmc to work with different mount points (try a few...)
casties
parents:
0
diff
changeset
|
235 if ($$files{$file}) { |
9c61f624d802
fixed dsmc to work with different mount points (try a few...)
casties
parents:
0
diff
changeset
|
236 logger("DEBUG", "file $file seems to be archived multiple times: $time $date"); |
9c61f624d802
fixed dsmc to work with different mount points (try a few...)
casties
parents:
0
diff
changeset
|
237 #$warncnt++; |
9c61f624d802
fixed dsmc to work with different mount points (try a few...)
casties
parents:
0
diff
changeset
|
238 } |
9c61f624d802
fixed dsmc to work with different mount points (try a few...)
casties
parents:
0
diff
changeset
|
239 if (length $file <= length $docdir) { |
9c61f624d802
fixed dsmc to work with different mount points (try a few...)
casties
parents:
0
diff
changeset
|
240 logger("DEBUG", "not below document dir: $file"); |
9c61f624d802
fixed dsmc to work with different mount points (try a few...)
casties
parents:
0
diff
changeset
|
241 next; |
9c61f624d802
fixed dsmc to work with different mount points (try a few...)
casties
parents:
0
diff
changeset
|
242 } |
9c61f624d802
fixed dsmc to work with different mount points (try a few...)
casties
parents:
0
diff
changeset
|
243 $$files{$file} = [$size, "$date $time"]; |
0 | 244 } |
245 } | |
246 } else { | |
247 logger("ABORT", "unable to start archive command '$archcmd'!!"); | |
248 exit 1; | |
249 } | |
250 | |
9
9c61f624d802
fixed dsmc to work with different mount points (try a few...)
casties
parents:
0
diff
changeset
|
251 return $archcnt; |
0 | 252 } |
253 | |
254 | |
255 # | |
256 # check_files(\%files_to_archive, \%archived_files) | |
257 # | |
258 # compares the list of archived and to be archived files | |
259 # | |
260 sub check_files { | |
261 my ($to_archive, $archived) = @_; | |
262 | |
263 my $nt = scalar keys %$to_archive; | |
264 my $na = scalar keys %$archived; | |
265 | |
266 foreach my $ft (sort keys %$to_archive) { | |
267 my $fp = "$docdir/$ft"; | |
268 #logger("DEBUG", " fp: $fp"); | |
269 if ($$archived{$fp}) { | |
270 logger("DEBUG", "$ft archived OK"); | |
271 $$archived{$fp}->[2] = "OK"; | |
272 } else { | |
273 logger("ERROR", "file entry '$ft' missing from archive!"); | |
274 $errcnt++; | |
275 } | |
276 } | |
277 | |
278 foreach my $fa (sort keys %$archived) { | |
279 if (! $$archived{$fa}->[2]) { | |
280 my ($fn, $fp) = split_file_path($fa); | |
281 if ($index_files{$fn}) { | |
282 logger("DEBUG", "$fa ignored"); | |
283 $na--; | |
284 } else { | |
285 logger("WARNING", "$fa archived but not in list!"); | |
286 $warncnt++; | |
287 } | |
288 } | |
289 } | |
290 | |
291 if ($nt > $na) { | |
292 logger("WARNING", "less files were archived ($na vs. $nt)!"); | |
293 $warncnt++; | |
294 } elsif ($na > $nt) { | |
295 logger("WARNING", "more files were archived ($na vs. $nt)!"); | |
296 $warncnt++; | |
297 } | |
298 | |
299 } | |
300 | |
301 # | |
302 # compare_files(\%files_on_disk, \%archived_files) | |
303 # | |
304 # compares the list of archived files and files on disk | |
305 # | |
306 sub compare_files { | |
307 my ($fs_files, $archived) = @_; | |
308 | |
309 foreach my $ft (sort keys %$fs_files) { | |
310 next if ($index_files{$ft}); | |
311 my $fp = "$docdir/$ft"; | |
312 #logger("DEBUG", " fp: $fp"); | |
313 if ($$archived{$fp}) { | |
314 next if ($index_files{$ft}); | |
315 | |
316 my $asize = $$archived{$fp}[0]; | |
317 my $atime = $$archived{$fp}[1]; | |
318 my $fsize = $$fs_files{$ft}[1]; | |
319 my $ftime = $$fs_files{$ft}[2]; | |
320 if ($asize != $fsize) { | |
321 logger("ERROR", "archived $ft ($asize) and file on disk ($fsize) have different size!"); | |
322 $errcnt++; | |
323 } elsif ($atime lt $ftime) { | |
324 logger("ERROR", "archived $ft ($atime) is older than file on disk ($ftime)!"); | |
325 $errcnt++; | |
326 } else { | |
11 | 327 logger("WARNING", "archived file $ft still on disk"); |
328 $warncnt++; | |
0 | 329 } |
330 } else { | |
9
9c61f624d802
fixed dsmc to work with different mount points (try a few...)
casties
parents:
0
diff
changeset
|
331 logger("ERROR", "file on disk '$ft' is not in archive!"); |
0 | 332 $errcnt++; |
333 } | |
334 } | |
335 } | |
336 | |
337 | |
338 | |
339 ####################################################### | |
340 # main | |
341 # | |
342 | |
343 logger("INFO", "archivecheck $version"); | |
344 | |
345 # make shure the right user is running this program | |
52 | 346 my $user = getlogin || getpwuid($<) ; |
19 | 347 if (not (($user eq "archive")||($user eq "root"))) { |
0 | 348 logger("ABORT", "you must be archive or root user to run this program!"); |
349 exit 1; | |
350 } | |
351 | |
352 # read index.meta file | |
353 my ($document, $rootnode) = read_xml($metafile); | |
354 | |
355 # check file and add archive date | |
356 my $files_to_archive = read_resource_meta($rootnode); | |
357 | |
358 # check for .archived file | |
359 if (-f "$docdir/.archived") { | |
360 logger("INFO", ".archived file exists."); | |
361 } else { | |
362 logger("WARNING", "no .archived file!"); | |
363 $warncnt++; | |
364 } | |
365 | |
366 # check archive | |
9
9c61f624d802
fixed dsmc to work with different mount points (try a few...)
casties
parents:
0
diff
changeset
|
367 my %archived_files = (); |
9c61f624d802
fixed dsmc to work with different mount points (try a few...)
casties
parents:
0
diff
changeset
|
368 my $archcnt = 0; |
9c61f624d802
fixed dsmc to work with different mount points (try a few...)
casties
parents:
0
diff
changeset
|
369 if ($docdir =~ /\/mpiwg\/archive\/data\/(.*)/) { |
9c61f624d802
fixed dsmc to work with different mount points (try a few...)
casties
parents:
0
diff
changeset
|
370 # TSM needs two different paths because of historical mount points :-( |
57 | 371 # ...doesn't work anymore since old filespace was renamed (13.7.2009) |
372 # my $docdir1 = "/mpiwg/archivedataold/"; | |
373 # $archcnt += run_query($docdir1, \%archived_files); | |
9
9c61f624d802
fixed dsmc to work with different mount points (try a few...)
casties
parents:
0
diff
changeset
|
374 my $docdir2 = "/mpiwg/archive/"; |
9c61f624d802
fixed dsmc to work with different mount points (try a few...)
casties
parents:
0
diff
changeset
|
375 $archcnt += run_query($docdir2, \%archived_files); |
9c61f624d802
fixed dsmc to work with different mount points (try a few...)
casties
parents:
0
diff
changeset
|
376 } else { |
9c61f624d802
fixed dsmc to work with different mount points (try a few...)
casties
parents:
0
diff
changeset
|
377 $archcnt += run_query("$docdir/", \%archived_files); |
9c61f624d802
fixed dsmc to work with different mount points (try a few...)
casties
parents:
0
diff
changeset
|
378 } |
9c61f624d802
fixed dsmc to work with different mount points (try a few...)
casties
parents:
0
diff
changeset
|
379 logger("INFO", "$archcnt archives of " . (scalar keys %archived_files) . " files."); |
0 | 380 |
9
9c61f624d802
fixed dsmc to work with different mount points (try a few...)
casties
parents:
0
diff
changeset
|
381 my $num_arch_files = (scalar keys %archived_files); |
0 | 382 if ($num_arch_files == 0) { |
383 logger("ABORT", "no archive of this directory!!"); | |
384 exit 1; | |
385 } | |
386 logger("INFO", "$num_arch_files files archived"); | |
387 | |
388 # check list of archived files | |
9
9c61f624d802
fixed dsmc to work with different mount points (try a few...)
casties
parents:
0
diff
changeset
|
389 check_files($files_to_archive, \%archived_files); |
0 | 390 |
391 # read files from filesystem | |
392 my %fsfiles; | |
393 my %fsdirs; | |
394 my $num_fs_files = fs_read_files($docdir, "", \%fsfiles, \%fsdirs); | |
395 | |
396 logger("INFO", "$num_fs_files files still on disk!"); | |
397 if ($num_fs_files > 0) { | |
9
9c61f624d802
fixed dsmc to work with different mount points (try a few...)
casties
parents:
0
diff
changeset
|
398 compare_files(\%fsfiles, \%archived_files); |
0 | 399 } |
400 | |
401 logger("INFO", "$warncnt warnings"); | |
402 logger("INFO", "$errcnt errors"); | |
403 if ($errcnt == 0) { | |
9
9c61f624d802
fixed dsmc to work with different mount points (try a few...)
casties
parents:
0
diff
changeset
|
404 logger("DONE", "" . (scalar keys %archived_files) . " archived files OK"); |
11 | 405 exit 0; |
0 | 406 } else { |
407 logger("ABORT", "there were $errcnt errors!!"); | |
408 exit 1; | |
409 } |