Mercurial > hg > foxridge-archiver
annotate archivecheck.pl @ 19:320c4b93bf39
*** empty log message ***
| author | casties |
|---|---|
| date | Tue, 20 Sep 2005 19:28:26 +0200 |
| parents | b19df18aa19a |
| children | 24d9dd63ae93 |
| rev | line source |
|---|---|
| 0 | 1 #!/usr/local/bin/perl -w |
| 2 | |
| 3 use strict; | |
| 4 | |
| 5 use XML::LibXML; | |
| 6 | |
| 7 # MPIWG libraries | |
| 8 use lib '/usr/local/mpiwg/archive'; | |
| 9 use MPIWGStor; | |
| 10 | |
| 11 # make output unbuffered | |
| 12 $|=1; | |
| 13 | |
| 14 ####################################################### | |
| 15 # internal parameters | |
| 16 # | |
| 17 | |
| 18 # program version | |
| 11 | 19 my $version = "0.4.1 (15.2.2005)"; |
| 0 | 20 |
| 21 # read command line parameters | |
| 22 my $args = parseargs; | |
| 23 | |
| 24 # debug level | |
| 25 $debug = (exists $$args{'debug'}) ? $$args{'debug'} : 0; | |
| 26 | |
| 27 # XML namespace (not really implemented!) | |
| 28 my $namespace = ""; | |
| 29 | |
| 30 # archive name (archive-path element, usually == $docdir) | |
| 31 my $archname; | |
| 32 # archive storage date | |
| 33 my $archdate; | |
| 34 | |
| 35 | |
| 36 ####################################################### | |
| 37 # external programs | |
| 38 # | |
| 39 my $archprog = "/opt/tivoli/tsm/client/ba/bin/dsmc"; | |
| 40 if (! -x $archprog) { | |
| 41 logger("ABORT", "TSM client program '$archprog' missing!!"); | |
| 42 exit 1; | |
| 43 } | |
| 44 # my $checkprog = "/usr/local/mpiwg/archive/metacheck"; | |
| 45 # if (! -x $checkprog) { | |
| 46 # logge("ABORT", "meta data checking program '$checkprog' missing!!"); | |
| 47 # exit 1; | |
| 48 # } | |
| 49 # log file for archiver | |
| 50 my $log_file = "/var/tmp/archivecheck.log"; | |
| 51 if (! open LOG, ">>$log_file") { | |
| 52 logger("ABORT", "unable to write log file '$log_file'!!"); | |
| 53 exit 1; | |
| 54 } | |
| 55 | |
| 56 ####################################################### | |
| 57 # check parameters that were passed to the program | |
| 58 # | |
| 59 my $docdir = $$args{'path'}; | |
| 60 if (! $docdir) { | |
| 61 print "ABORT: no document directory given!\n"; | |
| 62 exit 1; | |
| 63 } | |
| 64 # strip trailing slashes | |
| 65 $docdir =~ s/\/$//; | |
| 66 if (! -d $docdir) { | |
| 67 print "ABORT: document directory \'$docdir\' doesn't exist!\n"; | |
| 68 exit 1; | |
| 69 } | |
| 70 | |
| 71 my $metafile = "$docdir/index.meta"; | |
| 72 if (! -f $metafile) { | |
| 73 print "ABORT: metadata index file \'$metafile\' doesn't exist!\n"; | |
| 74 exit 1; | |
| 75 } | |
| 76 | |
| 77 ####################################################### | |
| 78 # internal variables | |
| 79 # | |
| 80 | |
| 81 # number of errors | |
| 82 my $errcnt = 0; | |
| 83 # number of warnings | |
| 84 my $warncnt = 0; | |
| 85 | |
| 86 ####################################################### | |
| 87 # subroutines | |
| 88 # | |
| 89 | |
| 90 | |
| 91 # | |
| 92 # $files = read_resource_meta($rootnode) | |
| 93 # | |
| 94 # checks general resource meta information and reads the list of files | |
| 95 # | |
| 96 sub read_resource_meta { | |
| 97 my ($rootnode) = @_; | |
| 98 my %files; | |
| 99 # | |
| 100 # archive path | |
| 101 # | |
| 102 # get archive-path | |
| 103 $archname = sstrip($rootnode->findvalue('child::archive-path')); | |
| 104 if (! $archname) { | |
| 105 logger("ABORT", "archive-name element missing!!"); | |
| 106 exit 1; | |
| 107 } | |
| 108 | |
| 109 # | |
| 110 # files | |
| 111 # | |
| 112 my @filenodes = $rootnode->findnodes('child::file'); | |
| 113 foreach my $fn (@filenodes) { | |
| 114 my $name = sstrip($fn->findvalue('child::name')); | |
| 115 my $path = sstrip($fn->findvalue('child::path')); | |
| 116 logger("DEBUG", "FILE: ($path)$name"); | |
| 117 my $f = ($path) ? "$path/$name" : "$name"; | |
| 118 $files{$f} = [$name]; | |
| 119 } | |
| 120 | |
| 121 # | |
| 122 # dirs | |
| 123 # | |
| 124 my @dirnodes = $rootnode->findnodes('child::dir'); | |
| 125 foreach my $fn (@dirnodes) { | |
| 126 my $name = sstrip($fn->findvalue('child::name')); | |
| 127 my $path = sstrip($fn->findvalue('child::path')); | |
| 128 logger("DEBUG", "DIR: ($path)$name"); | |
| 129 my $f = ($path) ? "$path/$name" : "$name"; | |
| 130 $files{$f} = [$name]; | |
| 131 } | |
| 132 | |
| 133 # | |
| 134 # archive-storage-date | |
| 135 # | |
| 136 my $archdate = $rootnode->find('child::archive-storage-date'); | |
| 137 if ($archdate) { | |
| 138 logger("INFO", "archive storage date: $archdate"); | |
| 139 } else { | |
| 140 logger("ERROR", "archive storage date missing!"); | |
| 141 $errcnt++; | |
| 142 } | |
| 143 return \%files; | |
| 144 } | |
| 145 | |
| 146 | |
| 147 # | |
| 148 # fs_read_files($realdir, $docdir, \%files, \%dirs) | |
| 149 # | |
| 150 # reads all files and directories below $realdir and puts the | |
| 151 # files in %files and directories in %dirs | |
| 152 # $docdir is only for recursion, it should be empty when called | |
| 153 # from outside | |
| 154 # | |
| 155 sub fs_read_files { | |
| 156 my ($directory, $docdir, $files, $dirs) = @_; | |
| 157 my $cnt = 0; | |
| 158 | |
| 159 if (! opendir DIR, $directory) { | |
| 160 return 0; | |
| 161 } | |
| 162 my @dirfiles = readdir DIR; | |
| 163 foreach my $fn (@dirfiles) { | |
| 164 # ignore names starting with a dot | |
| 165 next if ($fn =~ /^\./); | |
| 166 # ignore other silly files | |
| 167 next if ($junk_files{$fn}); | |
| 168 | |
| 169 $cnt++; | |
| 170 my $f = "$directory/$fn"; | |
| 171 my $docf = ($docdir) ? "$docdir/$fn" : $fn; | |
| 172 #print "fs_file: \"$f\"\n"; | |
| 173 if (-f $f) { | |
| 174 #print " is file\n"; | |
| 175 my ($dev,$ino,$mode,$nlink,$uid,$gid,$rdev,$size, | |
| 176 $atime,$mtime,$ctime,$blksize,$blocks) | |
| 177 = stat(_); | |
| 178 $$files{$docf} = [$fn, $size, stime($mtime)]; | |
| 179 #logger("TEST", "fn $fn, size $size, mtime $mtime"); | |
| 180 } elsif (-d _) { | |
| 181 #print " is dir\n"; | |
| 182 $$dirs{$docf} = $fn; | |
| 183 # recurse into directory | |
| 184 $cnt += fs_read_files($f, $docf, $files, $dirs); | |
| 185 } | |
| 186 } | |
| 187 return $cnt; | |
| 188 } | |
| 189 | |
| 190 | |
| 191 # | |
|
9
9c61f624d802
fixed dsmc to work with different mount points (try a few...)
casties
parents:
0
diff
changeset
|
192 # $archcnt = run_query($dirquery, \%files) |
| 0 | 193 # |
|
9
9c61f624d802
fixed dsmc to work with different mount points (try a few...)
casties
parents:
0
diff
changeset
|
194 # runs the archiver program on $dirquery and adds to the hash of archived files |
| 0 | 195 # |
| 196 # Sample output: | |
| 197 # 20,345 B 08/06/03 17:17:02 /mpiwg/archive/data/proyectohumboldt/webb_histo_fr_01_1839/index.meta Never /mpiwg/archive/data/proyectohumboldt/webb_histo_fr_01_1839 | |
| 198 # | |
| 199 sub run_query { | |
|
9
9c61f624d802
fixed dsmc to work with different mount points (try a few...)
casties
parents:
0
diff
changeset
|
200 my ($dirquery, $files) = @_; |
| 0 | 201 print LOG "START checkarchive $version ", scalar localtime, "\n"; |
| 202 my $archcmd = $archprog; | |
| 203 $archcmd .= " query archive -subdir=yes"; | |
| 204 $archcmd .= " -description='$archname'"; | |
|
9
9c61f624d802
fixed dsmc to work with different mount points (try a few...)
casties
parents:
0
diff
changeset
|
205 $archcmd .= " '$dirquery'"; |
| 0 | 206 |
| 207 my $archcnt = 0; | |
| 208 print LOG "CMD: $archcmd\n"; | |
| 209 if (open ARCH, "$archcmd 2>&1 |") { | |
| 210 while (<ARCH>) { | |
| 211 chomp; | |
| 212 print LOG "ARCH: $_\n"; | |
| 213 if (/ | |
| 214 \s*([\d,]+) # size | |
| 215 \s+(\w+) # unit of size | |
| 216 \s+([\d\/]+) # date mm\/dd\/yy | |
| 217 \s+([\d:]+) # time | |
| 218 \s+(\S+) # file name | |
| 219 \s+(\w+) # expiry | |
| 220 \s+(\S+) # archive label | |
| 221 /x) { | |
| 222 my $size = $1; | |
| 223 my $sunit = $2; | |
| 224 my $date = $3; | |
| 225 my $time = $4; | |
| 226 my $file = $5; | |
| 227 my $exp = $6; | |
| 228 my $label = $7; | |
| 229 $size =~ s/,//g; | |
| 230 $date = ymd_date($date); | |
| 231 logger("DEBUG", " QUERY: file '$file'"); | |
| 232 $archcnt++; | |
|
9
9c61f624d802
fixed dsmc to work with different mount points (try a few...)
casties
parents:
0
diff
changeset
|
233 if ($$files{$file}) { |
|
9c61f624d802
fixed dsmc to work with different mount points (try a few...)
casties
parents:
0
diff
changeset
|
234 logger("DEBUG", "file $file seems to be archived multiple times: $time $date"); |
|
9c61f624d802
fixed dsmc to work with different mount points (try a few...)
casties
parents:
0
diff
changeset
|
235 #$warncnt++; |
|
9c61f624d802
fixed dsmc to work with different mount points (try a few...)
casties
parents:
0
diff
changeset
|
236 } |
|
9c61f624d802
fixed dsmc to work with different mount points (try a few...)
casties
parents:
0
diff
changeset
|
237 if (length $file <= length $docdir) { |
|
9c61f624d802
fixed dsmc to work with different mount points (try a few...)
casties
parents:
0
diff
changeset
|
238 logger("DEBUG", "not below document dir: $file"); |
|
9c61f624d802
fixed dsmc to work with different mount points (try a few...)
casties
parents:
0
diff
changeset
|
239 next; |
|
9c61f624d802
fixed dsmc to work with different mount points (try a few...)
casties
parents:
0
diff
changeset
|
240 } |
|
9c61f624d802
fixed dsmc to work with different mount points (try a few...)
casties
parents:
0
diff
changeset
|
241 $$files{$file} = [$size, "$date $time"]; |
| 0 | 242 } |
| 243 } | |
| 244 } else { | |
| 245 logger("ABORT", "unable to start archive command '$archcmd'!!"); | |
| 246 exit 1; | |
| 247 } | |
| 248 | |
|
9
9c61f624d802
fixed dsmc to work with different mount points (try a few...)
casties
parents:
0
diff
changeset
|
249 return $archcnt; |
| 0 | 250 } |
| 251 | |
| 252 | |
| 253 # | |
| 254 # check_files(\%files_to_archive, \%archived_files) | |
| 255 # | |
| 256 # compares the list of archived and to be archived files | |
| 257 # | |
| 258 sub check_files { | |
| 259 my ($to_archive, $archived) = @_; | |
| 260 | |
| 261 my $nt = scalar keys %$to_archive; | |
| 262 my $na = scalar keys %$archived; | |
| 263 | |
| 264 foreach my $ft (sort keys %$to_archive) { | |
| 265 my $fp = "$docdir/$ft"; | |
| 266 #logger("DEBUG", " fp: $fp"); | |
| 267 if ($$archived{$fp}) { | |
| 268 logger("DEBUG", "$ft archived OK"); | |
| 269 $$archived{$fp}->[2] = "OK"; | |
| 270 } else { | |
| 271 logger("ERROR", "file entry '$ft' missing from archive!"); | |
| 272 $errcnt++; | |
| 273 } | |
| 274 } | |
| 275 | |
| 276 foreach my $fa (sort keys %$archived) { | |
| 277 if (! $$archived{$fa}->[2]) { | |
| 278 my ($fn, $fp) = split_file_path($fa); | |
| 279 if ($index_files{$fn}) { | |
| 280 logger("DEBUG", "$fa ignored"); | |
| 281 $na--; | |
| 282 } else { | |
| 283 logger("WARNING", "$fa archived but not in list!"); | |
| 284 $warncnt++; | |
| 285 } | |
| 286 } | |
| 287 } | |
| 288 | |
| 289 if ($nt > $na) { | |
| 290 logger("WARNING", "less files were archived ($na vs. $nt)!"); | |
| 291 $warncnt++; | |
| 292 } elsif ($na > $nt) { | |
| 293 logger("WARNING", "more files were archived ($na vs. $nt)!"); | |
| 294 $warncnt++; | |
| 295 } | |
| 296 | |
| 297 } | |
| 298 | |
| 299 # | |
| 300 # compare_files(\%files_on_disk, \%archived_files) | |
| 301 # | |
| 302 # compares the list of archived files and files on disk | |
| 303 # | |
| 304 sub compare_files { | |
| 305 my ($fs_files, $archived) = @_; | |
| 306 | |
| 307 foreach my $ft (sort keys %$fs_files) { | |
| 308 next if ($index_files{$ft}); | |
| 309 my $fp = "$docdir/$ft"; | |
| 310 #logger("DEBUG", " fp: $fp"); | |
| 311 if ($$archived{$fp}) { | |
| 312 next if ($index_files{$ft}); | |
| 313 | |
| 314 my $asize = $$archived{$fp}[0]; | |
| 315 my $atime = $$archived{$fp}[1]; | |
| 316 my $fsize = $$fs_files{$ft}[1]; | |
| 317 my $ftime = $$fs_files{$ft}[2]; | |
| 318 if ($asize != $fsize) { | |
| 319 logger("ERROR", "archived $ft ($asize) and file on disk ($fsize) have different size!"); | |
| 320 $errcnt++; | |
| 321 } elsif ($atime lt $ftime) { | |
| 322 logger("ERROR", "archived $ft ($atime) is older than file on disk ($ftime)!"); | |
| 323 $errcnt++; | |
| 324 } else { | |
| 11 | 325 logger("WARNING", "archived file $ft still on disk"); |
| 326 $warncnt++; | |
| 0 | 327 } |
| 328 } else { | |
|
9
9c61f624d802
fixed dsmc to work with different mount points (try a few...)
casties
parents:
0
diff
changeset
|
329 logger("ERROR", "file on disk '$ft' is not in archive!"); |
| 0 | 330 $errcnt++; |
| 331 } | |
| 332 } | |
| 333 } | |
| 334 | |
| 335 | |
| 336 | |
| 337 ####################################################### | |
| 338 # main | |
| 339 # | |
| 340 | |
| 341 logger("INFO", "archivecheck $version"); | |
| 342 | |
| 343 # make shure the right user is running this program | |
| 344 my $user = getlogin; | |
| 19 | 345 if (not (($user eq "archive")||($user eq "root"))) { |
| 0 | 346 logger("ABORT", "you must be archive or root user to run this program!"); |
| 347 exit 1; | |
| 348 } | |
| 349 | |
| 350 # read index.meta file | |
| 351 my ($document, $rootnode) = read_xml($metafile); | |
| 352 | |
| 353 # check file and add archive date | |
| 354 my $files_to_archive = read_resource_meta($rootnode); | |
| 355 | |
| 356 # check for .archived file | |
| 357 if (-f "$docdir/.archived") { | |
| 358 logger("INFO", ".archived file exists."); | |
| 359 } else { | |
| 360 logger("WARNING", "no .archived file!"); | |
| 361 $warncnt++; | |
| 362 } | |
| 363 | |
| 364 # check archive | |
|
9
9c61f624d802
fixed dsmc to work with different mount points (try a few...)
casties
parents:
0
diff
changeset
|
365 my %archived_files = (); |
|
9c61f624d802
fixed dsmc to work with different mount points (try a few...)
casties
parents:
0
diff
changeset
|
366 my $archcnt = 0; |
|
9c61f624d802
fixed dsmc to work with different mount points (try a few...)
casties
parents:
0
diff
changeset
|
367 if ($docdir =~ /\/mpiwg\/archive\/data\/(.*)/) { |
|
9c61f624d802
fixed dsmc to work with different mount points (try a few...)
casties
parents:
0
diff
changeset
|
368 # TSM needs two different paths because of historical mount points :-( |
|
9c61f624d802
fixed dsmc to work with different mount points (try a few...)
casties
parents:
0
diff
changeset
|
369 my $docdir1 = "/mpiwg/archive/data/"; |
|
9c61f624d802
fixed dsmc to work with different mount points (try a few...)
casties
parents:
0
diff
changeset
|
370 $archcnt += run_query($docdir1, \%archived_files); |
|
9c61f624d802
fixed dsmc to work with different mount points (try a few...)
casties
parents:
0
diff
changeset
|
371 my $docdir2 = "/mpiwg/archive/"; |
|
9c61f624d802
fixed dsmc to work with different mount points (try a few...)
casties
parents:
0
diff
changeset
|
372 $archcnt += run_query($docdir2, \%archived_files); |
|
9c61f624d802
fixed dsmc to work with different mount points (try a few...)
casties
parents:
0
diff
changeset
|
373 } else { |
|
9c61f624d802
fixed dsmc to work with different mount points (try a few...)
casties
parents:
0
diff
changeset
|
374 $archcnt += run_query("$docdir/", \%archived_files); |
|
9c61f624d802
fixed dsmc to work with different mount points (try a few...)
casties
parents:
0
diff
changeset
|
375 } |
|
9c61f624d802
fixed dsmc to work with different mount points (try a few...)
casties
parents:
0
diff
changeset
|
376 logger("INFO", "$archcnt archives of " . (scalar keys %archived_files) . " files."); |
| 0 | 377 |
|
9
9c61f624d802
fixed dsmc to work with different mount points (try a few...)
casties
parents:
0
diff
changeset
|
378 my $num_arch_files = (scalar keys %archived_files); |
| 0 | 379 if ($num_arch_files == 0) { |
| 380 logger("ABORT", "no archive of this directory!!"); | |
| 381 exit 1; | |
| 382 } | |
| 383 logger("INFO", "$num_arch_files files archived"); | |
| 384 | |
| 385 # check list of archived files | |
|
9
9c61f624d802
fixed dsmc to work with different mount points (try a few...)
casties
parents:
0
diff
changeset
|
386 check_files($files_to_archive, \%archived_files); |
| 0 | 387 |
| 388 # read files from filesystem | |
| 389 my %fsfiles; | |
| 390 my %fsdirs; | |
| 391 my $num_fs_files = fs_read_files($docdir, "", \%fsfiles, \%fsdirs); | |
| 392 | |
| 393 logger("INFO", "$num_fs_files files still on disk!"); | |
| 394 if ($num_fs_files > 0) { | |
|
9
9c61f624d802
fixed dsmc to work with different mount points (try a few...)
casties
parents:
0
diff
changeset
|
395 compare_files(\%fsfiles, \%archived_files); |
| 0 | 396 } |
| 397 | |
| 398 logger("INFO", "$warncnt warnings"); | |
| 399 logger("INFO", "$errcnt errors"); | |
| 400 if ($errcnt == 0) { | |
|
9
9c61f624d802
fixed dsmc to work with different mount points (try a few...)
casties
parents:
0
diff
changeset
|
401 logger("DONE", "" . (scalar keys %archived_files) . " archived files OK"); |
| 11 | 402 exit 0; |
| 0 | 403 } else { |
| 404 logger("ABORT", "there were $errcnt errors!!"); | |
| 405 exit 1; | |
| 406 } |
