Mercurial > hg > foxridge-archiver
annotate archivecheck.pl @ 60:5bee75ca9eb3 default tip
added old makemeta-quantum.pl that was not in CVS.
| author | casties |
|---|---|
| date | Thu, 16 Mar 2017 18:29:58 +0100 |
| parents | 2208ed7370cb |
| children |
| rev | line source |
|---|---|
| 57 | 1 #!/usr/bin/perl -w |
| 0 | 2 |
| 3 use strict; | |
| 4 | |
| 5 use XML::LibXML; | |
| 6 | |
| 7 # MPIWG libraries | |
| 8 use lib '/usr/local/mpiwg/archive'; | |
| 9 use MPIWGStor; | |
| 10 | |
| 11 # make output unbuffered | |
| 12 $|=1; | |
| 13 | |
| 14 ####################################################### | |
| 15 # internal parameters | |
| 16 # | |
| 17 | |
| 18 # program version | |
| 57 | 19 my $version = "0.4.5 (13.7.2009 ROC)"; |
| 0 | 20 |
| 21 # read command line parameters | |
| 22 my $args = parseargs; | |
| 23 | |
| 24 # debug level | |
| 25 $debug = (exists $$args{'debug'}) ? $$args{'debug'} : 0; | |
| 26 | |
| 27 # XML namespace (not really implemented!) | |
| 28 my $namespace = ""; | |
| 29 | |
| 30 # archive name (archive-path element, usually == $docdir) | |
| 31 my $archname; | |
| 32 | |
| 33 | |
| 34 ####################################################### | |
| 35 # external programs | |
| 36 # | |
| 37 my $archprog = "/opt/tivoli/tsm/client/ba/bin/dsmc"; | |
| 38 if (! -x $archprog) { | |
| 39 logger("ABORT", "TSM client program '$archprog' missing!!"); | |
| 40 exit 1; | |
| 41 } | |
| 42 # my $checkprog = "/usr/local/mpiwg/archive/metacheck"; | |
| 43 # if (! -x $checkprog) { | |
| 44 # logge("ABORT", "meta data checking program '$checkprog' missing!!"); | |
| 45 # exit 1; | |
| 46 # } | |
| 47 # log file for archiver | |
| 48 my $log_file = "/var/tmp/archivecheck.log"; | |
| 49 if (! open LOG, ">>$log_file") { | |
| 50 logger("ABORT", "unable to write log file '$log_file'!!"); | |
| 51 exit 1; | |
| 52 } | |
| 53 | |
| 54 ####################################################### | |
| 55 # check parameters that were passed to the program | |
| 56 # | |
| 57 my $docdir = $$args{'path'}; | |
| 58 if (! $docdir) { | |
| 59 print "ABORT: no document directory given!\n"; | |
| 60 exit 1; | |
| 61 } | |
| 62 # strip trailing slashes | |
| 63 $docdir =~ s/\/$//; | |
| 64 if (! -d $docdir) { | |
| 65 print "ABORT: document directory \'$docdir\' doesn't exist!\n"; | |
| 66 exit 1; | |
| 67 } | |
| 68 | |
| 69 my $metafile = "$docdir/index.meta"; | |
| 70 if (! -f $metafile) { | |
| 71 print "ABORT: metadata index file \'$metafile\' doesn't exist!\n"; | |
| 72 exit 1; | |
| 73 } | |
| 74 | |
| 75 ####################################################### | |
| 76 # internal variables | |
| 77 # | |
| 78 | |
| 79 # number of errors | |
| 80 my $errcnt = 0; | |
| 81 # number of warnings | |
| 82 my $warncnt = 0; | |
| 83 | |
| 84 ####################################################### | |
| 85 # subroutines | |
| 86 # | |
| 87 | |
| 88 | |
| 89 # | |
| 90 # $files = read_resource_meta($rootnode) | |
| 91 # | |
| 92 # checks general resource meta information and reads the list of files | |
| 93 # | |
| 94 sub read_resource_meta { | |
| 95 my ($rootnode) = @_; | |
| 96 my %files; | |
| 97 # | |
| 98 # archive path | |
| 99 # | |
| 100 # get archive-path | |
| 101 $archname = sstrip($rootnode->findvalue('child::archive-path')); | |
| 102 if (! $archname) { | |
| 103 logger("ABORT", "archive-name element missing!!"); | |
| 104 exit 1; | |
| 105 } | |
| 106 | |
| 107 # | |
| 108 # files | |
| 109 # | |
| 110 my @filenodes = $rootnode->findnodes('child::file'); | |
| 111 foreach my $fn (@filenodes) { | |
| 112 my $name = sstrip($fn->findvalue('child::name')); | |
| 113 my $path = sstrip($fn->findvalue('child::path')); | |
| 114 logger("DEBUG", "FILE: ($path)$name"); | |
| 115 my $f = ($path) ? "$path/$name" : "$name"; | |
| 116 $files{$f} = [$name]; | |
| 117 } | |
| 118 | |
| 119 # | |
| 120 # dirs | |
| 121 # | |
| 122 my @dirnodes = $rootnode->findnodes('child::dir'); | |
| 123 foreach my $fn (@dirnodes) { | |
| 124 my $name = sstrip($fn->findvalue('child::name')); | |
| 125 my $path = sstrip($fn->findvalue('child::path')); | |
| 126 logger("DEBUG", "DIR: ($path)$name"); | |
| 26 | 127 my $f = "$name"; |
| 128 if (($path)&&($path ne '.')) { | |
| 129 $f = "$path/$name"; | |
| 130 } | |
| 0 | 131 $files{$f} = [$name]; |
| 132 } | |
| 133 | |
| 134 # | |
| 135 # archive-storage-date | |
| 136 # | |
| 137 my $archdate = $rootnode->find('child::archive-storage-date'); | |
| 138 if ($archdate) { | |
| 139 logger("INFO", "archive storage date: $archdate"); | |
| 140 } else { | |
| 141 logger("ERROR", "archive storage date missing!"); | |
| 142 $errcnt++; | |
| 143 } | |
| 144 return \%files; | |
| 145 } | |
| 146 | |
| 147 | |
| 148 # | |
| 149 # fs_read_files($realdir, $docdir, \%files, \%dirs) | |
| 150 # | |
| 151 # reads all files and directories below $realdir and puts the | |
| 152 # files in %files and directories in %dirs | |
| 153 # $docdir is only for recursion, it should be empty when called | |
| 154 # from outside | |
| 155 # | |
| 156 sub fs_read_files { | |
| 157 my ($directory, $docdir, $files, $dirs) = @_; | |
| 158 my $cnt = 0; | |
| 159 | |
| 160 if (! opendir DIR, $directory) { | |
| 161 return 0; | |
| 162 } | |
| 163 my @dirfiles = readdir DIR; | |
| 164 foreach my $fn (@dirfiles) { | |
| 165 # ignore names starting with a dot | |
| 166 next if ($fn =~ /^\./); | |
| 167 # ignore other silly files | |
| 168 next if ($junk_files{$fn}); | |
| 169 | |
| 170 $cnt++; | |
| 171 my $f = "$directory/$fn"; | |
| 172 my $docf = ($docdir) ? "$docdir/$fn" : $fn; | |
| 173 #print "fs_file: \"$f\"\n"; | |
| 174 if (-f $f) { | |
| 175 #print " is file\n"; | |
| 176 my ($dev,$ino,$mode,$nlink,$uid,$gid,$rdev,$size, | |
| 177 $atime,$mtime,$ctime,$blksize,$blocks) | |
| 178 = stat(_); | |
| 179 $$files{$docf} = [$fn, $size, stime($mtime)]; | |
| 180 #logger("TEST", "fn $fn, size $size, mtime $mtime"); | |
| 181 } elsif (-d _) { | |
| 182 #print " is dir\n"; | |
| 183 $$dirs{$docf} = $fn; | |
| 184 # recurse into directory | |
| 185 $cnt += fs_read_files($f, $docf, $files, $dirs); | |
| 186 } | |
| 187 } | |
| 188 return $cnt; | |
| 189 } | |
| 190 | |
| 191 | |
| 192 # | |
|
9
9c61f624d802
fixed dsmc to work with different mount points (try a few...)
casties
parents:
0
diff
changeset
|
193 # $archcnt = run_query($dirquery, \%files) |
| 0 | 194 # |
|
9
9c61f624d802
fixed dsmc to work with different mount points (try a few...)
casties
parents:
0
diff
changeset
|
195 # runs the archiver program on $dirquery and adds to the hash of archived files |
| 0 | 196 # |
| 197 # Sample output: | |
| 198 # 20,345 B 08/06/03 17:17:02 /mpiwg/archive/data/proyectohumboldt/webb_histo_fr_01_1839/index.meta Never /mpiwg/archive/data/proyectohumboldt/webb_histo_fr_01_1839 | |
| 199 # | |
| 200 sub run_query { | |
|
9
9c61f624d802
fixed dsmc to work with different mount points (try a few...)
casties
parents:
0
diff
changeset
|
201 my ($dirquery, $files) = @_; |
| 0 | 202 print LOG "START checkarchive $version ", scalar localtime, "\n"; |
| 203 my $archcmd = $archprog; | |
| 204 $archcmd .= " query archive -subdir=yes"; | |
| 205 $archcmd .= " -description='$archname'"; | |
|
9
9c61f624d802
fixed dsmc to work with different mount points (try a few...)
casties
parents:
0
diff
changeset
|
206 $archcmd .= " '$dirquery'"; |
| 0 | 207 |
| 50 | 208 logger('INFO', "querying TSM server for $dirquery, please wait..."); |
| 0 | 209 my $archcnt = 0; |
| 210 print LOG "CMD: $archcmd\n"; | |
| 211 if (open ARCH, "$archcmd 2>&1 |") { | |
| 212 while (<ARCH>) { | |
| 213 chomp; | |
| 214 print LOG "ARCH: $_\n"; | |
| 215 if (/ | |
| 216 \s*([\d,]+) # size | |
| 217 \s+(\w+) # unit of size | |
| 218 \s+([\d\/]+) # date mm\/dd\/yy | |
| 219 \s+([\d:]+) # time | |
| 220 \s+(\S+) # file name | |
| 221 \s+(\w+) # expiry | |
| 222 \s+(\S+) # archive label | |
| 223 /x) { | |
| 224 my $size = $1; | |
| 225 my $sunit = $2; | |
| 226 my $date = $3; | |
| 227 my $time = $4; | |
| 228 my $file = $5; | |
| 229 my $exp = $6; | |
| 230 my $label = $7; | |
| 231 $size =~ s/,//g; | |
| 232 $date = ymd_date($date); | |
| 233 logger("DEBUG", " QUERY: file '$file'"); | |
| 234 $archcnt++; | |
|
9
9c61f624d802
fixed dsmc to work with different mount points (try a few...)
casties
parents:
0
diff
changeset
|
235 if ($$files{$file}) { |
|
9c61f624d802
fixed dsmc to work with different mount points (try a few...)
casties
parents:
0
diff
changeset
|
236 logger("DEBUG", "file $file seems to be archived multiple times: $time $date"); |
|
9c61f624d802
fixed dsmc to work with different mount points (try a few...)
casties
parents:
0
diff
changeset
|
237 #$warncnt++; |
|
9c61f624d802
fixed dsmc to work with different mount points (try a few...)
casties
parents:
0
diff
changeset
|
238 } |
|
9c61f624d802
fixed dsmc to work with different mount points (try a few...)
casties
parents:
0
diff
changeset
|
239 if (length $file <= length $docdir) { |
|
9c61f624d802
fixed dsmc to work with different mount points (try a few...)
casties
parents:
0
diff
changeset
|
240 logger("DEBUG", "not below document dir: $file"); |
|
9c61f624d802
fixed dsmc to work with different mount points (try a few...)
casties
parents:
0
diff
changeset
|
241 next; |
|
9c61f624d802
fixed dsmc to work with different mount points (try a few...)
casties
parents:
0
diff
changeset
|
242 } |
|
9c61f624d802
fixed dsmc to work with different mount points (try a few...)
casties
parents:
0
diff
changeset
|
243 $$files{$file} = [$size, "$date $time"]; |
| 0 | 244 } |
| 245 } | |
| 246 } else { | |
| 247 logger("ABORT", "unable to start archive command '$archcmd'!!"); | |
| 248 exit 1; | |
| 249 } | |
| 250 | |
|
9
9c61f624d802
fixed dsmc to work with different mount points (try a few...)
casties
parents:
0
diff
changeset
|
251 return $archcnt; |
| 0 | 252 } |
| 253 | |
| 254 | |
| 255 # | |
| 256 # check_files(\%files_to_archive, \%archived_files) | |
| 257 # | |
| 258 # compares the list of archived and to be archived files | |
| 259 # | |
| 260 sub check_files { | |
| 261 my ($to_archive, $archived) = @_; | |
| 262 | |
| 263 my $nt = scalar keys %$to_archive; | |
| 264 my $na = scalar keys %$archived; | |
| 265 | |
| 266 foreach my $ft (sort keys %$to_archive) { | |
| 267 my $fp = "$docdir/$ft"; | |
| 268 #logger("DEBUG", " fp: $fp"); | |
| 269 if ($$archived{$fp}) { | |
| 270 logger("DEBUG", "$ft archived OK"); | |
| 271 $$archived{$fp}->[2] = "OK"; | |
| 272 } else { | |
| 273 logger("ERROR", "file entry '$ft' missing from archive!"); | |
| 274 $errcnt++; | |
| 275 } | |
| 276 } | |
| 277 | |
| 278 foreach my $fa (sort keys %$archived) { | |
| 279 if (! $$archived{$fa}->[2]) { | |
| 280 my ($fn, $fp) = split_file_path($fa); | |
| 281 if ($index_files{$fn}) { | |
| 282 logger("DEBUG", "$fa ignored"); | |
| 283 $na--; | |
| 284 } else { | |
| 285 logger("WARNING", "$fa archived but not in list!"); | |
| 286 $warncnt++; | |
| 287 } | |
| 288 } | |
| 289 } | |
| 290 | |
| 291 if ($nt > $na) { | |
| 292 logger("WARNING", "less files were archived ($na vs. $nt)!"); | |
| 293 $warncnt++; | |
| 294 } elsif ($na > $nt) { | |
| 295 logger("WARNING", "more files were archived ($na vs. $nt)!"); | |
| 296 $warncnt++; | |
| 297 } | |
| 298 | |
| 299 } | |
| 300 | |
| 301 # | |
| 302 # compare_files(\%files_on_disk, \%archived_files) | |
| 303 # | |
| 304 # compares the list of archived files and files on disk | |
| 305 # | |
| 306 sub compare_files { | |
| 307 my ($fs_files, $archived) = @_; | |
| 308 | |
| 309 foreach my $ft (sort keys %$fs_files) { | |
| 310 next if ($index_files{$ft}); | |
| 311 my $fp = "$docdir/$ft"; | |
| 312 #logger("DEBUG", " fp: $fp"); | |
| 313 if ($$archived{$fp}) { | |
| 314 next if ($index_files{$ft}); | |
| 315 | |
| 316 my $asize = $$archived{$fp}[0]; | |
| 317 my $atime = $$archived{$fp}[1]; | |
| 318 my $fsize = $$fs_files{$ft}[1]; | |
| 319 my $ftime = $$fs_files{$ft}[2]; | |
| 320 if ($asize != $fsize) { | |
| 321 logger("ERROR", "archived $ft ($asize) and file on disk ($fsize) have different size!"); | |
| 322 $errcnt++; | |
| 323 } elsif ($atime lt $ftime) { | |
| 324 logger("ERROR", "archived $ft ($atime) is older than file on disk ($ftime)!"); | |
| 325 $errcnt++; | |
| 326 } else { | |
| 11 | 327 logger("WARNING", "archived file $ft still on disk"); |
| 328 $warncnt++; | |
| 0 | 329 } |
| 330 } else { | |
|
9
9c61f624d802
fixed dsmc to work with different mount points (try a few...)
casties
parents:
0
diff
changeset
|
331 logger("ERROR", "file on disk '$ft' is not in archive!"); |
| 0 | 332 $errcnt++; |
| 333 } | |
| 334 } | |
| 335 } | |
| 336 | |
| 337 | |
| 338 | |
| 339 ####################################################### | |
| 340 # main | |
| 341 # | |
| 342 | |
| 343 logger("INFO", "archivecheck $version"); | |
| 344 | |
| 345 # make shure the right user is running this program | |
| 52 | 346 my $user = getlogin || getpwuid($<) ; |
| 19 | 347 if (not (($user eq "archive")||($user eq "root"))) { |
| 0 | 348 logger("ABORT", "you must be archive or root user to run this program!"); |
| 349 exit 1; | |
| 350 } | |
| 351 | |
| 352 # read index.meta file | |
| 353 my ($document, $rootnode) = read_xml($metafile); | |
| 354 | |
| 355 # check file and add archive date | |
| 356 my $files_to_archive = read_resource_meta($rootnode); | |
| 357 | |
| 358 # check for .archived file | |
| 359 if (-f "$docdir/.archived") { | |
| 360 logger("INFO", ".archived file exists."); | |
| 361 } else { | |
| 362 logger("WARNING", "no .archived file!"); | |
| 363 $warncnt++; | |
| 364 } | |
| 365 | |
| 366 # check archive | |
|
9
9c61f624d802
fixed dsmc to work with different mount points (try a few...)
casties
parents:
0
diff
changeset
|
367 my %archived_files = (); |
|
9c61f624d802
fixed dsmc to work with different mount points (try a few...)
casties
parents:
0
diff
changeset
|
368 my $archcnt = 0; |
|
9c61f624d802
fixed dsmc to work with different mount points (try a few...)
casties
parents:
0
diff
changeset
|
369 if ($docdir =~ /\/mpiwg\/archive\/data\/(.*)/) { |
|
9c61f624d802
fixed dsmc to work with different mount points (try a few...)
casties
parents:
0
diff
changeset
|
370 # TSM needs two different paths because of historical mount points :-( |
| 57 | 371 # ...doesn't work anymore since old filespace was renamed (13.7.2009) |
| 372 # my $docdir1 = "/mpiwg/archivedataold/"; | |
| 373 # $archcnt += run_query($docdir1, \%archived_files); | |
|
9
9c61f624d802
fixed dsmc to work with different mount points (try a few...)
casties
parents:
0
diff
changeset
|
374 my $docdir2 = "/mpiwg/archive/"; |
|
9c61f624d802
fixed dsmc to work with different mount points (try a few...)
casties
parents:
0
diff
changeset
|
375 $archcnt += run_query($docdir2, \%archived_files); |
|
9c61f624d802
fixed dsmc to work with different mount points (try a few...)
casties
parents:
0
diff
changeset
|
376 } else { |
|
9c61f624d802
fixed dsmc to work with different mount points (try a few...)
casties
parents:
0
diff
changeset
|
377 $archcnt += run_query("$docdir/", \%archived_files); |
|
9c61f624d802
fixed dsmc to work with different mount points (try a few...)
casties
parents:
0
diff
changeset
|
378 } |
|
9c61f624d802
fixed dsmc to work with different mount points (try a few...)
casties
parents:
0
diff
changeset
|
379 logger("INFO", "$archcnt archives of " . (scalar keys %archived_files) . " files."); |
| 0 | 380 |
|
9
9c61f624d802
fixed dsmc to work with different mount points (try a few...)
casties
parents:
0
diff
changeset
|
381 my $num_arch_files = (scalar keys %archived_files); |
| 0 | 382 if ($num_arch_files == 0) { |
| 383 logger("ABORT", "no archive of this directory!!"); | |
| 384 exit 1; | |
| 385 } | |
| 386 logger("INFO", "$num_arch_files files archived"); | |
| 387 | |
| 388 # check list of archived files | |
|
9
9c61f624d802
fixed dsmc to work with different mount points (try a few...)
casties
parents:
0
diff
changeset
|
389 check_files($files_to_archive, \%archived_files); |
| 0 | 390 |
| 391 # read files from filesystem | |
| 392 my %fsfiles; | |
| 393 my %fsdirs; | |
| 394 my $num_fs_files = fs_read_files($docdir, "", \%fsfiles, \%fsdirs); | |
| 395 | |
| 396 logger("INFO", "$num_fs_files files still on disk!"); | |
| 397 if ($num_fs_files > 0) { | |
|
9
9c61f624d802
fixed dsmc to work with different mount points (try a few...)
casties
parents:
0
diff
changeset
|
398 compare_files(\%fsfiles, \%archived_files); |
| 0 | 399 } |
| 400 | |
| 401 logger("INFO", "$warncnt warnings"); | |
| 402 logger("INFO", "$errcnt errors"); | |
| 403 if ($errcnt == 0) { | |
|
9
9c61f624d802
fixed dsmc to work with different mount points (try a few...)
casties
parents:
0
diff
changeset
|
404 logger("DONE", "" . (scalar keys %archived_files) . " archived files OK"); |
| 11 | 405 exit 0; |
| 0 | 406 } else { |
| 407 logger("ABORT", "there were $errcnt errors!!"); | |
| 408 exit 1; | |
| 409 } |
