Mercurial > hg > foxridge-archiver
annotate metacheck.pl @ 30:398ef4b8f072
added -online-base option to makemeta-lib
author | casties |
---|---|
date | Mon, 12 Jun 2006 19:01:39 +0200 |
parents | 1dd183b95c61 |
children | 2208ed7370cb |
rev | line source |
---|---|
0 | 1 #!/usr/local/bin/perl -w |
2 | |
3 use strict; | |
4 use XML::LibXML; | |
5 | |
6 use lib '/usr/local/mpiwg/archive'; | |
7 use MPIWGStor; | |
8 | |
9 # make output unbuffered | |
10 $|=1; | |
11 | |
12 ####################################################### | |
13 # internal parameters | |
14 # | |
15 | |
16 # program version | |
18 | 17 my $version = "0.6.0 (20.9.2005)"; |
18 my $help = | |
19 "use: metacheck [options] docdir | |
20 options: | |
21 -debug show debugging info | |
22 -dry-run simulate, dont'do anything | |
23 -checkonly leave existing index file untouched | |
24 -add-files add file tags for missing files | |
25 -replace rewrite index file to match current files | |
26 "; | |
27 logger("INFO", "metacheck $version"); | |
0 | 28 |
29 # read command line parameters | |
30 my $args = MPIWGStor::parseargs; | |
18 | 31 if (! scalar(%$args)) { |
32 print $help, "\n"; | |
33 exit 1; | |
34 } | |
0 | 35 |
36 # debug level | |
37 $debug = (exists $$args{'debug'}) ? $$args{'debug'} : 0; | |
38 | |
18 | 39 # simulate action only |
40 my $dry_run = (exists $$args{'dry-run'}) ? $$args{'dry-run'} : 0; | |
41 logger('DEBUG', "dry-run: $dry_run"); | |
42 | |
0 | 43 # check only or fix index file also |
18 | 44 my $check_only = (exists $$args{'checkonly'}) ? $$args{'checkonly'} : 1; |
45 | |
0 | 46 # add file tags for missing files |
47 my $fix_files = ! $check_only; | |
48 # add tags for file size and date | |
49 my $fix_fs_meta = 1; | |
50 # add dir tags for missing directories | |
51 my $fix_dirs = ! $check_only; | |
52 # rewrite XML file (necessary for fix_files and fix_dirs) | |
53 my $fix_xml = ! $check_only; | |
18 | 54 # rewrite complete index file |
55 my $do_rewrite = 0; | |
56 | |
57 # add file tags for missing files | |
58 if (exists $$args{'add-files'}) { | |
59 $check_only = 0; | |
60 $fix_files = 1; | |
61 $fix_dirs = 1; | |
62 $fix_xml = 1; | |
63 $do_rewrite = 0; | |
64 logger('DEBUG', "add-files: true"); | |
65 } | |
66 | |
67 # completely rewrite index file | |
68 if (exists $$args{'replace'}) { | |
69 $check_only = 0; | |
70 $fix_files = 1; | |
71 $fix_dirs = 1; | |
72 $fix_xml = 1; | |
73 $do_rewrite = 1; | |
74 logger('DEBUG', "replace: true"); | |
75 } | |
76 logger('DEBUG', "checkonly: $check_only"); | |
77 | |
78 | |
0 | 79 my $xml_changed = 0; |
80 # XML namespace (not really implemented!) | |
81 my $namespace = ""; | |
82 | |
83 | |
84 ####################################################### | |
85 # check parameters that were passed to the program | |
86 # | |
87 my $docdir = $$args{'path'}; | |
88 if (! $docdir) { | |
89 logger("ABORT", "no document directory given!"); | |
90 exit 1; | |
91 } | |
1 | 92 # strip double slashes |
93 $docdir =~ s/\/\//\//; | |
0 | 94 # strip trailing slashes |
95 $docdir =~ s/\/$//; | |
96 if (! -d $docdir) { | |
97 logger("ABORT", "document directory \'$docdir\' doesn't exist!"); | |
98 exit 1; | |
99 } | |
100 | |
101 my $metafile = "$docdir/index.meta"; | |
102 if (! -f $metafile) { | |
103 logger("ABORT", "metadata index file \'$metafile\' doesn't exist!"); | |
104 exit 1; | |
105 } | |
106 | |
107 ####################################################### | |
108 # internal variables | |
109 # | |
110 | |
111 # all files in the document directory tree | |
112 my %files; | |
113 # all directories in the document directory tree | |
114 my %dirs; | |
115 # number of errors | |
116 my $errcnt = 0; | |
117 # number of warnings | |
118 my $warncnt = 0; | |
119 | |
120 ####################################################### | |
121 # subroutines | |
122 # | |
123 | |
124 # | |
125 # fs_read_files($realdir, $docdir, \%files, \%dirs) | |
126 # | |
127 # reads all files and directories below $realdir and puts the | |
128 # files in %files and directories in %dirs | |
129 # $docdir is only for recursion, it should be empty when called | |
130 # from outside | |
131 # | |
132 sub fs_read_files { | |
133 my ($directory, $docdir, $files, $dirs) = @_; | |
134 my $cnt = 0; | |
135 | |
136 if (! opendir DIR, $directory) { | |
137 return 0; | |
138 } | |
139 my @dirfiles = readdir DIR; | |
140 foreach my $fn (@dirfiles) { | |
141 # ignore names starting with a dot | |
142 next if ($fn =~ /^\./); | |
143 # ignore other silly files | |
144 next if ($junk_files{$fn}); | |
145 | |
146 $cnt++; | |
147 my $f = "$directory/$fn"; | |
148 my $docf = ($docdir) ? "$docdir/$fn" : $fn; | |
149 #logger("fs_file: \"$f\""); | |
150 if (-f $f) { | |
151 #logger(" is file"); | |
152 my ($dev,$ino,$mode,$nlink,$uid,$gid,$rdev,$size, | |
153 $atime,$mtime,$ctime,$blksize,$blocks) | |
154 = stat(_); | |
155 $$files{$docf} = [$fn, $size, $mtime]; | |
156 } elsif (-d _) { | |
157 #logger(" is dir"); | |
158 $$dirs{$docf} = $fn; | |
159 # recurse into directory | |
160 $cnt += fs_read_files($f, $docf, $files, $dirs); | |
161 } | |
162 } | |
163 return $cnt; | |
164 } | |
165 | |
166 | |
167 # | |
168 # check_dirs($rootnode, \%dirs) | |
169 # | |
170 # reads all dir elements under $rootnode and compares with the directory list | |
171 # in %dirs | |
172 # | |
173 sub check_dirs { | |
174 my ($root, $fsdirs) = @_; | |
175 | |
176 # | |
177 # iterate all dir tags | |
178 # | |
179 my @dirnodes = $root->findnodes('child::dir'); | |
180 my %okdirs; | |
181 foreach my $dirnode (@dirnodes) { | |
182 my $dirname = sstrip($dirnode->find('child::name')); | |
183 my $dirpath = sstrip($dirnode->find('child::path')); | |
184 my $description = sstrip($dirnode->find('child::description')); | |
185 # name must be valid | |
186 if (! valid_dir_name($dirname)) { | |
24
1dd183b95c61
-replace deals with invalid filenames in index files now
casties
parents:
18
diff
changeset
|
187 if ($do_rewrite) { |
1dd183b95c61
-replace deals with invalid filenames in index files now
casties
parents:
18
diff
changeset
|
188 logger("WARNING", "directory name ($dirpath) $dirname in index file invalid!"); |
1dd183b95c61
-replace deals with invalid filenames in index files now
casties
parents:
18
diff
changeset
|
189 $warncnt++; |
1dd183b95c61
-replace deals with invalid filenames in index files now
casties
parents:
18
diff
changeset
|
190 } else { |
1dd183b95c61
-replace deals with invalid filenames in index files now
casties
parents:
18
diff
changeset
|
191 logger("ERROR", "directory name ($dirpath) $dirname invalid!"); |
1dd183b95c61
-replace deals with invalid filenames in index files now
casties
parents:
18
diff
changeset
|
192 $errcnt++; |
1dd183b95c61
-replace deals with invalid filenames in index files now
casties
parents:
18
diff
changeset
|
193 } |
0 | 194 } |
195 # description can be present | |
196 if (! $description) { | |
197 logger("WARNING", "description for directory $dirname (in $dirpath/) missing!"); | |
198 $warncnt++; | |
199 } | |
200 # check with dirs on filesystem | |
201 my $fn; | |
18 | 202 if ($dirpath && ($dirpath ne '.')) { |
0 | 203 $fn = "$dirpath/$dirname"; |
204 } else { | |
205 $fn = "$dirname"; | |
206 } | |
18 | 207 #logger('DEBUG', "dir: \"$dirname\", \"$dirpath\", fn: \"$fn\""); |
0 | 208 if ($$fsdirs{$fn}) { |
209 #logger(" OK ($$fsdirs{$fn})"); | |
210 $okdirs{$fn} = $dirname; | |
211 } else { | |
18 | 212 if ($do_rewrite) { |
213 # remove dir tag | |
214 logger("WARNING", "directory $dirname (in $dirpath/) no longer on disk!"); | |
215 $dirnode->unbindNode(); | |
216 $warncnt++; | |
217 } else { | |
218 logger("ERROR", "directory $dirname (in $dirpath/) missing on disk!"); | |
219 $errcnt++; | |
220 } | |
0 | 221 } |
222 } | |
223 #logger("dirs: ", (scalar keys %$fsdirs), " vs ", (scalar keys %okdirs), ""); | |
224 if ((scalar keys %$fsdirs) != (scalar keys %okdirs)) { | |
225 # number of dir tags and dirs don't match | |
226 # iterate through all dirs | |
227 foreach my $f (sort keys %$fsdirs) { | |
228 # was this dir missing? | |
229 if (! $okdirs{$f}) { | |
18 | 230 my ($name, $path) = split_file_path($f, 1); |
0 | 231 # name must be valid |
232 if (! valid_dir_name($name)) { | |
233 $path = "." unless ($path); | |
234 logger("ERROR", "directory name $name (in $path/) invalid!"); | |
235 $errcnt++; | |
236 next; | |
237 } | |
238 if ($fix_dirs) { | |
239 # add missing dir tag | |
240 my $dir_node = $root->addNewChild($namespace, "dir"); | |
241 $xml_changed++; | |
242 # add name | |
243 my $name_node = $dir_node->addNewChild($namespace, "name"); | |
244 $name_node->appendTextNode($name); | |
245 # add path | |
246 if ($path) { | |
247 my $path_node = $dir_node->addNewChild($namespace, "path"); | |
248 $path_node->appendTextNode($path); | |
249 } | |
250 logger("INFO", "directory $f to be added to index file!"); | |
251 } else { | |
252 logger("ERROR", "directory $f missing in index file!"); | |
253 $errcnt++; | |
254 } | |
255 } | |
256 } | |
257 } | |
258 } | |
259 | |
260 | |
261 # | |
262 # check_files($rootnode, \%files) | |
263 # | |
264 # reads all file elements under $rootnode and compares with the file list | |
265 # in %files | |
266 # | |
267 sub check_files { | |
268 my ($root, $fsfiles) = @_; | |
269 | |
270 # | |
271 # iterate all file tags | |
272 # | |
273 my @filenodes = $root->findnodes('child::file'); | |
274 my %okfiles; | |
275 foreach my $filenode (@filenodes) { | |
276 my $filename = sstrip($filenode->find('child::name')); | |
277 my $filepath = sstrip($filenode->find('child::path')); | |
278 my $filesize = sstrip($filenode->find('child::size')); | |
279 my $filedate = sstrip($filenode->find('child::date')); | |
280 # name must be valid | |
281 if (! valid_file_name($filename)) { | |
24
1dd183b95c61
-replace deals with invalid filenames in index files now
casties
parents:
18
diff
changeset
|
282 if ($do_rewrite) { |
1dd183b95c61
-replace deals with invalid filenames in index files now
casties
parents:
18
diff
changeset
|
283 logger("WARNING", "file name ($filepath)$filename in index file invalid!"); |
1dd183b95c61
-replace deals with invalid filenames in index files now
casties
parents:
18
diff
changeset
|
284 $warncnt++; |
1dd183b95c61
-replace deals with invalid filenames in index files now
casties
parents:
18
diff
changeset
|
285 } else { |
1dd183b95c61
-replace deals with invalid filenames in index files now
casties
parents:
18
diff
changeset
|
286 logger("ERROR", "file name ($filepath)$filename invalid!"); |
1dd183b95c61
-replace deals with invalid filenames in index files now
casties
parents:
18
diff
changeset
|
287 $errcnt++; |
1dd183b95c61
-replace deals with invalid filenames in index files now
casties
parents:
18
diff
changeset
|
288 } |
0 | 289 } |
290 my $fn = ($filepath) ? "$filepath/$filename" : "$filename"; | |
291 #logger("file: \"$filename\", \"$filepath\""); | |
292 if ($$fsfiles{$fn}) { | |
293 #logger(" OK ($$fsfiles{$fn})"); | |
294 $okfiles{$fn} = $filename; | |
295 # check file size and date | |
296 if ($filesize) { | |
297 if ($filesize != $$fsfiles{$fn}->[1]) { | |
298 logger("WARNING", "size of file $fn changed: $filesize to $$fsfiles{$fn}->[1]"); | |
299 $warncnt++; | |
300 } | |
301 } | |
302 # file date | |
303 if ($filedate) { | |
304 if ($filedate ne stime($$fsfiles{$fn}->[2])) { | |
305 logger("WARNING", "date of file $fn changed: $filedate to ", stime($$fsfiles{$fn}->[2]), ""); | |
306 $warncnt++; | |
307 } | |
308 } | |
309 # update file size and date | |
310 if ($fix_fs_meta) { | |
311 # delete size and date | |
312 foreach my $n ($filenode->findnodes('child::size')) { | |
313 $filenode->removeChild($n); | |
314 } | |
315 foreach my $n ($filenode->findnodes('child::date')) { | |
316 $filenode->removeChild($n); | |
317 } | |
318 # add new size and date | |
319 my $node = $filenode->addNewChild($namespace, "size"); | |
320 $node->appendTextNode($$fsfiles{$fn}->[1]); | |
321 $node = $filenode->addNewChild($namespace, "date"); | |
322 $node->appendTextNode(stime($$fsfiles{$fn}->[2])); | |
323 $xml_changed++; | |
324 } | |
325 } else { | |
18 | 326 if ($do_rewrite) { |
327 # remove file tag | |
328 logger("WARNING", "file $filename (in $filepath/) no longer on disk!"); | |
329 $filenode->unbindNode(); | |
330 $warncnt++; | |
331 } else { | |
332 logger("ERROR", "file $filename (in $filepath/) missing on disk!"); | |
333 $errcnt++; | |
334 } | |
0 | 335 } |
336 } | |
337 #logger("files: ", (scalar keys %$fsfiles), " vs ", (scalar keys %okfiles), ""); | |
338 if ((scalar keys %$fsfiles) != (scalar keys %okfiles)) { | |
339 # number of file tags and files don't match | |
340 # iterate through all files | |
341 foreach my $f (sort keys %$fsfiles) { | |
18 | 342 my ($name, $path) = split_file_path($f, 1); |
0 | 343 # was this file missing? |
344 if (! $okfiles{$f}) { | |
345 # is an ignoreable file? | |
346 if ($index_files{$name}) { | |
347 next; | |
348 } | |
349 # name must be valid | |
350 if (! valid_dir_name($name)) { | |
351 $path = "." unless ($path); | |
352 logger("ERROR", "file name $name (in $path/) invalid!"); | |
353 $errcnt++; | |
354 next; | |
355 } | |
356 if ($fix_files) { | |
357 # add missing file tag | |
358 my $file_node = $root->addNewChild($namespace, "file"); | |
359 $xml_changed++; | |
360 # add name | |
361 my $name_node = $file_node->addNewChild($namespace, "name"); | |
362 $name_node->appendTextNode($name); | |
363 # add path | |
364 if ($path) { | |
365 my $path_node = $file_node->addNewChild($namespace, "path"); | |
366 $path_node->appendTextNode($path); | |
367 } | |
368 # add size | |
369 my $size_node = $file_node->addNewChild($namespace, "size"); | |
370 $size_node->appendTextNode($$fsfiles{$f}->[1]); | |
371 # add date | |
372 my $date_node = $file_node->addNewChild($namespace, "date"); | |
373 $date_node->appendTextNode(stime($$fsfiles{$f}->[2])); | |
374 logger("INFO", "file $f to be added to index file!"); | |
375 } else { | |
376 logger("ERROR", "file $f missing in index file!"); | |
377 $errcnt++; | |
378 } | |
379 } | |
380 } | |
381 } | |
382 } | |
383 | |
384 # | |
385 # check_resource_meta($rootnode) | |
386 # | |
387 # checks general resource meta information | |
388 # | |
389 sub check_resource_meta { | |
390 my ($rootnode) = @_; | |
391 | |
392 # | |
393 # description | |
394 # | |
395 my $description = $rootnode->findvalue('child::description'); | |
396 if (! $description) { | |
397 logger("ERROR", "resource description element missing!"); | |
398 $errcnt++; | |
399 } | |
400 # | |
401 # name | |
402 # | |
403 my $name = sstrip($rootnode->findvalue('child::name')); | |
404 if ($name) { | |
405 my ($dirname, $dirpath) = split_file_path($docdir); | |
406 if ($dirname ne $name) { | |
407 logger("ERROR", "resource name element '$name' does not match directory name '$dirname'!"); | |
408 $errcnt++; | |
409 } | |
410 } else { | |
411 logger("ERROR", "resource name element missing!"); | |
412 $errcnt++; | |
413 } | |
414 # | |
415 # archive path | |
416 # | |
417 my $realpath; | |
418 # get real path | |
419 if ($docdir =~ /^\//) { | |
420 # docdir is absolute | |
421 $realpath = $docdir; | |
422 } else { | |
423 # docdir is relative -- try with the shell | |
424 if (open PWDCMD, "cd $docdir ; pwd|") { | |
425 $realpath = <PWDCMD>; | |
426 chomp $realpath; | |
427 } | |
428 } | |
429 if (! $realpath) { | |
430 logger("ERROR", "unable to check real archive path!"); | |
431 $errcnt++; | |
432 return; | |
433 } | |
434 # get archive-path | |
435 my $archnode = ($rootnode->find('child::archive-path'))->get_node(1); | |
436 if ($archnode) { | |
437 my $arch = sstrip($archnode->textContent); | |
438 if ($arch ne $realpath) { | |
439 logger("WARNING", "incorrect archive-path '$arch' will be changed to '$realpath'!"); | |
440 $warncnt++; | |
441 # correct archive-path | |
442 $archnode->removeChildNodes; | |
443 $archnode->appendTextNode($realpath); | |
444 $xml_changed++; | |
445 } | |
446 } else { | |
447 # add archive-path | |
448 $archnode = $rootnode->addNewChild($namespace, "archive-path"); | |
449 $archnode->appendTextNode($realpath); | |
450 $xml_changed++; | |
451 } | |
452 | |
453 } | |
454 | |
455 | |
456 | |
457 ####################################################### | |
458 # main | |
459 # | |
460 | |
461 my ($document, $rootnode) = read_xml($metafile); | |
462 | |
463 check_resource_meta($rootnode); | |
464 | |
465 my $fnum = fs_read_files($docdir, "", \%files, \%dirs); | |
466 logger("INFO", "$fnum files on FS"); | |
18 | 467 #foreach (keys %dirs) {logger('DEBUG', " dir ($_): $dirs{$_}");} |
0 | 468 |
469 check_files($rootnode, \%files); | |
470 check_dirs($rootnode, \%dirs); | |
471 | |
472 logger("INFO", "$warncnt warnings"); | |
473 logger("INFO", "$errcnt errors"); | |
474 if ($errcnt > 0) { | |
18 | 475 logger("ABORT", "there were $errcnt errors!"); |
0 | 476 exit 1; |
477 } else { | |
478 if ($fix_xml) { | |
18 | 479 if ($dry_run) { |
480 logger('INFO', "would write $metafile"); | |
481 logger('DEBUG', $document->toString(1)); | |
482 } else { | |
483 write_xml($document, $metafile); | |
484 } | |
0 | 485 } |
486 logger("DONE", "index file checked successfully!"); | |
487 } |