1: #!/usr/bin/perl -w
2:
3: use strict;
4: use XML::LibXML;
5:
6: use lib '/usr/local/mpiwg/archive';
7: use MPIWGStor;
8:
9: # make output unbuffered
10: $|=1;
11:
12: #######################################################
13: # internal parameters
14: #
15:
16: # program version
17: my $version = "0.6.0 (20.9.2005)";
18: my $help =
19: "use: metacheck [options] docdir
20: options:
21: -debug show debugging info
22: -dry-run simulate, dont'do anything
23: -checkonly leave existing index file untouched
24: -add-files add file tags for missing files
25: -replace rewrite index file to match current files
26: ";
27: logger("INFO", "metacheck $version");
28:
29: # read command line parameters
30: my $args = MPIWGStor::parseargs;
31: if (! scalar(%$args)) {
32: print $help, "\n";
33: exit 1;
34: }
35:
36: # debug level
37: $debug = (exists $$args{'debug'}) ? $$args{'debug'} : 0;
38:
39: # simulate action only
40: my $dry_run = (exists $$args{'dry-run'}) ? $$args{'dry-run'} : 0;
41: logger('DEBUG', "dry-run: $dry_run");
42:
43: # check only or fix index file also
44: my $check_only = (exists $$args{'checkonly'}) ? $$args{'checkonly'} : 1;
45:
46: # add file tags for missing files
47: my $fix_files = ! $check_only;
48: # add tags for file size and date
49: my $fix_fs_meta = 1;
50: # add dir tags for missing directories
51: my $fix_dirs = ! $check_only;
52: # rewrite XML file (necessary for fix_files and fix_dirs)
53: my $fix_xml = ! $check_only;
54: # rewrite complete index file
55: my $do_rewrite = 0;
56:
57: # add file tags for missing files
58: if (exists $$args{'add-files'}) {
59: $check_only = 0;
60: $fix_files = 1;
61: $fix_dirs = 1;
62: $fix_xml = 1;
63: $do_rewrite = 0;
64: logger('DEBUG', "add-files: true");
65: }
66:
67: # completely rewrite index file
68: if (exists $$args{'replace'}) {
69: $check_only = 0;
70: $fix_files = 1;
71: $fix_dirs = 1;
72: $fix_xml = 1;
73: $do_rewrite = 1;
74: logger('DEBUG', "replace: true");
75: }
76: logger('DEBUG', "checkonly: $check_only");
77:
78:
79: my $xml_changed = 0;
80: # XML namespace (not really implemented!)
81: my $namespace = "";
82:
83:
84: #######################################################
85: # check parameters that were passed to the program
86: #
87: my $docdir = $$args{'path'};
88: if (! $docdir) {
89: logger("ABORT", "no document directory given!");
90: exit 1;
91: }
92: # strip double slashes
93: $docdir =~ s/\/\//\//;
94: # strip trailing slashes
95: $docdir =~ s/\/$//;
96: if (! -d $docdir) {
97: logger("ABORT", "document directory \'$docdir\' doesn't exist!");
98: exit 1;
99: }
100:
101: my $metafile = "$docdir/index.meta";
102: if (! -f $metafile) {
103: logger("ABORT", "metadata index file \'$metafile\' doesn't exist!");
104: exit 1;
105: }
106:
107: #######################################################
108: # internal variables
109: #
110:
111: # all files in the document directory tree
112: my %files;
113: # all directories in the document directory tree
114: my %dirs;
115: # number of errors
116: my $errcnt = 0;
117: # number of warnings
118: my $warncnt = 0;
119:
120: #######################################################
121: # subroutines
122: #
123:
124: #
125: # fs_read_files($realdir, $docdir, \%files, \%dirs)
126: #
127: # reads all files and directories below $realdir and puts the
128: # files in %files and directories in %dirs
129: # $docdir is only for recursion, it should be empty when called
130: # from outside
131: #
132: sub fs_read_files {
133: my ($directory, $docdir, $files, $dirs) = @_;
134: my $cnt = 0;
135:
136: if (! opendir DIR, $directory) {
137: return 0;
138: }
139: my @dirfiles = readdir DIR;
140: foreach my $fn (@dirfiles) {
141: # ignore names starting with a dot
142: next if ($fn =~ /^\./);
143: # ignore other silly files
144: next if ($junk_files{$fn});
145:
146: $cnt++;
147: my $f = "$directory/$fn";
148: my $docf = ($docdir) ? "$docdir/$fn" : $fn;
149: #logger("fs_file: \"$f\"");
150: if (-f $f) {
151: #logger(" is file");
152: my ($dev,$ino,$mode,$nlink,$uid,$gid,$rdev,$size,
153: $atime,$mtime,$ctime,$blksize,$blocks)
154: = stat(_);
155: $$files{$docf} = [$fn, $size, $mtime];
156: } elsif (-d _) {
157: #logger(" is dir");
158: $$dirs{$docf} = $fn;
159: # recurse into directory
160: $cnt += fs_read_files($f, $docf, $files, $dirs);
161: }
162: }
163: return $cnt;
164: }
165:
166:
167: #
168: # check_dirs($rootnode, \%dirs)
169: #
170: # reads all dir elements under $rootnode and compares with the directory list
171: # in %dirs
172: #
173: sub check_dirs {
174: my ($root, $fsdirs) = @_;
175:
176: #
177: # iterate all dir tags
178: #
179: my @dirnodes = $root->findnodes('child::dir');
180: my %okdirs;
181: foreach my $dirnode (@dirnodes) {
182: my $dirname = sstrip($dirnode->find('child::name'));
183: my $dirpath = sstrip($dirnode->find('child::path'));
184: my $description = sstrip($dirnode->find('child::description'));
185: # name must be valid
186: if (! valid_dir_name($dirname)) {
187: if ($do_rewrite) {
188: logger("WARNING", "directory name ($dirpath) $dirname in index file invalid!");
189: $warncnt++;
190: } else {
191: logger("ERROR", "directory name ($dirpath) $dirname invalid!");
192: $errcnt++;
193: }
194: }
195: # description can be present
196: if (! $description) {
197: logger("WARNING", "description for directory $dirname (in $dirpath/) missing!");
198: $warncnt++;
199: }
200: # check with dirs on filesystem
201: my $fn;
202: if ($dirpath && ($dirpath ne '.')) {
203: $fn = "$dirpath/$dirname";
204: } else {
205: $fn = "$dirname";
206: }
207: #logger('DEBUG', "dir: \"$dirname\", \"$dirpath\", fn: \"$fn\"");
208: if ($$fsdirs{$fn}) {
209: #logger(" OK ($$fsdirs{$fn})");
210: $okdirs{$fn} = $dirname;
211: } else {
212: if ($do_rewrite) {
213: # remove dir tag
214: logger("WARNING", "directory $dirname (in $dirpath/) no longer on disk!");
215: $dirnode->unbindNode();
216: $warncnt++;
217: } else {
218: logger("ERROR", "directory $dirname (in $dirpath/) missing on disk!");
219: $errcnt++;
220: }
221: }
222: }
223: #logger("dirs: ", (scalar keys %$fsdirs), " vs ", (scalar keys %okdirs), "");
224: if ((scalar keys %$fsdirs) != (scalar keys %okdirs)) {
225: # number of dir tags and dirs don't match
226: # iterate through all dirs
227: foreach my $f (sort keys %$fsdirs) {
228: # was this dir missing?
229: if (! $okdirs{$f}) {
230: my ($name, $path) = split_file_path($f, 1);
231: # name must be valid
232: if (! valid_dir_name($name)) {
233: $path = "." unless ($path);
234: logger("ERROR", "directory name $name (in $path/) invalid!");
235: $errcnt++;
236: next;
237: }
238: if ($fix_dirs) {
239: # add missing dir tag
240: my $dir_node = $root->addNewChild($namespace, "dir");
241: $xml_changed++;
242: # add name
243: my $name_node = $dir_node->addNewChild($namespace, "name");
244: $name_node->appendTextNode($name);
245: # add path
246: if ($path) {
247: my $path_node = $dir_node->addNewChild($namespace, "path");
248: $path_node->appendTextNode($path);
249: }
250: logger("INFO", "directory $f to be added to index file!");
251: } else {
252: logger("ERROR", "directory $f missing in index file!");
253: $errcnt++;
254: }
255: }
256: }
257: }
258: }
259:
260:
261: #
262: # check_files($rootnode, \%files)
263: #
264: # reads all file elements under $rootnode and compares with the file list
265: # in %files
266: #
267: sub check_files {
268: my ($root, $fsfiles) = @_;
269:
270: #
271: # iterate all file tags
272: #
273: my @filenodes = $root->findnodes('child::file');
274: my %okfiles;
275: foreach my $filenode (@filenodes) {
276: my $filename = sstrip($filenode->find('child::name'));
277: my $filepath = sstrip($filenode->find('child::path'));
278: my $filesize = sstrip($filenode->find('child::size'));
279: my $filedate = sstrip($filenode->find('child::date'));
280: # name must be valid
281: if (! valid_file_name($filename)) {
282: if ($do_rewrite) {
283: logger("WARNING", "file name ($filepath)$filename in index file invalid!");
284: $warncnt++;
285: } else {
286: logger("ERROR", "file name ($filepath)$filename invalid!");
287: $errcnt++;
288: }
289: }
290: my $fn = ($filepath) ? "$filepath/$filename" : "$filename";
291: #logger("file: \"$filename\", \"$filepath\"");
292: if ($$fsfiles{$fn}) {
293: #logger(" OK ($$fsfiles{$fn})");
294: $okfiles{$fn} = $filename;
295: # check file size and date
296: if ($filesize) {
297: if ($filesize != $$fsfiles{$fn}->[1]) {
298: logger("WARNING", "size of file $fn changed: $filesize to $$fsfiles{$fn}->[1]");
299: $warncnt++;
300: }
301: }
302: # file date
303: if ($filedate) {
304: if ($filedate ne stime($$fsfiles{$fn}->[2])) {
305: logger("WARNING", "date of file $fn changed: $filedate to ", stime($$fsfiles{$fn}->[2]), "");
306: $warncnt++;
307: }
308: }
309: # update file size and date
310: if ($fix_fs_meta) {
311: # delete size and date
312: foreach my $n ($filenode->findnodes('child::size')) {
313: $filenode->removeChild($n);
314: }
315: foreach my $n ($filenode->findnodes('child::date')) {
316: $filenode->removeChild($n);
317: }
318: # add new size and date
319: my $node = $filenode->addNewChild($namespace, "size");
320: $node->appendTextNode($$fsfiles{$fn}->[1]);
321: $node = $filenode->addNewChild($namespace, "date");
322: $node->appendTextNode(stime($$fsfiles{$fn}->[2]));
323: $xml_changed++;
324: }
325: } else {
326: if ($do_rewrite) {
327: # remove file tag
328: logger("WARNING", "file $filename (in $filepath/) no longer on disk!");
329: $filenode->unbindNode();
330: $warncnt++;
331: } else {
332: logger("ERROR", "file $filename (in $filepath/) missing on disk!");
333: $errcnt++;
334: }
335: }
336: }
337: #logger("files: ", (scalar keys %$fsfiles), " vs ", (scalar keys %okfiles), "");
338: if ((scalar keys %$fsfiles) != (scalar keys %okfiles)) {
339: # number of file tags and files don't match
340: # iterate through all files
341: foreach my $f (sort keys %$fsfiles) {
342: my ($name, $path) = split_file_path($f, 1);
343: # was this file missing?
344: if (! $okfiles{$f}) {
345: # is an ignoreable file?
346: if ($index_files{$name}) {
347: next;
348: }
349: # name must be valid
350: if (! valid_dir_name($name)) {
351: $path = "." unless ($path);
352: logger("ERROR", "file name $name (in $path/) invalid!");
353: $errcnt++;
354: next;
355: }
356: if ($fix_files) {
357: # add missing file tag
358: my $file_node = $root->addNewChild($namespace, "file");
359: $xml_changed++;
360: # add name
361: my $name_node = $file_node->addNewChild($namespace, "name");
362: $name_node->appendTextNode($name);
363: # add path
364: if ($path) {
365: my $path_node = $file_node->addNewChild($namespace, "path");
366: $path_node->appendTextNode($path);
367: }
368: # add size
369: my $size_node = $file_node->addNewChild($namespace, "size");
370: $size_node->appendTextNode($$fsfiles{$f}->[1]);
371: # add date
372: my $date_node = $file_node->addNewChild($namespace, "date");
373: $date_node->appendTextNode(stime($$fsfiles{$f}->[2]));
374: logger("INFO", "file $f to be added to index file!");
375: } else {
376: logger("ERROR", "file $f missing in index file!");
377: $errcnt++;
378: }
379: }
380: }
381: }
382: }
383:
384: #
385: # check_resource_meta($rootnode)
386: #
387: # checks general resource meta information
388: #
389: sub check_resource_meta {
390: my ($rootnode) = @_;
391:
392: #
393: # description
394: #
395: my $description = $rootnode->findvalue('child::description');
396: if (! $description) {
397: logger("ERROR", "resource description element missing!");
398: $errcnt++;
399: }
400: #
401: # name
402: #
403: my $name = sstrip($rootnode->findvalue('child::name'));
404: if ($name) {
405: my ($dirname, $dirpath) = split_file_path($docdir);
406: if ($dirname ne $name) {
407: logger("ERROR", "resource name element '$name' does not match directory name '$dirname'!");
408: $errcnt++;
409: }
410: } else {
411: logger("ERROR", "resource name element missing!");
412: $errcnt++;
413: }
414: #
415: # archive path
416: #
417: my $realpath;
418: # get real path
419: if ($docdir =~ /^\//) {
420: # docdir is absolute
421: $realpath = $docdir;
422: } else {
423: # docdir is relative -- try with the shell
424: if (open PWDCMD, "cd $docdir ; pwd|") {
425: $realpath = <PWDCMD>;
426: chomp $realpath;
427: }
428: }
429: if (! $realpath) {
430: logger("ERROR", "unable to check real archive path!");
431: $errcnt++;
432: return;
433: }
434: # get archive-path
435: my $archnode = ($rootnode->find('child::archive-path'))->get_node(1);
436: if ($archnode) {
437: my $arch = sstrip($archnode->textContent);
438: if ($arch ne $realpath) {
439: logger("WARNING", "incorrect archive-path '$arch' will be changed to '$realpath'!");
440: $warncnt++;
441: # correct archive-path
442: $archnode->removeChildNodes;
443: $archnode->appendTextNode($realpath);
444: $xml_changed++;
445: }
446: } else {
447: # add archive-path
448: $archnode = $rootnode->addNewChild($namespace, "archive-path");
449: $archnode->appendTextNode($realpath);
450: $xml_changed++;
451: }
452:
453: }
454:
455:
456:
457: #######################################################
458: # main
459: #
460:
461: my ($document, $rootnode) = read_xml($metafile);
462:
463: check_resource_meta($rootnode);
464:
465: my $fnum = fs_read_files($docdir, "", \%files, \%dirs);
466: logger("INFO", "$fnum files on FS");
467: #foreach (keys %dirs) {logger('DEBUG', " dir ($_): $dirs{$_}");}
468:
469: check_files($rootnode, \%files);
470: check_dirs($rootnode, \%dirs);
471:
472: logger("INFO", "$warncnt warnings");
473: logger("INFO", "$errcnt errors");
474: if ($errcnt > 0) {
475: logger("ABORT", "there were $errcnt errors!");
476: exit 1;
477: } else {
478: if ($fix_xml) {
479: if ($dry_run) {
480: logger('INFO', "would write $metafile");
481: logger('DEBUG', $document->toString(1));
482: } else {
483: write_xml($document, $metafile);
484: }
485: }
486: logger("DONE", "index file checked successfully!");
487: }
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>