1: #!/usr/local/bin/perl -w
2:
3: use strict;
4: use XML::LibXML;
5:
6: use lib '/usr/local/mpiwg/archive';
7: use MPIWGStor;
8:
9: # make output unbuffered
10: $|=1;
11:
12: #######################################################
13: # internal parameters
14: #
15:
16: # program version
17: my $version = "0.6.0 (20.9.2005)";
18: my $help =
19: "use: metacheck [options] docdir
20: options:
21: -debug show debugging info
22: -dry-run simulate, dont'do anything
23: -checkonly leave existing index file untouched
24: -add-files add file tags for missing files
25: -replace rewrite index file to match current files
26: ";
27: logger("INFO", "metacheck $version");
28:
29: # read command line parameters
30: my $args = MPIWGStor::parseargs;
31: if (! scalar(%$args)) {
32: print $help, "\n";
33: exit 1;
34: }
35:
36: # debug level
37: $debug = (exists $$args{'debug'}) ? $$args{'debug'} : 0;
38:
39: # simulate action only
40: my $dry_run = (exists $$args{'dry-run'}) ? $$args{'dry-run'} : 0;
41: logger('DEBUG', "dry-run: $dry_run");
42:
43: # check only or fix index file also
44: my $check_only = (exists $$args{'checkonly'}) ? $$args{'checkonly'} : 1;
45:
46: # add file tags for missing files
47: my $fix_files = ! $check_only;
48: # add tags for file size and date
49: my $fix_fs_meta = 1;
50: # add dir tags for missing directories
51: my $fix_dirs = ! $check_only;
52: # rewrite XML file (necessary for fix_files and fix_dirs)
53: my $fix_xml = ! $check_only;
54: # rewrite complete index file
55: my $do_rewrite = 0;
56:
57: # add file tags for missing files
58: if (exists $$args{'add-files'}) {
59: $check_only = 0;
60: $fix_files = 1;
61: $fix_dirs = 1;
62: $fix_xml = 1;
63: $do_rewrite = 0;
64: logger('DEBUG', "add-files: true");
65: }
66:
67: # completely rewrite index file
68: if (exists $$args{'replace'}) {
69: $check_only = 0;
70: $fix_files = 1;
71: $fix_dirs = 1;
72: $fix_xml = 1;
73: $do_rewrite = 1;
74: logger('DEBUG', "replace: true");
75: }
76: logger('DEBUG', "checkonly: $check_only");
77:
78:
79: my $xml_changed = 0;
80: # XML namespace (not really implemented!)
81: my $namespace = "";
82:
83:
84: #######################################################
85: # check parameters that were passed to the program
86: #
87: my $docdir = $$args{'path'};
88: if (! $docdir) {
89: logger("ABORT", "no document directory given!");
90: exit 1;
91: }
92: # strip double slashes
93: $docdir =~ s/\/\//\//;
94: # strip trailing slashes
95: $docdir =~ s/\/$//;
96: if (! -d $docdir) {
97: logger("ABORT", "document directory \'$docdir\' doesn't exist!");
98: exit 1;
99: }
100:
101: my $metafile = "$docdir/index.meta";
102: if (! -f $metafile) {
103: logger("ABORT", "metadata index file \'$metafile\' doesn't exist!");
104: exit 1;
105: }
106:
107: #######################################################
108: # internal variables
109: #
110:
111: # all files in the document directory tree
112: my %files;
113: # all directories in the document directory tree
114: my %dirs;
115: # number of errors
116: my $errcnt = 0;
117: # number of warnings
118: my $warncnt = 0;
119:
120: #######################################################
121: # subroutines
122: #
123:
124: #
125: # fs_read_files($realdir, $docdir, \%files, \%dirs)
126: #
127: # reads all files and directories below $realdir and puts the
128: # files in %files and directories in %dirs
129: # $docdir is only for recursion, it should be empty when called
130: # from outside
131: #
132: sub fs_read_files {
133: my ($directory, $docdir, $files, $dirs) = @_;
134: my $cnt = 0;
135:
136: if (! opendir DIR, $directory) {
137: return 0;
138: }
139: my @dirfiles = readdir DIR;
140: foreach my $fn (@dirfiles) {
141: # ignore names starting with a dot
142: next if ($fn =~ /^\./);
143: # ignore other silly files
144: next if ($junk_files{$fn});
145:
146: $cnt++;
147: my $f = "$directory/$fn";
148: my $docf = ($docdir) ? "$docdir/$fn" : $fn;
149: #logger("fs_file: \"$f\"");
150: if (-f $f) {
151: #logger(" is file");
152: my ($dev,$ino,$mode,$nlink,$uid,$gid,$rdev,$size,
153: $atime,$mtime,$ctime,$blksize,$blocks)
154: = stat(_);
155: $$files{$docf} = [$fn, $size, $mtime];
156: } elsif (-d _) {
157: #logger(" is dir");
158: $$dirs{$docf} = $fn;
159: # recurse into directory
160: $cnt += fs_read_files($f, $docf, $files, $dirs);
161: }
162: }
163: return $cnt;
164: }
165:
166:
167: #
168: # check_dirs($rootnode, \%dirs)
169: #
170: # reads all dir elements under $rootnode and compares with the directory list
171: # in %dirs
172: #
173: sub check_dirs {
174: my ($root, $fsdirs) = @_;
175:
176: #
177: # iterate all dir tags
178: #
179: my @dirnodes = $root->findnodes('child::dir');
180: my %okdirs;
181: foreach my $dirnode (@dirnodes) {
182: my $dirname = sstrip($dirnode->find('child::name'));
183: my $dirpath = sstrip($dirnode->find('child::path'));
184: my $description = sstrip($dirnode->find('child::description'));
185: # name must be valid
186: if (! valid_dir_name($dirname)) {
187: logger("ERROR", "directory name ($dirpath) $dirname invalid!");
188: $errcnt++;
189: }
190: # description can be present
191: if (! $description) {
192: logger("WARNING", "description for directory $dirname (in $dirpath/) missing!");
193: $warncnt++;
194: }
195: # check with dirs on filesystem
196: my $fn;
197: if ($dirpath && ($dirpath ne '.')) {
198: $fn = "$dirpath/$dirname";
199: } else {
200: $fn = "$dirname";
201: }
202: #logger('DEBUG', "dir: \"$dirname\", \"$dirpath\", fn: \"$fn\"");
203: if ($$fsdirs{$fn}) {
204: #logger(" OK ($$fsdirs{$fn})");
205: $okdirs{$fn} = $dirname;
206: } else {
207: if ($do_rewrite) {
208: # remove dir tag
209: logger("WARNING", "directory $dirname (in $dirpath/) no longer on disk!");
210: $dirnode->unbindNode();
211: $warncnt++;
212: } else {
213: logger("ERROR", "directory $dirname (in $dirpath/) missing on disk!");
214: $errcnt++;
215: }
216: }
217: }
218: #logger("dirs: ", (scalar keys %$fsdirs), " vs ", (scalar keys %okdirs), "");
219: if ((scalar keys %$fsdirs) != (scalar keys %okdirs)) {
220: # number of dir tags and dirs don't match
221: # iterate through all dirs
222: foreach my $f (sort keys %$fsdirs) {
223: # was this dir missing?
224: if (! $okdirs{$f}) {
225: my ($name, $path) = split_file_path($f, 1);
226: # name must be valid
227: if (! valid_dir_name($name)) {
228: $path = "." unless ($path);
229: logger("ERROR", "directory name $name (in $path/) invalid!");
230: $errcnt++;
231: next;
232: }
233: if ($fix_dirs) {
234: # add missing dir tag
235: my $dir_node = $root->addNewChild($namespace, "dir");
236: $xml_changed++;
237: # add name
238: my $name_node = $dir_node->addNewChild($namespace, "name");
239: $name_node->appendTextNode($name);
240: # add path
241: if ($path) {
242: my $path_node = $dir_node->addNewChild($namespace, "path");
243: $path_node->appendTextNode($path);
244: }
245: logger("INFO", "directory $f to be added to index file!");
246: } else {
247: logger("ERROR", "directory $f missing in index file!");
248: $errcnt++;
249: }
250: }
251: }
252: }
253: }
254:
255:
256: #
257: # check_files($rootnode, \%files)
258: #
259: # reads all file elements under $rootnode and compares with the file list
260: # in %files
261: #
262: sub check_files {
263: my ($root, $fsfiles) = @_;
264:
265: #
266: # iterate all file tags
267: #
268: my @filenodes = $root->findnodes('child::file');
269: my %okfiles;
270: foreach my $filenode (@filenodes) {
271: my $filename = sstrip($filenode->find('child::name'));
272: my $filepath = sstrip($filenode->find('child::path'));
273: my $filesize = sstrip($filenode->find('child::size'));
274: my $filedate = sstrip($filenode->find('child::date'));
275: # name must be valid
276: if (! valid_file_name($filename)) {
277: logger("ERROR", "file name ($filepath)$filename invalid!");
278: $errcnt++;
279: }
280: my $fn = ($filepath) ? "$filepath/$filename" : "$filename";
281: #logger("file: \"$filename\", \"$filepath\"");
282: if ($$fsfiles{$fn}) {
283: #logger(" OK ($$fsfiles{$fn})");
284: $okfiles{$fn} = $filename;
285: # check file size and date
286: if ($filesize) {
287: if ($filesize != $$fsfiles{$fn}->[1]) {
288: logger("WARNING", "size of file $fn changed: $filesize to $$fsfiles{$fn}->[1]");
289: $warncnt++;
290: }
291: }
292: # file date
293: if ($filedate) {
294: if ($filedate ne stime($$fsfiles{$fn}->[2])) {
295: logger("WARNING", "date of file $fn changed: $filedate to ", stime($$fsfiles{$fn}->[2]), "");
296: $warncnt++;
297: }
298: }
299: # update file size and date
300: if ($fix_fs_meta) {
301: # delete size and date
302: foreach my $n ($filenode->findnodes('child::size')) {
303: $filenode->removeChild($n);
304: }
305: foreach my $n ($filenode->findnodes('child::date')) {
306: $filenode->removeChild($n);
307: }
308: # add new size and date
309: my $node = $filenode->addNewChild($namespace, "size");
310: $node->appendTextNode($$fsfiles{$fn}->[1]);
311: $node = $filenode->addNewChild($namespace, "date");
312: $node->appendTextNode(stime($$fsfiles{$fn}->[2]));
313: $xml_changed++;
314: }
315: } else {
316: if ($do_rewrite) {
317: # remove file tag
318: logger("WARNING", "file $filename (in $filepath/) no longer on disk!");
319: $filenode->unbindNode();
320: $warncnt++;
321: } else {
322: logger("ERROR", "file $filename (in $filepath/) missing on disk!");
323: $errcnt++;
324: }
325: }
326: }
327: #logger("files: ", (scalar keys %$fsfiles), " vs ", (scalar keys %okfiles), "");
328: if ((scalar keys %$fsfiles) != (scalar keys %okfiles)) {
329: # number of file tags and files don't match
330: # iterate through all files
331: foreach my $f (sort keys %$fsfiles) {
332: my ($name, $path) = split_file_path($f, 1);
333: # was this file missing?
334: if (! $okfiles{$f}) {
335: # is an ignoreable file?
336: if ($index_files{$name}) {
337: next;
338: }
339: # name must be valid
340: if (! valid_dir_name($name)) {
341: $path = "." unless ($path);
342: logger("ERROR", "file name $name (in $path/) invalid!");
343: $errcnt++;
344: next;
345: }
346: if ($fix_files) {
347: # add missing file tag
348: my $file_node = $root->addNewChild($namespace, "file");
349: $xml_changed++;
350: # add name
351: my $name_node = $file_node->addNewChild($namespace, "name");
352: $name_node->appendTextNode($name);
353: # add path
354: if ($path) {
355: my $path_node = $file_node->addNewChild($namespace, "path");
356: $path_node->appendTextNode($path);
357: }
358: # add size
359: my $size_node = $file_node->addNewChild($namespace, "size");
360: $size_node->appendTextNode($$fsfiles{$f}->[1]);
361: # add date
362: my $date_node = $file_node->addNewChild($namespace, "date");
363: $date_node->appendTextNode(stime($$fsfiles{$f}->[2]));
364: logger("INFO", "file $f to be added to index file!");
365: } else {
366: logger("ERROR", "file $f missing in index file!");
367: $errcnt++;
368: }
369: }
370: }
371: }
372: }
373:
374: #
375: # check_resource_meta($rootnode)
376: #
377: # checks general resource meta information
378: #
379: sub check_resource_meta {
380: my ($rootnode) = @_;
381:
382: #
383: # description
384: #
385: my $description = $rootnode->findvalue('child::description');
386: if (! $description) {
387: logger("ERROR", "resource description element missing!");
388: $errcnt++;
389: }
390: #
391: # name
392: #
393: my $name = sstrip($rootnode->findvalue('child::name'));
394: if ($name) {
395: my ($dirname, $dirpath) = split_file_path($docdir);
396: if ($dirname ne $name) {
397: logger("ERROR", "resource name element '$name' does not match directory name '$dirname'!");
398: $errcnt++;
399: }
400: } else {
401: logger("ERROR", "resource name element missing!");
402: $errcnt++;
403: }
404: #
405: # archive path
406: #
407: my $realpath;
408: # get real path
409: if ($docdir =~ /^\//) {
410: # docdir is absolute
411: $realpath = $docdir;
412: } else {
413: # docdir is relative -- try with the shell
414: if (open PWDCMD, "cd $docdir ; pwd|") {
415: $realpath = <PWDCMD>;
416: chomp $realpath;
417: }
418: }
419: if (! $realpath) {
420: logger("ERROR", "unable to check real archive path!");
421: $errcnt++;
422: return;
423: }
424: # get archive-path
425: my $archnode = ($rootnode->find('child::archive-path'))->get_node(1);
426: if ($archnode) {
427: my $arch = sstrip($archnode->textContent);
428: if ($arch ne $realpath) {
429: logger("WARNING", "incorrect archive-path '$arch' will be changed to '$realpath'!");
430: $warncnt++;
431: # correct archive-path
432: $archnode->removeChildNodes;
433: $archnode->appendTextNode($realpath);
434: $xml_changed++;
435: }
436: } else {
437: # add archive-path
438: $archnode = $rootnode->addNewChild($namespace, "archive-path");
439: $archnode->appendTextNode($realpath);
440: $xml_changed++;
441: }
442:
443: }
444:
445:
446:
447: #######################################################
448: # main
449: #
450:
451: my ($document, $rootnode) = read_xml($metafile);
452:
453: check_resource_meta($rootnode);
454:
455: my $fnum = fs_read_files($docdir, "", \%files, \%dirs);
456: logger("INFO", "$fnum files on FS");
457: #foreach (keys %dirs) {logger('DEBUG', " dir ($_): $dirs{$_}");}
458:
459: check_files($rootnode, \%files);
460: check_dirs($rootnode, \%dirs);
461:
462: logger("INFO", "$warncnt warnings");
463: logger("INFO", "$errcnt errors");
464: if ($errcnt > 0) {
465: logger("ABORT", "there were $errcnt errors!");
466: exit 1;
467: } else {
468: if ($fix_xml) {
469: if ($dry_run) {
470: logger('INFO', "would write $metafile");
471: logger('DEBUG', $document->toString(1));
472: } else {
473: write_xml($document, $metafile);
474: }
475: }
476: logger("DONE", "index file checked successfully!");
477: }
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>