1: #!/usr/local/bin/perl -w
2:
3: use strict;
4: use XML::LibXML;
5:
6: use lib '/usr/local/mpiwg/archive';
7: use MPIWGStor;
8:
9: # make output unbuffered
10: $|=1;
11:
12: #######################################################
13: # internal parameters
14: #
15:
16: # program version
17: my $version = "0.5.3 (8.7.2004)";
18:
19: # read command line parameters
20: my $args = MPIWGStor::parseargs;
21:
22: # debug level
23: $debug = (exists $$args{'debug'}) ? $$args{'debug'} : 0;
24:
25: # check only or fix index file also
26: my $check_only = (exists $$args{'checkonly'}) ? $$args{'checkonly'} : 0;
27: # add file tags for missing files
28: my $fix_files = ! $check_only;
29: # add tags for file size and date
30: my $fix_fs_meta = 1;
31: # add dir tags for missing directories
32: my $fix_dirs = ! $check_only;
33: # rewrite XML file (necessary for fix_files and fix_dirs)
34: my $fix_xml = ! $check_only;
35: my $xml_changed = 0;
36: # XML namespace (not really implemented!)
37: my $namespace = "";
38:
39:
40: #######################################################
41: # check parameters that were passed to the program
42: #
43: my $docdir = $$args{'path'};
44: if (! $docdir) {
45: logger("ABORT", "no document directory given!");
46: exit 1;
47: }
48: # strip double slashes
49: $docdir =~ s/\/\//\//;
50: # strip trailing slashes
51: $docdir =~ s/\/$//;
52: if (! -d $docdir) {
53: logger("ABORT", "document directory \'$docdir\' doesn't exist!");
54: exit 1;
55: }
56:
57: my $metafile = "$docdir/index.meta";
58: if (! -f $metafile) {
59: logger("ABORT", "metadata index file \'$metafile\' doesn't exist!");
60: exit 1;
61: }
62:
63: #######################################################
64: # internal variables
65: #
66:
67: # all files in the document directory tree
68: my %files;
69: # all directories in the document directory tree
70: my %dirs;
71: # number of errors
72: my $errcnt = 0;
73: # number of warnings
74: my $warncnt = 0;
75:
76: #######################################################
77: # subroutines
78: #
79:
80: #
81: # fs_read_files($realdir, $docdir, \%files, \%dirs)
82: #
83: # reads all files and directories below $realdir and puts the
84: # files in %files and directories in %dirs
85: # $docdir is only for recursion, it should be empty when called
86: # from outside
87: #
88: sub fs_read_files {
89: my ($directory, $docdir, $files, $dirs) = @_;
90: my $cnt = 0;
91:
92: if (! opendir DIR, $directory) {
93: return 0;
94: }
95: my @dirfiles = readdir DIR;
96: foreach my $fn (@dirfiles) {
97: # ignore names starting with a dot
98: next if ($fn =~ /^\./);
99: # ignore other silly files
100: next if ($junk_files{$fn});
101:
102: $cnt++;
103: my $f = "$directory/$fn";
104: my $docf = ($docdir) ? "$docdir/$fn" : $fn;
105: #logger("fs_file: \"$f\"");
106: if (-f $f) {
107: #logger(" is file");
108: my ($dev,$ino,$mode,$nlink,$uid,$gid,$rdev,$size,
109: $atime,$mtime,$ctime,$blksize,$blocks)
110: = stat(_);
111: $$files{$docf} = [$fn, $size, $mtime];
112: } elsif (-d _) {
113: #logger(" is dir");
114: $$dirs{$docf} = $fn;
115: # recurse into directory
116: $cnt += fs_read_files($f, $docf, $files, $dirs);
117: }
118: }
119: return $cnt;
120: }
121:
122:
123: #
124: # check_dirs($rootnode, \%dirs)
125: #
126: # reads all dir elements under $rootnode and compares with the directory list
127: # in %dirs
128: #
129: sub check_dirs {
130: my ($root, $fsdirs) = @_;
131:
132: #
133: # iterate all dir tags
134: #
135: my @dirnodes = $root->findnodes('child::dir');
136: my %okdirs;
137: foreach my $dirnode (@dirnodes) {
138: my $dirname = sstrip($dirnode->find('child::name'));
139: my $dirpath = sstrip($dirnode->find('child::path'));
140: my $description = sstrip($dirnode->find('child::description'));
141: # name must be valid
142: if (! valid_dir_name($dirname)) {
143: logger("ERROR", "directory name ($dirpath) $dirname invalid!");
144: $errcnt++;
145: }
146: # description can be present
147: if (! $description) {
148: logger("WARNING", "description for directory $dirname (in $dirpath/) missing!");
149: $warncnt++;
150: }
151: # check with dirs on filesystem
152: my $fn;
153: if ($dirpath) {
154: $fn = "$dirpath/$dirname";
155: } else {
156: $fn = "$dirname";
157: }
158: #logger("dir: \"$dirname\", \"$dirpath\"");
159: if ($$fsdirs{$fn}) {
160: #logger(" OK ($$fsdirs{$fn})");
161: $okdirs{$fn} = $dirname;
162: } else {
163: logger("ERROR", "directory $dirname (in $dirpath/) missing on disk!");
164: $errcnt++;
165: }
166: }
167: #logger("dirs: ", (scalar keys %$fsdirs), " vs ", (scalar keys %okdirs), "");
168: if ((scalar keys %$fsdirs) != (scalar keys %okdirs)) {
169: # number of dir tags and dirs don't match
170: # iterate through all dirs
171: foreach my $f (sort keys %$fsdirs) {
172: # was this dir missing?
173: if (! $okdirs{$f}) {
174: my ($name, $path) = split_file_path($f);
175: # name must be valid
176: if (! valid_dir_name($name)) {
177: $path = "." unless ($path);
178: logger("ERROR", "directory name $name (in $path/) invalid!");
179: $errcnt++;
180: next;
181: }
182: if ($fix_dirs) {
183: # add missing dir tag
184: my $dir_node = $root->addNewChild($namespace, "dir");
185: $xml_changed++;
186: # add name
187: my $name_node = $dir_node->addNewChild($namespace, "name");
188: $name_node->appendTextNode($name);
189: # add path
190: if ($path) {
191: my $path_node = $dir_node->addNewChild($namespace, "path");
192: $path_node->appendTextNode($path);
193: }
194: logger("INFO", "directory $f to be added to index file!");
195: } else {
196: logger("ERROR", "directory $f missing in index file!");
197: $errcnt++;
198: }
199: }
200: }
201: }
202: }
203:
204:
205: #
206: # check_files($rootnode, \%files)
207: #
208: # reads all file elements under $rootnode and compares with the file list
209: # in %files
210: #
211: sub check_files {
212: my ($root, $fsfiles) = @_;
213:
214: #
215: # iterate all file tags
216: #
217: my @filenodes = $root->findnodes('child::file');
218: my %okfiles;
219: foreach my $filenode (@filenodes) {
220: my $filename = sstrip($filenode->find('child::name'));
221: my $filepath = sstrip($filenode->find('child::path'));
222: my $filesize = sstrip($filenode->find('child::size'));
223: my $filedate = sstrip($filenode->find('child::date'));
224: # name must be valid
225: if (! valid_file_name($filename)) {
226: logger("ERROR", "file name ($filepath)$filename invalid!");
227: $errcnt++;
228: }
229: my $fn = ($filepath) ? "$filepath/$filename" : "$filename";
230: #logger("file: \"$filename\", \"$filepath\"");
231: if ($$fsfiles{$fn}) {
232: #logger(" OK ($$fsfiles{$fn})");
233: $okfiles{$fn} = $filename;
234: # check file size and date
235: if ($filesize) {
236: if ($filesize != $$fsfiles{$fn}->[1]) {
237: logger("WARNING", "size of file $fn changed: $filesize to $$fsfiles{$fn}->[1]");
238: $warncnt++;
239: }
240: }
241: # file date
242: if ($filedate) {
243: if ($filedate ne stime($$fsfiles{$fn}->[2])) {
244: logger("WARNING", "date of file $fn changed: $filedate to ", stime($$fsfiles{$fn}->[2]), "");
245: $warncnt++;
246: }
247: }
248: # update file size and date
249: if ($fix_fs_meta) {
250: # delete size and date
251: foreach my $n ($filenode->findnodes('child::size')) {
252: $filenode->removeChild($n);
253: }
254: foreach my $n ($filenode->findnodes('child::date')) {
255: $filenode->removeChild($n);
256: }
257: # add new size and date
258: my $node = $filenode->addNewChild($namespace, "size");
259: $node->appendTextNode($$fsfiles{$fn}->[1]);
260: $node = $filenode->addNewChild($namespace, "date");
261: $node->appendTextNode(stime($$fsfiles{$fn}->[2]));
262: $xml_changed++;
263: }
264: } else {
265: logger("ERROR", "file $filename (in $filepath/) missing on disk!");
266: $errcnt++;
267: }
268: }
269: #logger("files: ", (scalar keys %$fsfiles), " vs ", (scalar keys %okfiles), "");
270: if ((scalar keys %$fsfiles) != (scalar keys %okfiles)) {
271: # number of file tags and files don't match
272: # iterate through all files
273: foreach my $f (sort keys %$fsfiles) {
274: my ($name, $path) = split_file_path($f);
275: # was this file missing?
276: if (! $okfiles{$f}) {
277: # is an ignoreable file?
278: if ($index_files{$name}) {
279: next;
280: }
281: # name must be valid
282: if (! valid_dir_name($name)) {
283: $path = "." unless ($path);
284: logger("ERROR", "file name $name (in $path/) invalid!");
285: $errcnt++;
286: next;
287: }
288: if ($fix_files) {
289: # add missing file tag
290: my $file_node = $root->addNewChild($namespace, "file");
291: $xml_changed++;
292: # add name
293: my $name_node = $file_node->addNewChild($namespace, "name");
294: $name_node->appendTextNode($name);
295: # add path
296: if ($path) {
297: my $path_node = $file_node->addNewChild($namespace, "path");
298: $path_node->appendTextNode($path);
299: }
300: # add size
301: my $size_node = $file_node->addNewChild($namespace, "size");
302: $size_node->appendTextNode($$fsfiles{$f}->[1]);
303: # add date
304: my $date_node = $file_node->addNewChild($namespace, "date");
305: $date_node->appendTextNode(stime($$fsfiles{$f}->[2]));
306: logger("INFO", "file $f to be added to index file!");
307: } else {
308: logger("ERROR", "file $f missing in index file!");
309: $errcnt++;
310: }
311: }
312: }
313: }
314: }
315:
316: #
317: # check_resource_meta($rootnode)
318: #
319: # checks general resource meta information
320: #
321: sub check_resource_meta {
322: my ($rootnode) = @_;
323:
324: #
325: # description
326: #
327: my $description = $rootnode->findvalue('child::description');
328: if (! $description) {
329: logger("ERROR", "resource description element missing!");
330: $errcnt++;
331: }
332: #
333: # name
334: #
335: my $name = sstrip($rootnode->findvalue('child::name'));
336: if ($name) {
337: my ($dirname, $dirpath) = split_file_path($docdir);
338: if ($dirname ne $name) {
339: logger("ERROR", "resource name element '$name' does not match directory name '$dirname'!");
340: $errcnt++;
341: }
342: } else {
343: logger("ERROR", "resource name element missing!");
344: $errcnt++;
345: }
346: #
347: # archive path
348: #
349: my $realpath;
350: # get real path
351: if ($docdir =~ /^\//) {
352: # docdir is absolute
353: $realpath = $docdir;
354: } else {
355: # docdir is relative -- try with the shell
356: if (open PWDCMD, "cd $docdir ; pwd|") {
357: $realpath = <PWDCMD>;
358: chomp $realpath;
359: }
360: }
361: if (! $realpath) {
362: logger("ERROR", "unable to check real archive path!");
363: $errcnt++;
364: return;
365: }
366: # get archive-path
367: my $archnode = ($rootnode->find('child::archive-path'))->get_node(1);
368: if ($archnode) {
369: my $arch = sstrip($archnode->textContent);
370: if ($arch ne $realpath) {
371: logger("WARNING", "incorrect archive-path '$arch' will be changed to '$realpath'!");
372: $warncnt++;
373: # correct archive-path
374: $archnode->removeChildNodes;
375: $archnode->appendTextNode($realpath);
376: $xml_changed++;
377: }
378: } else {
379: # add archive-path
380: $archnode = $rootnode->addNewChild($namespace, "archive-path");
381: $archnode->appendTextNode($realpath);
382: $xml_changed++;
383: }
384:
385: }
386:
387:
388:
389: #######################################################
390: # main
391: #
392:
393: logger("INFO", "metacheck $version");
394:
395: my ($document, $rootnode) = read_xml($metafile);
396:
397: check_resource_meta($rootnode);
398:
399: my $fnum = fs_read_files($docdir, "", \%files, \%dirs);
400: logger("INFO", "$fnum files on FS");
401: #foreach (keys %files) {logger(" file ($_): $files{$_}");}
402:
403: check_files($rootnode, \%files);
404: check_dirs($rootnode, \%dirs);
405:
406: logger("INFO", "$warncnt warnings");
407: logger("INFO", "$errcnt errors");
408: if ($errcnt > 0) {
409: logger("ABORT", "there were errors!");
410: exit 1;
411: } else {
412: if ($fix_xml) {
413: write_xml($document, $metafile);
414: }
415: logger("DONE", "index file checked successfully!");
416: }
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>