Annotation of foxridge-archiver/metacheck.pl, revision 1.4
1.1 casties 1: #!/usr/local/bin/perl -w
2:
3: use strict;
4: use XML::LibXML;
5:
6: use lib '/usr/local/mpiwg/archive';
7: use MPIWGStor;
8:
9: # make output unbuffered
10: $|=1;
11:
12: #######################################################
13: # internal parameters
14: #
15:
16: # program version
1.4 ! casties 17: my $version = "0.6.0 (20.9.2005)";
! 18: my $help =
! 19: "use: metacheck [options] docdir
! 20: options:
! 21: -debug show debugging info
! 22: -dry-run simulate, dont'do anything
! 23: -checkonly leave existing index file untouched
! 24: -add-files add file tags for missing files
! 25: -replace rewrite index file to match current files
! 26: ";
! 27: logger("INFO", "metacheck $version");
1.1 casties 28:
29: # read command line parameters
30: my $args = MPIWGStor::parseargs;
1.4 ! casties 31: if (! scalar(%$args)) {
! 32: print $help, "\n";
! 33: exit 1;
! 34: }
1.1 casties 35:
36: # debug level
37: $debug = (exists $$args{'debug'}) ? $$args{'debug'} : 0;
38:
1.4 ! casties 39: # simulate action only
! 40: my $dry_run = (exists $$args{'dry-run'}) ? $$args{'dry-run'} : 0;
! 41: logger('DEBUG', "dry-run: $dry_run");
! 42:
1.1 casties 43: # check only or fix index file also
1.4 ! casties 44: my $check_only = (exists $$args{'checkonly'}) ? $$args{'checkonly'} : 1;
! 45:
1.1 casties 46: # add file tags for missing files
47: my $fix_files = ! $check_only;
48: # add tags for file size and date
49: my $fix_fs_meta = 1;
50: # add dir tags for missing directories
51: my $fix_dirs = ! $check_only;
52: # rewrite XML file (necessary for fix_files and fix_dirs)
53: my $fix_xml = ! $check_only;
1.4 ! casties 54: # rewrite complete index file
! 55: my $do_rewrite = 0;
! 56:
! 57: # add file tags for missing files
! 58: if (exists $$args{'add-files'}) {
! 59: $check_only = 0;
! 60: $fix_files = 1;
! 61: $fix_dirs = 1;
! 62: $fix_xml = 1;
! 63: $do_rewrite = 0;
! 64: logger('DEBUG', "add-files: true");
! 65: }
! 66:
! 67: # completely rewrite index file
! 68: if (exists $$args{'replace'}) {
! 69: $check_only = 0;
! 70: $fix_files = 1;
! 71: $fix_dirs = 1;
! 72: $fix_xml = 1;
! 73: $do_rewrite = 1;
! 74: logger('DEBUG', "replace: true");
! 75: }
! 76: logger('DEBUG', "checkonly: $check_only");
! 77:
! 78:
1.1 casties 79: my $xml_changed = 0;
80: # XML namespace (not really implemented!)
81: my $namespace = "";
82:
83:
84: #######################################################
85: # check parameters that were passed to the program
86: #
87: my $docdir = $$args{'path'};
88: if (! $docdir) {
89: logger("ABORT", "no document directory given!");
90: exit 1;
91: }
1.2 casties 92: # strip double slashes
93: $docdir =~ s/\/\//\//;
1.1 casties 94: # strip trailing slashes
95: $docdir =~ s/\/$//;
96: if (! -d $docdir) {
97: logger("ABORT", "document directory \'$docdir\' doesn't exist!");
98: exit 1;
99: }
100:
101: my $metafile = "$docdir/index.meta";
102: if (! -f $metafile) {
103: logger("ABORT", "metadata index file \'$metafile\' doesn't exist!");
104: exit 1;
105: }
106:
107: #######################################################
108: # internal variables
109: #
110:
111: # all files in the document directory tree
112: my %files;
113: # all directories in the document directory tree
114: my %dirs;
115: # number of errors
116: my $errcnt = 0;
117: # number of warnings
118: my $warncnt = 0;
119:
120: #######################################################
121: # subroutines
122: #
123:
124: #
125: # fs_read_files($realdir, $docdir, \%files, \%dirs)
126: #
127: # reads all files and directories below $realdir and puts the
128: # files in %files and directories in %dirs
129: # $docdir is only for recursion, it should be empty when called
130: # from outside
131: #
132: sub fs_read_files {
133: my ($directory, $docdir, $files, $dirs) = @_;
134: my $cnt = 0;
135:
136: if (! opendir DIR, $directory) {
137: return 0;
138: }
139: my @dirfiles = readdir DIR;
140: foreach my $fn (@dirfiles) {
141: # ignore names starting with a dot
142: next if ($fn =~ /^\./);
143: # ignore other silly files
144: next if ($junk_files{$fn});
145:
146: $cnt++;
147: my $f = "$directory/$fn";
148: my $docf = ($docdir) ? "$docdir/$fn" : $fn;
149: #logger("fs_file: \"$f\"");
150: if (-f $f) {
151: #logger(" is file");
152: my ($dev,$ino,$mode,$nlink,$uid,$gid,$rdev,$size,
153: $atime,$mtime,$ctime,$blksize,$blocks)
154: = stat(_);
155: $$files{$docf} = [$fn, $size, $mtime];
156: } elsif (-d _) {
157: #logger(" is dir");
158: $$dirs{$docf} = $fn;
159: # recurse into directory
160: $cnt += fs_read_files($f, $docf, $files, $dirs);
161: }
162: }
163: return $cnt;
164: }
165:
166:
167: #
168: # check_dirs($rootnode, \%dirs)
169: #
170: # reads all dir elements under $rootnode and compares with the directory list
171: # in %dirs
172: #
173: sub check_dirs {
174: my ($root, $fsdirs) = @_;
175:
176: #
177: # iterate all dir tags
178: #
179: my @dirnodes = $root->findnodes('child::dir');
180: my %okdirs;
181: foreach my $dirnode (@dirnodes) {
182: my $dirname = sstrip($dirnode->find('child::name'));
183: my $dirpath = sstrip($dirnode->find('child::path'));
184: my $description = sstrip($dirnode->find('child::description'));
185: # name must be valid
186: if (! valid_dir_name($dirname)) {
187: logger("ERROR", "directory name ($dirpath) $dirname invalid!");
188: $errcnt++;
189: }
190: # description can be present
191: if (! $description) {
192: logger("WARNING", "description for directory $dirname (in $dirpath/) missing!");
193: $warncnt++;
194: }
195: # check with dirs on filesystem
196: my $fn;
1.4 ! casties 197: if ($dirpath && ($dirpath ne '.')) {
1.1 casties 198: $fn = "$dirpath/$dirname";
199: } else {
200: $fn = "$dirname";
201: }
1.4 ! casties 202: #logger('DEBUG', "dir: \"$dirname\", \"$dirpath\", fn: \"$fn\"");
1.1 casties 203: if ($$fsdirs{$fn}) {
204: #logger(" OK ($$fsdirs{$fn})");
205: $okdirs{$fn} = $dirname;
206: } else {
1.4 ! casties 207: if ($do_rewrite) {
! 208: # remove dir tag
! 209: logger("WARNING", "directory $dirname (in $dirpath/) no longer on disk!");
! 210: $dirnode->unbindNode();
! 211: $warncnt++;
! 212: } else {
! 213: logger("ERROR", "directory $dirname (in $dirpath/) missing on disk!");
! 214: $errcnt++;
! 215: }
1.1 casties 216: }
217: }
218: #logger("dirs: ", (scalar keys %$fsdirs), " vs ", (scalar keys %okdirs), "");
219: if ((scalar keys %$fsdirs) != (scalar keys %okdirs)) {
220: # number of dir tags and dirs don't match
221: # iterate through all dirs
222: foreach my $f (sort keys %$fsdirs) {
223: # was this dir missing?
224: if (! $okdirs{$f}) {
1.4 ! casties 225: my ($name, $path) = split_file_path($f, 1);
1.1 casties 226: # name must be valid
227: if (! valid_dir_name($name)) {
228: $path = "." unless ($path);
229: logger("ERROR", "directory name $name (in $path/) invalid!");
230: $errcnt++;
231: next;
232: }
233: if ($fix_dirs) {
234: # add missing dir tag
235: my $dir_node = $root->addNewChild($namespace, "dir");
236: $xml_changed++;
237: # add name
238: my $name_node = $dir_node->addNewChild($namespace, "name");
239: $name_node->appendTextNode($name);
240: # add path
241: if ($path) {
242: my $path_node = $dir_node->addNewChild($namespace, "path");
243: $path_node->appendTextNode($path);
244: }
245: logger("INFO", "directory $f to be added to index file!");
246: } else {
247: logger("ERROR", "directory $f missing in index file!");
248: $errcnt++;
249: }
250: }
251: }
252: }
253: }
254:
255:
256: #
257: # check_files($rootnode, \%files)
258: #
259: # reads all file elements under $rootnode and compares with the file list
260: # in %files
261: #
262: sub check_files {
263: my ($root, $fsfiles) = @_;
264:
265: #
266: # iterate all file tags
267: #
268: my @filenodes = $root->findnodes('child::file');
269: my %okfiles;
270: foreach my $filenode (@filenodes) {
271: my $filename = sstrip($filenode->find('child::name'));
272: my $filepath = sstrip($filenode->find('child::path'));
273: my $filesize = sstrip($filenode->find('child::size'));
274: my $filedate = sstrip($filenode->find('child::date'));
275: # name must be valid
276: if (! valid_file_name($filename)) {
277: logger("ERROR", "file name ($filepath)$filename invalid!");
278: $errcnt++;
279: }
280: my $fn = ($filepath) ? "$filepath/$filename" : "$filename";
281: #logger("file: \"$filename\", \"$filepath\"");
282: if ($$fsfiles{$fn}) {
283: #logger(" OK ($$fsfiles{$fn})");
284: $okfiles{$fn} = $filename;
285: # check file size and date
286: if ($filesize) {
287: if ($filesize != $$fsfiles{$fn}->[1]) {
288: logger("WARNING", "size of file $fn changed: $filesize to $$fsfiles{$fn}->[1]");
289: $warncnt++;
290: }
291: }
292: # file date
293: if ($filedate) {
294: if ($filedate ne stime($$fsfiles{$fn}->[2])) {
295: logger("WARNING", "date of file $fn changed: $filedate to ", stime($$fsfiles{$fn}->[2]), "");
296: $warncnt++;
297: }
298: }
299: # update file size and date
300: if ($fix_fs_meta) {
301: # delete size and date
302: foreach my $n ($filenode->findnodes('child::size')) {
303: $filenode->removeChild($n);
304: }
305: foreach my $n ($filenode->findnodes('child::date')) {
306: $filenode->removeChild($n);
307: }
308: # add new size and date
309: my $node = $filenode->addNewChild($namespace, "size");
310: $node->appendTextNode($$fsfiles{$fn}->[1]);
311: $node = $filenode->addNewChild($namespace, "date");
312: $node->appendTextNode(stime($$fsfiles{$fn}->[2]));
313: $xml_changed++;
314: }
315: } else {
1.4 ! casties 316: if ($do_rewrite) {
! 317: # remove file tag
! 318: logger("WARNING", "file $filename (in $filepath/) no longer on disk!");
! 319: $filenode->unbindNode();
! 320: $warncnt++;
! 321: } else {
! 322: logger("ERROR", "file $filename (in $filepath/) missing on disk!");
! 323: $errcnt++;
! 324: }
1.1 casties 325: }
326: }
327: #logger("files: ", (scalar keys %$fsfiles), " vs ", (scalar keys %okfiles), "");
328: if ((scalar keys %$fsfiles) != (scalar keys %okfiles)) {
329: # number of file tags and files don't match
330: # iterate through all files
331: foreach my $f (sort keys %$fsfiles) {
1.4 ! casties 332: my ($name, $path) = split_file_path($f, 1);
1.1 casties 333: # was this file missing?
334: if (! $okfiles{$f}) {
335: # is an ignoreable file?
336: if ($index_files{$name}) {
337: next;
338: }
339: # name must be valid
340: if (! valid_dir_name($name)) {
341: $path = "." unless ($path);
342: logger("ERROR", "file name $name (in $path/) invalid!");
343: $errcnt++;
344: next;
345: }
346: if ($fix_files) {
347: # add missing file tag
348: my $file_node = $root->addNewChild($namespace, "file");
349: $xml_changed++;
350: # add name
351: my $name_node = $file_node->addNewChild($namespace, "name");
352: $name_node->appendTextNode($name);
353: # add path
354: if ($path) {
355: my $path_node = $file_node->addNewChild($namespace, "path");
356: $path_node->appendTextNode($path);
357: }
358: # add size
359: my $size_node = $file_node->addNewChild($namespace, "size");
360: $size_node->appendTextNode($$fsfiles{$f}->[1]);
361: # add date
362: my $date_node = $file_node->addNewChild($namespace, "date");
363: $date_node->appendTextNode(stime($$fsfiles{$f}->[2]));
364: logger("INFO", "file $f to be added to index file!");
365: } else {
366: logger("ERROR", "file $f missing in index file!");
367: $errcnt++;
368: }
369: }
370: }
371: }
372: }
373:
374: #
375: # check_resource_meta($rootnode)
376: #
377: # checks general resource meta information
378: #
379: sub check_resource_meta {
380: my ($rootnode) = @_;
381:
382: #
383: # description
384: #
385: my $description = $rootnode->findvalue('child::description');
386: if (! $description) {
387: logger("ERROR", "resource description element missing!");
388: $errcnt++;
389: }
390: #
391: # name
392: #
393: my $name = sstrip($rootnode->findvalue('child::name'));
394: if ($name) {
395: my ($dirname, $dirpath) = split_file_path($docdir);
396: if ($dirname ne $name) {
397: logger("ERROR", "resource name element '$name' does not match directory name '$dirname'!");
398: $errcnt++;
399: }
400: } else {
401: logger("ERROR", "resource name element missing!");
402: $errcnt++;
403: }
404: #
405: # archive path
406: #
407: my $realpath;
408: # get real path
409: if ($docdir =~ /^\//) {
410: # docdir is absolute
411: $realpath = $docdir;
412: } else {
413: # docdir is relative -- try with the shell
414: if (open PWDCMD, "cd $docdir ; pwd|") {
415: $realpath = <PWDCMD>;
416: chomp $realpath;
417: }
418: }
419: if (! $realpath) {
420: logger("ERROR", "unable to check real archive path!");
421: $errcnt++;
422: return;
423: }
424: # get archive-path
425: my $archnode = ($rootnode->find('child::archive-path'))->get_node(1);
426: if ($archnode) {
427: my $arch = sstrip($archnode->textContent);
428: if ($arch ne $realpath) {
429: logger("WARNING", "incorrect archive-path '$arch' will be changed to '$realpath'!");
430: $warncnt++;
431: # correct archive-path
432: $archnode->removeChildNodes;
433: $archnode->appendTextNode($realpath);
434: $xml_changed++;
435: }
436: } else {
437: # add archive-path
438: $archnode = $rootnode->addNewChild($namespace, "archive-path");
439: $archnode->appendTextNode($realpath);
440: $xml_changed++;
441: }
442:
443: }
444:
445:
446:
447: #######################################################
448: # main
449: #
450:
451: my ($document, $rootnode) = read_xml($metafile);
452:
453: check_resource_meta($rootnode);
454:
455: my $fnum = fs_read_files($docdir, "", \%files, \%dirs);
456: logger("INFO", "$fnum files on FS");
1.4 ! casties 457: #foreach (keys %dirs) {logger('DEBUG', " dir ($_): $dirs{$_}");}
1.1 casties 458:
459: check_files($rootnode, \%files);
460: check_dirs($rootnode, \%dirs);
461:
462: logger("INFO", "$warncnt warnings");
463: logger("INFO", "$errcnt errors");
464: if ($errcnt > 0) {
1.4 ! casties 465: logger("ABORT", "there were $errcnt errors!");
1.1 casties 466: exit 1;
467: } else {
468: if ($fix_xml) {
1.4 ! casties 469: if ($dry_run) {
! 470: logger('INFO', "would write $metafile");
! 471: logger('DEBUG', $document->toString(1));
! 472: } else {
! 473: write_xml($document, $metafile);
! 474: }
1.1 casties 475: }
476: logger("DONE", "index file checked successfully!");
477: }
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>