annotate metacheck.pl @ 18:fdf4ceb36db1

fixed problem with dir names in metacheck new version of metacheck defaults to not change index file new version of archiver uses new version of metacheck
author casties
date Tue, 20 Sep 2005 19:24:57 +0200
parents b7259a1c85aa
children 1dd183b95c61
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
30497c6a3eca Initial revision
casties
parents:
diff changeset
1 #!/usr/local/bin/perl -w
30497c6a3eca Initial revision
casties
parents:
diff changeset
2
30497c6a3eca Initial revision
casties
parents:
diff changeset
3 use strict;
30497c6a3eca Initial revision
casties
parents:
diff changeset
4 use XML::LibXML;
30497c6a3eca Initial revision
casties
parents:
diff changeset
5
30497c6a3eca Initial revision
casties
parents:
diff changeset
6 use lib '/usr/local/mpiwg/archive';
30497c6a3eca Initial revision
casties
parents:
diff changeset
7 use MPIWGStor;
30497c6a3eca Initial revision
casties
parents:
diff changeset
8
30497c6a3eca Initial revision
casties
parents:
diff changeset
9 # make output unbuffered
30497c6a3eca Initial revision
casties
parents:
diff changeset
10 $|=1;
30497c6a3eca Initial revision
casties
parents:
diff changeset
11
30497c6a3eca Initial revision
casties
parents:
diff changeset
12 #######################################################
30497c6a3eca Initial revision
casties
parents:
diff changeset
13 # internal parameters
30497c6a3eca Initial revision
casties
parents:
diff changeset
14 #
30497c6a3eca Initial revision
casties
parents:
diff changeset
15
30497c6a3eca Initial revision
casties
parents:
diff changeset
16 # program version
18
fdf4ceb36db1 fixed problem with dir names in metacheck
casties
parents: 2
diff changeset
17 my $version = "0.6.0 (20.9.2005)";
fdf4ceb36db1 fixed problem with dir names in metacheck
casties
parents: 2
diff changeset
18 my $help =
fdf4ceb36db1 fixed problem with dir names in metacheck
casties
parents: 2
diff changeset
19 "use: metacheck [options] docdir
fdf4ceb36db1 fixed problem with dir names in metacheck
casties
parents: 2
diff changeset
20 options:
fdf4ceb36db1 fixed problem with dir names in metacheck
casties
parents: 2
diff changeset
21 -debug show debugging info
fdf4ceb36db1 fixed problem with dir names in metacheck
casties
parents: 2
diff changeset
22 -dry-run simulate, dont'do anything
fdf4ceb36db1 fixed problem with dir names in metacheck
casties
parents: 2
diff changeset
23 -checkonly leave existing index file untouched
fdf4ceb36db1 fixed problem with dir names in metacheck
casties
parents: 2
diff changeset
24 -add-files add file tags for missing files
fdf4ceb36db1 fixed problem with dir names in metacheck
casties
parents: 2
diff changeset
25 -replace rewrite index file to match current files
fdf4ceb36db1 fixed problem with dir names in metacheck
casties
parents: 2
diff changeset
26 ";
fdf4ceb36db1 fixed problem with dir names in metacheck
casties
parents: 2
diff changeset
27 logger("INFO", "metacheck $version");
0
30497c6a3eca Initial revision
casties
parents:
diff changeset
28
30497c6a3eca Initial revision
casties
parents:
diff changeset
29 # read command line parameters
30497c6a3eca Initial revision
casties
parents:
diff changeset
30 my $args = MPIWGStor::parseargs;
18
fdf4ceb36db1 fixed problem with dir names in metacheck
casties
parents: 2
diff changeset
31 if (! scalar(%$args)) {
fdf4ceb36db1 fixed problem with dir names in metacheck
casties
parents: 2
diff changeset
32 print $help, "\n";
fdf4ceb36db1 fixed problem with dir names in metacheck
casties
parents: 2
diff changeset
33 exit 1;
fdf4ceb36db1 fixed problem with dir names in metacheck
casties
parents: 2
diff changeset
34 }
0
30497c6a3eca Initial revision
casties
parents:
diff changeset
35
30497c6a3eca Initial revision
casties
parents:
diff changeset
36 # debug level
30497c6a3eca Initial revision
casties
parents:
diff changeset
37 $debug = (exists $$args{'debug'}) ? $$args{'debug'} : 0;
30497c6a3eca Initial revision
casties
parents:
diff changeset
38
18
fdf4ceb36db1 fixed problem with dir names in metacheck
casties
parents: 2
diff changeset
39 # simulate action only
fdf4ceb36db1 fixed problem with dir names in metacheck
casties
parents: 2
diff changeset
40 my $dry_run = (exists $$args{'dry-run'}) ? $$args{'dry-run'} : 0;
fdf4ceb36db1 fixed problem with dir names in metacheck
casties
parents: 2
diff changeset
41 logger('DEBUG', "dry-run: $dry_run");
fdf4ceb36db1 fixed problem with dir names in metacheck
casties
parents: 2
diff changeset
42
0
30497c6a3eca Initial revision
casties
parents:
diff changeset
43 # check only or fix index file also
18
fdf4ceb36db1 fixed problem with dir names in metacheck
casties
parents: 2
diff changeset
44 my $check_only = (exists $$args{'checkonly'}) ? $$args{'checkonly'} : 1;
fdf4ceb36db1 fixed problem with dir names in metacheck
casties
parents: 2
diff changeset
45
0
30497c6a3eca Initial revision
casties
parents:
diff changeset
46 # add file tags for missing files
30497c6a3eca Initial revision
casties
parents:
diff changeset
47 my $fix_files = ! $check_only;
30497c6a3eca Initial revision
casties
parents:
diff changeset
48 # add tags for file size and date
30497c6a3eca Initial revision
casties
parents:
diff changeset
49 my $fix_fs_meta = 1;
30497c6a3eca Initial revision
casties
parents:
diff changeset
50 # add dir tags for missing directories
30497c6a3eca Initial revision
casties
parents:
diff changeset
51 my $fix_dirs = ! $check_only;
30497c6a3eca Initial revision
casties
parents:
diff changeset
52 # rewrite XML file (necessary for fix_files and fix_dirs)
30497c6a3eca Initial revision
casties
parents:
diff changeset
53 my $fix_xml = ! $check_only;
18
fdf4ceb36db1 fixed problem with dir names in metacheck
casties
parents: 2
diff changeset
54 # rewrite complete index file
fdf4ceb36db1 fixed problem with dir names in metacheck
casties
parents: 2
diff changeset
55 my $do_rewrite = 0;
fdf4ceb36db1 fixed problem with dir names in metacheck
casties
parents: 2
diff changeset
56
fdf4ceb36db1 fixed problem with dir names in metacheck
casties
parents: 2
diff changeset
57 # add file tags for missing files
fdf4ceb36db1 fixed problem with dir names in metacheck
casties
parents: 2
diff changeset
58 if (exists $$args{'add-files'}) {
fdf4ceb36db1 fixed problem with dir names in metacheck
casties
parents: 2
diff changeset
59 $check_only = 0;
fdf4ceb36db1 fixed problem with dir names in metacheck
casties
parents: 2
diff changeset
60 $fix_files = 1;
fdf4ceb36db1 fixed problem with dir names in metacheck
casties
parents: 2
diff changeset
61 $fix_dirs = 1;
fdf4ceb36db1 fixed problem with dir names in metacheck
casties
parents: 2
diff changeset
62 $fix_xml = 1;
fdf4ceb36db1 fixed problem with dir names in metacheck
casties
parents: 2
diff changeset
63 $do_rewrite = 0;
fdf4ceb36db1 fixed problem with dir names in metacheck
casties
parents: 2
diff changeset
64 logger('DEBUG', "add-files: true");
fdf4ceb36db1 fixed problem with dir names in metacheck
casties
parents: 2
diff changeset
65 }
fdf4ceb36db1 fixed problem with dir names in metacheck
casties
parents: 2
diff changeset
66
fdf4ceb36db1 fixed problem with dir names in metacheck
casties
parents: 2
diff changeset
67 # completely rewrite index file
fdf4ceb36db1 fixed problem with dir names in metacheck
casties
parents: 2
diff changeset
68 if (exists $$args{'replace'}) {
fdf4ceb36db1 fixed problem with dir names in metacheck
casties
parents: 2
diff changeset
69 $check_only = 0;
fdf4ceb36db1 fixed problem with dir names in metacheck
casties
parents: 2
diff changeset
70 $fix_files = 1;
fdf4ceb36db1 fixed problem with dir names in metacheck
casties
parents: 2
diff changeset
71 $fix_dirs = 1;
fdf4ceb36db1 fixed problem with dir names in metacheck
casties
parents: 2
diff changeset
72 $fix_xml = 1;
fdf4ceb36db1 fixed problem with dir names in metacheck
casties
parents: 2
diff changeset
73 $do_rewrite = 1;
fdf4ceb36db1 fixed problem with dir names in metacheck
casties
parents: 2
diff changeset
74 logger('DEBUG', "replace: true");
fdf4ceb36db1 fixed problem with dir names in metacheck
casties
parents: 2
diff changeset
75 }
fdf4ceb36db1 fixed problem with dir names in metacheck
casties
parents: 2
diff changeset
76 logger('DEBUG', "checkonly: $check_only");
fdf4ceb36db1 fixed problem with dir names in metacheck
casties
parents: 2
diff changeset
77
fdf4ceb36db1 fixed problem with dir names in metacheck
casties
parents: 2
diff changeset
78
0
30497c6a3eca Initial revision
casties
parents:
diff changeset
79 my $xml_changed = 0;
30497c6a3eca Initial revision
casties
parents:
diff changeset
80 # XML namespace (not really implemented!)
30497c6a3eca Initial revision
casties
parents:
diff changeset
81 my $namespace = "";
30497c6a3eca Initial revision
casties
parents:
diff changeset
82
30497c6a3eca Initial revision
casties
parents:
diff changeset
83
30497c6a3eca Initial revision
casties
parents:
diff changeset
84 #######################################################
30497c6a3eca Initial revision
casties
parents:
diff changeset
85 # check parameters that were passed to the program
30497c6a3eca Initial revision
casties
parents:
diff changeset
86 #
30497c6a3eca Initial revision
casties
parents:
diff changeset
87 my $docdir = $$args{'path'};
30497c6a3eca Initial revision
casties
parents:
diff changeset
88 if (! $docdir) {
30497c6a3eca Initial revision
casties
parents:
diff changeset
89 logger("ABORT", "no document directory given!");
30497c6a3eca Initial revision
casties
parents:
diff changeset
90 exit 1;
30497c6a3eca Initial revision
casties
parents:
diff changeset
91 }
1
9143a0c4c678 small fix for // in pathnames
casties
parents: 0
diff changeset
92 # strip double slashes
9143a0c4c678 small fix for // in pathnames
casties
parents: 0
diff changeset
93 $docdir =~ s/\/\//\//;
0
30497c6a3eca Initial revision
casties
parents:
diff changeset
94 # strip trailing slashes
30497c6a3eca Initial revision
casties
parents:
diff changeset
95 $docdir =~ s/\/$//;
30497c6a3eca Initial revision
casties
parents:
diff changeset
96 if (! -d $docdir) {
30497c6a3eca Initial revision
casties
parents:
diff changeset
97 logger("ABORT", "document directory \'$docdir\' doesn't exist!");
30497c6a3eca Initial revision
casties
parents:
diff changeset
98 exit 1;
30497c6a3eca Initial revision
casties
parents:
diff changeset
99 }
30497c6a3eca Initial revision
casties
parents:
diff changeset
100
30497c6a3eca Initial revision
casties
parents:
diff changeset
101 my $metafile = "$docdir/index.meta";
30497c6a3eca Initial revision
casties
parents:
diff changeset
102 if (! -f $metafile) {
30497c6a3eca Initial revision
casties
parents:
diff changeset
103 logger("ABORT", "metadata index file \'$metafile\' doesn't exist!");
30497c6a3eca Initial revision
casties
parents:
diff changeset
104 exit 1;
30497c6a3eca Initial revision
casties
parents:
diff changeset
105 }
30497c6a3eca Initial revision
casties
parents:
diff changeset
106
30497c6a3eca Initial revision
casties
parents:
diff changeset
107 #######################################################
30497c6a3eca Initial revision
casties
parents:
diff changeset
108 # internal variables
30497c6a3eca Initial revision
casties
parents:
diff changeset
109 #
30497c6a3eca Initial revision
casties
parents:
diff changeset
110
30497c6a3eca Initial revision
casties
parents:
diff changeset
111 # all files in the document directory tree
30497c6a3eca Initial revision
casties
parents:
diff changeset
112 my %files;
30497c6a3eca Initial revision
casties
parents:
diff changeset
113 # all directories in the document directory tree
30497c6a3eca Initial revision
casties
parents:
diff changeset
114 my %dirs;
30497c6a3eca Initial revision
casties
parents:
diff changeset
115 # number of errors
30497c6a3eca Initial revision
casties
parents:
diff changeset
116 my $errcnt = 0;
30497c6a3eca Initial revision
casties
parents:
diff changeset
117 # number of warnings
30497c6a3eca Initial revision
casties
parents:
diff changeset
118 my $warncnt = 0;
30497c6a3eca Initial revision
casties
parents:
diff changeset
119
30497c6a3eca Initial revision
casties
parents:
diff changeset
120 #######################################################
30497c6a3eca Initial revision
casties
parents:
diff changeset
121 # subroutines
30497c6a3eca Initial revision
casties
parents:
diff changeset
122 #
30497c6a3eca Initial revision
casties
parents:
diff changeset
123
30497c6a3eca Initial revision
casties
parents:
diff changeset
124 #
30497c6a3eca Initial revision
casties
parents:
diff changeset
125 # fs_read_files($realdir, $docdir, \%files, \%dirs)
30497c6a3eca Initial revision
casties
parents:
diff changeset
126 #
30497c6a3eca Initial revision
casties
parents:
diff changeset
127 # reads all files and directories below $realdir and puts the
30497c6a3eca Initial revision
casties
parents:
diff changeset
128 # files in %files and directories in %dirs
30497c6a3eca Initial revision
casties
parents:
diff changeset
129 # $docdir is only for recursion, it should be empty when called
30497c6a3eca Initial revision
casties
parents:
diff changeset
130 # from outside
30497c6a3eca Initial revision
casties
parents:
diff changeset
131 #
30497c6a3eca Initial revision
casties
parents:
diff changeset
132 sub fs_read_files {
30497c6a3eca Initial revision
casties
parents:
diff changeset
133 my ($directory, $docdir, $files, $dirs) = @_;
30497c6a3eca Initial revision
casties
parents:
diff changeset
134 my $cnt = 0;
30497c6a3eca Initial revision
casties
parents:
diff changeset
135
30497c6a3eca Initial revision
casties
parents:
diff changeset
136 if (! opendir DIR, $directory) {
30497c6a3eca Initial revision
casties
parents:
diff changeset
137 return 0;
30497c6a3eca Initial revision
casties
parents:
diff changeset
138 }
30497c6a3eca Initial revision
casties
parents:
diff changeset
139 my @dirfiles = readdir DIR;
30497c6a3eca Initial revision
casties
parents:
diff changeset
140 foreach my $fn (@dirfiles) {
30497c6a3eca Initial revision
casties
parents:
diff changeset
141 # ignore names starting with a dot
30497c6a3eca Initial revision
casties
parents:
diff changeset
142 next if ($fn =~ /^\./);
30497c6a3eca Initial revision
casties
parents:
diff changeset
143 # ignore other silly files
30497c6a3eca Initial revision
casties
parents:
diff changeset
144 next if ($junk_files{$fn});
30497c6a3eca Initial revision
casties
parents:
diff changeset
145
30497c6a3eca Initial revision
casties
parents:
diff changeset
146 $cnt++;
30497c6a3eca Initial revision
casties
parents:
diff changeset
147 my $f = "$directory/$fn";
30497c6a3eca Initial revision
casties
parents:
diff changeset
148 my $docf = ($docdir) ? "$docdir/$fn" : $fn;
30497c6a3eca Initial revision
casties
parents:
diff changeset
149 #logger("fs_file: \"$f\"");
30497c6a3eca Initial revision
casties
parents:
diff changeset
150 if (-f $f) {
30497c6a3eca Initial revision
casties
parents:
diff changeset
151 #logger(" is file");
30497c6a3eca Initial revision
casties
parents:
diff changeset
152 my ($dev,$ino,$mode,$nlink,$uid,$gid,$rdev,$size,
30497c6a3eca Initial revision
casties
parents:
diff changeset
153 $atime,$mtime,$ctime,$blksize,$blocks)
30497c6a3eca Initial revision
casties
parents:
diff changeset
154 = stat(_);
30497c6a3eca Initial revision
casties
parents:
diff changeset
155 $$files{$docf} = [$fn, $size, $mtime];
30497c6a3eca Initial revision
casties
parents:
diff changeset
156 } elsif (-d _) {
30497c6a3eca Initial revision
casties
parents:
diff changeset
157 #logger(" is dir");
30497c6a3eca Initial revision
casties
parents:
diff changeset
158 $$dirs{$docf} = $fn;
30497c6a3eca Initial revision
casties
parents:
diff changeset
159 # recurse into directory
30497c6a3eca Initial revision
casties
parents:
diff changeset
160 $cnt += fs_read_files($f, $docf, $files, $dirs);
30497c6a3eca Initial revision
casties
parents:
diff changeset
161 }
30497c6a3eca Initial revision
casties
parents:
diff changeset
162 }
30497c6a3eca Initial revision
casties
parents:
diff changeset
163 return $cnt;
30497c6a3eca Initial revision
casties
parents:
diff changeset
164 }
30497c6a3eca Initial revision
casties
parents:
diff changeset
165
30497c6a3eca Initial revision
casties
parents:
diff changeset
166
30497c6a3eca Initial revision
casties
parents:
diff changeset
167 #
30497c6a3eca Initial revision
casties
parents:
diff changeset
168 # check_dirs($rootnode, \%dirs)
30497c6a3eca Initial revision
casties
parents:
diff changeset
169 #
30497c6a3eca Initial revision
casties
parents:
diff changeset
170 # reads all dir elements under $rootnode and compares with the directory list
30497c6a3eca Initial revision
casties
parents:
diff changeset
171 # in %dirs
30497c6a3eca Initial revision
casties
parents:
diff changeset
172 #
30497c6a3eca Initial revision
casties
parents:
diff changeset
173 sub check_dirs {
30497c6a3eca Initial revision
casties
parents:
diff changeset
174 my ($root, $fsdirs) = @_;
30497c6a3eca Initial revision
casties
parents:
diff changeset
175
30497c6a3eca Initial revision
casties
parents:
diff changeset
176 #
30497c6a3eca Initial revision
casties
parents:
diff changeset
177 # iterate all dir tags
30497c6a3eca Initial revision
casties
parents:
diff changeset
178 #
30497c6a3eca Initial revision
casties
parents:
diff changeset
179 my @dirnodes = $root->findnodes('child::dir');
30497c6a3eca Initial revision
casties
parents:
diff changeset
180 my %okdirs;
30497c6a3eca Initial revision
casties
parents:
diff changeset
181 foreach my $dirnode (@dirnodes) {
30497c6a3eca Initial revision
casties
parents:
diff changeset
182 my $dirname = sstrip($dirnode->find('child::name'));
30497c6a3eca Initial revision
casties
parents:
diff changeset
183 my $dirpath = sstrip($dirnode->find('child::path'));
30497c6a3eca Initial revision
casties
parents:
diff changeset
184 my $description = sstrip($dirnode->find('child::description'));
30497c6a3eca Initial revision
casties
parents:
diff changeset
185 # name must be valid
30497c6a3eca Initial revision
casties
parents:
diff changeset
186 if (! valid_dir_name($dirname)) {
30497c6a3eca Initial revision
casties
parents:
diff changeset
187 logger("ERROR", "directory name ($dirpath) $dirname invalid!");
30497c6a3eca Initial revision
casties
parents:
diff changeset
188 $errcnt++;
30497c6a3eca Initial revision
casties
parents:
diff changeset
189 }
30497c6a3eca Initial revision
casties
parents:
diff changeset
190 # description can be present
30497c6a3eca Initial revision
casties
parents:
diff changeset
191 if (! $description) {
30497c6a3eca Initial revision
casties
parents:
diff changeset
192 logger("WARNING", "description for directory $dirname (in $dirpath/) missing!");
30497c6a3eca Initial revision
casties
parents:
diff changeset
193 $warncnt++;
30497c6a3eca Initial revision
casties
parents:
diff changeset
194 }
30497c6a3eca Initial revision
casties
parents:
diff changeset
195 # check with dirs on filesystem
30497c6a3eca Initial revision
casties
parents:
diff changeset
196 my $fn;
18
fdf4ceb36db1 fixed problem with dir names in metacheck
casties
parents: 2
diff changeset
197 if ($dirpath && ($dirpath ne '.')) {
0
30497c6a3eca Initial revision
casties
parents:
diff changeset
198 $fn = "$dirpath/$dirname";
30497c6a3eca Initial revision
casties
parents:
diff changeset
199 } else {
30497c6a3eca Initial revision
casties
parents:
diff changeset
200 $fn = "$dirname";
30497c6a3eca Initial revision
casties
parents:
diff changeset
201 }
18
fdf4ceb36db1 fixed problem with dir names in metacheck
casties
parents: 2
diff changeset
202 #logger('DEBUG', "dir: \"$dirname\", \"$dirpath\", fn: \"$fn\"");
0
30497c6a3eca Initial revision
casties
parents:
diff changeset
203 if ($$fsdirs{$fn}) {
30497c6a3eca Initial revision
casties
parents:
diff changeset
204 #logger(" OK ($$fsdirs{$fn})");
30497c6a3eca Initial revision
casties
parents:
diff changeset
205 $okdirs{$fn} = $dirname;
30497c6a3eca Initial revision
casties
parents:
diff changeset
206 } else {
18
fdf4ceb36db1 fixed problem with dir names in metacheck
casties
parents: 2
diff changeset
207 if ($do_rewrite) {
fdf4ceb36db1 fixed problem with dir names in metacheck
casties
parents: 2
diff changeset
208 # remove dir tag
fdf4ceb36db1 fixed problem with dir names in metacheck
casties
parents: 2
diff changeset
209 logger("WARNING", "directory $dirname (in $dirpath/) no longer on disk!");
fdf4ceb36db1 fixed problem with dir names in metacheck
casties
parents: 2
diff changeset
210 $dirnode->unbindNode();
fdf4ceb36db1 fixed problem with dir names in metacheck
casties
parents: 2
diff changeset
211 $warncnt++;
fdf4ceb36db1 fixed problem with dir names in metacheck
casties
parents: 2
diff changeset
212 } else {
fdf4ceb36db1 fixed problem with dir names in metacheck
casties
parents: 2
diff changeset
213 logger("ERROR", "directory $dirname (in $dirpath/) missing on disk!");
fdf4ceb36db1 fixed problem with dir names in metacheck
casties
parents: 2
diff changeset
214 $errcnt++;
fdf4ceb36db1 fixed problem with dir names in metacheck
casties
parents: 2
diff changeset
215 }
0
30497c6a3eca Initial revision
casties
parents:
diff changeset
216 }
30497c6a3eca Initial revision
casties
parents:
diff changeset
217 }
30497c6a3eca Initial revision
casties
parents:
diff changeset
218 #logger("dirs: ", (scalar keys %$fsdirs), " vs ", (scalar keys %okdirs), "");
30497c6a3eca Initial revision
casties
parents:
diff changeset
219 if ((scalar keys %$fsdirs) != (scalar keys %okdirs)) {
30497c6a3eca Initial revision
casties
parents:
diff changeset
220 # number of dir tags and dirs don't match
30497c6a3eca Initial revision
casties
parents:
diff changeset
221 # iterate through all dirs
30497c6a3eca Initial revision
casties
parents:
diff changeset
222 foreach my $f (sort keys %$fsdirs) {
30497c6a3eca Initial revision
casties
parents:
diff changeset
223 # was this dir missing?
30497c6a3eca Initial revision
casties
parents:
diff changeset
224 if (! $okdirs{$f}) {
18
fdf4ceb36db1 fixed problem with dir names in metacheck
casties
parents: 2
diff changeset
225 my ($name, $path) = split_file_path($f, 1);
0
30497c6a3eca Initial revision
casties
parents:
diff changeset
226 # name must be valid
30497c6a3eca Initial revision
casties
parents:
diff changeset
227 if (! valid_dir_name($name)) {
30497c6a3eca Initial revision
casties
parents:
diff changeset
228 $path = "." unless ($path);
30497c6a3eca Initial revision
casties
parents:
diff changeset
229 logger("ERROR", "directory name $name (in $path/) invalid!");
30497c6a3eca Initial revision
casties
parents:
diff changeset
230 $errcnt++;
30497c6a3eca Initial revision
casties
parents:
diff changeset
231 next;
30497c6a3eca Initial revision
casties
parents:
diff changeset
232 }
30497c6a3eca Initial revision
casties
parents:
diff changeset
233 if ($fix_dirs) {
30497c6a3eca Initial revision
casties
parents:
diff changeset
234 # add missing dir tag
30497c6a3eca Initial revision
casties
parents:
diff changeset
235 my $dir_node = $root->addNewChild($namespace, "dir");
30497c6a3eca Initial revision
casties
parents:
diff changeset
236 $xml_changed++;
30497c6a3eca Initial revision
casties
parents:
diff changeset
237 # add name
30497c6a3eca Initial revision
casties
parents:
diff changeset
238 my $name_node = $dir_node->addNewChild($namespace, "name");
30497c6a3eca Initial revision
casties
parents:
diff changeset
239 $name_node->appendTextNode($name);
30497c6a3eca Initial revision
casties
parents:
diff changeset
240 # add path
30497c6a3eca Initial revision
casties
parents:
diff changeset
241 if ($path) {
30497c6a3eca Initial revision
casties
parents:
diff changeset
242 my $path_node = $dir_node->addNewChild($namespace, "path");
30497c6a3eca Initial revision
casties
parents:
diff changeset
243 $path_node->appendTextNode($path);
30497c6a3eca Initial revision
casties
parents:
diff changeset
244 }
30497c6a3eca Initial revision
casties
parents:
diff changeset
245 logger("INFO", "directory $f to be added to index file!");
30497c6a3eca Initial revision
casties
parents:
diff changeset
246 } else {
30497c6a3eca Initial revision
casties
parents:
diff changeset
247 logger("ERROR", "directory $f missing in index file!");
30497c6a3eca Initial revision
casties
parents:
diff changeset
248 $errcnt++;
30497c6a3eca Initial revision
casties
parents:
diff changeset
249 }
30497c6a3eca Initial revision
casties
parents:
diff changeset
250 }
30497c6a3eca Initial revision
casties
parents:
diff changeset
251 }
30497c6a3eca Initial revision
casties
parents:
diff changeset
252 }
30497c6a3eca Initial revision
casties
parents:
diff changeset
253 }
30497c6a3eca Initial revision
casties
parents:
diff changeset
254
30497c6a3eca Initial revision
casties
parents:
diff changeset
255
30497c6a3eca Initial revision
casties
parents:
diff changeset
256 #
30497c6a3eca Initial revision
casties
parents:
diff changeset
257 # check_files($rootnode, \%files)
30497c6a3eca Initial revision
casties
parents:
diff changeset
258 #
30497c6a3eca Initial revision
casties
parents:
diff changeset
259 # reads all file elements under $rootnode and compares with the file list
30497c6a3eca Initial revision
casties
parents:
diff changeset
260 # in %files
30497c6a3eca Initial revision
casties
parents:
diff changeset
261 #
30497c6a3eca Initial revision
casties
parents:
diff changeset
262 sub check_files {
30497c6a3eca Initial revision
casties
parents:
diff changeset
263 my ($root, $fsfiles) = @_;
30497c6a3eca Initial revision
casties
parents:
diff changeset
264
30497c6a3eca Initial revision
casties
parents:
diff changeset
265 #
30497c6a3eca Initial revision
casties
parents:
diff changeset
266 # iterate all file tags
30497c6a3eca Initial revision
casties
parents:
diff changeset
267 #
30497c6a3eca Initial revision
casties
parents:
diff changeset
268 my @filenodes = $root->findnodes('child::file');
30497c6a3eca Initial revision
casties
parents:
diff changeset
269 my %okfiles;
30497c6a3eca Initial revision
casties
parents:
diff changeset
270 foreach my $filenode (@filenodes) {
30497c6a3eca Initial revision
casties
parents:
diff changeset
271 my $filename = sstrip($filenode->find('child::name'));
30497c6a3eca Initial revision
casties
parents:
diff changeset
272 my $filepath = sstrip($filenode->find('child::path'));
30497c6a3eca Initial revision
casties
parents:
diff changeset
273 my $filesize = sstrip($filenode->find('child::size'));
30497c6a3eca Initial revision
casties
parents:
diff changeset
274 my $filedate = sstrip($filenode->find('child::date'));
30497c6a3eca Initial revision
casties
parents:
diff changeset
275 # name must be valid
30497c6a3eca Initial revision
casties
parents:
diff changeset
276 if (! valid_file_name($filename)) {
30497c6a3eca Initial revision
casties
parents:
diff changeset
277 logger("ERROR", "file name ($filepath)$filename invalid!");
30497c6a3eca Initial revision
casties
parents:
diff changeset
278 $errcnt++;
30497c6a3eca Initial revision
casties
parents:
diff changeset
279 }
30497c6a3eca Initial revision
casties
parents:
diff changeset
280 my $fn = ($filepath) ? "$filepath/$filename" : "$filename";
30497c6a3eca Initial revision
casties
parents:
diff changeset
281 #logger("file: \"$filename\", \"$filepath\"");
30497c6a3eca Initial revision
casties
parents:
diff changeset
282 if ($$fsfiles{$fn}) {
30497c6a3eca Initial revision
casties
parents:
diff changeset
283 #logger(" OK ($$fsfiles{$fn})");
30497c6a3eca Initial revision
casties
parents:
diff changeset
284 $okfiles{$fn} = $filename;
30497c6a3eca Initial revision
casties
parents:
diff changeset
285 # check file size and date
30497c6a3eca Initial revision
casties
parents:
diff changeset
286 if ($filesize) {
30497c6a3eca Initial revision
casties
parents:
diff changeset
287 if ($filesize != $$fsfiles{$fn}->[1]) {
30497c6a3eca Initial revision
casties
parents:
diff changeset
288 logger("WARNING", "size of file $fn changed: $filesize to $$fsfiles{$fn}->[1]");
30497c6a3eca Initial revision
casties
parents:
diff changeset
289 $warncnt++;
30497c6a3eca Initial revision
casties
parents:
diff changeset
290 }
30497c6a3eca Initial revision
casties
parents:
diff changeset
291 }
30497c6a3eca Initial revision
casties
parents:
diff changeset
292 # file date
30497c6a3eca Initial revision
casties
parents:
diff changeset
293 if ($filedate) {
30497c6a3eca Initial revision
casties
parents:
diff changeset
294 if ($filedate ne stime($$fsfiles{$fn}->[2])) {
30497c6a3eca Initial revision
casties
parents:
diff changeset
295 logger("WARNING", "date of file $fn changed: $filedate to ", stime($$fsfiles{$fn}->[2]), "");
30497c6a3eca Initial revision
casties
parents:
diff changeset
296 $warncnt++;
30497c6a3eca Initial revision
casties
parents:
diff changeset
297 }
30497c6a3eca Initial revision
casties
parents:
diff changeset
298 }
30497c6a3eca Initial revision
casties
parents:
diff changeset
299 # update file size and date
30497c6a3eca Initial revision
casties
parents:
diff changeset
300 if ($fix_fs_meta) {
30497c6a3eca Initial revision
casties
parents:
diff changeset
301 # delete size and date
30497c6a3eca Initial revision
casties
parents:
diff changeset
302 foreach my $n ($filenode->findnodes('child::size')) {
30497c6a3eca Initial revision
casties
parents:
diff changeset
303 $filenode->removeChild($n);
30497c6a3eca Initial revision
casties
parents:
diff changeset
304 }
30497c6a3eca Initial revision
casties
parents:
diff changeset
305 foreach my $n ($filenode->findnodes('child::date')) {
30497c6a3eca Initial revision
casties
parents:
diff changeset
306 $filenode->removeChild($n);
30497c6a3eca Initial revision
casties
parents:
diff changeset
307 }
30497c6a3eca Initial revision
casties
parents:
diff changeset
308 # add new size and date
30497c6a3eca Initial revision
casties
parents:
diff changeset
309 my $node = $filenode->addNewChild($namespace, "size");
30497c6a3eca Initial revision
casties
parents:
diff changeset
310 $node->appendTextNode($$fsfiles{$fn}->[1]);
30497c6a3eca Initial revision
casties
parents:
diff changeset
311 $node = $filenode->addNewChild($namespace, "date");
30497c6a3eca Initial revision
casties
parents:
diff changeset
312 $node->appendTextNode(stime($$fsfiles{$fn}->[2]));
30497c6a3eca Initial revision
casties
parents:
diff changeset
313 $xml_changed++;
30497c6a3eca Initial revision
casties
parents:
diff changeset
314 }
30497c6a3eca Initial revision
casties
parents:
diff changeset
315 } else {
18
fdf4ceb36db1 fixed problem with dir names in metacheck
casties
parents: 2
diff changeset
316 if ($do_rewrite) {
fdf4ceb36db1 fixed problem with dir names in metacheck
casties
parents: 2
diff changeset
317 # remove file tag
fdf4ceb36db1 fixed problem with dir names in metacheck
casties
parents: 2
diff changeset
318 logger("WARNING", "file $filename (in $filepath/) no longer on disk!");
fdf4ceb36db1 fixed problem with dir names in metacheck
casties
parents: 2
diff changeset
319 $filenode->unbindNode();
fdf4ceb36db1 fixed problem with dir names in metacheck
casties
parents: 2
diff changeset
320 $warncnt++;
fdf4ceb36db1 fixed problem with dir names in metacheck
casties
parents: 2
diff changeset
321 } else {
fdf4ceb36db1 fixed problem with dir names in metacheck
casties
parents: 2
diff changeset
322 logger("ERROR", "file $filename (in $filepath/) missing on disk!");
fdf4ceb36db1 fixed problem with dir names in metacheck
casties
parents: 2
diff changeset
323 $errcnt++;
fdf4ceb36db1 fixed problem with dir names in metacheck
casties
parents: 2
diff changeset
324 }
0
30497c6a3eca Initial revision
casties
parents:
diff changeset
325 }
30497c6a3eca Initial revision
casties
parents:
diff changeset
326 }
30497c6a3eca Initial revision
casties
parents:
diff changeset
327 #logger("files: ", (scalar keys %$fsfiles), " vs ", (scalar keys %okfiles), "");
30497c6a3eca Initial revision
casties
parents:
diff changeset
328 if ((scalar keys %$fsfiles) != (scalar keys %okfiles)) {
30497c6a3eca Initial revision
casties
parents:
diff changeset
329 # number of file tags and files don't match
30497c6a3eca Initial revision
casties
parents:
diff changeset
330 # iterate through all files
30497c6a3eca Initial revision
casties
parents:
diff changeset
331 foreach my $f (sort keys %$fsfiles) {
18
fdf4ceb36db1 fixed problem with dir names in metacheck
casties
parents: 2
diff changeset
332 my ($name, $path) = split_file_path($f, 1);
0
30497c6a3eca Initial revision
casties
parents:
diff changeset
333 # was this file missing?
30497c6a3eca Initial revision
casties
parents:
diff changeset
334 if (! $okfiles{$f}) {
30497c6a3eca Initial revision
casties
parents:
diff changeset
335 # is an ignoreable file?
30497c6a3eca Initial revision
casties
parents:
diff changeset
336 if ($index_files{$name}) {
30497c6a3eca Initial revision
casties
parents:
diff changeset
337 next;
30497c6a3eca Initial revision
casties
parents:
diff changeset
338 }
30497c6a3eca Initial revision
casties
parents:
diff changeset
339 # name must be valid
30497c6a3eca Initial revision
casties
parents:
diff changeset
340 if (! valid_dir_name($name)) {
30497c6a3eca Initial revision
casties
parents:
diff changeset
341 $path = "." unless ($path);
30497c6a3eca Initial revision
casties
parents:
diff changeset
342 logger("ERROR", "file name $name (in $path/) invalid!");
30497c6a3eca Initial revision
casties
parents:
diff changeset
343 $errcnt++;
30497c6a3eca Initial revision
casties
parents:
diff changeset
344 next;
30497c6a3eca Initial revision
casties
parents:
diff changeset
345 }
30497c6a3eca Initial revision
casties
parents:
diff changeset
346 if ($fix_files) {
30497c6a3eca Initial revision
casties
parents:
diff changeset
347 # add missing file tag
30497c6a3eca Initial revision
casties
parents:
diff changeset
348 my $file_node = $root->addNewChild($namespace, "file");
30497c6a3eca Initial revision
casties
parents:
diff changeset
349 $xml_changed++;
30497c6a3eca Initial revision
casties
parents:
diff changeset
350 # add name
30497c6a3eca Initial revision
casties
parents:
diff changeset
351 my $name_node = $file_node->addNewChild($namespace, "name");
30497c6a3eca Initial revision
casties
parents:
diff changeset
352 $name_node->appendTextNode($name);
30497c6a3eca Initial revision
casties
parents:
diff changeset
353 # add path
30497c6a3eca Initial revision
casties
parents:
diff changeset
354 if ($path) {
30497c6a3eca Initial revision
casties
parents:
diff changeset
355 my $path_node = $file_node->addNewChild($namespace, "path");
30497c6a3eca Initial revision
casties
parents:
diff changeset
356 $path_node->appendTextNode($path);
30497c6a3eca Initial revision
casties
parents:
diff changeset
357 }
30497c6a3eca Initial revision
casties
parents:
diff changeset
358 # add size
30497c6a3eca Initial revision
casties
parents:
diff changeset
359 my $size_node = $file_node->addNewChild($namespace, "size");
30497c6a3eca Initial revision
casties
parents:
diff changeset
360 $size_node->appendTextNode($$fsfiles{$f}->[1]);
30497c6a3eca Initial revision
casties
parents:
diff changeset
361 # add date
30497c6a3eca Initial revision
casties
parents:
diff changeset
362 my $date_node = $file_node->addNewChild($namespace, "date");
30497c6a3eca Initial revision
casties
parents:
diff changeset
363 $date_node->appendTextNode(stime($$fsfiles{$f}->[2]));
30497c6a3eca Initial revision
casties
parents:
diff changeset
364 logger("INFO", "file $f to be added to index file!");
30497c6a3eca Initial revision
casties
parents:
diff changeset
365 } else {
30497c6a3eca Initial revision
casties
parents:
diff changeset
366 logger("ERROR", "file $f missing in index file!");
30497c6a3eca Initial revision
casties
parents:
diff changeset
367 $errcnt++;
30497c6a3eca Initial revision
casties
parents:
diff changeset
368 }
30497c6a3eca Initial revision
casties
parents:
diff changeset
369 }
30497c6a3eca Initial revision
casties
parents:
diff changeset
370 }
30497c6a3eca Initial revision
casties
parents:
diff changeset
371 }
30497c6a3eca Initial revision
casties
parents:
diff changeset
372 }
30497c6a3eca Initial revision
casties
parents:
diff changeset
373
30497c6a3eca Initial revision
casties
parents:
diff changeset
374 #
30497c6a3eca Initial revision
casties
parents:
diff changeset
375 # check_resource_meta($rootnode)
30497c6a3eca Initial revision
casties
parents:
diff changeset
376 #
30497c6a3eca Initial revision
casties
parents:
diff changeset
377 # checks general resource meta information
30497c6a3eca Initial revision
casties
parents:
diff changeset
378 #
30497c6a3eca Initial revision
casties
parents:
diff changeset
379 sub check_resource_meta {
30497c6a3eca Initial revision
casties
parents:
diff changeset
380 my ($rootnode) = @_;
30497c6a3eca Initial revision
casties
parents:
diff changeset
381
30497c6a3eca Initial revision
casties
parents:
diff changeset
382 #
30497c6a3eca Initial revision
casties
parents:
diff changeset
383 # description
30497c6a3eca Initial revision
casties
parents:
diff changeset
384 #
30497c6a3eca Initial revision
casties
parents:
diff changeset
385 my $description = $rootnode->findvalue('child::description');
30497c6a3eca Initial revision
casties
parents:
diff changeset
386 if (! $description) {
30497c6a3eca Initial revision
casties
parents:
diff changeset
387 logger("ERROR", "resource description element missing!");
30497c6a3eca Initial revision
casties
parents:
diff changeset
388 $errcnt++;
30497c6a3eca Initial revision
casties
parents:
diff changeset
389 }
30497c6a3eca Initial revision
casties
parents:
diff changeset
390 #
30497c6a3eca Initial revision
casties
parents:
diff changeset
391 # name
30497c6a3eca Initial revision
casties
parents:
diff changeset
392 #
30497c6a3eca Initial revision
casties
parents:
diff changeset
393 my $name = sstrip($rootnode->findvalue('child::name'));
30497c6a3eca Initial revision
casties
parents:
diff changeset
394 if ($name) {
30497c6a3eca Initial revision
casties
parents:
diff changeset
395 my ($dirname, $dirpath) = split_file_path($docdir);
30497c6a3eca Initial revision
casties
parents:
diff changeset
396 if ($dirname ne $name) {
30497c6a3eca Initial revision
casties
parents:
diff changeset
397 logger("ERROR", "resource name element '$name' does not match directory name '$dirname'!");
30497c6a3eca Initial revision
casties
parents:
diff changeset
398 $errcnt++;
30497c6a3eca Initial revision
casties
parents:
diff changeset
399 }
30497c6a3eca Initial revision
casties
parents:
diff changeset
400 } else {
30497c6a3eca Initial revision
casties
parents:
diff changeset
401 logger("ERROR", "resource name element missing!");
30497c6a3eca Initial revision
casties
parents:
diff changeset
402 $errcnt++;
30497c6a3eca Initial revision
casties
parents:
diff changeset
403 }
30497c6a3eca Initial revision
casties
parents:
diff changeset
404 #
30497c6a3eca Initial revision
casties
parents:
diff changeset
405 # archive path
30497c6a3eca Initial revision
casties
parents:
diff changeset
406 #
30497c6a3eca Initial revision
casties
parents:
diff changeset
407 my $realpath;
30497c6a3eca Initial revision
casties
parents:
diff changeset
408 # get real path
30497c6a3eca Initial revision
casties
parents:
diff changeset
409 if ($docdir =~ /^\//) {
30497c6a3eca Initial revision
casties
parents:
diff changeset
410 # docdir is absolute
30497c6a3eca Initial revision
casties
parents:
diff changeset
411 $realpath = $docdir;
30497c6a3eca Initial revision
casties
parents:
diff changeset
412 } else {
30497c6a3eca Initial revision
casties
parents:
diff changeset
413 # docdir is relative -- try with the shell
30497c6a3eca Initial revision
casties
parents:
diff changeset
414 if (open PWDCMD, "cd $docdir ; pwd|") {
30497c6a3eca Initial revision
casties
parents:
diff changeset
415 $realpath = <PWDCMD>;
30497c6a3eca Initial revision
casties
parents:
diff changeset
416 chomp $realpath;
30497c6a3eca Initial revision
casties
parents:
diff changeset
417 }
30497c6a3eca Initial revision
casties
parents:
diff changeset
418 }
30497c6a3eca Initial revision
casties
parents:
diff changeset
419 if (! $realpath) {
30497c6a3eca Initial revision
casties
parents:
diff changeset
420 logger("ERROR", "unable to check real archive path!");
30497c6a3eca Initial revision
casties
parents:
diff changeset
421 $errcnt++;
30497c6a3eca Initial revision
casties
parents:
diff changeset
422 return;
30497c6a3eca Initial revision
casties
parents:
diff changeset
423 }
30497c6a3eca Initial revision
casties
parents:
diff changeset
424 # get archive-path
30497c6a3eca Initial revision
casties
parents:
diff changeset
425 my $archnode = ($rootnode->find('child::archive-path'))->get_node(1);
30497c6a3eca Initial revision
casties
parents:
diff changeset
426 if ($archnode) {
30497c6a3eca Initial revision
casties
parents:
diff changeset
427 my $arch = sstrip($archnode->textContent);
30497c6a3eca Initial revision
casties
parents:
diff changeset
428 if ($arch ne $realpath) {
30497c6a3eca Initial revision
casties
parents:
diff changeset
429 logger("WARNING", "incorrect archive-path '$arch' will be changed to '$realpath'!");
30497c6a3eca Initial revision
casties
parents:
diff changeset
430 $warncnt++;
30497c6a3eca Initial revision
casties
parents:
diff changeset
431 # correct archive-path
30497c6a3eca Initial revision
casties
parents:
diff changeset
432 $archnode->removeChildNodes;
30497c6a3eca Initial revision
casties
parents:
diff changeset
433 $archnode->appendTextNode($realpath);
30497c6a3eca Initial revision
casties
parents:
diff changeset
434 $xml_changed++;
30497c6a3eca Initial revision
casties
parents:
diff changeset
435 }
30497c6a3eca Initial revision
casties
parents:
diff changeset
436 } else {
30497c6a3eca Initial revision
casties
parents:
diff changeset
437 # add archive-path
30497c6a3eca Initial revision
casties
parents:
diff changeset
438 $archnode = $rootnode->addNewChild($namespace, "archive-path");
30497c6a3eca Initial revision
casties
parents:
diff changeset
439 $archnode->appendTextNode($realpath);
30497c6a3eca Initial revision
casties
parents:
diff changeset
440 $xml_changed++;
30497c6a3eca Initial revision
casties
parents:
diff changeset
441 }
30497c6a3eca Initial revision
casties
parents:
diff changeset
442
30497c6a3eca Initial revision
casties
parents:
diff changeset
443 }
30497c6a3eca Initial revision
casties
parents:
diff changeset
444
30497c6a3eca Initial revision
casties
parents:
diff changeset
445
30497c6a3eca Initial revision
casties
parents:
diff changeset
446
30497c6a3eca Initial revision
casties
parents:
diff changeset
447 #######################################################
30497c6a3eca Initial revision
casties
parents:
diff changeset
448 # main
30497c6a3eca Initial revision
casties
parents:
diff changeset
449 #
30497c6a3eca Initial revision
casties
parents:
diff changeset
450
30497c6a3eca Initial revision
casties
parents:
diff changeset
451 my ($document, $rootnode) = read_xml($metafile);
30497c6a3eca Initial revision
casties
parents:
diff changeset
452
30497c6a3eca Initial revision
casties
parents:
diff changeset
453 check_resource_meta($rootnode);
30497c6a3eca Initial revision
casties
parents:
diff changeset
454
30497c6a3eca Initial revision
casties
parents:
diff changeset
455 my $fnum = fs_read_files($docdir, "", \%files, \%dirs);
30497c6a3eca Initial revision
casties
parents:
diff changeset
456 logger("INFO", "$fnum files on FS");
18
fdf4ceb36db1 fixed problem with dir names in metacheck
casties
parents: 2
diff changeset
457 #foreach (keys %dirs) {logger('DEBUG', " dir ($_): $dirs{$_}");}
0
30497c6a3eca Initial revision
casties
parents:
diff changeset
458
30497c6a3eca Initial revision
casties
parents:
diff changeset
459 check_files($rootnode, \%files);
30497c6a3eca Initial revision
casties
parents:
diff changeset
460 check_dirs($rootnode, \%dirs);
30497c6a3eca Initial revision
casties
parents:
diff changeset
461
30497c6a3eca Initial revision
casties
parents:
diff changeset
462 logger("INFO", "$warncnt warnings");
30497c6a3eca Initial revision
casties
parents:
diff changeset
463 logger("INFO", "$errcnt errors");
30497c6a3eca Initial revision
casties
parents:
diff changeset
464 if ($errcnt > 0) {
18
fdf4ceb36db1 fixed problem with dir names in metacheck
casties
parents: 2
diff changeset
465 logger("ABORT", "there were $errcnt errors!");
0
30497c6a3eca Initial revision
casties
parents:
diff changeset
466 exit 1;
30497c6a3eca Initial revision
casties
parents:
diff changeset
467 } else {
30497c6a3eca Initial revision
casties
parents:
diff changeset
468 if ($fix_xml) {
18
fdf4ceb36db1 fixed problem with dir names in metacheck
casties
parents: 2
diff changeset
469 if ($dry_run) {
fdf4ceb36db1 fixed problem with dir names in metacheck
casties
parents: 2
diff changeset
470 logger('INFO', "would write $metafile");
fdf4ceb36db1 fixed problem with dir names in metacheck
casties
parents: 2
diff changeset
471 logger('DEBUG', $document->toString(1));
fdf4ceb36db1 fixed problem with dir names in metacheck
casties
parents: 2
diff changeset
472 } else {
fdf4ceb36db1 fixed problem with dir names in metacheck
casties
parents: 2
diff changeset
473 write_xml($document, $metafile);
fdf4ceb36db1 fixed problem with dir names in metacheck
casties
parents: 2
diff changeset
474 }
0
30497c6a3eca Initial revision
casties
parents:
diff changeset
475 }
30497c6a3eca Initial revision
casties
parents:
diff changeset
476 logger("DONE", "index file checked successfully!");
30497c6a3eca Initial revision
casties
parents:
diff changeset
477 }