comparison unarchiver.pl @ 0:30497c6a3eca

Initial revision
author casties
date Thu, 17 Jun 2004 17:58:42 +0200
parents
children c4e6fc065b6d
comparison
equal deleted inserted replaced
-1:000000000000 0:30497c6a3eca
1 #!/usr/local/bin/perl -w
2
3 use strict;
4
5 use XML::LibXML;
6
7 # MPIWG libraries
8 use lib '/usr/local/mpiwg/archive';
9 use MPIWGStor;
10
11 # make output unbuffered
12 $|=1;
13
14
15 #######################################################
16 # internal parameters
17 #
18
19 # program version
20 my $version = "0.1 (24.9.2003)";
21
22 # read command line parameters
23 my $args = parseargs;
24
25 # debug level
26 $debug = (exists $$args{'debug'}) ? ($$args{'debug'}) : 0;
27
28 # rewrite XML file (necessary for archive date!)
29 my $fix_xml = 1;
30 my $xml_changed = 0;
31 # XML namespace (not really implemented!)
32 my $namespace = "";
33
34 # archive name (archive-path element, usually == $docdir)
35 my $archname;
36 # archive storage date
37 my $archdate;
38
39 #######################################################
40 # external programs
41 #
42 my $archprog = "/opt/tivoli/tsm/client/ba/bin/dsmc";
43 if (! -x $archprog) {
44 logger("ABORT", "TSM client program '$archprog' missing!!");
45 exit 1;
46 }
47 my $checkprog = "/usr/local/mpiwg/archive/archivecheck";
48 if (! -x $checkprog) {
49 logger("ABORT", "archive checking program '$checkprog' missing!!");
50 exit 1;
51 }
52 # log file for archiver
53 my $log_file = "/var/tmp/unarchiver.log";
54 if (! open LOG, ">>$log_file") {
55 logger("ABORT", "unable to write log file '$log_file'!!");
56 exit 1;
57 }
58
59 #######################################################
60 # check parameters that were passed to the program
61 #
62 my $docdir = $$args{'path'};
63 if (! $docdir) {
64 print "ABORT: no document directory given!\n";
65 exit 1;
66 }
67 # strip trailing slashes
68 $docdir =~ s/\/$//;
69 if (! -d $docdir) {
70 print "ABORT: document directory \'$docdir\' doesn't exist!\n";
71 exit 1;
72 }
73
74 my $metafile = "$docdir/index.meta";
75 if (! -f $metafile) {
76 print "ABORT: metadata index file \'$metafile\' doesn't exist!\n";
77 exit 1;
78 }
79
80
81 #######################################################
82 # internal variables
83 #
84
85 # number of errors
86 my $errcnt = 0;
87 # number of warnings
88 my $warncnt = 0;
89
90 #######################################################
91 # subroutines
92 #
93
94
95 #
96 # $files = read_resource_meta($rootnode)
97 #
98 # checks general resource meta information and reads the list of files
99 #
100 sub read_resource_meta {
101 my ($rootnode) = @_;
102 my %files;
103 #
104 # archive path
105 #
106 # get archive-path
107 $archname = sstrip($rootnode->findvalue('child::archive-path'));
108 if (! $archname) {
109 logger("ABORT", "archive-name element missing!!");
110 exit 1;
111 }
112
113 #
114 # files
115 #
116 my @filenodes = $rootnode->findnodes('child::file');
117 foreach my $fn (@filenodes) {
118 my $name = sstrip($fn->findvalue('child::name'));
119 my $path = sstrip($fn->findvalue('child::path'));
120 logger("DEBUG", "FILE: ($path)$name");
121 my $f = ($path) ? "$path/$name" : "$name";
122 $files{$f} = [$name];
123 }
124
125 #
126 # dirs
127 #
128 my @dirnodes = $rootnode->findnodes('child::dir');
129 foreach my $fn (@dirnodes) {
130 my $name = sstrip($fn->findvalue('child::name'));
131 my $path = sstrip($fn->findvalue('child::path'));
132 logger("DEBUG", "DIR: ($path)$name");
133 my $f = ($path) ? "$path/$name" : "$name";
134 $files{$f} = [$name];
135 }
136
137 #
138 # archive-storage-date
139 #
140 my $archdate = $rootnode->find('child::archive-storage-date');
141 if ($archdate) {
142 logger("INFO", "archive storage date: $archdate");
143 } else {
144 logger("ERROR", "archive storage date missing!");
145 $errcnt++;
146 }
147
148 #
149 # archive-recall-date
150 #
151 my $recalldatenode = ($rootnode->find('child::archive-recall-date'))->get_node(1);
152 if ($recalldatenode) {
153 print "INFO: archive recall date exists!\n";
154 # delete old date
155 $recalldatenode->removeChildNodes;
156 } else {
157 # create new storage date node
158 $recalldatenode = $rootnode->addNewChild($namespace, "archive-recall-date");
159 # move after archive-path
160 $rootnode->insertAfter($recalldatenode, ($rootnode->find('child::archive-storage-date'))->get_node(1));
161 }
162 $recalldatenode->appendTextNode(scalar localtime);
163 $xml_changed++;
164
165 return \%files;
166 }
167
168
169 #
170 # $%files = run_retrieve
171 #
172 # runs the retriever program on $docdir and returns a list of archived files
173 #
174 # Sample output:
175 # Retrieving 17,234 /mpiwg/archive/data/test/auto_titit_123/pageimg/essen-wind1.jpg [Done]
176 #
177 sub run_retrieve {
178 my %files;
179 print LOG "START unarchive $version ", scalar localtime, "\n";
180 my $archcmd = $archprog;
181 $archcmd .= " retrieve -subdir=yes -replace=all";
182 $archcmd .= " -description='$archname'";
183 $archcmd .= " '$docdir/'";
184
185 my $archcnt = 0;
186 print LOG "CMD: $archcmd\n";
187 if (open ARCH, "$archcmd 2>&1 |") {
188 while (<ARCH>) {
189 chomp;
190 print LOG "ARCH: $_\n";
191 if (/
192 Retrieving
193 \s+([\d,]+) # size
194 \s+(\S+) # file name
195 \s+\[Done\]
196 /x) {
197 my $size = $1;
198 my $file = $2;
199 $size =~ s/,//g;
200 logger("DEBUG", " RETRIEVE: file '$file'");
201 $archcnt++;
202 if ($files{$file}) {
203 logger("WARNING", "file $file seems to be archived multiple times.");
204 $warncnt++;
205 }
206 $files{$file} = [$size];
207 }
208 }
209 logger("INFO", "$archcnt archives of " . (scalar keys %files) . " files.");
210 } else {
211 logger("ABORT", "unable to start archive command '$archcmd'!!");
212 exit 1;
213 }
214 return \%files;
215 }
216
217
218 #
219 # check_files(\%files_to_retrieve, \%retrieved_files)
220 #
221 # compares the list of archived and retrieved files
222 #
223 sub check_files {
224 my ($to_retrieve, $retrieved) = @_;
225
226 my $nt = scalar keys %$to_retrieve;
227 my $na = scalar keys %$retrieved;
228
229 foreach my $ft (sort keys %$to_retrieve) {
230 my $fp = "$docdir/$ft";
231 #logger("DEBUG", " fp: $fp");
232 if ($$retrieved{$fp}) {
233 logger("DEBUG", "$ft retrieved OK");
234 $$retrieved{$fp}->[1] = "OK";
235 } else {
236 logger("ERROR", "file entry '$ft' missing from archive!");
237 $errcnt++;
238 }
239 }
240
241 foreach my $fa (sort keys %$retrieved) {
242 if (! $$retrieved{$fa}->[1]) {
243 my ($fn, $fp) = split_file_path($fa);
244 if ($index_files{$fn}) {
245 logger("DEBUG", "$fa ignored");
246 $na--;
247 } else {
248 logger("WARNING", "$fa retrieved but not in list!");
249 $warncnt++;
250 }
251 }
252 }
253
254 if ($nt > $na) {
255 logger("WARNING", "less files were retrieved ($na vs. $nt)!");
256 $warncnt++;
257 } elsif ($na > $nt) {
258 logger("WARNING", "more files were retrieved ($na vs. $nt)!");
259 $warncnt++;
260 }
261
262 }
263
264
265
266 #######################################################
267 # main
268 #
269
270 logger("INFO", "unarchiver $version");
271
272 # make shure the right user is running this program
273 my $user = getlogin;
274 if (($user ne "archive")&&($user ne "root")) {
275 logger("ABORT", "you must be archive or root user to run this program!");
276 exit 1;
277 }
278
279 # use checkarchive first
280 if (system("$checkprog $docdir >/dev/null") == 0) {
281 logger("INFO", "archive '$docdir' check OK");
282 } else {
283 logger("ABORT", "archive '$docdir' check failed!!");
284 exit 1;
285 }
286
287 # read index.meta file
288 my ($document, $rootnode) = read_xml($metafile);
289
290 # check index file
291 my $archived_files = read_resource_meta($rootnode);
292 my $num_archived_files = scalar keys %$archived_files;
293
294 # check for .archived file
295 if (-f "$docdir/.archived") {
296 logger("INFO", ".archived file exists.");
297 } else {
298 logger("WARNING", "no .archived file!");
299 $warncnt++;
300 }
301
302 logger("INFO", "$num_archived_files files to retrieve.");
303
304 # retrieve
305 my $retrieved_files = run_retrieve;
306
307 my $num_arch_files = (scalar keys %$retrieved_files);
308 if ($num_arch_files == 0) {
309 logger("ABORT", "no files retrieved!!");
310 exit 1;
311 }
312 logger("INFO", "$num_arch_files files retrieved");
313
314 # check list of archived files
315 check_files($archived_files, $retrieved_files);
316
317 # rewrite index.meta file
318 write_xml($document, $metafile);
319
320 logger("INFO", "$warncnt warnings");
321 logger("INFO", "$errcnt errors");
322 if ($errcnt == 0) {
323 logger("DONE", "" . (scalar keys %$retrieved_files) . " archived files retrieved");
324 } else {
325 logger("ABORT", "there were $errcnt errors!!");
326 exit 1;
327 }