Mercurial > hg > foxridge-archiver
annotate unarchiver.pl @ 55:7958ac21f9bf
fixed unarchiving of multiple versions by adding -ifnewer option to dsmc ret
author | casties |
---|---|
date | Tue, 11 Aug 2009 16:34:59 +0200 |
parents | b762b5af6e42 |
children | 2208ed7370cb |
rev | line source |
---|---|
0 | 1 #!/usr/local/bin/perl -w |
2 | |
3 use strict; | |
4 | |
5 use XML::LibXML; | |
7 | 6 use FileHandle; |
0 | 7 |
8 # MPIWG libraries | |
9 use lib '/usr/local/mpiwg/archive'; | |
10 use MPIWGStor; | |
11 | |
12 # make output unbuffered | |
13 $|=1; | |
14 | |
15 | |
16 ####################################################### | |
17 # internal parameters | |
18 # | |
19 | |
20 # program version | |
55
7958ac21f9bf
fixed unarchiving of multiple versions by adding -ifnewer option to dsmc ret
casties
parents:
52
diff
changeset
|
21 my $version = "0.3.4 (11.8.2009)"; |
0 | 22 |
23 # read command line parameters | |
24 my $args = parseargs; | |
25 | |
26 # debug level | |
27 $debug = (exists $$args{'debug'}) ? ($$args{'debug'}) : 0; | |
28 | |
29 # rewrite XML file (necessary for archive date!) | |
30 my $fix_xml = 1; | |
31 my $xml_changed = 0; | |
32 # XML namespace (not really implemented!) | |
33 my $namespace = ""; | |
34 | |
35 # archive name (archive-path element, usually == $docdir) | |
36 my $archname; | |
37 # archive storage date | |
38 my $archdate; | |
39 | |
40 ####################################################### | |
41 # external programs | |
42 # | |
43 my $archprog = "/opt/tivoli/tsm/client/ba/bin/dsmc"; | |
44 if (! -x $archprog) { | |
45 logger("ABORT", "TSM client program '$archprog' missing!!"); | |
46 exit 1; | |
47 } | |
48 my $checkprog = "/usr/local/mpiwg/archive/archivecheck"; | |
49 if (! -x $checkprog) { | |
50 logger("ABORT", "archive checking program '$checkprog' missing!!"); | |
51 exit 1; | |
52 } | |
53 # log file for archiver | |
54 my $log_file = "/var/tmp/unarchiver.log"; | |
55 if (! open LOG, ">>$log_file") { | |
56 logger("ABORT", "unable to write log file '$log_file'!!"); | |
57 exit 1; | |
58 } | |
7 | 59 LOG->autoflush(1); |
0 | 60 |
61 ####################################################### | |
62 # check parameters that were passed to the program | |
63 # | |
64 my $docdir = $$args{'path'}; | |
65 if (! $docdir) { | |
66 print "ABORT: no document directory given!\n"; | |
67 exit 1; | |
68 } | |
69 # strip trailing slashes | |
70 $docdir =~ s/\/$//; | |
71 if (! -d $docdir) { | |
72 print "ABORT: document directory \'$docdir\' doesn't exist!\n"; | |
73 exit 1; | |
74 } | |
75 | |
76 my $metafile = "$docdir/index.meta"; | |
77 if (! -f $metafile) { | |
78 print "ABORT: metadata index file \'$metafile\' doesn't exist!\n"; | |
79 exit 1; | |
80 } | |
81 | |
82 ####################################################### | |
83 # internal variables | |
84 # | |
85 | |
86 # number of errors | |
87 my $errcnt = 0; | |
88 # number of warnings | |
89 my $warncnt = 0; | |
90 | |
91 ####################################################### | |
92 # subroutines | |
93 # | |
94 | |
10 | 95 # construct document's parent dir |
96 sub get_parent { | |
97 my ($dirname) = @_; | |
98 my $dirparent = $dirname; | |
99 $dirparent =~ s!/[^/]+$!!; | |
100 return $dirparent; | |
101 } | |
102 | |
0 | 103 |
104 # | |
105 # $files = read_resource_meta($rootnode) | |
106 # | |
107 # checks general resource meta information and reads the list of files | |
108 # | |
109 sub read_resource_meta { | |
110 my ($rootnode) = @_; | |
111 my %files; | |
112 # | |
113 # archive path | |
114 # | |
115 # get archive-path | |
116 $archname = sstrip($rootnode->findvalue('child::archive-path')); | |
117 if (! $archname) { | |
118 logger("ABORT", "archive-name element missing!!"); | |
119 exit 1; | |
120 } | |
121 | |
122 # | |
123 # files | |
124 # | |
125 my @filenodes = $rootnode->findnodes('child::file'); | |
126 foreach my $fn (@filenodes) { | |
127 my $name = sstrip($fn->findvalue('child::name')); | |
128 my $path = sstrip($fn->findvalue('child::path')); | |
129 logger("DEBUG", "FILE: ($path)$name"); | |
130 my $f = ($path) ? "$path/$name" : "$name"; | |
131 $files{$f} = [$name]; | |
132 } | |
133 | |
134 # | |
135 # dirs | |
136 # | |
137 my @dirnodes = $rootnode->findnodes('child::dir'); | |
138 foreach my $fn (@dirnodes) { | |
139 my $name = sstrip($fn->findvalue('child::name')); | |
140 my $path = sstrip($fn->findvalue('child::path')); | |
141 logger("DEBUG", "DIR: ($path)$name"); | |
142 my $f = ($path) ? "$path/$name" : "$name"; | |
143 $files{$f} = [$name]; | |
144 } | |
145 | |
146 # | |
147 # archive-storage-date | |
148 # | |
149 my $archdate = $rootnode->find('child::archive-storage-date'); | |
150 if ($archdate) { | |
151 logger("INFO", "archive storage date: $archdate"); | |
152 } else { | |
153 logger("ERROR", "archive storage date missing!"); | |
154 $errcnt++; | |
155 } | |
156 | |
157 # | |
158 # archive-recall-date | |
159 # | |
160 my $recalldatenode = ($rootnode->find('child::archive-recall-date'))->get_node(1); | |
161 if ($recalldatenode) { | |
162 print "INFO: archive recall date exists!\n"; | |
163 # delete old date | |
164 $recalldatenode->removeChildNodes; | |
165 } else { | |
166 # create new storage date node | |
167 $recalldatenode = $rootnode->addNewChild($namespace, "archive-recall-date"); | |
168 # move after archive-path | |
169 $rootnode->insertAfter($recalldatenode, ($rootnode->find('child::archive-storage-date'))->get_node(1)); | |
170 } | |
171 $recalldatenode->appendTextNode(scalar localtime); | |
172 $xml_changed++; | |
173 | |
174 return \%files; | |
175 } | |
176 | |
177 | |
178 # | |
10 | 179 # $num_files = run_retrieve($docdir, $docmount, \%files) |
0 | 180 # |
10 | 181 # Runs the retriever program on $docdir and returns the number of unarchived files. |
182 # All filenames are put in %files. | |
183 # $docmount is the mount point of the doc partition in cases when the new mount point | |
184 # is different. | |
0 | 185 # |
186 # Sample output: | |
7 | 187 # (old!) Retrieving 17,234 /mpiwg/archive/data/test/auto_titit_123/pageimg/essen-wind1.jpg [Done] |
188 # Retrieving 42,406,326 /mpiwg/archive/data/library/B980G582/raw/00015.tif --> /mpiwg/archive/data/library/B980G582/raw/00015.tif [Done] | |
0 | 189 sub run_retrieve { |
10 | 190 my ($archdir, $archmount, $files) = @_; |
191 my $archparent; | |
192 if ($archmount eq $archdir) { | |
193 # no explicit mount point | |
194 $archparent = get_parent($archdir); | |
195 } else { | |
196 # destination dir is mount point | |
197 $archparent = $archmount; | |
198 } | |
20 | 199 logger("INFO", "looking for archives in $archmount..."); |
10 | 200 |
7 | 201 print LOG "START unarchive $version on ", scalar localtime, "\n"; |
0 | 202 my $archcmd = $archprog; |
55
7958ac21f9bf
fixed unarchiving of multiple versions by adding -ifnewer option to dsmc ret
casties
parents:
52
diff
changeset
|
203 $archcmd .= " retrieve -subdir=yes -replace=all -ifnewer"; |
10 | 204 $archcmd .= " -description='$archname'"; # archive name |
205 $archcmd .= " '$archmount/'"; # archive mount point | |
206 $archcmd .= " '$archparent/'"; # destination dir name | |
0 | 207 |
50 | 208 logger('INFO', "querying TSM server for $archmount, please wait..."); |
209 | |
0 | 210 my $archcnt = 0; |
10 | 211 my $numfiles = 0; |
0 | 212 print LOG "CMD: $archcmd\n"; |
213 if (open ARCH, "$archcmd 2>&1 |") { | |
214 while (<ARCH>) { | |
215 chomp; | |
216 print LOG "ARCH: $_\n"; | |
217 if (/ | |
218 Retrieving | |
219 \s+([\d,]+) # size | |
220 \s+(\S+) # file name | |
7 | 221 \s+--> |
222 \s+(\S+) # destination file name | |
0 | 223 \s+\[Done\] |
224 /x) { | |
225 my $size = $1; | |
226 my $file = $2; | |
227 $size =~ s/,//g; | |
228 logger("DEBUG", " RETRIEVE: file '$file'"); | |
229 $archcnt++; | |
10 | 230 if ($$files{$file}) { |
0 | 231 logger("WARNING", "file $file seems to be archived multiple times."); |
232 $warncnt++; | |
233 } | |
10 | 234 $$files{$file} = [$size]; |
0 | 235 } |
236 } | |
10 | 237 $numfiles = (scalar keys %$files); |
238 logger("INFO", "$archcnt archives of $numfiles files (in $archmount)."); | |
0 | 239 } else { |
240 logger("ABORT", "unable to start archive command '$archcmd'!!"); | |
241 exit 1; | |
242 } | |
10 | 243 return $numfiles; |
0 | 244 } |
245 | |
246 | |
247 # | |
248 # check_files(\%files_to_retrieve, \%retrieved_files) | |
249 # | |
250 # compares the list of archived and retrieved files | |
251 # | |
252 sub check_files { | |
253 my ($to_retrieve, $retrieved) = @_; | |
254 | |
255 my $nt = scalar keys %$to_retrieve; | |
256 my $na = scalar keys %$retrieved; | |
257 | |
258 foreach my $ft (sort keys %$to_retrieve) { | |
259 my $fp = "$docdir/$ft"; | |
260 #logger("DEBUG", " fp: $fp"); | |
261 if ($$retrieved{$fp}) { | |
262 logger("DEBUG", "$ft retrieved OK"); | |
263 $$retrieved{$fp}->[1] = "OK"; | |
264 } else { | |
265 logger("ERROR", "file entry '$ft' missing from archive!"); | |
266 $errcnt++; | |
267 } | |
268 } | |
269 | |
270 foreach my $fa (sort keys %$retrieved) { | |
271 if (! $$retrieved{$fa}->[1]) { | |
272 my ($fn, $fp) = split_file_path($fa); | |
273 if ($index_files{$fn}) { | |
274 logger("DEBUG", "$fa ignored"); | |
275 $na--; | |
276 } else { | |
277 logger("WARNING", "$fa retrieved but not in list!"); | |
278 $warncnt++; | |
279 } | |
280 } | |
281 } | |
282 | |
283 if ($nt > $na) { | |
284 logger("WARNING", "less files were retrieved ($na vs. $nt)!"); | |
285 $warncnt++; | |
286 } elsif ($na > $nt) { | |
287 logger("WARNING", "more files were retrieved ($na vs. $nt)!"); | |
288 $warncnt++; | |
289 } | |
290 | |
291 } | |
292 | |
293 | |
294 | |
295 ####################################################### | |
296 # main | |
297 # | |
298 | |
299 logger("INFO", "unarchiver $version"); | |
300 | |
301 # make shure the right user is running this program | |
52 | 302 my $user = getlogin || getpwuid($<); |
7 | 303 if (($user)&&($user ne "archive")&&($user ne "root")) { |
0 | 304 logger("ABORT", "you must be archive or root user to run this program!"); |
305 exit 1; | |
306 } | |
307 | |
308 # use checkarchive first | |
309 if (system("$checkprog $docdir >/dev/null") == 0) { | |
10 | 310 logger("INFO", "archive \"$docdir\" check OK"); |
0 | 311 } else { |
10 | 312 logger("ABORT", "archive \"$docdir\" check failed!!"); |
0 | 313 exit 1; |
314 } | |
315 | |
316 # read index.meta file | |
317 my ($document, $rootnode) = read_xml($metafile); | |
318 | |
319 # check index file | |
320 my $archived_files = read_resource_meta($rootnode); | |
321 my $num_archived_files = scalar keys %$archived_files; | |
322 | |
323 # check for .archived file | |
324 if (-f "$docdir/.archived") { | |
325 logger("INFO", ".archived file exists."); | |
326 } else { | |
327 logger("WARNING", "no .archived file!"); | |
328 $warncnt++; | |
329 } | |
330 | |
331 logger("INFO", "$num_archived_files files to retrieve."); | |
332 | |
10 | 333 # save current index.meta |
334 park_file($metafile); | |
335 | |
0 | 336 # retrieve |
10 | 337 my %retrieved_files = (); |
338 my $archcnt = 0; | |
0 | 339 |
10 | 340 if ($docdir =~ /\/mpiwg\/archive\/data\/(.*)/) { |
341 # TSM needs two different paths because of historical mount points :-( | |
342 # try the new one first | |
343 $archcnt = run_retrieve($docdir, "/mpiwg/archive", \%retrieved_files); | |
344 if ($archcnt == 0) { | |
345 # and then the old one | |
346 $archcnt = run_retrieve($docdir, "/mpiwg/archive/data", \%retrieved_files); | |
347 } | |
348 } else { | |
349 # otherwise we assume that it works | |
350 $archcnt += run_retrieve($docdir, $docdir, \%retrieved_files); | |
351 } | |
352 | |
353 # restore current index.meta | |
354 unpark_file($metafile); | |
355 | |
356 if ($archcnt == 0) { | |
0 | 357 logger("ABORT", "no files retrieved!!"); |
358 exit 1; | |
359 } | |
10 | 360 logger("INFO", "$archcnt files retrieved"); |
0 | 361 |
362 # check list of archived files | |
10 | 363 check_files($archived_files, \%retrieved_files); |
0 | 364 |
365 # rewrite index.meta file | |
366 write_xml($document, $metafile); | |
367 | |
368 logger("INFO", "$warncnt warnings"); | |
369 logger("INFO", "$errcnt errors"); | |
370 if ($errcnt == 0) { | |
10 | 371 logger("DONE", "$archcnt archived files retrieved"); |
0 | 372 } else { |
373 logger("ABORT", "there were $errcnt errors!!"); | |
374 exit 1; | |
375 } |