Mercurial > hg > foxridge-archiver
comparison unarchiver.pl @ 0:30497c6a3eca
Initial revision
author | casties |
---|---|
date | Thu, 17 Jun 2004 17:58:42 +0200 |
parents | |
children | c4e6fc065b6d |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:30497c6a3eca |
---|---|
1 #!/usr/local/bin/perl -w | |
2 | |
3 use strict; | |
4 | |
5 use XML::LibXML; | |
6 | |
7 # MPIWG libraries | |
8 use lib '/usr/local/mpiwg/archive'; | |
9 use MPIWGStor; | |
10 | |
11 # make output unbuffered | |
12 $|=1; | |
13 | |
14 | |
15 ####################################################### | |
16 # internal parameters | |
17 # | |
18 | |
19 # program version | |
20 my $version = "0.1 (24.9.2003)"; | |
21 | |
22 # read command line parameters | |
23 my $args = parseargs; | |
24 | |
25 # debug level | |
26 $debug = (exists $$args{'debug'}) ? ($$args{'debug'}) : 0; | |
27 | |
28 # rewrite XML file (necessary for archive date!) | |
29 my $fix_xml = 1; | |
30 my $xml_changed = 0; | |
31 # XML namespace (not really implemented!) | |
32 my $namespace = ""; | |
33 | |
34 # archive name (archive-path element, usually == $docdir) | |
35 my $archname; | |
36 # archive storage date | |
37 my $archdate; | |
38 | |
39 ####################################################### | |
40 # external programs | |
41 # | |
42 my $archprog = "/opt/tivoli/tsm/client/ba/bin/dsmc"; | |
43 if (! -x $archprog) { | |
44 logger("ABORT", "TSM client program '$archprog' missing!!"); | |
45 exit 1; | |
46 } | |
47 my $checkprog = "/usr/local/mpiwg/archive/archivecheck"; | |
48 if (! -x $checkprog) { | |
49 logger("ABORT", "archive checking program '$checkprog' missing!!"); | |
50 exit 1; | |
51 } | |
52 # log file for archiver | |
53 my $log_file = "/var/tmp/unarchiver.log"; | |
54 if (! open LOG, ">>$log_file") { | |
55 logger("ABORT", "unable to write log file '$log_file'!!"); | |
56 exit 1; | |
57 } | |
58 | |
59 ####################################################### | |
60 # check parameters that were passed to the program | |
61 # | |
62 my $docdir = $$args{'path'}; | |
63 if (! $docdir) { | |
64 print "ABORT: no document directory given!\n"; | |
65 exit 1; | |
66 } | |
67 # strip trailing slashes | |
68 $docdir =~ s/\/$//; | |
69 if (! -d $docdir) { | |
70 print "ABORT: document directory \'$docdir\' doesn't exist!\n"; | |
71 exit 1; | |
72 } | |
73 | |
74 my $metafile = "$docdir/index.meta"; | |
75 if (! -f $metafile) { | |
76 print "ABORT: metadata index file \'$metafile\' doesn't exist!\n"; | |
77 exit 1; | |
78 } | |
79 | |
80 | |
81 ####################################################### | |
82 # internal variables | |
83 # | |
84 | |
85 # number of errors | |
86 my $errcnt = 0; | |
87 # number of warnings | |
88 my $warncnt = 0; | |
89 | |
90 ####################################################### | |
91 # subroutines | |
92 # | |
93 | |
94 | |
95 # | |
96 # $files = read_resource_meta($rootnode) | |
97 # | |
98 # checks general resource meta information and reads the list of files | |
99 # | |
100 sub read_resource_meta { | |
101 my ($rootnode) = @_; | |
102 my %files; | |
103 # | |
104 # archive path | |
105 # | |
106 # get archive-path | |
107 $archname = sstrip($rootnode->findvalue('child::archive-path')); | |
108 if (! $archname) { | |
109 logger("ABORT", "archive-name element missing!!"); | |
110 exit 1; | |
111 } | |
112 | |
113 # | |
114 # files | |
115 # | |
116 my @filenodes = $rootnode->findnodes('child::file'); | |
117 foreach my $fn (@filenodes) { | |
118 my $name = sstrip($fn->findvalue('child::name')); | |
119 my $path = sstrip($fn->findvalue('child::path')); | |
120 logger("DEBUG", "FILE: ($path)$name"); | |
121 my $f = ($path) ? "$path/$name" : "$name"; | |
122 $files{$f} = [$name]; | |
123 } | |
124 | |
125 # | |
126 # dirs | |
127 # | |
128 my @dirnodes = $rootnode->findnodes('child::dir'); | |
129 foreach my $fn (@dirnodes) { | |
130 my $name = sstrip($fn->findvalue('child::name')); | |
131 my $path = sstrip($fn->findvalue('child::path')); | |
132 logger("DEBUG", "DIR: ($path)$name"); | |
133 my $f = ($path) ? "$path/$name" : "$name"; | |
134 $files{$f} = [$name]; | |
135 } | |
136 | |
137 # | |
138 # archive-storage-date | |
139 # | |
140 my $archdate = $rootnode->find('child::archive-storage-date'); | |
141 if ($archdate) { | |
142 logger("INFO", "archive storage date: $archdate"); | |
143 } else { | |
144 logger("ERROR", "archive storage date missing!"); | |
145 $errcnt++; | |
146 } | |
147 | |
148 # | |
149 # archive-recall-date | |
150 # | |
151 my $recalldatenode = ($rootnode->find('child::archive-recall-date'))->get_node(1); | |
152 if ($recalldatenode) { | |
153 print "INFO: archive recall date exists!\n"; | |
154 # delete old date | |
155 $recalldatenode->removeChildNodes; | |
156 } else { | |
157 # create new storage date node | |
158 $recalldatenode = $rootnode->addNewChild($namespace, "archive-recall-date"); | |
159 # move after archive-path | |
160 $rootnode->insertAfter($recalldatenode, ($rootnode->find('child::archive-storage-date'))->get_node(1)); | |
161 } | |
162 $recalldatenode->appendTextNode(scalar localtime); | |
163 $xml_changed++; | |
164 | |
165 return \%files; | |
166 } | |
167 | |
168 | |
169 # | |
170 # $%files = run_retrieve | |
171 # | |
172 # runs the retriever program on $docdir and returns a list of archived files | |
173 # | |
174 # Sample output: | |
175 # Retrieving 17,234 /mpiwg/archive/data/test/auto_titit_123/pageimg/essen-wind1.jpg [Done] | |
176 # | |
177 sub run_retrieve { | |
178 my %files; | |
179 print LOG "START unarchive $version ", scalar localtime, "\n"; | |
180 my $archcmd = $archprog; | |
181 $archcmd .= " retrieve -subdir=yes -replace=all"; | |
182 $archcmd .= " -description='$archname'"; | |
183 $archcmd .= " '$docdir/'"; | |
184 | |
185 my $archcnt = 0; | |
186 print LOG "CMD: $archcmd\n"; | |
187 if (open ARCH, "$archcmd 2>&1 |") { | |
188 while (<ARCH>) { | |
189 chomp; | |
190 print LOG "ARCH: $_\n"; | |
191 if (/ | |
192 Retrieving | |
193 \s+([\d,]+) # size | |
194 \s+(\S+) # file name | |
195 \s+\[Done\] | |
196 /x) { | |
197 my $size = $1; | |
198 my $file = $2; | |
199 $size =~ s/,//g; | |
200 logger("DEBUG", " RETRIEVE: file '$file'"); | |
201 $archcnt++; | |
202 if ($files{$file}) { | |
203 logger("WARNING", "file $file seems to be archived multiple times."); | |
204 $warncnt++; | |
205 } | |
206 $files{$file} = [$size]; | |
207 } | |
208 } | |
209 logger("INFO", "$archcnt archives of " . (scalar keys %files) . " files."); | |
210 } else { | |
211 logger("ABORT", "unable to start archive command '$archcmd'!!"); | |
212 exit 1; | |
213 } | |
214 return \%files; | |
215 } | |
216 | |
217 | |
218 # | |
219 # check_files(\%files_to_retrieve, \%retrieved_files) | |
220 # | |
221 # compares the list of archived and retrieved files | |
222 # | |
223 sub check_files { | |
224 my ($to_retrieve, $retrieved) = @_; | |
225 | |
226 my $nt = scalar keys %$to_retrieve; | |
227 my $na = scalar keys %$retrieved; | |
228 | |
229 foreach my $ft (sort keys %$to_retrieve) { | |
230 my $fp = "$docdir/$ft"; | |
231 #logger("DEBUG", " fp: $fp"); | |
232 if ($$retrieved{$fp}) { | |
233 logger("DEBUG", "$ft retrieved OK"); | |
234 $$retrieved{$fp}->[1] = "OK"; | |
235 } else { | |
236 logger("ERROR", "file entry '$ft' missing from archive!"); | |
237 $errcnt++; | |
238 } | |
239 } | |
240 | |
241 foreach my $fa (sort keys %$retrieved) { | |
242 if (! $$retrieved{$fa}->[1]) { | |
243 my ($fn, $fp) = split_file_path($fa); | |
244 if ($index_files{$fn}) { | |
245 logger("DEBUG", "$fa ignored"); | |
246 $na--; | |
247 } else { | |
248 logger("WARNING", "$fa retrieved but not in list!"); | |
249 $warncnt++; | |
250 } | |
251 } | |
252 } | |
253 | |
254 if ($nt > $na) { | |
255 logger("WARNING", "less files were retrieved ($na vs. $nt)!"); | |
256 $warncnt++; | |
257 } elsif ($na > $nt) { | |
258 logger("WARNING", "more files were retrieved ($na vs. $nt)!"); | |
259 $warncnt++; | |
260 } | |
261 | |
262 } | |
263 | |
264 | |
265 | |
266 ####################################################### | |
267 # main | |
268 # | |
269 | |
270 logger("INFO", "unarchiver $version"); | |
271 | |
272 # make shure the right user is running this program | |
273 my $user = getlogin; | |
274 if (($user ne "archive")&&($user ne "root")) { | |
275 logger("ABORT", "you must be archive or root user to run this program!"); | |
276 exit 1; | |
277 } | |
278 | |
279 # use checkarchive first | |
280 if (system("$checkprog $docdir >/dev/null") == 0) { | |
281 logger("INFO", "archive '$docdir' check OK"); | |
282 } else { | |
283 logger("ABORT", "archive '$docdir' check failed!!"); | |
284 exit 1; | |
285 } | |
286 | |
287 # read index.meta file | |
288 my ($document, $rootnode) = read_xml($metafile); | |
289 | |
290 # check index file | |
291 my $archived_files = read_resource_meta($rootnode); | |
292 my $num_archived_files = scalar keys %$archived_files; | |
293 | |
294 # check for .archived file | |
295 if (-f "$docdir/.archived") { | |
296 logger("INFO", ".archived file exists."); | |
297 } else { | |
298 logger("WARNING", "no .archived file!"); | |
299 $warncnt++; | |
300 } | |
301 | |
302 logger("INFO", "$num_archived_files files to retrieve."); | |
303 | |
304 # retrieve | |
305 my $retrieved_files = run_retrieve; | |
306 | |
307 my $num_arch_files = (scalar keys %$retrieved_files); | |
308 if ($num_arch_files == 0) { | |
309 logger("ABORT", "no files retrieved!!"); | |
310 exit 1; | |
311 } | |
312 logger("INFO", "$num_arch_files files retrieved"); | |
313 | |
314 # check list of archived files | |
315 check_files($archived_files, $retrieved_files); | |
316 | |
317 # rewrite index.meta file | |
318 write_xml($document, $metafile); | |
319 | |
320 logger("INFO", "$warncnt warnings"); | |
321 logger("INFO", "$errcnt errors"); | |
322 if ($errcnt == 0) { | |
323 logger("DONE", "" . (scalar keys %$retrieved_files) . " archived files retrieved"); | |
324 } else { | |
325 logger("ABORT", "there were $errcnt errors!!"); | |
326 exit 1; | |
327 } |