Annotation of foxridge-archiver/unarchiver.pl, revision 1.2
1.1 casties 1: #!/usr/local/bin/perl -w
2:
3: use strict;
4:
5: use XML::LibXML;
1.2 ! casties 6: use FileHandle;
1.1 casties 7:
8: # MPIWG libraries
9: use lib '/usr/local/mpiwg/archive';
10: use MPIWGStor;
11:
12: # make output unbuffered
13: $|=1;
14:
15:
16: #######################################################
17: # internal parameters
18: #
19:
20: # program version
1.2 ! casties 21: my $version = "0.2 (23.9.2004)";
1.1 casties 22:
23: # read command line parameters
24: my $args = parseargs;
25:
26: # debug level
27: $debug = (exists $$args{'debug'}) ? ($$args{'debug'}) : 0;
28:
29: # rewrite XML file (necessary for archive date!)
30: my $fix_xml = 1;
31: my $xml_changed = 0;
32: # XML namespace (not really implemented!)
33: my $namespace = "";
34:
35: # archive name (archive-path element, usually == $docdir)
36: my $archname;
37: # archive storage date
38: my $archdate;
39:
40: #######################################################
41: # external programs
42: #
43: my $archprog = "/opt/tivoli/tsm/client/ba/bin/dsmc";
44: if (! -x $archprog) {
45: logger("ABORT", "TSM client program '$archprog' missing!!");
46: exit 1;
47: }
48: my $checkprog = "/usr/local/mpiwg/archive/archivecheck";
49: if (! -x $checkprog) {
50: logger("ABORT", "archive checking program '$checkprog' missing!!");
51: exit 1;
52: }
53: # log file for archiver
54: my $log_file = "/var/tmp/unarchiver.log";
55: if (! open LOG, ">>$log_file") {
56: logger("ABORT", "unable to write log file '$log_file'!!");
57: exit 1;
58: }
1.2 ! casties 59: LOG->autoflush(1);
1.1 casties 60:
61: #######################################################
62: # check parameters that were passed to the program
63: #
64: my $docdir = $$args{'path'};
65: if (! $docdir) {
66: print "ABORT: no document directory given!\n";
67: exit 1;
68: }
69: # strip trailing slashes
70: $docdir =~ s/\/$//;
71: if (! -d $docdir) {
72: print "ABORT: document directory \'$docdir\' doesn't exist!\n";
73: exit 1;
74: }
75:
76: my $metafile = "$docdir/index.meta";
77: if (! -f $metafile) {
78: print "ABORT: metadata index file \'$metafile\' doesn't exist!\n";
79: exit 1;
80: }
81:
1.2 ! casties 82: # construct document's parent dir
! 83: my $docparent = $docdir;
! 84: $docparent =~ s!/[^/]+$!!;
1.1 casties 85:
86: #######################################################
87: # internal variables
88: #
89:
90: # number of errors
91: my $errcnt = 0;
92: # number of warnings
93: my $warncnt = 0;
94:
95: #######################################################
96: # subroutines
97: #
98:
99:
100: #
101: # $files = read_resource_meta($rootnode)
102: #
103: # checks general resource meta information and reads the list of files
104: #
105: sub read_resource_meta {
106: my ($rootnode) = @_;
107: my %files;
108: #
109: # archive path
110: #
111: # get archive-path
112: $archname = sstrip($rootnode->findvalue('child::archive-path'));
113: if (! $archname) {
114: logger("ABORT", "archive-name element missing!!");
115: exit 1;
116: }
117:
118: #
119: # files
120: #
121: my @filenodes = $rootnode->findnodes('child::file');
122: foreach my $fn (@filenodes) {
123: my $name = sstrip($fn->findvalue('child::name'));
124: my $path = sstrip($fn->findvalue('child::path'));
125: logger("DEBUG", "FILE: ($path)$name");
126: my $f = ($path) ? "$path/$name" : "$name";
127: $files{$f} = [$name];
128: }
129:
130: #
131: # dirs
132: #
133: my @dirnodes = $rootnode->findnodes('child::dir');
134: foreach my $fn (@dirnodes) {
135: my $name = sstrip($fn->findvalue('child::name'));
136: my $path = sstrip($fn->findvalue('child::path'));
137: logger("DEBUG", "DIR: ($path)$name");
138: my $f = ($path) ? "$path/$name" : "$name";
139: $files{$f} = [$name];
140: }
141:
142: #
143: # archive-storage-date
144: #
145: my $archdate = $rootnode->find('child::archive-storage-date');
146: if ($archdate) {
147: logger("INFO", "archive storage date: $archdate");
148: } else {
149: logger("ERROR", "archive storage date missing!");
150: $errcnt++;
151: }
152:
153: #
154: # archive-recall-date
155: #
156: my $recalldatenode = ($rootnode->find('child::archive-recall-date'))->get_node(1);
157: if ($recalldatenode) {
158: print "INFO: archive recall date exists!\n";
159: # delete old date
160: $recalldatenode->removeChildNodes;
161: } else {
162: # create new storage date node
163: $recalldatenode = $rootnode->addNewChild($namespace, "archive-recall-date");
164: # move after archive-path
165: $rootnode->insertAfter($recalldatenode, ($rootnode->find('child::archive-storage-date'))->get_node(1));
166: }
167: $recalldatenode->appendTextNode(scalar localtime);
168: $xml_changed++;
169:
170: return \%files;
171: }
172:
173:
174: #
175: # $%files = run_retrieve
176: #
177: # runs the retriever program on $docdir and returns a list of archived files
178: #
179: # Sample output:
1.2 ! casties 180: # (old!) Retrieving 17,234 /mpiwg/archive/data/test/auto_titit_123/pageimg/essen-wind1.jpg [Done]
! 181: # Retrieving 42,406,326 /mpiwg/archive/data/library/B980G582/raw/00015.tif --> /mpiwg/archive/data/library/B980G582/raw/00015.tif [Done]
1.1 casties 182: sub run_retrieve {
183: my %files;
1.2 ! casties 184: print LOG "START unarchive $version on ", scalar localtime, "\n";
1.1 casties 185: my $archcmd = $archprog;
186: $archcmd .= " retrieve -subdir=yes -replace=all";
187: $archcmd .= " -description='$archname'";
1.2 ! casties 188: $archcmd .= " '$docdir/'"; # archive name
! 189: $archcmd .= " '$docparent/'"; # destination dir name
1.1 casties 190:
191: my $archcnt = 0;
192: print LOG "CMD: $archcmd\n";
193: if (open ARCH, "$archcmd 2>&1 |") {
194: while (<ARCH>) {
195: chomp;
196: print LOG "ARCH: $_\n";
197: if (/
198: Retrieving
199: \s+([\d,]+) # size
200: \s+(\S+) # file name
1.2 ! casties 201: \s+-->
! 202: \s+(\S+) # destination file name
1.1 casties 203: \s+\[Done\]
204: /x) {
205: my $size = $1;
206: my $file = $2;
207: $size =~ s/,//g;
208: logger("DEBUG", " RETRIEVE: file '$file'");
209: $archcnt++;
210: if ($files{$file}) {
211: logger("WARNING", "file $file seems to be archived multiple times.");
212: $warncnt++;
213: }
214: $files{$file} = [$size];
215: }
216: }
217: logger("INFO", "$archcnt archives of " . (scalar keys %files) . " files.");
218: } else {
219: logger("ABORT", "unable to start archive command '$archcmd'!!");
220: exit 1;
221: }
222: return \%files;
223: }
224:
225:
226: #
227: # check_files(\%files_to_retrieve, \%retrieved_files)
228: #
229: # compares the list of archived and retrieved files
230: #
231: sub check_files {
232: my ($to_retrieve, $retrieved) = @_;
233:
234: my $nt = scalar keys %$to_retrieve;
235: my $na = scalar keys %$retrieved;
236:
237: foreach my $ft (sort keys %$to_retrieve) {
238: my $fp = "$docdir/$ft";
239: #logger("DEBUG", " fp: $fp");
240: if ($$retrieved{$fp}) {
241: logger("DEBUG", "$ft retrieved OK");
242: $$retrieved{$fp}->[1] = "OK";
243: } else {
244: logger("ERROR", "file entry '$ft' missing from archive!");
245: $errcnt++;
246: }
247: }
248:
249: foreach my $fa (sort keys %$retrieved) {
250: if (! $$retrieved{$fa}->[1]) {
251: my ($fn, $fp) = split_file_path($fa);
252: if ($index_files{$fn}) {
253: logger("DEBUG", "$fa ignored");
254: $na--;
255: } else {
256: logger("WARNING", "$fa retrieved but not in list!");
257: $warncnt++;
258: }
259: }
260: }
261:
262: if ($nt > $na) {
263: logger("WARNING", "less files were retrieved ($na vs. $nt)!");
264: $warncnt++;
265: } elsif ($na > $nt) {
266: logger("WARNING", "more files were retrieved ($na vs. $nt)!");
267: $warncnt++;
268: }
269:
270: }
271:
272:
273:
274: #######################################################
275: # main
276: #
277:
278: logger("INFO", "unarchiver $version");
279:
280: # make shure the right user is running this program
281: my $user = getlogin;
1.2 ! casties 282: if (($user)&&($user ne "archive")&&($user ne "root")) {
1.1 casties 283: logger("ABORT", "you must be archive or root user to run this program!");
284: exit 1;
285: }
286:
287: # use checkarchive first
288: if (system("$checkprog $docdir >/dev/null") == 0) {
289: logger("INFO", "archive '$docdir' check OK");
290: } else {
291: logger("ABORT", "archive '$docdir' check failed!!");
292: exit 1;
293: }
294:
295: # read index.meta file
296: my ($document, $rootnode) = read_xml($metafile);
297:
298: # check index file
299: my $archived_files = read_resource_meta($rootnode);
300: my $num_archived_files = scalar keys %$archived_files;
301:
302: # check for .archived file
303: if (-f "$docdir/.archived") {
304: logger("INFO", ".archived file exists.");
305: } else {
306: logger("WARNING", "no .archived file!");
307: $warncnt++;
308: }
309:
310: logger("INFO", "$num_archived_files files to retrieve.");
311:
312: # retrieve
313: my $retrieved_files = run_retrieve;
314:
315: my $num_arch_files = (scalar keys %$retrieved_files);
316: if ($num_arch_files == 0) {
317: logger("ABORT", "no files retrieved!!");
318: exit 1;
319: }
320: logger("INFO", "$num_arch_files files retrieved");
321:
322: # check list of archived files
323: check_files($archived_files, $retrieved_files);
324:
325: # rewrite index.meta file
326: write_xml($document, $metafile);
327:
328: logger("INFO", "$warncnt warnings");
329: logger("INFO", "$errcnt errors");
330: if ($errcnt == 0) {
331: logger("DONE", "" . (scalar keys %$retrieved_files) . " archived files retrieved");
332: } else {
333: logger("ABORT", "there were $errcnt errors!!");
334: exit 1;
335: }
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>