annotate HarvestmetaHandler.pm @ 0:30497c6a3eca

Initial revision
author casties
date Thu, 17 Jun 2004 17:58:42 +0200
parents
children 1a51f94d5dbd
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
30497c6a3eca Initial revision
casties
parents:
diff changeset
1 #
30497c6a3eca Initial revision
casties
parents:
diff changeset
2 # SAX handler for harvestmeta
30497c6a3eca Initial revision
casties
parents:
diff changeset
3 #
30497c6a3eca Initial revision
casties
parents:
diff changeset
4
30497c6a3eca Initial revision
casties
parents:
diff changeset
5 package HarvestmetaHandler;
30497c6a3eca Initial revision
casties
parents:
diff changeset
6
30497c6a3eca Initial revision
casties
parents:
diff changeset
7 use strict;
30497c6a3eca Initial revision
casties
parents:
diff changeset
8
30497c6a3eca Initial revision
casties
parents:
diff changeset
9 use base qw(XML::SAX::Base);
30497c6a3eca Initial revision
casties
parents:
diff changeset
10
30497c6a3eca Initial revision
casties
parents:
diff changeset
11 use lib '/usr/local/mpiwg/archive';
30497c6a3eca Initial revision
casties
parents:
diff changeset
12 use MPIWGStor;
30497c6a3eca Initial revision
casties
parents:
diff changeset
13
30497c6a3eca Initial revision
casties
parents:
diff changeset
14 my @currElemPath;
30497c6a3eca Initial revision
casties
parents:
diff changeset
15 my $currElem;
30497c6a3eca Initial revision
casties
parents:
diff changeset
16 my $currText;
30497c6a3eca Initial revision
casties
parents:
diff changeset
17 my $currAttrib;
30497c6a3eca Initial revision
casties
parents:
diff changeset
18 my @elements;
30497c6a3eca Initial revision
casties
parents:
diff changeset
19
30497c6a3eca Initial revision
casties
parents:
diff changeset
20 sub getData {
30497c6a3eca Initial revision
casties
parents:
diff changeset
21 return @elements;
30497c6a3eca Initial revision
casties
parents:
diff changeset
22 }
30497c6a3eca Initial revision
casties
parents:
diff changeset
23
30497c6a3eca Initial revision
casties
parents:
diff changeset
24 sub start_document {
30497c6a3eca Initial revision
casties
parents:
diff changeset
25 my ($self, $doc) = @_;
30497c6a3eca Initial revision
casties
parents:
diff changeset
26 # process document start event
30497c6a3eca Initial revision
casties
parents:
diff changeset
27 #logger('DEBUG', "startdoc: $self, $doc");
30497c6a3eca Initial revision
casties
parents:
diff changeset
28 @currElemPath = ();
30497c6a3eca Initial revision
casties
parents:
diff changeset
29 $currElem = "";
30497c6a3eca Initial revision
casties
parents:
diff changeset
30 $currText = "";
30497c6a3eca Initial revision
casties
parents:
diff changeset
31 $currAttrib ="";
30497c6a3eca Initial revision
casties
parents:
diff changeset
32 @elements = ();
30497c6a3eca Initial revision
casties
parents:
diff changeset
33 }
30497c6a3eca Initial revision
casties
parents:
diff changeset
34
30497c6a3eca Initial revision
casties
parents:
diff changeset
35 sub start_element {
30497c6a3eca Initial revision
casties
parents:
diff changeset
36 my ($self, $el) = @_;
30497c6a3eca Initial revision
casties
parents:
diff changeset
37 # process element start event
30497c6a3eca Initial revision
casties
parents:
diff changeset
38 #logger('DEBUG', "startelem: $self, $el");
30497c6a3eca Initial revision
casties
parents:
diff changeset
39 # check if the last element needs to be finished
30497c6a3eca Initial revision
casties
parents:
diff changeset
40 if ($currElem) {
30497c6a3eca Initial revision
casties
parents:
diff changeset
41 my $elem = join "/", @currElemPath;
30497c6a3eca Initial revision
casties
parents:
diff changeset
42 push @elements, [$elem, "", $currAttrib];
30497c6a3eca Initial revision
casties
parents:
diff changeset
43 }
30497c6a3eca Initial revision
casties
parents:
diff changeset
44 # element name is either LocalName or Name
30497c6a3eca Initial revision
casties
parents:
diff changeset
45 my $name = $$el{'LocalName'};
30497c6a3eca Initial revision
casties
parents:
diff changeset
46 $name = $$el{'Name'} unless ($name);
30497c6a3eca Initial revision
casties
parents:
diff changeset
47 #logger('DEBUG', " name: $name");
30497c6a3eca Initial revision
casties
parents:
diff changeset
48 # assemble attributes string
30497c6a3eca Initial revision
casties
parents:
diff changeset
49 $currAttrib ="";
30497c6a3eca Initial revision
casties
parents:
diff changeset
50 foreach $a (values %{$$el{'Attributes'}}) {
30497c6a3eca Initial revision
casties
parents:
diff changeset
51 my $n = $$a{'LocalName'};
30497c6a3eca Initial revision
casties
parents:
diff changeset
52 $n = $$a{'Name'} unless ($n);
30497c6a3eca Initial revision
casties
parents:
diff changeset
53 my $v = $$a{'Value'};
30497c6a3eca Initial revision
casties
parents:
diff changeset
54 $currAttrib .= "$n=\"$v\" ";
30497c6a3eca Initial revision
casties
parents:
diff changeset
55 }
30497c6a3eca Initial revision
casties
parents:
diff changeset
56 # start element name
30497c6a3eca Initial revision
casties
parents:
diff changeset
57 push @currElemPath, $name;
30497c6a3eca Initial revision
casties
parents:
diff changeset
58 $currElem = $name;
30497c6a3eca Initial revision
casties
parents:
diff changeset
59 $currText = "";
30497c6a3eca Initial revision
casties
parents:
diff changeset
60 }
30497c6a3eca Initial revision
casties
parents:
diff changeset
61
30497c6a3eca Initial revision
casties
parents:
diff changeset
62 sub end_element {
30497c6a3eca Initial revision
casties
parents:
diff changeset
63 my ($self, $el) = @_;
30497c6a3eca Initial revision
casties
parents:
diff changeset
64 # process element end event
30497c6a3eca Initial revision
casties
parents:
diff changeset
65 #logger('DEBUG', "endelem: $self, $el");
30497c6a3eca Initial revision
casties
parents:
diff changeset
66 # check element name
30497c6a3eca Initial revision
casties
parents:
diff changeset
67 my $name = $$el{'LocalName'};
30497c6a3eca Initial revision
casties
parents:
diff changeset
68 $name = $$el{'Name'} unless ($name);
30497c6a3eca Initial revision
casties
parents:
diff changeset
69 my $lastag = $currElemPath[$#currElemPath];
30497c6a3eca Initial revision
casties
parents:
diff changeset
70 if ($lastag ne $name) {
30497c6a3eca Initial revision
casties
parents:
diff changeset
71 logger('ERROR', "closing tag '$lastag' doesn't match '$name'!");
30497c6a3eca Initial revision
casties
parents:
diff changeset
72 }
30497c6a3eca Initial revision
casties
parents:
diff changeset
73 # assemble element path
30497c6a3eca Initial revision
casties
parents:
diff changeset
74 my $elem = join "/", @currElemPath;
30497c6a3eca Initial revision
casties
parents:
diff changeset
75 # strip whitespace from element content
30497c6a3eca Initial revision
casties
parents:
diff changeset
76 $currText =~ s/^\s*//;
30497c6a3eca Initial revision
casties
parents:
diff changeset
77 $currText =~ s/\s*$//;
30497c6a3eca Initial revision
casties
parents:
diff changeset
78 if ($currText) {
30497c6a3eca Initial revision
casties
parents:
diff changeset
79 # put pair in elements array
30497c6a3eca Initial revision
casties
parents:
diff changeset
80 push @elements, [$elem, $currText, $currAttrib];
30497c6a3eca Initial revision
casties
parents:
diff changeset
81 #logger('DEBUG', " elem: $elem = $currText");
30497c6a3eca Initial revision
casties
parents:
diff changeset
82 }
30497c6a3eca Initial revision
casties
parents:
diff changeset
83 # end element name
30497c6a3eca Initial revision
casties
parents:
diff changeset
84 pop @currElemPath;
30497c6a3eca Initial revision
casties
parents:
diff changeset
85 $currElem = "";
30497c6a3eca Initial revision
casties
parents:
diff changeset
86 $currText = "";
30497c6a3eca Initial revision
casties
parents:
diff changeset
87 $currAttrib ="";
30497c6a3eca Initial revision
casties
parents:
diff changeset
88 }
30497c6a3eca Initial revision
casties
parents:
diff changeset
89
30497c6a3eca Initial revision
casties
parents:
diff changeset
90 sub characters {
30497c6a3eca Initial revision
casties
parents:
diff changeset
91 my ($self, $char) = @_;
30497c6a3eca Initial revision
casties
parents:
diff changeset
92 # process character data event
30497c6a3eca Initial revision
casties
parents:
diff changeset
93 #logger('DEBUG', "characters: $self, $char");
30497c6a3eca Initial revision
casties
parents:
diff changeset
94 # add to current content
30497c6a3eca Initial revision
casties
parents:
diff changeset
95 $currText .= $$char{'Data'};
30497c6a3eca Initial revision
casties
parents:
diff changeset
96 #logger('DEBUG', " Text: $currText");
30497c6a3eca Initial revision
casties
parents:
diff changeset
97 }
30497c6a3eca Initial revision
casties
parents:
diff changeset
98
30497c6a3eca Initial revision
casties
parents:
diff changeset
99
30497c6a3eca Initial revision
casties
parents:
diff changeset
100 1;