# # SAX handler for harvestmeta # package HarvestmetaHandler; use strict; use base qw(XML::SAX::Base); use lib '/usr/local/mpiwg/archive'; use MPIWGStor; my $debugElem = 0; my $debugCont = 0; my @currElemPath; my $currElem; my $currText; my $currAttrib; my @elements; sub getData { return @elements; } sub start_document { my ($self, $doc) = @_; # process document start event logger('DEBUG', "startdoc: $self, $doc") if ($debugElem); @currElemPath = (); $currElem = ""; $currText = ""; $currAttrib =""; @elements = (); } sub start_element { my ($self, $el) = @_; # process element start event logger('DEBUG', "startelem: $self, $$el{'LocalName'}") if ($debugElem); # check if the last element needs to be finished if ($currElem) { my $elem = join "/", @currElemPath; push @elements, [$elem, "", $currAttrib]; } # element name is either LocalName or Name my $name = $$el{'LocalName'}; $name = $$el{'Name'} unless ($name); #logger('DEBUG', " name: $name"); # assemble attributes string $currAttrib =""; foreach my $attr (values %{$$el{'Attributes'}}) { my $key = $$attr{'LocalName'}; $key = $$attr{'Name'} unless ($key); my $val = $$attr{'Value'}; $currAttrib .= "$key=\"$val\" "; } $currAttrib = sstrip($currAttrib); # start element name push @currElemPath, $name; $currElem = $name; $currText = ""; } sub end_element { my ($self, $el) = @_; # process element end event logger('DEBUG', "endelem: $self, $$el{'LocalName'}") if ($debugElem); # check element name my $name = $$el{'LocalName'}; $name = $$el{'Name'} unless ($name); my $lastag = $currElemPath[$#currElemPath]; if ($lastag ne $name) { logger('ERROR', "closing tag '$lastag' doesn't match '$name'!"); } # assemble element path my $elem = join "/", @currElemPath; # strip whitespace from element content $currText =~ s/^\s*//; $currText =~ s/\s*$//; if (($currText)||($currAttrib)) { # put pair in elements array push @elements, [$elem, $currText, $currAttrib]; logger('DEBUG', " elem: $elem = $currText ($currAttrib)") if ($debugCont); } # end element name pop @currElemPath; $currElem = ""; $currText = ""; $currAttrib =""; } sub characters { my ($self, $char) = @_; # process character data event logger('DEBUG', "characters: $self, $char") if ($debugElem); # add to current content $currText .= $$char{'Data'}; logger('DEBUG', " Text: $currText") if ($debugCont); } 1;