# # SAX handler for harvestmeta # package HarvestmetaHandler; use strict; use base qw(XML::SAX::Base); use lib '/usr/local/mpiwg/archive'; use MPIWGStor; my @currElemPath; my $currElem; my $currText; my $currAttrib; my @elements; sub getData { return @elements; } sub start_document { my ($self, $doc) = @_; # process document start event #logger('DEBUG', "startdoc: $self, $doc"); @currElemPath = (); $currElem = ""; $currText = ""; $currAttrib =""; @elements = (); } sub start_element { my ($self, $el) = @_; # process element start event #logger('DEBUG', "startelem: $self, $el"); # check if the last element needs to be finished if ($currElem) { my $elem = join "/", @currElemPath; push @elements, [$elem, "", $currAttrib]; } # element name is either LocalName or Name my $name = $$el{'LocalName'}; $name = $$el{'Name'} unless ($name); #logger('DEBUG', " name: $name"); # assemble attributes string $currAttrib =""; foreach $a (values %{$$el{'Attributes'}}) { my $n = $$a{'LocalName'}; $n = $$a{'Name'} unless ($n); my $v = $$a{'Value'}; $currAttrib .= "$n=\"$v\" "; } # start element name push @currElemPath, $name; $currElem = $name; $currText = ""; } sub end_element { my ($self, $el) = @_; # process element end event #logger('DEBUG', "endelem: $self, $el"); # check element name my $name = $$el{'LocalName'}; $name = $$el{'Name'} unless ($name); my $lastag = $currElemPath[$#currElemPath]; if ($lastag ne $name) { logger('ERROR', "closing tag '$lastag' doesn't match '$name'!"); } # assemble element path my $elem = join "/", @currElemPath; # strip whitespace from element content $currText =~ s/^\s*//; $currText =~ s/\s*$//; if ($currText) { # put pair in elements array push @elements, [$elem, $currText, $currAttrib]; #logger('DEBUG', " elem: $elem = $currText"); } # end element name pop @currElemPath; $currElem = ""; $currText = ""; $currAttrib =""; } sub characters { my ($self, $char) = @_; # process character data event #logger('DEBUG', "characters: $self, $char"); # add to current content $currText .= $$char{'Data'}; #logger('DEBUG', " Text: $currText"); } 1;