view HarvestmetaHandler.pm @ 57:2208ed7370cb

updated to Ubuntu Perl paths.
author casties
date Thu, 16 Mar 2017 18:00:43 +0100
parents a3feffd94021
children
line wrap: on
line source

#
# SAX handler for harvestmeta
#

package HarvestmetaHandler;

use strict;

use base qw(XML::SAX::Base);

use lib '/usr/local/mpiwg/archive';
use MPIWGStor;

my $debugElem = 0;
my $debugCont = 0;

my @currElemPath;
my $currElem;
my $currText;
my $currAttrib;
my @elements;

sub getData {
    return @elements;
}

sub start_document {
    my ($self, $doc) = @_;
    # process document start event
    logger('DEBUG', "startdoc: $self, $doc") if ($debugElem);
    @currElemPath = ();
    $currElem = "";
    $currText = "";
    $currAttrib ="";
    @elements = ();
}

sub start_element {
    my ($self, $el) = @_;
    # process element start event
    logger('DEBUG', "startelem: $self, $$el{'LocalName'}") if ($debugElem);
    # check if the last element needs to be finished
    if ($currElem) {
	my $elem = join "/", @currElemPath;
	push @elements, [$elem, "", $currAttrib];
    }
    # element name is either LocalName or Name
    my $name = $$el{'LocalName'} or $$el{'Name'};
    #logger('DEBUG', "  name: $name");
    # assemble attributes string
    $currAttrib = "";
    foreach my $attr (values %{$$el{'Attributes'}}) {
	my $key = $$attr{'LocalName'} or $$attr{'Name'};
	my $val = $$attr{'Value'};
	$currAttrib .= "$key=\"$val\" ";
    }
    $currAttrib = sstrip($currAttrib);
    # start element name
    push @currElemPath, $name;
    $currElem = $name;
    $currText = "";
}

sub end_element {
    my ($self, $el) = @_;
    # process element end event
    logger('DEBUG', "endelem: $self, $$el{'LocalName'}") if ($debugElem);
    # check element name
    my $name = $$el{'LocalName'} or $$el{'Name'};
    my $lastag = $currElemPath[$#currElemPath];
    if ($lastag ne $name) {
	logger('ERROR', "closing tag '$lastag' doesn't match '$name'!");
    }
    # assemble element path
    my $elem = join "/", @currElemPath;
    # strip whitespace from element content
    $currText =~ s/^\s*//;
    $currText =~ s/\s*$//;
    if (($currText)||($currAttrib)) {
	# put pair in elements array
	push @elements, [$elem, $currText, $currAttrib];
	logger('DEBUG', "  elem: $elem = $currText ($currAttrib)") if ($debugCont);
    }
    # end element name
    pop @currElemPath;
    $currElem = "";
    $currText = "";
    $currAttrib ="";
}

sub characters {
    my ($self, $char) = @_;
    # process character data event
    logger('DEBUG', "characters: $self, $char") if ($debugElem > 1);
    # add to current content
    $currText .= $$char{'Data'};
    logger('DEBUG', " Text: $currText") if ($debugCont > 1);
}


1;