0
|
1 #
|
|
2 # SAX handler for harvestmeta
|
|
3 #
|
|
4
|
|
5 package HarvestmetaHandler;
|
|
6
|
|
7 use strict;
|
|
8
|
|
9 use base qw(XML::SAX::Base);
|
|
10
|
|
11 use lib '/usr/local/mpiwg/archive';
|
|
12 use MPIWGStor;
|
|
13
|
|
14 my @currElemPath;
|
|
15 my $currElem;
|
|
16 my $currText;
|
|
17 my $currAttrib;
|
|
18 my @elements;
|
|
19
|
|
20 sub getData {
|
|
21 return @elements;
|
|
22 }
|
|
23
|
|
24 sub start_document {
|
|
25 my ($self, $doc) = @_;
|
|
26 # process document start event
|
|
27 #logger('DEBUG', "startdoc: $self, $doc");
|
|
28 @currElemPath = ();
|
|
29 $currElem = "";
|
|
30 $currText = "";
|
|
31 $currAttrib ="";
|
|
32 @elements = ();
|
|
33 }
|
|
34
|
|
35 sub start_element {
|
|
36 my ($self, $el) = @_;
|
|
37 # process element start event
|
|
38 #logger('DEBUG', "startelem: $self, $el");
|
|
39 # check if the last element needs to be finished
|
|
40 if ($currElem) {
|
|
41 my $elem = join "/", @currElemPath;
|
|
42 push @elements, [$elem, "", $currAttrib];
|
|
43 }
|
|
44 # element name is either LocalName or Name
|
|
45 my $name = $$el{'LocalName'};
|
|
46 $name = $$el{'Name'} unless ($name);
|
|
47 #logger('DEBUG', " name: $name");
|
|
48 # assemble attributes string
|
|
49 $currAttrib ="";
|
|
50 foreach $a (values %{$$el{'Attributes'}}) {
|
|
51 my $n = $$a{'LocalName'};
|
|
52 $n = $$a{'Name'} unless ($n);
|
|
53 my $v = $$a{'Value'};
|
|
54 $currAttrib .= "$n=\"$v\" ";
|
|
55 }
|
|
56 # start element name
|
|
57 push @currElemPath, $name;
|
|
58 $currElem = $name;
|
|
59 $currText = "";
|
|
60 }
|
|
61
|
|
62 sub end_element {
|
|
63 my ($self, $el) = @_;
|
|
64 # process element end event
|
|
65 #logger('DEBUG', "endelem: $self, $el");
|
|
66 # check element name
|
|
67 my $name = $$el{'LocalName'};
|
|
68 $name = $$el{'Name'} unless ($name);
|
|
69 my $lastag = $currElemPath[$#currElemPath];
|
|
70 if ($lastag ne $name) {
|
|
71 logger('ERROR', "closing tag '$lastag' doesn't match '$name'!");
|
|
72 }
|
|
73 # assemble element path
|
|
74 my $elem = join "/", @currElemPath;
|
|
75 # strip whitespace from element content
|
|
76 $currText =~ s/^\s*//;
|
|
77 $currText =~ s/\s*$//;
|
|
78 if ($currText) {
|
|
79 # put pair in elements array
|
|
80 push @elements, [$elem, $currText, $currAttrib];
|
|
81 #logger('DEBUG', " elem: $elem = $currText");
|
|
82 }
|
|
83 # end element name
|
|
84 pop @currElemPath;
|
|
85 $currElem = "";
|
|
86 $currText = "";
|
|
87 $currAttrib ="";
|
|
88 }
|
|
89
|
|
90 sub characters {
|
|
91 my ($self, $char) = @_;
|
|
92 # process character data event
|
|
93 #logger('DEBUG', "characters: $self, $char");
|
|
94 # add to current content
|
|
95 $currText .= $$char{'Data'};
|
|
96 #logger('DEBUG', " Text: $currText");
|
|
97 }
|
|
98
|
|
99
|
|
100 1;
|