0
|
1 #
|
|
2 # SAX handler for harvestmeta
|
|
3 #
|
|
4
|
|
5 package HarvestmetaHandler;
|
|
6
|
|
7 use strict;
|
|
8
|
|
9 use base qw(XML::SAX::Base);
|
|
10
|
3
|
11 use lib '/usr/local/mpiwg/archive_devel';
|
0
|
12 use MPIWGStor;
|
|
13
|
3
|
14 my $debugElem = 0;
|
|
15 my $debugCont = 0;
|
|
16
|
0
|
17 my @currElemPath;
|
|
18 my $currElem;
|
|
19 my $currText;
|
|
20 my $currAttrib;
|
|
21 my @elements;
|
3
|
22
|
0
|
23 sub getData {
|
|
24 return @elements;
|
|
25 }
|
|
26
|
|
27 sub start_document {
|
|
28 my ($self, $doc) = @_;
|
|
29 # process document start event
|
3
|
30 logger('DEBUG', "startdoc: $self, $doc") if ($debugElem);
|
0
|
31 @currElemPath = ();
|
|
32 $currElem = "";
|
|
33 $currText = "";
|
|
34 $currAttrib ="";
|
|
35 @elements = ();
|
|
36 }
|
|
37
|
|
38 sub start_element {
|
|
39 my ($self, $el) = @_;
|
|
40 # process element start event
|
3
|
41 logger('DEBUG', "startelem: $self, $$el{'LocalName'}") if ($debugElem);
|
0
|
42 # check if the last element needs to be finished
|
|
43 if ($currElem) {
|
|
44 my $elem = join "/", @currElemPath;
|
|
45 push @elements, [$elem, "", $currAttrib];
|
|
46 }
|
|
47 # element name is either LocalName or Name
|
|
48 my $name = $$el{'LocalName'};
|
|
49 $name = $$el{'Name'} unless ($name);
|
|
50 #logger('DEBUG', " name: $name");
|
|
51 # assemble attributes string
|
|
52 $currAttrib ="";
|
3
|
53 foreach my $attr (values %{$$el{'Attributes'}}) {
|
|
54 my $key = $$attr{'LocalName'};
|
|
55 $key = $$attr{'Name'} unless ($key);
|
|
56 my $val = $$attr{'Value'};
|
|
57 $currAttrib .= "$key=\"$val\" ";
|
0
|
58 }
|
3
|
59 $currAttrib = sstrip($currAttrib);
|
0
|
60 # start element name
|
|
61 push @currElemPath, $name;
|
|
62 $currElem = $name;
|
|
63 $currText = "";
|
|
64 }
|
|
65
|
|
66 sub end_element {
|
|
67 my ($self, $el) = @_;
|
|
68 # process element end event
|
3
|
69 logger('DEBUG', "endelem: $self, $$el{'LocalName'}") if ($debugElem);
|
0
|
70 # check element name
|
|
71 my $name = $$el{'LocalName'};
|
|
72 $name = $$el{'Name'} unless ($name);
|
|
73 my $lastag = $currElemPath[$#currElemPath];
|
|
74 if ($lastag ne $name) {
|
|
75 logger('ERROR', "closing tag '$lastag' doesn't match '$name'!");
|
|
76 }
|
|
77 # assemble element path
|
|
78 my $elem = join "/", @currElemPath;
|
|
79 # strip whitespace from element content
|
|
80 $currText =~ s/^\s*//;
|
|
81 $currText =~ s/\s*$//;
|
3
|
82 if (($currText)||($currAttrib)) {
|
0
|
83 # put pair in elements array
|
|
84 push @elements, [$elem, $currText, $currAttrib];
|
3
|
85 logger('DEBUG', " elem: $elem = $currText ($currAttrib)") if ($debugCont);
|
0
|
86 }
|
|
87 # end element name
|
|
88 pop @currElemPath;
|
|
89 $currElem = "";
|
|
90 $currText = "";
|
|
91 $currAttrib ="";
|
|
92 }
|
|
93
|
|
94 sub characters {
|
|
95 my ($self, $char) = @_;
|
|
96 # process character data event
|
3
|
97 logger('DEBUG', "characters: $self, $char") if ($debugElem);
|
0
|
98 # add to current content
|
|
99 $currText .= $$char{'Data'};
|
3
|
100 logger('DEBUG', " Text: $currText") if ($debugCont);
|
0
|
101 }
|
|
102
|
|
103
|
|
104 1;
|