0
|
1 #
|
|
2 # SAX handler for harvestmeta
|
|
3 #
|
|
4
|
|
5 package HarvestmetaHandler;
|
|
6
|
|
7 use strict;
|
|
8
|
|
9 use base qw(XML::SAX::Base);
|
|
10
|
4
|
11 use lib '/usr/local/mpiwg/archive';
|
0
|
12 use MPIWGStor;
|
|
13
|
3
|
14 my $debugElem = 0;
|
|
15 my $debugCont = 0;
|
|
16
|
0
|
17 my @currElemPath;
|
|
18 my $currElem;
|
|
19 my $currText;
|
|
20 my $currAttrib;
|
|
21 my @elements;
|
3
|
22
|
0
|
23 sub getData {
|
|
24 return @elements;
|
|
25 }
|
|
26
|
|
27 sub start_document {
|
|
28 my ($self, $doc) = @_;
|
|
29 # process document start event
|
3
|
30 logger('DEBUG', "startdoc: $self, $doc") if ($debugElem);
|
0
|
31 @currElemPath = ();
|
|
32 $currElem = "";
|
|
33 $currText = "";
|
|
34 $currAttrib ="";
|
|
35 @elements = ();
|
|
36 }
|
|
37
|
|
38 sub start_element {
|
|
39 my ($self, $el) = @_;
|
|
40 # process element start event
|
3
|
41 logger('DEBUG', "startelem: $self, $$el{'LocalName'}") if ($debugElem);
|
0
|
42 # check if the last element needs to be finished
|
|
43 if ($currElem) {
|
|
44 my $elem = join "/", @currElemPath;
|
|
45 push @elements, [$elem, "", $currAttrib];
|
|
46 }
|
|
47 # element name is either LocalName or Name
|
6
|
48 my $name = $$el{'LocalName'} or $$el{'Name'};
|
0
|
49 #logger('DEBUG', " name: $name");
|
|
50 # assemble attributes string
|
6
|
51 $currAttrib = "";
|
3
|
52 foreach my $attr (values %{$$el{'Attributes'}}) {
|
6
|
53 my $key = $$attr{'LocalName'} or $$attr{'Name'};
|
3
|
54 my $val = $$attr{'Value'};
|
|
55 $currAttrib .= "$key=\"$val\" ";
|
0
|
56 }
|
3
|
57 $currAttrib = sstrip($currAttrib);
|
0
|
58 # start element name
|
|
59 push @currElemPath, $name;
|
|
60 $currElem = $name;
|
|
61 $currText = "";
|
|
62 }
|
|
63
|
|
64 sub end_element {
|
|
65 my ($self, $el) = @_;
|
|
66 # process element end event
|
3
|
67 logger('DEBUG', "endelem: $self, $$el{'LocalName'}") if ($debugElem);
|
0
|
68 # check element name
|
6
|
69 my $name = $$el{'LocalName'} or $$el{'Name'};
|
0
|
70 my $lastag = $currElemPath[$#currElemPath];
|
|
71 if ($lastag ne $name) {
|
|
72 logger('ERROR', "closing tag '$lastag' doesn't match '$name'!");
|
|
73 }
|
|
74 # assemble element path
|
|
75 my $elem = join "/", @currElemPath;
|
|
76 # strip whitespace from element content
|
|
77 $currText =~ s/^\s*//;
|
|
78 $currText =~ s/\s*$//;
|
3
|
79 if (($currText)||($currAttrib)) {
|
0
|
80 # put pair in elements array
|
|
81 push @elements, [$elem, $currText, $currAttrib];
|
3
|
82 logger('DEBUG', " elem: $elem = $currText ($currAttrib)") if ($debugCont);
|
0
|
83 }
|
|
84 # end element name
|
|
85 pop @currElemPath;
|
|
86 $currElem = "";
|
|
87 $currText = "";
|
|
88 $currAttrib ="";
|
|
89 }
|
|
90
|
|
91 sub characters {
|
|
92 my ($self, $char) = @_;
|
|
93 # process character data event
|
6
|
94 logger('DEBUG', "characters: $self, $char") if ($debugElem > 1);
|
0
|
95 # add to current content
|
|
96 $currText .= $$char{'Data'};
|
6
|
97 logger('DEBUG', " Text: $currText") if ($debugCont > 1);
|
0
|
98 }
|
|
99
|
|
100
|
|
101 1;
|