annotate HarvestmetaHandler.pm @ 57:2208ed7370cb

updated to Ubuntu Perl paths.
author casties
date Thu, 16 Mar 2017 18:00:43 +0100
parents a3feffd94021
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
30497c6a3eca Initial revision
casties
parents:
diff changeset
1 #
30497c6a3eca Initial revision
casties
parents:
diff changeset
2 # SAX handler for harvestmeta
30497c6a3eca Initial revision
casties
parents:
diff changeset
3 #
30497c6a3eca Initial revision
casties
parents:
diff changeset
4
30497c6a3eca Initial revision
casties
parents:
diff changeset
5 package HarvestmetaHandler;
30497c6a3eca Initial revision
casties
parents:
diff changeset
6
30497c6a3eca Initial revision
casties
parents:
diff changeset
7 use strict;
30497c6a3eca Initial revision
casties
parents:
diff changeset
8
30497c6a3eca Initial revision
casties
parents:
diff changeset
9 use base qw(XML::SAX::Base);
30497c6a3eca Initial revision
casties
parents:
diff changeset
10
4
046d584ed7b3 forgot lib path...
casties
parents: 3
diff changeset
11 use lib '/usr/local/mpiwg/archive';
0
30497c6a3eca Initial revision
casties
parents:
diff changeset
12 use MPIWGStor;
30497c6a3eca Initial revision
casties
parents:
diff changeset
13
3
1a51f94d5dbd new version also reads XML index feeds via HTTP
casties
parents: 0
diff changeset
14 my $debugElem = 0;
1a51f94d5dbd new version also reads XML index feeds via HTTP
casties
parents: 0
diff changeset
15 my $debugCont = 0;
1a51f94d5dbd new version also reads XML index feeds via HTTP
casties
parents: 0
diff changeset
16
0
30497c6a3eca Initial revision
casties
parents:
diff changeset
17 my @currElemPath;
30497c6a3eca Initial revision
casties
parents:
diff changeset
18 my $currElem;
30497c6a3eca Initial revision
casties
parents:
diff changeset
19 my $currText;
30497c6a3eca Initial revision
casties
parents:
diff changeset
20 my $currAttrib;
30497c6a3eca Initial revision
casties
parents:
diff changeset
21 my @elements;
3
1a51f94d5dbd new version also reads XML index feeds via HTTP
casties
parents: 0
diff changeset
22
0
30497c6a3eca Initial revision
casties
parents:
diff changeset
23 sub getData {
30497c6a3eca Initial revision
casties
parents:
diff changeset
24 return @elements;
30497c6a3eca Initial revision
casties
parents:
diff changeset
25 }
30497c6a3eca Initial revision
casties
parents:
diff changeset
26
30497c6a3eca Initial revision
casties
parents:
diff changeset
27 sub start_document {
30497c6a3eca Initial revision
casties
parents:
diff changeset
28 my ($self, $doc) = @_;
30497c6a3eca Initial revision
casties
parents:
diff changeset
29 # process document start event
3
1a51f94d5dbd new version also reads XML index feeds via HTTP
casties
parents: 0
diff changeset
30 logger('DEBUG', "startdoc: $self, $doc") if ($debugElem);
0
30497c6a3eca Initial revision
casties
parents:
diff changeset
31 @currElemPath = ();
30497c6a3eca Initial revision
casties
parents:
diff changeset
32 $currElem = "";
30497c6a3eca Initial revision
casties
parents:
diff changeset
33 $currText = "";
30497c6a3eca Initial revision
casties
parents:
diff changeset
34 $currAttrib ="";
30497c6a3eca Initial revision
casties
parents:
diff changeset
35 @elements = ();
30497c6a3eca Initial revision
casties
parents:
diff changeset
36 }
30497c6a3eca Initial revision
casties
parents:
diff changeset
37
30497c6a3eca Initial revision
casties
parents:
diff changeset
38 sub start_element {
30497c6a3eca Initial revision
casties
parents:
diff changeset
39 my ($self, $el) = @_;
30497c6a3eca Initial revision
casties
parents:
diff changeset
40 # process element start event
3
1a51f94d5dbd new version also reads XML index feeds via HTTP
casties
parents: 0
diff changeset
41 logger('DEBUG', "startelem: $self, $$el{'LocalName'}") if ($debugElem);
0
30497c6a3eca Initial revision
casties
parents:
diff changeset
42 # check if the last element needs to be finished
30497c6a3eca Initial revision
casties
parents:
diff changeset
43 if ($currElem) {
30497c6a3eca Initial revision
casties
parents:
diff changeset
44 my $elem = join "/", @currElemPath;
30497c6a3eca Initial revision
casties
parents:
diff changeset
45 push @elements, [$elem, "", $currAttrib];
30497c6a3eca Initial revision
casties
parents:
diff changeset
46 }
30497c6a3eca Initial revision
casties
parents:
diff changeset
47 # element name is either LocalName or Name
6
a3feffd94021 small fixes
casties
parents: 4
diff changeset
48 my $name = $$el{'LocalName'} or $$el{'Name'};
0
30497c6a3eca Initial revision
casties
parents:
diff changeset
49 #logger('DEBUG', " name: $name");
30497c6a3eca Initial revision
casties
parents:
diff changeset
50 # assemble attributes string
6
a3feffd94021 small fixes
casties
parents: 4
diff changeset
51 $currAttrib = "";
3
1a51f94d5dbd new version also reads XML index feeds via HTTP
casties
parents: 0
diff changeset
52 foreach my $attr (values %{$$el{'Attributes'}}) {
6
a3feffd94021 small fixes
casties
parents: 4
diff changeset
53 my $key = $$attr{'LocalName'} or $$attr{'Name'};
3
1a51f94d5dbd new version also reads XML index feeds via HTTP
casties
parents: 0
diff changeset
54 my $val = $$attr{'Value'};
1a51f94d5dbd new version also reads XML index feeds via HTTP
casties
parents: 0
diff changeset
55 $currAttrib .= "$key=\"$val\" ";
0
30497c6a3eca Initial revision
casties
parents:
diff changeset
56 }
3
1a51f94d5dbd new version also reads XML index feeds via HTTP
casties
parents: 0
diff changeset
57 $currAttrib = sstrip($currAttrib);
0
30497c6a3eca Initial revision
casties
parents:
diff changeset
58 # start element name
30497c6a3eca Initial revision
casties
parents:
diff changeset
59 push @currElemPath, $name;
30497c6a3eca Initial revision
casties
parents:
diff changeset
60 $currElem = $name;
30497c6a3eca Initial revision
casties
parents:
diff changeset
61 $currText = "";
30497c6a3eca Initial revision
casties
parents:
diff changeset
62 }
30497c6a3eca Initial revision
casties
parents:
diff changeset
63
30497c6a3eca Initial revision
casties
parents:
diff changeset
64 sub end_element {
30497c6a3eca Initial revision
casties
parents:
diff changeset
65 my ($self, $el) = @_;
30497c6a3eca Initial revision
casties
parents:
diff changeset
66 # process element end event
3
1a51f94d5dbd new version also reads XML index feeds via HTTP
casties
parents: 0
diff changeset
67 logger('DEBUG', "endelem: $self, $$el{'LocalName'}") if ($debugElem);
0
30497c6a3eca Initial revision
casties
parents:
diff changeset
68 # check element name
6
a3feffd94021 small fixes
casties
parents: 4
diff changeset
69 my $name = $$el{'LocalName'} or $$el{'Name'};
0
30497c6a3eca Initial revision
casties
parents:
diff changeset
70 my $lastag = $currElemPath[$#currElemPath];
30497c6a3eca Initial revision
casties
parents:
diff changeset
71 if ($lastag ne $name) {
30497c6a3eca Initial revision
casties
parents:
diff changeset
72 logger('ERROR', "closing tag '$lastag' doesn't match '$name'!");
30497c6a3eca Initial revision
casties
parents:
diff changeset
73 }
30497c6a3eca Initial revision
casties
parents:
diff changeset
74 # assemble element path
30497c6a3eca Initial revision
casties
parents:
diff changeset
75 my $elem = join "/", @currElemPath;
30497c6a3eca Initial revision
casties
parents:
diff changeset
76 # strip whitespace from element content
30497c6a3eca Initial revision
casties
parents:
diff changeset
77 $currText =~ s/^\s*//;
30497c6a3eca Initial revision
casties
parents:
diff changeset
78 $currText =~ s/\s*$//;
3
1a51f94d5dbd new version also reads XML index feeds via HTTP
casties
parents: 0
diff changeset
79 if (($currText)||($currAttrib)) {
0
30497c6a3eca Initial revision
casties
parents:
diff changeset
80 # put pair in elements array
30497c6a3eca Initial revision
casties
parents:
diff changeset
81 push @elements, [$elem, $currText, $currAttrib];
3
1a51f94d5dbd new version also reads XML index feeds via HTTP
casties
parents: 0
diff changeset
82 logger('DEBUG', " elem: $elem = $currText ($currAttrib)") if ($debugCont);
0
30497c6a3eca Initial revision
casties
parents:
diff changeset
83 }
30497c6a3eca Initial revision
casties
parents:
diff changeset
84 # end element name
30497c6a3eca Initial revision
casties
parents:
diff changeset
85 pop @currElemPath;
30497c6a3eca Initial revision
casties
parents:
diff changeset
86 $currElem = "";
30497c6a3eca Initial revision
casties
parents:
diff changeset
87 $currText = "";
30497c6a3eca Initial revision
casties
parents:
diff changeset
88 $currAttrib ="";
30497c6a3eca Initial revision
casties
parents:
diff changeset
89 }
30497c6a3eca Initial revision
casties
parents:
diff changeset
90
30497c6a3eca Initial revision
casties
parents:
diff changeset
91 sub characters {
30497c6a3eca Initial revision
casties
parents:
diff changeset
92 my ($self, $char) = @_;
30497c6a3eca Initial revision
casties
parents:
diff changeset
93 # process character data event
6
a3feffd94021 small fixes
casties
parents: 4
diff changeset
94 logger('DEBUG', "characters: $self, $char") if ($debugElem > 1);
0
30497c6a3eca Initial revision
casties
parents:
diff changeset
95 # add to current content
30497c6a3eca Initial revision
casties
parents:
diff changeset
96 $currText .= $$char{'Data'};
6
a3feffd94021 small fixes
casties
parents: 4
diff changeset
97 logger('DEBUG', " Text: $currText") if ($debugCont > 1);
0
30497c6a3eca Initial revision
casties
parents:
diff changeset
98 }
30497c6a3eca Initial revision
casties
parents:
diff changeset
99
30497c6a3eca Initial revision
casties
parents:
diff changeset
100
30497c6a3eca Initial revision
casties
parents:
diff changeset
101 1;