annotate HarvestmetaHandler.pm @ 3:1a51f94d5dbd

new version also reads XML index feeds via HTTP
author casties
date Thu, 08 Jul 2004 23:22:04 +0200
parents 30497c6a3eca
children 046d584ed7b3
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
30497c6a3eca Initial revision
casties
parents:
diff changeset
1 #
30497c6a3eca Initial revision
casties
parents:
diff changeset
2 # SAX handler for harvestmeta
30497c6a3eca Initial revision
casties
parents:
diff changeset
3 #
30497c6a3eca Initial revision
casties
parents:
diff changeset
4
30497c6a3eca Initial revision
casties
parents:
diff changeset
5 package HarvestmetaHandler;
30497c6a3eca Initial revision
casties
parents:
diff changeset
6
30497c6a3eca Initial revision
casties
parents:
diff changeset
7 use strict;
30497c6a3eca Initial revision
casties
parents:
diff changeset
8
30497c6a3eca Initial revision
casties
parents:
diff changeset
9 use base qw(XML::SAX::Base);
30497c6a3eca Initial revision
casties
parents:
diff changeset
10
3
1a51f94d5dbd new version also reads XML index feeds via HTTP
casties
parents: 0
diff changeset
11 use lib '/usr/local/mpiwg/archive_devel';
0
30497c6a3eca Initial revision
casties
parents:
diff changeset
12 use MPIWGStor;
30497c6a3eca Initial revision
casties
parents:
diff changeset
13
3
1a51f94d5dbd new version also reads XML index feeds via HTTP
casties
parents: 0
diff changeset
14 my $debugElem = 0;
1a51f94d5dbd new version also reads XML index feeds via HTTP
casties
parents: 0
diff changeset
15 my $debugCont = 0;
1a51f94d5dbd new version also reads XML index feeds via HTTP
casties
parents: 0
diff changeset
16
0
30497c6a3eca Initial revision
casties
parents:
diff changeset
17 my @currElemPath;
30497c6a3eca Initial revision
casties
parents:
diff changeset
18 my $currElem;
30497c6a3eca Initial revision
casties
parents:
diff changeset
19 my $currText;
30497c6a3eca Initial revision
casties
parents:
diff changeset
20 my $currAttrib;
30497c6a3eca Initial revision
casties
parents:
diff changeset
21 my @elements;
3
1a51f94d5dbd new version also reads XML index feeds via HTTP
casties
parents: 0
diff changeset
22
0
30497c6a3eca Initial revision
casties
parents:
diff changeset
23 sub getData {
30497c6a3eca Initial revision
casties
parents:
diff changeset
24 return @elements;
30497c6a3eca Initial revision
casties
parents:
diff changeset
25 }
30497c6a3eca Initial revision
casties
parents:
diff changeset
26
30497c6a3eca Initial revision
casties
parents:
diff changeset
27 sub start_document {
30497c6a3eca Initial revision
casties
parents:
diff changeset
28 my ($self, $doc) = @_;
30497c6a3eca Initial revision
casties
parents:
diff changeset
29 # process document start event
3
1a51f94d5dbd new version also reads XML index feeds via HTTP
casties
parents: 0
diff changeset
30 logger('DEBUG', "startdoc: $self, $doc") if ($debugElem);
0
30497c6a3eca Initial revision
casties
parents:
diff changeset
31 @currElemPath = ();
30497c6a3eca Initial revision
casties
parents:
diff changeset
32 $currElem = "";
30497c6a3eca Initial revision
casties
parents:
diff changeset
33 $currText = "";
30497c6a3eca Initial revision
casties
parents:
diff changeset
34 $currAttrib ="";
30497c6a3eca Initial revision
casties
parents:
diff changeset
35 @elements = ();
30497c6a3eca Initial revision
casties
parents:
diff changeset
36 }
30497c6a3eca Initial revision
casties
parents:
diff changeset
37
30497c6a3eca Initial revision
casties
parents:
diff changeset
38 sub start_element {
30497c6a3eca Initial revision
casties
parents:
diff changeset
39 my ($self, $el) = @_;
30497c6a3eca Initial revision
casties
parents:
diff changeset
40 # process element start event
3
1a51f94d5dbd new version also reads XML index feeds via HTTP
casties
parents: 0
diff changeset
41 logger('DEBUG', "startelem: $self, $$el{'LocalName'}") if ($debugElem);
0
30497c6a3eca Initial revision
casties
parents:
diff changeset
42 # check if the last element needs to be finished
30497c6a3eca Initial revision
casties
parents:
diff changeset
43 if ($currElem) {
30497c6a3eca Initial revision
casties
parents:
diff changeset
44 my $elem = join "/", @currElemPath;
30497c6a3eca Initial revision
casties
parents:
diff changeset
45 push @elements, [$elem, "", $currAttrib];
30497c6a3eca Initial revision
casties
parents:
diff changeset
46 }
30497c6a3eca Initial revision
casties
parents:
diff changeset
47 # element name is either LocalName or Name
30497c6a3eca Initial revision
casties
parents:
diff changeset
48 my $name = $$el{'LocalName'};
30497c6a3eca Initial revision
casties
parents:
diff changeset
49 $name = $$el{'Name'} unless ($name);
30497c6a3eca Initial revision
casties
parents:
diff changeset
50 #logger('DEBUG', " name: $name");
30497c6a3eca Initial revision
casties
parents:
diff changeset
51 # assemble attributes string
30497c6a3eca Initial revision
casties
parents:
diff changeset
52 $currAttrib ="";
3
1a51f94d5dbd new version also reads XML index feeds via HTTP
casties
parents: 0
diff changeset
53 foreach my $attr (values %{$$el{'Attributes'}}) {
1a51f94d5dbd new version also reads XML index feeds via HTTP
casties
parents: 0
diff changeset
54 my $key = $$attr{'LocalName'};
1a51f94d5dbd new version also reads XML index feeds via HTTP
casties
parents: 0
diff changeset
55 $key = $$attr{'Name'} unless ($key);
1a51f94d5dbd new version also reads XML index feeds via HTTP
casties
parents: 0
diff changeset
56 my $val = $$attr{'Value'};
1a51f94d5dbd new version also reads XML index feeds via HTTP
casties
parents: 0
diff changeset
57 $currAttrib .= "$key=\"$val\" ";
0
30497c6a3eca Initial revision
casties
parents:
diff changeset
58 }
3
1a51f94d5dbd new version also reads XML index feeds via HTTP
casties
parents: 0
diff changeset
59 $currAttrib = sstrip($currAttrib);
0
30497c6a3eca Initial revision
casties
parents:
diff changeset
60 # start element name
30497c6a3eca Initial revision
casties
parents:
diff changeset
61 push @currElemPath, $name;
30497c6a3eca Initial revision
casties
parents:
diff changeset
62 $currElem = $name;
30497c6a3eca Initial revision
casties
parents:
diff changeset
63 $currText = "";
30497c6a3eca Initial revision
casties
parents:
diff changeset
64 }
30497c6a3eca Initial revision
casties
parents:
diff changeset
65
30497c6a3eca Initial revision
casties
parents:
diff changeset
66 sub end_element {
30497c6a3eca Initial revision
casties
parents:
diff changeset
67 my ($self, $el) = @_;
30497c6a3eca Initial revision
casties
parents:
diff changeset
68 # process element end event
3
1a51f94d5dbd new version also reads XML index feeds via HTTP
casties
parents: 0
diff changeset
69 logger('DEBUG', "endelem: $self, $$el{'LocalName'}") if ($debugElem);
0
30497c6a3eca Initial revision
casties
parents:
diff changeset
70 # check element name
30497c6a3eca Initial revision
casties
parents:
diff changeset
71 my $name = $$el{'LocalName'};
30497c6a3eca Initial revision
casties
parents:
diff changeset
72 $name = $$el{'Name'} unless ($name);
30497c6a3eca Initial revision
casties
parents:
diff changeset
73 my $lastag = $currElemPath[$#currElemPath];
30497c6a3eca Initial revision
casties
parents:
diff changeset
74 if ($lastag ne $name) {
30497c6a3eca Initial revision
casties
parents:
diff changeset
75 logger('ERROR', "closing tag '$lastag' doesn't match '$name'!");
30497c6a3eca Initial revision
casties
parents:
diff changeset
76 }
30497c6a3eca Initial revision
casties
parents:
diff changeset
77 # assemble element path
30497c6a3eca Initial revision
casties
parents:
diff changeset
78 my $elem = join "/", @currElemPath;
30497c6a3eca Initial revision
casties
parents:
diff changeset
79 # strip whitespace from element content
30497c6a3eca Initial revision
casties
parents:
diff changeset
80 $currText =~ s/^\s*//;
30497c6a3eca Initial revision
casties
parents:
diff changeset
81 $currText =~ s/\s*$//;
3
1a51f94d5dbd new version also reads XML index feeds via HTTP
casties
parents: 0
diff changeset
82 if (($currText)||($currAttrib)) {
0
30497c6a3eca Initial revision
casties
parents:
diff changeset
83 # put pair in elements array
30497c6a3eca Initial revision
casties
parents:
diff changeset
84 push @elements, [$elem, $currText, $currAttrib];
3
1a51f94d5dbd new version also reads XML index feeds via HTTP
casties
parents: 0
diff changeset
85 logger('DEBUG', " elem: $elem = $currText ($currAttrib)") if ($debugCont);
0
30497c6a3eca Initial revision
casties
parents:
diff changeset
86 }
30497c6a3eca Initial revision
casties
parents:
diff changeset
87 # end element name
30497c6a3eca Initial revision
casties
parents:
diff changeset
88 pop @currElemPath;
30497c6a3eca Initial revision
casties
parents:
diff changeset
89 $currElem = "";
30497c6a3eca Initial revision
casties
parents:
diff changeset
90 $currText = "";
30497c6a3eca Initial revision
casties
parents:
diff changeset
91 $currAttrib ="";
30497c6a3eca Initial revision
casties
parents:
diff changeset
92 }
30497c6a3eca Initial revision
casties
parents:
diff changeset
93
30497c6a3eca Initial revision
casties
parents:
diff changeset
94 sub characters {
30497c6a3eca Initial revision
casties
parents:
diff changeset
95 my ($self, $char) = @_;
30497c6a3eca Initial revision
casties
parents:
diff changeset
96 # process character data event
3
1a51f94d5dbd new version also reads XML index feeds via HTTP
casties
parents: 0
diff changeset
97 logger('DEBUG', "characters: $self, $char") if ($debugElem);
0
30497c6a3eca Initial revision
casties
parents:
diff changeset
98 # add to current content
30497c6a3eca Initial revision
casties
parents:
diff changeset
99 $currText .= $$char{'Data'};
3
1a51f94d5dbd new version also reads XML index feeds via HTTP
casties
parents: 0
diff changeset
100 logger('DEBUG', " Text: $currText") if ($debugCont);
0
30497c6a3eca Initial revision
casties
parents:
diff changeset
101 }
30497c6a3eca Initial revision
casties
parents:
diff changeset
102
30497c6a3eca Initial revision
casties
parents:
diff changeset
103
30497c6a3eca Initial revision
casties
parents:
diff changeset
104 1;