Mercurial > hg > foxridge-archiver
diff harvestmeta.pl @ 8:65895eec9e30
Added handling of modification date for HTTP feeds.
author | casties |
---|---|
date | Tue, 28 Sep 2004 14:20:26 +0200 |
parents | a3feffd94021 |
children | 2208ed7370cb |
line wrap: on
line diff
--- a/harvestmeta.pl Thu Sep 23 21:55:44 2004 +0200 +++ b/harvestmeta.pl Tue Sep 28 14:20:26 2004 +0200 @@ -17,7 +17,7 @@ # # program version -my $version = "0.2.1 (13.07.2004)"; +my $version = "0.3 (27.9.2004)"; # read command line parameters my $args = MPIWGStor::parseargs; @@ -229,6 +229,7 @@ my ($tag, $val, $attr) = @$me; my $meta = ""; my $file = ""; + my $mtime = ""; if ($tag =~ /index\/resource$/) { if ($attr =~ /metaLink=\"([^\"]+)\"/) { $meta = $1; @@ -236,8 +237,11 @@ if ($attr =~ /resourceLink=\"([^\"]+)\"/) { $file = $1; } + if ($attr =~ /modificationDate=\"([^\"]+)\"/) { + $mtime = $1; + } if ($meta =~ /^http:/) { - harvestURL($meta, $file); + harvestURL($meta, $file, $mtime); } } } @@ -251,29 +255,29 @@ # in the database (under $filepath) # sub harvestURL { - my ($metaurl, $fileurl) = @_; + my ($metaurl, $fileurl, $filetime) = @_; logger('DEBUG', "fetching from url '$metaurl' for '$fileurl'"); - # try to parse index file - my $ret = eval{$metaParser->parse_uri($metaurl)}; - if ($@) { - my $errmsg = $@; - logger('ERROR', "error reading XML from '$metaurl' ($errmsg)"); - $errcnt++; - return; - } - my @data = $metaParserHandler->getData(); - logger('DEBUG', "parsed $#data+1 elements"); - if (lc $data[0][0] eq "html") { - # oops, wrong - logger('WARNING', "invalid HTML content from $metaurl"); - $warncnt++; - return; - } - # filetime is now - my $filetime = stime(time); + # if no filetime then now + $filetime = stime(time) unless ($filetime); # register file in db my $fid = registerFile("$fileurl", $filetime); if ($fid) { + # try to parse index file + my $ret = eval{$metaParser->parse_uri($metaurl)}; + if ($@) { + my $errmsg = $@; + logger('ERROR', "error reading XML from '$metaurl' ($errmsg)"); + $errcnt++; + return; + } + my @data = $metaParserHandler->getData(); + logger('DEBUG', "parsed $#data+1 elements"); + if (lc $data[0][0] eq "html") { + # oops, wrong + logger('WARNING', "invalid HTML content from $metaurl"); + $warncnt++; + return; + } # file is new/modified registerMeta($fid, @data); }