--- foxridge-archiver/harvestmeta.pl 2004/07/13 18:32:49 1.4 +++ foxridge-archiver/harvestmeta.pl 2004/09/28 12:20:26 1.5 @@ -17,7 +17,7 @@ $|=1; # # program version -my $version = "0.2.1 (13.07.2004)"; +my $version = "0.3 (27.9.2004)"; # read command line parameters my $args = MPIWGStor::parseargs; @@ -229,6 +229,7 @@ sub readURLIndex { my ($tag, $val, $attr) = @$me; my $meta = ""; my $file = ""; + my $mtime = ""; if ($tag =~ /index\/resource$/) { if ($attr =~ /metaLink=\"([^\"]+)\"/) { $meta = $1; @@ -236,8 +237,11 @@ sub readURLIndex { if ($attr =~ /resourceLink=\"([^\"]+)\"/) { $file = $1; } + if ($attr =~ /modificationDate=\"([^\"]+)\"/) { + $mtime = $1; + } if ($meta =~ /^http:/) { - harvestURL($meta, $file); + harvestURL($meta, $file, $mtime); } } } @@ -251,29 +255,29 @@ sub readURLIndex { # in the database (under $filepath) # sub harvestURL { - my ($metaurl, $fileurl) = @_; + my ($metaurl, $fileurl, $filetime) = @_; logger('DEBUG', "fetching from url '$metaurl' for '$fileurl'"); - # try to parse index file - my $ret = eval{$metaParser->parse_uri($metaurl)}; - if ($@) { - my $errmsg = $@; - logger('ERROR', "error reading XML from '$metaurl' ($errmsg)"); - $errcnt++; - return; - } - my @data = $metaParserHandler->getData(); - logger('DEBUG', "parsed $#data+1 elements"); - if (lc $data[0][0] eq "html") { - # oops, wrong - logger('WARNING', "invalid HTML content from $metaurl"); - $warncnt++; - return; - } - # filetime is now - my $filetime = stime(time); + # if no filetime then now + $filetime = stime(time) unless ($filetime); # register file in db my $fid = registerFile("$fileurl", $filetime); if ($fid) { + # try to parse index file + my $ret = eval{$metaParser->parse_uri($metaurl)}; + if ($@) { + my $errmsg = $@; + logger('ERROR', "error reading XML from '$metaurl' ($errmsg)"); + $errcnt++; + return; + } + my @data = $metaParserHandler->getData(); + logger('DEBUG', "parsed $#data+1 elements"); + if (lc $data[0][0] eq "html") { + # oops, wrong + logger('WARNING', "invalid HTML content from $metaurl"); + $warncnt++; + return; + } # file is new/modified registerMeta($fid, @data); }