--- foxridge-archiver/harvestmeta.pl 2004/07/08 21:22:04 1.2 +++ foxridge-archiver/harvestmeta.pl 2017/03/16 17:00:43 1.6 @@ -1,11 +1,11 @@ -#!/usr/local/bin/perl -w +#!/usr/bin/perl -w use strict; use XML::SAX; use XML::LibXML; use DBI; -use lib '/usr/local/mpiwg/archive_devel'; +use lib '/usr/local/mpiwg/archive'; use MPIWGStor; use HarvestmetaHandler; @@ -17,7 +17,7 @@ $|=1; # # program version -my $version = "0.2 (08.07.2004)"; +my $version = "0.3 (27.9.2004)"; # read command line parameters my $args = MPIWGStor::parseargs; @@ -227,8 +227,9 @@ sub readURLIndex { foreach my $me (@indexdata) { $cnt++; my ($tag, $val, $attr) = @$me; - my $meta; - my $file; + my $meta = ""; + my $file = ""; + my $mtime = ""; if ($tag =~ /index\/resource$/) { if ($attr =~ /metaLink=\"([^\"]+)\"/) { $meta = $1; @@ -236,8 +237,11 @@ sub readURLIndex { if ($attr =~ /resourceLink=\"([^\"]+)\"/) { $file = $1; } + if ($attr =~ /modificationDate=\"([^\"]+)\"/) { + $mtime = $1; + } if ($meta =~ /^http:/) { - harvestURL($meta, $file); + harvestURL($meta, $file, $mtime); } } } @@ -251,29 +255,29 @@ sub readURLIndex { # in the database (under $filepath) # sub harvestURL { - my ($metaurl, $fileurl) = @_; + my ($metaurl, $fileurl, $filetime) = @_; logger('DEBUG', "fetching from url '$metaurl' for '$fileurl'"); - # try to parse index file - my $ret = eval{$metaParser->parse_uri($metaurl)}; - if ($@) { - my $errmsg = $@; - logger('ERROR', "error reading XML from '$metaurl' ($errmsg)"); - $errcnt++; - return; - } - my @data = $metaParserHandler->getData(); - logger('DEBUG', "parsed $#data+1 elements"); - if ($data[0][0] eq "html") { - # oops, wrong - logger('WARNING', "invalid HTML content from $metaurl"); - $warncnt++; - return; - } - # filetime is now - my $filetime = stime(time); + # if no filetime then now + $filetime = stime(time) unless ($filetime); # register file in db my $fid = registerFile("$fileurl", $filetime); if ($fid) { + # try to parse index file + my $ret = eval{$metaParser->parse_uri($metaurl)}; + if ($@) { + my $errmsg = $@; + logger('ERROR', "error reading XML from '$metaurl' ($errmsg)"); + $errcnt++; + return; + } + my @data = $metaParserHandler->getData(); + logger('DEBUG', "parsed $#data+1 elements"); + if (lc $data[0][0] eq "html") { + # oops, wrong + logger('WARNING', "invalid HTML content from $metaurl"); + $warncnt++; + return; + } # file is new/modified registerMeta($fid, @data); } @@ -384,14 +388,14 @@ sub initDB { logger("INFO", "harvestmeta $version"); initDB(); -my $fnum; +my $fnum = 0; if ($basedir) { # read and process all files under $basedir $fnum = readAllFiles($basedir, ""); # delete orphaned data (under $basedir) cleanUnmarkedFiles($basedir); -} elsif ($baseurl) { +} elsif ($indexurl) { # read and process XML index $fnum = readURLIndex($indexurl); if ($baseurl) {