changeset 8:65895eec9e30

Added handling of modification date for HTTP feeds.
author casties
date Tue, 28 Sep 2004 14:20:26 +0200
parents c4e6fc065b6d
children 9c61f624d802
files harvestmeta.pl
diffstat 1 files changed, 25 insertions(+), 21 deletions(-) [+]
line wrap: on
line diff
--- a/harvestmeta.pl	Thu Sep 23 21:55:44 2004 +0200
+++ b/harvestmeta.pl	Tue Sep 28 14:20:26 2004 +0200
@@ -17,7 +17,7 @@
 #
 
 # program version
-my $version = "0.2.1 (13.07.2004)";
+my $version = "0.3 (27.9.2004)";
 
 # read command line parameters
 my $args = MPIWGStor::parseargs;
@@ -229,6 +229,7 @@
 	my ($tag, $val, $attr) = @$me;
 	my $meta = "";
 	my $file = "";
+	my $mtime = "";
 	if ($tag =~ /index\/resource$/) {
 	    if ($attr =~ /metaLink=\"([^\"]+)\"/) {
 		$meta = $1;
@@ -236,8 +237,11 @@
 	    if ($attr =~ /resourceLink=\"([^\"]+)\"/) {
 		$file = $1;
 	    }
+	    if ($attr =~ /modificationDate=\"([^\"]+)\"/) {
+		$mtime = $1;
+	    }
 	    if ($meta =~ /^http:/) {
-		harvestURL($meta, $file);
+		harvestURL($meta, $file, $mtime);
 	    }
 	}
     }
@@ -251,29 +255,29 @@
 # in the database (under $filepath)
 #
 sub harvestURL {
-    my ($metaurl, $fileurl) = @_;
+    my ($metaurl, $fileurl, $filetime) = @_;
     logger('DEBUG', "fetching from url '$metaurl' for '$fileurl'");
-    # try to parse index file
-    my $ret = eval{$metaParser->parse_uri($metaurl)};
-    if ($@) {
-	my $errmsg = $@;
-	logger('ERROR', "error reading XML from '$metaurl' ($errmsg)");
-	$errcnt++;
-	return;
-    }
-    my @data = $metaParserHandler->getData();
-    logger('DEBUG', "parsed $#data+1 elements");
-    if (lc $data[0][0] eq "html") {
-	# oops, wrong
-	logger('WARNING', "invalid HTML content from $metaurl");
-	$warncnt++;
-	return;
-    }
-    # filetime is now
-    my $filetime = stime(time);
+    # if no filetime then now
+    $filetime = stime(time) unless ($filetime);
     # register file in db
     my $fid = registerFile("$fileurl", $filetime);
     if ($fid) {
+	# try to parse index file
+	my $ret = eval{$metaParser->parse_uri($metaurl)};
+	if ($@) {
+	    my $errmsg = $@;
+	    logger('ERROR', "error reading XML from '$metaurl' ($errmsg)");
+	    $errcnt++;
+	    return;
+	}
+	my @data = $metaParserHandler->getData();
+	logger('DEBUG', "parsed $#data+1 elements");
+	if (lc $data[0][0] eq "html") {
+	    # oops, wrong
+	    logger('WARNING', "invalid HTML content from $metaurl");
+	    $warncnt++;
+	    return;
+	}
 	# file is new/modified
 	registerMeta($fid, @data);
     }