Diff for /foxridge-archiver/harvestmeta.pl between versions 1.2 and 1.6

version 1.2, 2004/07/08 21:22:04 version 1.6, 2017/03/16 17:00:43
Line 1 Line 1
 #!/usr/local/bin/perl -w  #!/usr/bin/perl -w
   
 use strict;  use strict;
 use XML::SAX;  use XML::SAX;
 use XML::LibXML;  use XML::LibXML;
 use DBI;  use DBI;
   
 use lib '/usr/local/mpiwg/archive_devel';  use lib '/usr/local/mpiwg/archive';
 use MPIWGStor;  use MPIWGStor;
 use HarvestmetaHandler;  use HarvestmetaHandler;
   
Line 17  $|=1; Line 17  $|=1;
 #  #
   
 # program version  # program version
 my $version = "0.2 (08.07.2004)";  my $version = "0.3 (27.9.2004)";
   
 # read command line parameters  # read command line parameters
 my $args = MPIWGStor::parseargs;  my $args = MPIWGStor::parseargs;
Line 227  sub readURLIndex { Line 227  sub readURLIndex {
     foreach my $me (@indexdata) {      foreach my $me (@indexdata) {
     $cnt++;      $cnt++;
     my ($tag, $val, $attr) = @$me;      my ($tag, $val, $attr) = @$me;
     my $meta;      my $meta = "";
     my $file;      my $file = "";
       my $mtime = "";
     if ($tag =~ /index\/resource$/) {      if ($tag =~ /index\/resource$/) {
         if ($attr =~ /metaLink=\"([^\"]+)\"/) {          if ($attr =~ /metaLink=\"([^\"]+)\"/) {
         $meta = $1;          $meta = $1;
Line 236  sub readURLIndex { Line 237  sub readURLIndex {
         if ($attr =~ /resourceLink=\"([^\"]+)\"/) {          if ($attr =~ /resourceLink=\"([^\"]+)\"/) {
         $file = $1;          $file = $1;
         }          }
           if ($attr =~ /modificationDate=\"([^\"]+)\"/) {
           $mtime = $1;
           }
         if ($meta =~ /^http:/) {          if ($meta =~ /^http:/) {
         harvestURL($meta, $file);          harvestURL($meta, $file, $mtime);
         }          }
     }      }
     }      }
Line 251  sub readURLIndex { Line 255  sub readURLIndex {
 # in the database (under $filepath)  # in the database (under $filepath)
 #  #
 sub harvestURL {  sub harvestURL {
     my ($metaurl, $fileurl) = @_;      my ($metaurl, $fileurl, $filetime) = @_;
     logger('DEBUG', "fetching from url '$metaurl' for '$fileurl'");      logger('DEBUG', "fetching from url '$metaurl' for '$fileurl'");
       # if no filetime then now
       $filetime = stime(time) unless ($filetime);
       # register file in db
       my $fid = registerFile("$fileurl", $filetime);
       if ($fid) {
     # try to parse index file      # try to parse index file
     my $ret = eval{$metaParser->parse_uri($metaurl)};      my $ret = eval{$metaParser->parse_uri($metaurl)};
     if ($@) {      if ($@) {
Line 263  sub harvestURL { Line 272  sub harvestURL {
     }      }
     my @data = $metaParserHandler->getData();      my @data = $metaParserHandler->getData();
     logger('DEBUG', "parsed $#data+1 elements");      logger('DEBUG', "parsed $#data+1 elements");
     if ($data[0][0] eq "html") {      if (lc $data[0][0] eq "html") {
     # oops, wrong      # oops, wrong
     logger('WARNING', "invalid HTML content from $metaurl");      logger('WARNING', "invalid HTML content from $metaurl");
     $warncnt++;      $warncnt++;
     return;      return;
     }      }
     # filetime is now  
     my $filetime = stime(time);  
     # register file in db  
     my $fid = registerFile("$fileurl", $filetime);  
     if ($fid) {  
     # file is new/modified      # file is new/modified
     registerMeta($fid, @data);      registerMeta($fid, @data);
     }      }
Line 384  sub initDB { Line 388  sub initDB {
 logger("INFO", "harvestmeta $version");  logger("INFO", "harvestmeta $version");
     
 initDB();  initDB();
 my $fnum;  my $fnum = 0;
   
 if ($basedir) {  if ($basedir) {
     # read and process all files under $basedir      # read and process all files under $basedir
     $fnum = readAllFiles($basedir, "");      $fnum = readAllFiles($basedir, "");
     # delete orphaned data (under $basedir)      # delete orphaned data (under $basedir)
     cleanUnmarkedFiles($basedir);      cleanUnmarkedFiles($basedir);
 } elsif ($baseurl) {  } elsif ($indexurl) {
     # read and process XML index      # read and process XML index
     $fnum = readURLIndex($indexurl);      $fnum = readURLIndex($indexurl);
     if ($baseurl) {      if ($baseurl) {

Removed from v.1.2  
changed lines
  Added in v.1.6


FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>