version 1.3, 2004/07/08 21:23:53
|
version 1.5, 2004/09/28 12:20:26
|
Line 17 $|=1;
|
Line 17 $|=1;
|
# |
# |
|
|
# program version |
# program version |
my $version = "0.2 (08.07.2004)"; |
my $version = "0.3 (27.9.2004)"; |
|
|
# read command line parameters |
# read command line parameters |
my $args = MPIWGStor::parseargs; |
my $args = MPIWGStor::parseargs; |
Line 227 sub readURLIndex {
|
Line 227 sub readURLIndex {
|
foreach my $me (@indexdata) { |
foreach my $me (@indexdata) { |
$cnt++; |
$cnt++; |
my ($tag, $val, $attr) = @$me; |
my ($tag, $val, $attr) = @$me; |
my $meta; |
my $meta = ""; |
my $file; |
my $file = ""; |
|
my $mtime = ""; |
if ($tag =~ /index\/resource$/) { |
if ($tag =~ /index\/resource$/) { |
if ($attr =~ /metaLink=\"([^\"]+)\"/) { |
if ($attr =~ /metaLink=\"([^\"]+)\"/) { |
$meta = $1; |
$meta = $1; |
Line 236 sub readURLIndex {
|
Line 237 sub readURLIndex {
|
if ($attr =~ /resourceLink=\"([^\"]+)\"/) { |
if ($attr =~ /resourceLink=\"([^\"]+)\"/) { |
$file = $1; |
$file = $1; |
} |
} |
|
if ($attr =~ /modificationDate=\"([^\"]+)\"/) { |
|
$mtime = $1; |
|
} |
if ($meta =~ /^http:/) { |
if ($meta =~ /^http:/) { |
harvestURL($meta, $file); |
harvestURL($meta, $file, $mtime); |
} |
} |
} |
} |
} |
} |
Line 251 sub readURLIndex {
|
Line 255 sub readURLIndex {
|
# in the database (under $filepath) |
# in the database (under $filepath) |
# |
# |
sub harvestURL { |
sub harvestURL { |
my ($metaurl, $fileurl) = @_; |
my ($metaurl, $fileurl, $filetime) = @_; |
logger('DEBUG', "fetching from url '$metaurl' for '$fileurl'"); |
logger('DEBUG', "fetching from url '$metaurl' for '$fileurl'"); |
|
# if no filetime then now |
|
$filetime = stime(time) unless ($filetime); |
|
# register file in db |
|
my $fid = registerFile("$fileurl", $filetime); |
|
if ($fid) { |
# try to parse index file |
# try to parse index file |
my $ret = eval{$metaParser->parse_uri($metaurl)}; |
my $ret = eval{$metaParser->parse_uri($metaurl)}; |
if ($@) { |
if ($@) { |
Line 263 sub harvestURL {
|
Line 272 sub harvestURL {
|
} |
} |
my @data = $metaParserHandler->getData(); |
my @data = $metaParserHandler->getData(); |
logger('DEBUG', "parsed $#data+1 elements"); |
logger('DEBUG', "parsed $#data+1 elements"); |
if ($data[0][0] eq "html") { |
if (lc $data[0][0] eq "html") { |
# oops, wrong |
# oops, wrong |
logger('WARNING', "invalid HTML content from $metaurl"); |
logger('WARNING', "invalid HTML content from $metaurl"); |
$warncnt++; |
$warncnt++; |
return; |
return; |
} |
} |
# filetime is now |
|
my $filetime = stime(time); |
|
# register file in db |
|
my $fid = registerFile("$fileurl", $filetime); |
|
if ($fid) { |
|
# file is new/modified |
# file is new/modified |
registerMeta($fid, @data); |
registerMeta($fid, @data); |
} |
} |
Line 384 sub initDB {
|
Line 388 sub initDB {
|
logger("INFO", "harvestmeta $version"); |
logger("INFO", "harvestmeta $version"); |
|
|
initDB(); |
initDB(); |
my $fnum; |
my $fnum = 0; |
|
|
if ($basedir) { |
if ($basedir) { |
# read and process all files under $basedir |
# read and process all files under $basedir |
$fnum = readAllFiles($basedir, ""); |
$fnum = readAllFiles($basedir, ""); |
# delete orphaned data (under $basedir) |
# delete orphaned data (under $basedir) |
cleanUnmarkedFiles($basedir); |
cleanUnmarkedFiles($basedir); |
} elsif ($baseurl) { |
} elsif ($indexurl) { |
# read and process XML index |
# read and process XML index |
$fnum = readURLIndex($indexurl); |
$fnum = readURLIndex($indexurl); |
if ($baseurl) { |
if ($baseurl) { |