version 1.3, 2004/07/08 21:23:53
|
version 1.4, 2004/07/13 18:32:49
|
Line 17 $|=1;
|
Line 17 $|=1;
|
# |
# |
|
|
# program version |
# program version |
my $version = "0.2 (08.07.2004)"; |
my $version = "0.2.1 (13.07.2004)"; |
|
|
# read command line parameters |
# read command line parameters |
my $args = MPIWGStor::parseargs; |
my $args = MPIWGStor::parseargs; |
Line 227 sub readURLIndex {
|
Line 227 sub readURLIndex {
|
foreach my $me (@indexdata) { |
foreach my $me (@indexdata) { |
$cnt++; |
$cnt++; |
my ($tag, $val, $attr) = @$me; |
my ($tag, $val, $attr) = @$me; |
my $meta; |
my $meta = ""; |
my $file; |
my $file = ""; |
if ($tag =~ /index\/resource$/) { |
if ($tag =~ /index\/resource$/) { |
if ($attr =~ /metaLink=\"([^\"]+)\"/) { |
if ($attr =~ /metaLink=\"([^\"]+)\"/) { |
$meta = $1; |
$meta = $1; |
Line 263 sub harvestURL {
|
Line 263 sub harvestURL {
|
} |
} |
my @data = $metaParserHandler->getData(); |
my @data = $metaParserHandler->getData(); |
logger('DEBUG', "parsed $#data+1 elements"); |
logger('DEBUG', "parsed $#data+1 elements"); |
if ($data[0][0] eq "html") { |
if (lc $data[0][0] eq "html") { |
# oops, wrong |
# oops, wrong |
logger('WARNING', "invalid HTML content from $metaurl"); |
logger('WARNING', "invalid HTML content from $metaurl"); |
$warncnt++; |
$warncnt++; |
Line 384 sub initDB {
|
Line 384 sub initDB {
|
logger("INFO", "harvestmeta $version"); |
logger("INFO", "harvestmeta $version"); |
|
|
initDB(); |
initDB(); |
my $fnum; |
my $fnum = 0; |
|
|
if ($basedir) { |
if ($basedir) { |
# read and process all files under $basedir |
# read and process all files under $basedir |
$fnum = readAllFiles($basedir, ""); |
$fnum = readAllFiles($basedir, ""); |
# delete orphaned data (under $basedir) |
# delete orphaned data (under $basedir) |
cleanUnmarkedFiles($basedir); |
cleanUnmarkedFiles($basedir); |
} elsif ($baseurl) { |
} elsif ($indexurl) { |
# read and process XML index |
# read and process XML index |
$fnum = readURLIndex($indexurl); |
$fnum = readURLIndex($indexurl); |
if ($baseurl) { |
if ($baseurl) { |