version 1.6, 2005/08/31 10:50:00
|
version 1.12, 2006/12/11 18:07:43
|
Line 3
|
Line 3
|
use strict; |
use strict; |
use XML::LibXML; |
use XML::LibXML; |
|
|
use lib '/usr/local/mpiwg/archive_devel'; |
use lib '/usr/local/mpiwg/archive'; |
use MPIWGStor; |
use MPIWGStor; |
|
|
# make output unbuffered |
# make output unbuffered |
$|=1; |
$|=1; |
|
|
# program version |
# program version |
my $version = "0.2.2 (31.8.2005 ROC)"; |
my $version = "0.2.6 (11.12.2006 ROC)"; |
my $help = |
my $help = |
"use: makemeta-lib [options] file.xml |
"use: makemeta-lib [options] file.xml |
options: |
options: |
-debug show debugging info |
-debug show debugging info |
-dry-run simulate, dont'do anything |
-dry-run simulate, dont'do anything |
-online-mode mode for creating online/permanent files |
-online-mode mode for creating online/permanent files |
|
-online-base=dir base directory for online ids (for online mode) |
-cw-mode mode for copying einstein_cw archive documents |
-cw-mode mode for copying einstein_cw archive documents |
-digifiles-mode mode for copying files from digifiles |
-digifiles-mode mode for copying files from digifiles |
-map-file=mapfile.xml digilib mapping file (for digifiles mode) |
-map-file=mapfile.xml digilib mapping file (for digifiles mode) |
|
-access=free adds free access tag (use access=mpiwg for restricted access) |
"; |
"; |
logger("INFO", "makemeta-lib $version"); |
logger("INFO", "makemeta-lib $version"); |
|
|
Line 105 my %lang_map = (
|
Line 107 my %lang_map = (
|
'Latin' => 'la', |
'Latin' => 'la', |
'Japanese' => 'ja', |
'Japanese' => 'ja', |
'Dutch' => 'nl', |
'Dutch' => 'nl', |
'Spanish' => 'es' |
'Spanish' => 'es', |
|
'Swedish' => 'sv' |
); |
); |
# storage fields |
# storage fields |
my $arch_id_field = 'ID_Archive'; |
my $arch_id_field = 'ID_Archive'; |
Line 139 logger('DEBUG', "dry-run: $dry_run");
|
Line 142 logger('DEBUG', "dry-run: $dry_run");
|
# use online mode |
# use online mode |
my $online_mode = (exists $$args{'online-mode'}) ? $$args{'online-mode'} : 0; |
my $online_mode = (exists $$args{'online-mode'}) ? $$args{'online-mode'} : 0; |
logger('DEBUG', "online_mode: $online_mode"); |
logger('DEBUG', "online_mode: $online_mode"); |
|
# online base dir |
|
my $online_base_dir = (exists $$args{'online-base'}) ? $$args{'online-base'} : ""; |
|
logger('DEBUG', "online_base_dir: $online_base_dir"); |
|
|
# use einstein-cw mode |
# use einstein-cw mode |
my $cw_mode = (exists $$args{'cw-mode'}) ? $$args{'cw-mode'} : 0; |
my $cw_mode = (exists $$args{'cw-mode'}) ? $$args{'cw-mode'} : 0; |
Line 153 logger('DEBUG', "map_file_name: $map_fil
|
Line 159 logger('DEBUG', "map_file_name: $map_fil
|
my $mapping_doc; |
my $mapping_doc; |
my $mapping_root; |
my $mapping_root; |
|
|
|
# access type |
|
my $access_type = (exists $$args{'access'}) ? $$args{'access'} : ""; |
|
|
# index.meta namespace (not really implemented!) |
# index.meta namespace (not really implemented!) |
my $namespace = ""; |
my $namespace = ""; |
|
|
Line 170 if (! $infile) {
|
Line 179 if (! $infile) {
|
exit 1; |
exit 1; |
} |
} |
# strip double slashes |
# strip double slashes |
$infile =~ s/\/\//\//; |
$infile = sstrip($infile, 1); |
if (! -f $infile) { |
if (! -f $infile) { |
logger("ABORT", "input file \'$infile\' doesn't exist!"); |
logger("ABORT", "input file \'$infile\' doesn't exist!"); |
exit 1; |
exit 1; |
Line 273 sub find_cw_dir {
|
Line 282 sub find_cw_dir {
|
|
|
sub find_permanent_dir { |
sub find_permanent_dir { |
my ($input_node) = @_; |
my ($input_node) = @_; |
|
my $online_id = sstrip($input_node->findvalue("fm:$online_id_field")); |
|
# try online_base_dir + online_id first |
|
if (($online_base_dir)&&($online_id)) { |
|
my $dir = sstrip("$online_base_dir/$online_id", 1); |
|
return $dir; |
|
} |
|
# then online_url |
my $online_base = '/mpiwg/online/permanent'; |
my $online_base = '/mpiwg/online/permanent'; |
my $src_dir = find_online_dir($input_node, $online_base, 'pageimg'); |
my $online_dir = find_online_dir($input_node, $online_base, 'pageimg'); |
my $dest_id = sstrip($input_node->findvalue("fm:$online_id_field")); |
if ((! $online_dir)) { |
if (! $dest_id) { |
logger('ERROR', "no ID or URL for online permanent entry"); |
logger('ERROR', "no ID field for online permanent entry"); |
|
$errcnt++; |
$errcnt++; |
return; |
return; |
} |
} |
my $dir = "$online_base/$src_dir"; |
my $dir = sstrip("$online_base/$online_dir", 1); |
return $dir; |
return $dir; |
} |
} |
|
|
Line 310 sub find_online_dir {
|
Line 325 sub find_online_dir {
|
if ($online_dir) { |
if ($online_dir) { |
$online_dir =~ s/\/$//; # strip ending slashes |
$online_dir =~ s/\/$//; # strip ending slashes |
if ($page_dir) { |
if ($page_dir) { |
|
# strip page_dir |
$online_dir =~ s/\/${page_dir}$//; |
$online_dir =~ s/\/${page_dir}$//; |
} |
} |
#logger("DEBUG", "dir: $base_dir/$online_dir"); |
#logger("DEBUG", "dir: $base_dir/$online_dir"); |
Line 417 sub process_fm_entry {
|
Line 433 sub process_fm_entry {
|
$index_root->addChild($index_doc->createAttributeNS($namespace, 'version', '1.1')); |
$index_root->addChild($index_doc->createAttributeNS($namespace, 'version', '1.1')); |
$index_root->addChild($index_doc->createAttributeNS($namespace, 'type', 'MPIWG')); |
$index_root->addChild($index_doc->createAttributeNS($namespace, 'type', 'MPIWG')); |
$index_doc->setDocumentElement($index_root); |
$index_doc->setDocumentElement($index_root); |
|
my $derived_from = ""; |
|
|
# try to find the document directory |
# try to find the document directory |
my $doc_dir = ""; |
my $doc_dir = ""; |
if ($online_mode) { |
if ($online_mode) { |
$doc_dir = find_permanent_dir($input_node); |
$doc_dir = find_permanent_dir($input_node); |
|
$derived_from = find_arch_dir($input_node); |
} elsif ($cw_mode) { |
} elsif ($cw_mode) { |
$doc_dir = find_cw_dir($input_node); |
$doc_dir = find_cw_dir($input_node); |
} elsif ($digifiles_mode) { |
} elsif ($digifiles_mode) { |
Line 450 sub process_fm_entry {
|
Line 468 sub process_fm_entry {
|
# media |
# media |
create_text_path('media-type', 'image', $index_root, $namespace); |
create_text_path('media-type', 'image', $index_root, $namespace); |
create_text_path('meta/content-type', 'scanned document', $index_root, $namespace); |
create_text_path('meta/content-type', 'scanned document', $index_root, $namespace); |
|
# derived-from |
|
if ($derived_from) { |
|
create_text_path('derived-from/archive-path', $derived_from, $index_root, $namespace); |
|
} |
|
# access |
|
if ($access_type) { |
|
if ($access_type eq "free") { |
|
create_element_path('meta/access-conditions/access@type=free', $index_root, $namespace); |
|
} else { |
|
my $acc_tag = create_element_path('meta/access-conditions/access@type=institution', $index_root, $namespace); |
|
create_text_path('name', $access_type, $acc_tag, $namespace); |
|
} |
|
} |
|
|
# convert bib entries |
# convert bib entries |
my $cnt = convert_bib($input_node, $index_root, $index_doc); |
my $cnt = convert_bib($input_node, $index_root, $index_doc); |