version 1.4, 2006/06/20 16:23:26
|
version 1.9, 2010/08/27 17:10:28
|
Line 10 use MPIWGStor;
|
Line 10 use MPIWGStor;
|
$|=1; |
$|=1; |
|
|
# program version |
# program version |
my $version = "0.2.2 (20.6.2006 ROC)"; |
my $version = "0.2.7 (27.8.2010 ROC)"; |
my $help = |
my $help = |
"use: makemeta-vlp [options] file.xml |
"use: makemeta-vlp [options] file.xml |
options: |
options: |
Line 19 options:
|
Line 19 options:
|
-replace replace existing index files |
-replace replace existing index files |
-online-mode mode for creating online/permanent files |
-online-mode mode for creating online/permanent files |
-archive-mode mode for creating archive/data files |
-archive-mode mode for creating archive/data files |
|
-access=free adds free access tag for online-mode |
|
-texttool adds texttool tag for online-mode |
"; |
"; |
logger("INFO", "makemeta-vlp $version"); |
logger("INFO", "makemeta-vlp $version"); |
|
|
Line 27 logger("INFO", "makemeta-vlp $version");
|
Line 29 logger("INFO", "makemeta-vlp $version");
|
|
|
# generic mappings at top level |
# generic mappings at top level |
my %gen_map = ( |
my %gen_map = ( |
'Custom2_Language' => 'meta/lang' |
'Custom2_Language' => 'meta/lang', |
|
'productionComment' => 'meta/image-acquisition/production-comment', |
|
'derivedFrom' => 'derived-from/archive-path' |
); |
); |
# sub type switch tag |
# sub type switch tag |
my %type_map = ( |
my %type_map = ( |
Line 106 my %subtype_map = (
|
Line 110 my %subtype_map = (
|
'Number_Issue' => 'meta/bib/issue', |
'Number_Issue' => 'meta/bib/issue', |
'Pages' => 'meta/bib/pages' |
'Pages' => 'meta/bib/pages' |
}, |
}, |
|
'(JournalVolume)' => { |
|
'_name' => 'journal-volume', |
|
'SecondaryTitle' => 'meta/bib/title', |
|
'SecondaryAuthor' => 'meta/bib/editor', |
|
'Publisher' => 'meta/bib/publisher', |
|
'Place_Published' => 'meta/bib/city', |
|
'Year' => 'meta/bib/year', |
|
'Volume' => 'meta/bib/volume', |
|
'Pages' => 'meta/bib/number-of-pages', |
|
'#Cover pages only, articles have been extracted' => 'meta/bib/comment' |
|
}, |
|
'Journal' => { |
|
'_name' => 'report', |
|
'Title' => 'meta/bib/title', |
|
'SecondaryTitle' => 'meta/bib/institution', |
|
'Author' => 'meta/bib/author', |
|
'Place_Published' => 'meta/bib/city', |
|
'Year' => 'meta/bib/year', |
|
'Date' => 'meta/bib/date', |
|
'Pages' => 'meta/bib/pages', |
|
}, |
'Magazine Article' => { |
'Magazine Article' => { |
'_name' => 'magazine-article', |
'_name' => 'magazine-article', |
'Author' => 'meta/bib/author', |
'Author' => 'meta/bib/author', |
Line 116 my %subtype_map = (
|
Line 141 my %subtype_map = (
|
'Date' => 'meta/bib/issue-date', |
'Date' => 'meta/bib/issue-date', |
'Pages' => 'meta/bib/pages' |
'Pages' => 'meta/bib/pages' |
}, |
}, |
|
'Newspaper Article' => { |
|
'_name' => 'newspaper-article', |
|
'Author' => 'meta/bib/author', |
|
'Title' => 'meta/bib/title', |
|
'Year' => 'meta/bib/year', |
|
'Secondary_Title' => 'meta/bib/newspaper', |
|
'Date' => 'meta/bib/issue-date', |
|
'Pages' => 'meta/bib/pages' |
|
}, |
'Report' => { |
'Report' => { |
'_name' => 'report', |
'_name' => 'report', |
'Author' => 'meta/bib/author', |
'Author' => 'meta/bib/author', |
Line 169 my %lang_map = (
|
Line 203 my %lang_map = (
|
'Japanese' => 'ja', |
'Japanese' => 'ja', |
'Dutch' => 'nl', |
'Dutch' => 'nl', |
'Spanish' => 'es', |
'Spanish' => 'es', |
'Swedish' => 'sv' |
'Swedish' => 'sv', |
|
'Russian' => 'ru', |
|
'Polish' => 'pl', |
|
'Greek' => 'el' |
); |
); |
# storage fields |
# storage fields |
my $arch_id_field = 'ID'; |
my $arch_id_field = 'ID'; |
|
my $access_free_field = 'online'; |
|
|
####################################################### |
####################################################### |
# internal parameters |
# internal parameters |
Line 208 logger('DEBUG', "online_mode: $online_mo
|
Line 246 logger('DEBUG', "online_mode: $online_mo
|
my $archive_mode = (exists $$args{'archive-mode'}) ? $$args{'archive-mode'} : 0; |
my $archive_mode = (exists $$args{'archive-mode'}) ? $$args{'archive-mode'} : 0; |
logger('DEBUG', "archive_mode: $archive_mode"); |
logger('DEBUG', "archive_mode: $archive_mode"); |
|
|
|
# create texttool tag (online mode only) |
|
my $texttool = (exists $$args{'texttool'}) ? $$args{'texttool'} : 1; |
|
logger('DEBUG', "texttool: $texttool"); |
|
# image dir for texttool |
|
my $texttool_img_dir = "pages"; |
|
|
|
# access type |
|
my $access_type = (exists $$args{'access'}) ? $$args{'access'} : ""; |
|
|
# index.meta namespace (not really implemented!) |
# index.meta namespace (not really implemented!) |
my $namespace = ""; |
my $namespace = ""; |
|
|
Line 255 sub find_arch_dir {
|
Line 302 sub find_arch_dir {
|
|
|
sub find_permanent_dir { |
sub find_permanent_dir { |
my ($input_node) = @_; |
my ($input_node) = @_; |
my $online_base = '/mpiwg/online/permanent'; |
my $online_base = $lib_online_dir; |
my $dest_id = sstrip($input_node->findvalue("fm:$arch_id_field")); |
my $dest_id = sstrip($input_node->findvalue("fm:$arch_id_field")); |
if (! $dest_id) { |
if (! $dest_id) { |
logger('ERROR', "no ID field for online permanent entry"); |
logger('ERROR', "no ID field for online permanent entry"); |
Line 263 sub find_permanent_dir {
|
Line 310 sub find_permanent_dir {
|
return; |
return; |
} |
} |
my $dir = "$online_base/lit$dest_id"; |
my $dir = "$online_base/lit$dest_id"; |
|
if (-d $dir) { |
|
logger('DEBUG', "directory $dir exists"); |
return $dir; |
return $dir; |
} |
} |
|
return; |
|
} |
|
|
|
|
sub convert_bib { |
sub convert_bib { |
Line 396 sub process_fm_entry {
|
Line 447 sub process_fm_entry {
|
create_text_path('meta/acquisition/date', stime(time), $index_root, $namespace); |
create_text_path('meta/acquisition/date', stime(time), $index_root, $namespace); |
create_text_path('meta/acquisition/provider/provider-id', 'vlp', $index_root, $namespace); |
create_text_path('meta/acquisition/provider/provider-id', 'vlp', $index_root, $namespace); |
create_text_path('meta/acquisition/provider/address', 'Max Planck Institute for the History of Science', $index_root, $namespace); |
create_text_path('meta/acquisition/provider/address', 'Max Planck Institute for the History of Science', $index_root, $namespace); |
# image acquisition |
|
create_text_path('meta/image-acquisition/device', 'Flatbed Scanner' , $index_root, $namespace); |
|
create_text_path('meta/image-acquisition/image-type', 'Greyscale' , $index_root, $namespace); |
|
create_text_path('meta/image-acquisition/production-comment', 'Raw scans in \'raw\' folder, cleaned pages in \'pages\' folder.' , $index_root, $namespace); |
|
} |
} |
# media |
# media |
create_text_path('media-type', 'image', $index_root, $namespace); |
create_text_path('media-type', 'image', $index_root, $namespace); |
create_text_path('meta/content-type', 'scanned document', $index_root, $namespace); |
create_text_path('meta/content-type', 'scanned document', $index_root, $namespace); |
|
# access |
|
if ($access_type) { |
|
if ($access_type eq "free") { |
|
create_element_path('meta/access-conditions/access@type=free', $index_root, $namespace); |
|
} else { |
|
my $acc_tag = create_element_path('meta/access-conditions/access@type=institution', $index_root, $namespace); |
|
create_text_path('name', $access_type, $acc_tag, $namespace); |
|
} |
|
} elsif ($online_mode) { |
|
# read access conditions from "online" field in DB dump |
|
my $online = sstrip($input_node->findvalue("fm:$access_free_field")); |
|
if ($online) { |
|
create_element_path('meta/access-conditions/access@type=free', $index_root, $namespace); |
|
} else { |
|
my $acc_tag = create_element_path('meta/access-conditions/access@type=institution', $index_root, $namespace); |
|
create_text_path('name', 'mpiwg', $acc_tag, $namespace); |
|
} |
|
} |
|
|
|
# texttool tag with image dir |
|
if ($online_mode && $texttool) { |
|
if ( -d "$doc_dir/$texttool_img_dir" ) { |
|
create_text_path('meta/texttool/image', $texttool_img_dir,$index_root, $namespace); |
|
} else { |
|
logger('WARNING', "page image directory missing!"); |
|
$warncnt++; |
|
} |
|
} |
|
|
# convert bib entries |
# convert bib entries |
my $cnt = convert_bib($input_node, $index_root, $index_doc); |
my $cnt = convert_bib($input_node, $index_root, $index_doc); |